diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,86666 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0237002109442859, + "eval_steps": 500, + "global_step": 123750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 8.272324936923522e-05, + "grad_norm": 80462.0859375, + "learning_rate": 8.080808080808081e-09, + "loss": 6007.0633, + "step": 10 + }, + { + "epoch": 0.00016544649873847045, + "grad_norm": 360423.46875, + "learning_rate": 1.6161616161616162e-08, + "loss": 16793.1375, + "step": 20 + }, + { + "epoch": 0.0002481697481077057, + "grad_norm": 260210.0, + "learning_rate": 2.4242424242424243e-08, + "loss": 8807.757, + "step": 30 + }, + { + "epoch": 0.0003308929974769409, + "grad_norm": 333457.03125, + "learning_rate": 3.2323232323232324e-08, + "loss": 6780.607, + "step": 40 + }, + { + "epoch": 0.0004136162468461761, + "grad_norm": 240292.671875, + "learning_rate": 4.040404040404041e-08, + "loss": 10353.4664, + "step": 50 + }, + { + "epoch": 0.0004963394962154114, + "grad_norm": 205636.09375, + "learning_rate": 4.8484848484848486e-08, + "loss": 9239.3687, + "step": 60 + }, + { + "epoch": 0.0005790627455846465, + "grad_norm": 91692.9375, + "learning_rate": 5.656565656565657e-08, + "loss": 5793.2141, + "step": 70 + }, + { + "epoch": 0.0006617859949538818, + "grad_norm": 128958.5859375, + "learning_rate": 6.464646464646465e-08, + "loss": 7645.5141, + "step": 80 + }, + { + "epoch": 0.000744509244323117, + "grad_norm": 198792.46875, + "learning_rate": 7.272727272727274e-08, + "loss": 7540.1, + "step": 90 + }, + { + "epoch": 0.0008272324936923522, + "grad_norm": 143177.3125, + "learning_rate": 8.080808080808082e-08, + "loss": 5863.3023, + "step": 100 + }, + { + "epoch": 0.0009099557430615875, + "grad_norm": 69092.4453125, + "learning_rate": 8.88888888888889e-08, + "loss": 9028.2383, + "step": 110 + }, + { + "epoch": 0.0009926789924308227, + "grad_norm": 135931.546875, + "learning_rate": 9.696969696969697e-08, + "loss": 7231.0602, + "step": 120 + }, + { + "epoch": 0.0010754022418000579, + "grad_norm": 142479.5625, + "learning_rate": 1.0505050505050506e-07, + "loss": 4912.673, + "step": 130 + }, + { + "epoch": 0.001158125491169293, + "grad_norm": 310996.5625, + "learning_rate": 1.1313131313131314e-07, + "loss": 4637.1719, + "step": 140 + }, + { + "epoch": 0.0012408487405385284, + "grad_norm": 195854.375, + "learning_rate": 1.2121212121212122e-07, + "loss": 5191.2895, + "step": 150 + }, + { + "epoch": 0.0013235719899077636, + "grad_norm": 98090.03125, + "learning_rate": 1.292929292929293e-07, + "loss": 6609.8664, + "step": 160 + }, + { + "epoch": 0.0014062952392769987, + "grad_norm": 69154.9765625, + "learning_rate": 1.3737373737373738e-07, + "loss": 6437.2246, + "step": 170 + }, + { + "epoch": 0.001489018488646234, + "grad_norm": 161934.78125, + "learning_rate": 1.4545454545454548e-07, + "loss": 7987.9906, + "step": 180 + }, + { + "epoch": 0.0015717417380154692, + "grad_norm": 72492.5390625, + "learning_rate": 1.5353535353535356e-07, + "loss": 5339.8434, + "step": 190 + }, + { + "epoch": 0.0016544649873847044, + "grad_norm": 28318.041015625, + "learning_rate": 1.6161616161616163e-07, + "loss": 3778.6113, + "step": 200 + }, + { + "epoch": 0.0017371882367539398, + "grad_norm": 78588.4375, + "learning_rate": 1.6969696969696974e-07, + "loss": 5780.0262, + "step": 210 + }, + { + "epoch": 0.001819911486123175, + "grad_norm": 69777.796875, + "learning_rate": 1.777777777777778e-07, + "loss": 3939.0852, + "step": 220 + }, + { + "epoch": 0.00190263473549241, + "grad_norm": 26142.01953125, + "learning_rate": 1.858585858585859e-07, + "loss": 3253.984, + "step": 230 + }, + { + "epoch": 0.0019853579848616455, + "grad_norm": 77199.8828125, + "learning_rate": 1.9393939393939395e-07, + "loss": 3297.4723, + "step": 240 + }, + { + "epoch": 0.0020680812342308806, + "grad_norm": 79269.9921875, + "learning_rate": 2.0202020202020205e-07, + "loss": 3044.4252, + "step": 250 + }, + { + "epoch": 0.0021508044836001158, + "grad_norm": 39357.45703125, + "learning_rate": 2.1010101010101013e-07, + "loss": 3016.1178, + "step": 260 + }, + { + "epoch": 0.002233527732969351, + "grad_norm": 37892.91015625, + "learning_rate": 2.181818181818182e-07, + "loss": 2370.4621, + "step": 270 + }, + { + "epoch": 0.002316250982338586, + "grad_norm": 40799.3984375, + "learning_rate": 2.2626262626262628e-07, + "loss": 2173.1178, + "step": 280 + }, + { + "epoch": 0.0023989742317078217, + "grad_norm": 16129.89453125, + "learning_rate": 2.343434343434344e-07, + "loss": 2872.4018, + "step": 290 + }, + { + "epoch": 0.002481697481077057, + "grad_norm": 12510.9287109375, + "learning_rate": 2.4242424242424244e-07, + "loss": 1385.131, + "step": 300 + }, + { + "epoch": 0.002564420730446292, + "grad_norm": 21687.78515625, + "learning_rate": 2.505050505050505e-07, + "loss": 1238.8357, + "step": 310 + }, + { + "epoch": 0.002647143979815527, + "grad_norm": 26216.89453125, + "learning_rate": 2.585858585858586e-07, + "loss": 1459.9193, + "step": 320 + }, + { + "epoch": 0.0027298672291847623, + "grad_norm": 54759.859375, + "learning_rate": 2.666666666666667e-07, + "loss": 1641.5232, + "step": 330 + }, + { + "epoch": 0.0028125904785539974, + "grad_norm": 11365.4833984375, + "learning_rate": 2.7474747474747475e-07, + "loss": 1190.3224, + "step": 340 + }, + { + "epoch": 0.002895313727923233, + "grad_norm": 16354.1533203125, + "learning_rate": 2.828282828282829e-07, + "loss": 885.4668, + "step": 350 + }, + { + "epoch": 0.002978036977292468, + "grad_norm": 11645.587890625, + "learning_rate": 2.9090909090909096e-07, + "loss": 1400.4568, + "step": 360 + }, + { + "epoch": 0.0030607602266617033, + "grad_norm": 8614.1328125, + "learning_rate": 2.9898989898989904e-07, + "loss": 814.0722, + "step": 370 + }, + { + "epoch": 0.0031434834760309385, + "grad_norm": 11007.8818359375, + "learning_rate": 3.070707070707071e-07, + "loss": 970.882, + "step": 380 + }, + { + "epoch": 0.0032262067254001736, + "grad_norm": 6238.4267578125, + "learning_rate": 3.151515151515152e-07, + "loss": 946.8882, + "step": 390 + }, + { + "epoch": 0.003308929974769409, + "grad_norm": 5815.009765625, + "learning_rate": 3.2323232323232327e-07, + "loss": 792.6276, + "step": 400 + }, + { + "epoch": 0.003391653224138644, + "grad_norm": 34222.5390625, + "learning_rate": 3.3131313131313135e-07, + "loss": 787.8205, + "step": 410 + }, + { + "epoch": 0.0034743764735078795, + "grad_norm": 4778.3984375, + "learning_rate": 3.393939393939395e-07, + "loss": 653.9669, + "step": 420 + }, + { + "epoch": 0.0035570997228771147, + "grad_norm": 2678.25244140625, + "learning_rate": 3.474747474747475e-07, + "loss": 793.3054, + "step": 430 + }, + { + "epoch": 0.00363982297224635, + "grad_norm": 1457.694580078125, + "learning_rate": 3.555555555555556e-07, + "loss": 630.5825, + "step": 440 + }, + { + "epoch": 0.003722546221615585, + "grad_norm": 5033.21923828125, + "learning_rate": 3.6363636363636366e-07, + "loss": 759.3085, + "step": 450 + }, + { + "epoch": 0.00380526947098482, + "grad_norm": 3704.6376953125, + "learning_rate": 3.717171717171718e-07, + "loss": 644.2665, + "step": 460 + }, + { + "epoch": 0.0038879927203540553, + "grad_norm": 5654.28662109375, + "learning_rate": 3.7979797979797987e-07, + "loss": 710.7244, + "step": 470 + }, + { + "epoch": 0.003970715969723291, + "grad_norm": 1483.4691162109375, + "learning_rate": 3.878787878787879e-07, + "loss": 694.1961, + "step": 480 + }, + { + "epoch": 0.004053439219092526, + "grad_norm": 2019.6363525390625, + "learning_rate": 3.9595959595959597e-07, + "loss": 573.5805, + "step": 490 + }, + { + "epoch": 0.004136162468461761, + "grad_norm": 1998.4979248046875, + "learning_rate": 4.040404040404041e-07, + "loss": 624.7649, + "step": 500 + }, + { + "epoch": 0.004218885717830996, + "grad_norm": 2387.564453125, + "learning_rate": 4.121212121212122e-07, + "loss": 626.6972, + "step": 510 + }, + { + "epoch": 0.0043016089672002315, + "grad_norm": 3881.398681640625, + "learning_rate": 4.2020202020202026e-07, + "loss": 759.5687, + "step": 520 + }, + { + "epoch": 0.004384332216569467, + "grad_norm": 1924.41845703125, + "learning_rate": 4.282828282828283e-07, + "loss": 700.7449, + "step": 530 + }, + { + "epoch": 0.004467055465938702, + "grad_norm": 2266.10693359375, + "learning_rate": 4.363636363636364e-07, + "loss": 640.943, + "step": 540 + }, + { + "epoch": 0.004549778715307937, + "grad_norm": 7472.12548828125, + "learning_rate": 4.444444444444445e-07, + "loss": 806.1628, + "step": 550 + }, + { + "epoch": 0.004632501964677172, + "grad_norm": 1539.118408203125, + "learning_rate": 4.5252525252525257e-07, + "loss": 556.9574, + "step": 560 + }, + { + "epoch": 0.004715225214046408, + "grad_norm": 1867.2452392578125, + "learning_rate": 4.6060606060606064e-07, + "loss": 453.8387, + "step": 570 + }, + { + "epoch": 0.004797948463415643, + "grad_norm": 6622.47119140625, + "learning_rate": 4.686868686868688e-07, + "loss": 585.511, + "step": 580 + }, + { + "epoch": 0.0048806717127848785, + "grad_norm": 1117.4276123046875, + "learning_rate": 4.767676767676768e-07, + "loss": 591.8928, + "step": 590 + }, + { + "epoch": 0.004963394962154114, + "grad_norm": 1792.2279052734375, + "learning_rate": 4.848484848484849e-07, + "loss": 659.099, + "step": 600 + }, + { + "epoch": 0.005046118211523349, + "grad_norm": 952.1702880859375, + "learning_rate": 4.929292929292929e-07, + "loss": 558.1813, + "step": 610 + }, + { + "epoch": 0.005128841460892584, + "grad_norm": 1103.0098876953125, + "learning_rate": 5.01010101010101e-07, + "loss": 488.6359, + "step": 620 + }, + { + "epoch": 0.005211564710261819, + "grad_norm": 2055.734130859375, + "learning_rate": 5.090909090909092e-07, + "loss": 611.8516, + "step": 630 + }, + { + "epoch": 0.005294287959631054, + "grad_norm": 1028.85986328125, + "learning_rate": 5.171717171717172e-07, + "loss": 568.1, + "step": 640 + }, + { + "epoch": 0.005377011209000289, + "grad_norm": 1175.14892578125, + "learning_rate": 5.252525252525253e-07, + "loss": 407.9692, + "step": 650 + }, + { + "epoch": 0.005459734458369525, + "grad_norm": 1459.8382568359375, + "learning_rate": 5.333333333333335e-07, + "loss": 496.3047, + "step": 660 + }, + { + "epoch": 0.00554245770773876, + "grad_norm": 3791.671630859375, + "learning_rate": 5.414141414141415e-07, + "loss": 492.2279, + "step": 670 + }, + { + "epoch": 0.005625180957107995, + "grad_norm": 1465.2242431640625, + "learning_rate": 5.494949494949495e-07, + "loss": 518.7601, + "step": 680 + }, + { + "epoch": 0.00570790420647723, + "grad_norm": 1057.6175537109375, + "learning_rate": 5.575757575757576e-07, + "loss": 496.9481, + "step": 690 + }, + { + "epoch": 0.005790627455846466, + "grad_norm": 1338.5599365234375, + "learning_rate": 5.656565656565658e-07, + "loss": 493.3083, + "step": 700 + }, + { + "epoch": 0.005873350705215701, + "grad_norm": 1229.9342041015625, + "learning_rate": 5.737373737373738e-07, + "loss": 507.9572, + "step": 710 + }, + { + "epoch": 0.005956073954584936, + "grad_norm": 2031.885498046875, + "learning_rate": 5.818181818181819e-07, + "loss": 545.9699, + "step": 720 + }, + { + "epoch": 0.0060387972039541715, + "grad_norm": 1317.18408203125, + "learning_rate": 5.898989898989899e-07, + "loss": 515.3469, + "step": 730 + }, + { + "epoch": 0.006121520453323407, + "grad_norm": 1839.4345703125, + "learning_rate": 5.979797979797981e-07, + "loss": 377.9613, + "step": 740 + }, + { + "epoch": 0.006204243702692642, + "grad_norm": 1210.0321044921875, + "learning_rate": 6.060606060606061e-07, + "loss": 465.0116, + "step": 750 + }, + { + "epoch": 0.006286966952061877, + "grad_norm": 2228.900390625, + "learning_rate": 6.141414141414142e-07, + "loss": 336.6599, + "step": 760 + }, + { + "epoch": 0.006369690201431112, + "grad_norm": 1690.2906494140625, + "learning_rate": 6.222222222222223e-07, + "loss": 402.5558, + "step": 770 + }, + { + "epoch": 0.006452413450800347, + "grad_norm": 1048.852783203125, + "learning_rate": 6.303030303030304e-07, + "loss": 460.7069, + "step": 780 + }, + { + "epoch": 0.0065351367001695825, + "grad_norm": 1623.499755859375, + "learning_rate": 6.383838383838384e-07, + "loss": 322.7708, + "step": 790 + }, + { + "epoch": 0.006617859949538818, + "grad_norm": 4279.55908203125, + "learning_rate": 6.464646464646465e-07, + "loss": 430.4449, + "step": 800 + }, + { + "epoch": 0.006700583198908053, + "grad_norm": 1473.4871826171875, + "learning_rate": 6.545454545454547e-07, + "loss": 474.9226, + "step": 810 + }, + { + "epoch": 0.006783306448277288, + "grad_norm": 1544.477783203125, + "learning_rate": 6.626262626262627e-07, + "loss": 459.8634, + "step": 820 + }, + { + "epoch": 0.006866029697646524, + "grad_norm": 991.1500854492188, + "learning_rate": 6.707070707070708e-07, + "loss": 339.08, + "step": 830 + }, + { + "epoch": 0.006948752947015759, + "grad_norm": 1124.6473388671875, + "learning_rate": 6.78787878787879e-07, + "loss": 476.3061, + "step": 840 + }, + { + "epoch": 0.007031476196384994, + "grad_norm": 1139.6890869140625, + "learning_rate": 6.868686868686869e-07, + "loss": 504.3425, + "step": 850 + }, + { + "epoch": 0.007114199445754229, + "grad_norm": 1275.02294921875, + "learning_rate": 6.94949494949495e-07, + "loss": 431.0188, + "step": 860 + }, + { + "epoch": 0.0071969226951234646, + "grad_norm": 1159.5965576171875, + "learning_rate": 7.03030303030303e-07, + "loss": 455.7138, + "step": 870 + }, + { + "epoch": 0.0072796459444927, + "grad_norm": 1401.516357421875, + "learning_rate": 7.111111111111112e-07, + "loss": 529.1278, + "step": 880 + }, + { + "epoch": 0.007362369193861935, + "grad_norm": 1474.188720703125, + "learning_rate": 7.191919191919193e-07, + "loss": 326.5665, + "step": 890 + }, + { + "epoch": 0.00744509244323117, + "grad_norm": 917.8528442382812, + "learning_rate": 7.272727272727273e-07, + "loss": 436.4797, + "step": 900 + }, + { + "epoch": 0.007527815692600405, + "grad_norm": 941.6436157226562, + "learning_rate": 7.353535353535354e-07, + "loss": 357.6453, + "step": 910 + }, + { + "epoch": 0.00761053894196964, + "grad_norm": 934.293701171875, + "learning_rate": 7.434343434343436e-07, + "loss": 433.9422, + "step": 920 + }, + { + "epoch": 0.0076932621913388755, + "grad_norm": 1530.59814453125, + "learning_rate": 7.515151515151516e-07, + "loss": 506.7857, + "step": 930 + }, + { + "epoch": 0.007775985440708111, + "grad_norm": 3313.063720703125, + "learning_rate": 7.595959595959597e-07, + "loss": 386.7442, + "step": 940 + }, + { + "epoch": 0.007858708690077347, + "grad_norm": 9505.775390625, + "learning_rate": 7.676767676767677e-07, + "loss": 393.8395, + "step": 950 + }, + { + "epoch": 0.007941431939446582, + "grad_norm": 9777.1015625, + "learning_rate": 7.757575757575758e-07, + "loss": 444.2824, + "step": 960 + }, + { + "epoch": 0.008024155188815817, + "grad_norm": 952.7528686523438, + "learning_rate": 7.838383838383839e-07, + "loss": 463.8062, + "step": 970 + }, + { + "epoch": 0.008106878438185052, + "grad_norm": 1653.4957275390625, + "learning_rate": 7.919191919191919e-07, + "loss": 422.011, + "step": 980 + }, + { + "epoch": 0.008189601687554287, + "grad_norm": 1222.734619140625, + "learning_rate": 8.000000000000001e-07, + "loss": 370.3142, + "step": 990 + }, + { + "epoch": 0.008272324936923522, + "grad_norm": 1304.0595703125, + "learning_rate": 8.080808080808082e-07, + "loss": 423.2224, + "step": 1000 + }, + { + "epoch": 0.008355048186292758, + "grad_norm": 1451.695556640625, + "learning_rate": 8.161616161616162e-07, + "loss": 333.3123, + "step": 1010 + }, + { + "epoch": 0.008437771435661993, + "grad_norm": 1295.0885009765625, + "learning_rate": 8.242424242424244e-07, + "loss": 395.7991, + "step": 1020 + }, + { + "epoch": 0.008520494685031228, + "grad_norm": 24490.919921875, + "learning_rate": 8.323232323232324e-07, + "loss": 484.7744, + "step": 1030 + }, + { + "epoch": 0.008603217934400463, + "grad_norm": 1630.39306640625, + "learning_rate": 8.404040404040405e-07, + "loss": 507.0378, + "step": 1040 + }, + { + "epoch": 0.008685941183769698, + "grad_norm": 1444.76708984375, + "learning_rate": 8.484848484848486e-07, + "loss": 424.0987, + "step": 1050 + }, + { + "epoch": 0.008768664433138933, + "grad_norm": 9745.6103515625, + "learning_rate": 8.565656565656566e-07, + "loss": 356.5501, + "step": 1060 + }, + { + "epoch": 0.008851387682508169, + "grad_norm": 1603.8682861328125, + "learning_rate": 8.646464646464647e-07, + "loss": 363.1298, + "step": 1070 + }, + { + "epoch": 0.008934110931877404, + "grad_norm": 1848.8529052734375, + "learning_rate": 8.727272727272728e-07, + "loss": 453.0991, + "step": 1080 + }, + { + "epoch": 0.009016834181246639, + "grad_norm": 1721.5032958984375, + "learning_rate": 8.808080808080808e-07, + "loss": 352.8759, + "step": 1090 + }, + { + "epoch": 0.009099557430615874, + "grad_norm": 1215.8017578125, + "learning_rate": 8.88888888888889e-07, + "loss": 358.2751, + "step": 1100 + }, + { + "epoch": 0.00918228067998511, + "grad_norm": 3627.594482421875, + "learning_rate": 8.96969696969697e-07, + "loss": 373.2328, + "step": 1110 + }, + { + "epoch": 0.009265003929354344, + "grad_norm": 1093.44677734375, + "learning_rate": 9.050505050505051e-07, + "loss": 305.7944, + "step": 1120 + }, + { + "epoch": 0.00934772717872358, + "grad_norm": 5584.5224609375, + "learning_rate": 9.131313131313133e-07, + "loss": 371.0138, + "step": 1130 + }, + { + "epoch": 0.009430450428092816, + "grad_norm": 1449.30419921875, + "learning_rate": 9.212121212121213e-07, + "loss": 374.5949, + "step": 1140 + }, + { + "epoch": 0.009513173677462052, + "grad_norm": 1450.5250244140625, + "learning_rate": 9.292929292929294e-07, + "loss": 370.9519, + "step": 1150 + }, + { + "epoch": 0.009595896926831287, + "grad_norm": 1153.1729736328125, + "learning_rate": 9.373737373737376e-07, + "loss": 311.7418, + "step": 1160 + }, + { + "epoch": 0.009678620176200522, + "grad_norm": 1182.4305419921875, + "learning_rate": 9.454545454545455e-07, + "loss": 340.1112, + "step": 1170 + }, + { + "epoch": 0.009761343425569757, + "grad_norm": 2788.3564453125, + "learning_rate": 9.535353535353536e-07, + "loss": 402.735, + "step": 1180 + }, + { + "epoch": 0.009844066674938992, + "grad_norm": 1240.61669921875, + "learning_rate": 9.616161616161617e-07, + "loss": 430.115, + "step": 1190 + }, + { + "epoch": 0.009926789924308227, + "grad_norm": 7811.59228515625, + "learning_rate": 9.696969696969698e-07, + "loss": 294.6839, + "step": 1200 + }, + { + "epoch": 0.010009513173677462, + "grad_norm": 1490.346923828125, + "learning_rate": 9.77777777777778e-07, + "loss": 314.4158, + "step": 1210 + }, + { + "epoch": 0.010092236423046698, + "grad_norm": 1153.1387939453125, + "learning_rate": 9.858585858585858e-07, + "loss": 422.2901, + "step": 1220 + }, + { + "epoch": 0.010174959672415933, + "grad_norm": 1359.1773681640625, + "learning_rate": 9.93939393939394e-07, + "loss": 396.2289, + "step": 1230 + }, + { + "epoch": 0.010257682921785168, + "grad_norm": 1238.34814453125, + "learning_rate": 1.002020202020202e-06, + "loss": 432.7112, + "step": 1240 + }, + { + "epoch": 0.010340406171154403, + "grad_norm": 1150.58154296875, + "learning_rate": 1.01010101010101e-06, + "loss": 533.3193, + "step": 1250 + }, + { + "epoch": 0.010423129420523638, + "grad_norm": 2107.6708984375, + "learning_rate": 1.0181818181818183e-06, + "loss": 386.3309, + "step": 1260 + }, + { + "epoch": 0.010505852669892873, + "grad_norm": 1610.2342529296875, + "learning_rate": 1.0262626262626264e-06, + "loss": 378.974, + "step": 1270 + }, + { + "epoch": 0.010588575919262109, + "grad_norm": 10111.8642578125, + "learning_rate": 1.0343434343434344e-06, + "loss": 440.8467, + "step": 1280 + }, + { + "epoch": 0.010671299168631344, + "grad_norm": 3038.983642578125, + "learning_rate": 1.0424242424242426e-06, + "loss": 531.3744, + "step": 1290 + }, + { + "epoch": 0.010754022418000579, + "grad_norm": 2078.825927734375, + "learning_rate": 1.0505050505050506e-06, + "loss": 358.3208, + "step": 1300 + }, + { + "epoch": 0.010836745667369814, + "grad_norm": 1494.41943359375, + "learning_rate": 1.0585858585858587e-06, + "loss": 368.8838, + "step": 1310 + }, + { + "epoch": 0.01091946891673905, + "grad_norm": 913.7642211914062, + "learning_rate": 1.066666666666667e-06, + "loss": 343.7515, + "step": 1320 + }, + { + "epoch": 0.011002192166108284, + "grad_norm": 1475.224853515625, + "learning_rate": 1.0747474747474747e-06, + "loss": 311.1521, + "step": 1330 + }, + { + "epoch": 0.01108491541547752, + "grad_norm": 6028.0986328125, + "learning_rate": 1.082828282828283e-06, + "loss": 346.5072, + "step": 1340 + }, + { + "epoch": 0.011167638664846755, + "grad_norm": 1047.4739990234375, + "learning_rate": 1.090909090909091e-06, + "loss": 371.3439, + "step": 1350 + }, + { + "epoch": 0.01125036191421599, + "grad_norm": 2157.152099609375, + "learning_rate": 1.098989898989899e-06, + "loss": 396.0025, + "step": 1360 + }, + { + "epoch": 0.011333085163585225, + "grad_norm": 1551.3135986328125, + "learning_rate": 1.1070707070707072e-06, + "loss": 423.25, + "step": 1370 + }, + { + "epoch": 0.01141580841295446, + "grad_norm": 1080.8287353515625, + "learning_rate": 1.1151515151515153e-06, + "loss": 255.2609, + "step": 1380 + }, + { + "epoch": 0.011498531662323695, + "grad_norm": 835.9887084960938, + "learning_rate": 1.1232323232323233e-06, + "loss": 284.983, + "step": 1390 + }, + { + "epoch": 0.011581254911692932, + "grad_norm": 774.7077026367188, + "learning_rate": 1.1313131313131315e-06, + "loss": 274.2369, + "step": 1400 + }, + { + "epoch": 0.011663978161062167, + "grad_norm": 1325.56982421875, + "learning_rate": 1.1393939393939395e-06, + "loss": 797.6222, + "step": 1410 + }, + { + "epoch": 0.011746701410431402, + "grad_norm": 1981.0302734375, + "learning_rate": 1.1474747474747476e-06, + "loss": 366.0973, + "step": 1420 + }, + { + "epoch": 0.011829424659800638, + "grad_norm": 2644.739501953125, + "learning_rate": 1.1555555555555556e-06, + "loss": 327.9804, + "step": 1430 + }, + { + "epoch": 0.011912147909169873, + "grad_norm": 1665.543212890625, + "learning_rate": 1.1636363636363638e-06, + "loss": 425.4298, + "step": 1440 + }, + { + "epoch": 0.011994871158539108, + "grad_norm": 1862.1712646484375, + "learning_rate": 1.1717171717171719e-06, + "loss": 324.925, + "step": 1450 + }, + { + "epoch": 0.012077594407908343, + "grad_norm": 1164.2347412109375, + "learning_rate": 1.1797979797979799e-06, + "loss": 298.3789, + "step": 1460 + }, + { + "epoch": 0.012160317657277578, + "grad_norm": 1282.484375, + "learning_rate": 1.187878787878788e-06, + "loss": 399.5682, + "step": 1470 + }, + { + "epoch": 0.012243040906646813, + "grad_norm": 1316.5758056640625, + "learning_rate": 1.1959595959595961e-06, + "loss": 338.0538, + "step": 1480 + }, + { + "epoch": 0.012325764156016049, + "grad_norm": 875.5802001953125, + "learning_rate": 1.2040404040404042e-06, + "loss": 308.0409, + "step": 1490 + }, + { + "epoch": 0.012408487405385284, + "grad_norm": 1360.3880615234375, + "learning_rate": 1.2121212121212122e-06, + "loss": 292.9588, + "step": 1500 + }, + { + "epoch": 0.012491210654754519, + "grad_norm": 1129.11083984375, + "learning_rate": 1.2202020202020202e-06, + "loss": 322.4768, + "step": 1510 + }, + { + "epoch": 0.012573933904123754, + "grad_norm": 2066.356689453125, + "learning_rate": 1.2282828282828285e-06, + "loss": 271.5606, + "step": 1520 + }, + { + "epoch": 0.012656657153492989, + "grad_norm": 1669.0103759765625, + "learning_rate": 1.2363636363636365e-06, + "loss": 283.8805, + "step": 1530 + }, + { + "epoch": 0.012739380402862224, + "grad_norm": 2613.5205078125, + "learning_rate": 1.2444444444444445e-06, + "loss": 376.5511, + "step": 1540 + }, + { + "epoch": 0.01282210365223146, + "grad_norm": 1188.38427734375, + "learning_rate": 1.2525252525252527e-06, + "loss": 429.8225, + "step": 1550 + }, + { + "epoch": 0.012904826901600695, + "grad_norm": 1084.84814453125, + "learning_rate": 1.2606060606060608e-06, + "loss": 289.73, + "step": 1560 + }, + { + "epoch": 0.01298755015096993, + "grad_norm": 2206.3232421875, + "learning_rate": 1.268686868686869e-06, + "loss": 316.7855, + "step": 1570 + }, + { + "epoch": 0.013070273400339165, + "grad_norm": 2453.128662109375, + "learning_rate": 1.2767676767676768e-06, + "loss": 304.9484, + "step": 1580 + }, + { + "epoch": 0.0131529966497084, + "grad_norm": 976.6162719726562, + "learning_rate": 1.2848484848484848e-06, + "loss": 303.9162, + "step": 1590 + }, + { + "epoch": 0.013235719899077635, + "grad_norm": 1493.1630859375, + "learning_rate": 1.292929292929293e-06, + "loss": 356.7861, + "step": 1600 + }, + { + "epoch": 0.01331844314844687, + "grad_norm": 1568.7215576171875, + "learning_rate": 1.301010101010101e-06, + "loss": 556.4589, + "step": 1610 + }, + { + "epoch": 0.013401166397816106, + "grad_norm": 2514.0361328125, + "learning_rate": 1.3090909090909093e-06, + "loss": 414.0774, + "step": 1620 + }, + { + "epoch": 0.01348388964718534, + "grad_norm": 1089.7327880859375, + "learning_rate": 1.3171717171717172e-06, + "loss": 330.1746, + "step": 1630 + }, + { + "epoch": 0.013566612896554576, + "grad_norm": 1226.1904296875, + "learning_rate": 1.3252525252525254e-06, + "loss": 408.1962, + "step": 1640 + }, + { + "epoch": 0.013649336145923813, + "grad_norm": 2240.143310546875, + "learning_rate": 1.3333333333333334e-06, + "loss": 344.259, + "step": 1650 + }, + { + "epoch": 0.013732059395293048, + "grad_norm": 1064.2305908203125, + "learning_rate": 1.3414141414141417e-06, + "loss": 348.617, + "step": 1660 + }, + { + "epoch": 0.013814782644662283, + "grad_norm": 1232.1429443359375, + "learning_rate": 1.3494949494949497e-06, + "loss": 406.3739, + "step": 1670 + }, + { + "epoch": 0.013897505894031518, + "grad_norm": 1123.156982421875, + "learning_rate": 1.357575757575758e-06, + "loss": 396.9652, + "step": 1680 + }, + { + "epoch": 0.013980229143400753, + "grad_norm": 1280.202880859375, + "learning_rate": 1.3656565656565657e-06, + "loss": 391.5166, + "step": 1690 + }, + { + "epoch": 0.014062952392769988, + "grad_norm": 1420.6954345703125, + "learning_rate": 1.3737373737373738e-06, + "loss": 437.0295, + "step": 1700 + }, + { + "epoch": 0.014145675642139224, + "grad_norm": 2150.417236328125, + "learning_rate": 1.381818181818182e-06, + "loss": 346.5058, + "step": 1710 + }, + { + "epoch": 0.014228398891508459, + "grad_norm": 1029.7105712890625, + "learning_rate": 1.38989898989899e-06, + "loss": 326.7657, + "step": 1720 + }, + { + "epoch": 0.014311122140877694, + "grad_norm": 1207.562744140625, + "learning_rate": 1.3979797979797982e-06, + "loss": 279.2056, + "step": 1730 + }, + { + "epoch": 0.014393845390246929, + "grad_norm": 919.50244140625, + "learning_rate": 1.406060606060606e-06, + "loss": 264.9025, + "step": 1740 + }, + { + "epoch": 0.014476568639616164, + "grad_norm": 1641.6435546875, + "learning_rate": 1.4141414141414143e-06, + "loss": 239.4691, + "step": 1750 + }, + { + "epoch": 0.0145592918889854, + "grad_norm": 1205.822998046875, + "learning_rate": 1.4222222222222223e-06, + "loss": 333.4085, + "step": 1760 + }, + { + "epoch": 0.014642015138354635, + "grad_norm": 1508.9305419921875, + "learning_rate": 1.4303030303030306e-06, + "loss": 300.8089, + "step": 1770 + }, + { + "epoch": 0.01472473838772387, + "grad_norm": 1087.9306640625, + "learning_rate": 1.4383838383838386e-06, + "loss": 292.6564, + "step": 1780 + }, + { + "epoch": 0.014807461637093105, + "grad_norm": 2015.249755859375, + "learning_rate": 1.4464646464646464e-06, + "loss": 277.2897, + "step": 1790 + }, + { + "epoch": 0.01489018488646234, + "grad_norm": 4033.002685546875, + "learning_rate": 1.4545454545454546e-06, + "loss": 336.2396, + "step": 1800 + }, + { + "epoch": 0.014972908135831575, + "grad_norm": 1528.888916015625, + "learning_rate": 1.4626262626262627e-06, + "loss": 310.2283, + "step": 1810 + }, + { + "epoch": 0.01505563138520081, + "grad_norm": 1040.3310546875, + "learning_rate": 1.470707070707071e-06, + "loss": 296.3051, + "step": 1820 + }, + { + "epoch": 0.015138354634570046, + "grad_norm": 3019.808837890625, + "learning_rate": 1.478787878787879e-06, + "loss": 278.5342, + "step": 1830 + }, + { + "epoch": 0.01522107788393928, + "grad_norm": 1161.161376953125, + "learning_rate": 1.4868686868686872e-06, + "loss": 357.7166, + "step": 1840 + }, + { + "epoch": 0.015303801133308516, + "grad_norm": 2254.221435546875, + "learning_rate": 1.494949494949495e-06, + "loss": 325.2632, + "step": 1850 + }, + { + "epoch": 0.015386524382677751, + "grad_norm": 3298.8662109375, + "learning_rate": 1.5030303030303032e-06, + "loss": 340.6655, + "step": 1860 + }, + { + "epoch": 0.015469247632046986, + "grad_norm": 1297.3563232421875, + "learning_rate": 1.5111111111111112e-06, + "loss": 327.4712, + "step": 1870 + }, + { + "epoch": 0.015551970881416221, + "grad_norm": 1163.013671875, + "learning_rate": 1.5191919191919195e-06, + "loss": 302.4008, + "step": 1880 + }, + { + "epoch": 0.015634694130785456, + "grad_norm": 3574.491943359375, + "learning_rate": 1.5272727272727275e-06, + "loss": 304.6065, + "step": 1890 + }, + { + "epoch": 0.015717417380154693, + "grad_norm": 953.6083984375, + "learning_rate": 1.5353535353535353e-06, + "loss": 283.16, + "step": 1900 + }, + { + "epoch": 0.015800140629523927, + "grad_norm": 579.375732421875, + "learning_rate": 1.5434343434343435e-06, + "loss": 294.0983, + "step": 1910 + }, + { + "epoch": 0.015882863878893164, + "grad_norm": 904.5838012695312, + "learning_rate": 1.5515151515151516e-06, + "loss": 277.4686, + "step": 1920 + }, + { + "epoch": 0.015965587128262397, + "grad_norm": 878.8187255859375, + "learning_rate": 1.5595959595959598e-06, + "loss": 348.8653, + "step": 1930 + }, + { + "epoch": 0.016048310377631634, + "grad_norm": 1161.1363525390625, + "learning_rate": 1.5676767676767678e-06, + "loss": 374.7675, + "step": 1940 + }, + { + "epoch": 0.016131033627000867, + "grad_norm": 1667.3831787109375, + "learning_rate": 1.5757575757575759e-06, + "loss": 359.4971, + "step": 1950 + }, + { + "epoch": 0.016213756876370104, + "grad_norm": 2197.3330078125, + "learning_rate": 1.5838383838383839e-06, + "loss": 367.9104, + "step": 1960 + }, + { + "epoch": 0.016296480125739338, + "grad_norm": 1425.8123779296875, + "learning_rate": 1.5919191919191921e-06, + "loss": 367.0379, + "step": 1970 + }, + { + "epoch": 0.016379203375108575, + "grad_norm": 4604.79052734375, + "learning_rate": 1.6000000000000001e-06, + "loss": 462.412, + "step": 1980 + }, + { + "epoch": 0.016461926624477808, + "grad_norm": 1806.8238525390625, + "learning_rate": 1.6080808080808084e-06, + "loss": 350.7875, + "step": 1990 + }, + { + "epoch": 0.016544649873847045, + "grad_norm": 1519.6781005859375, + "learning_rate": 1.6161616161616164e-06, + "loss": 272.1075, + "step": 2000 + }, + { + "epoch": 0.01662737312321628, + "grad_norm": 2001.111572265625, + "learning_rate": 1.6242424242424242e-06, + "loss": 264.7772, + "step": 2010 + }, + { + "epoch": 0.016710096372585515, + "grad_norm": 2383.3994140625, + "learning_rate": 1.6323232323232325e-06, + "loss": 293.2167, + "step": 2020 + }, + { + "epoch": 0.016792819621954752, + "grad_norm": 670.5611572265625, + "learning_rate": 1.6404040404040405e-06, + "loss": 379.9431, + "step": 2030 + }, + { + "epoch": 0.016875542871323986, + "grad_norm": 1432.4300537109375, + "learning_rate": 1.6484848484848487e-06, + "loss": 362.749, + "step": 2040 + }, + { + "epoch": 0.016958266120693222, + "grad_norm": 6461.77880859375, + "learning_rate": 1.6565656565656567e-06, + "loss": 407.5534, + "step": 2050 + }, + { + "epoch": 0.017040989370062456, + "grad_norm": 1288.83544921875, + "learning_rate": 1.6646464646464648e-06, + "loss": 315.7526, + "step": 2060 + }, + { + "epoch": 0.017123712619431693, + "grad_norm": 907.4447021484375, + "learning_rate": 1.6727272727272728e-06, + "loss": 292.4799, + "step": 2070 + }, + { + "epoch": 0.017206435868800926, + "grad_norm": 1921.0576171875, + "learning_rate": 1.680808080808081e-06, + "loss": 297.3491, + "step": 2080 + }, + { + "epoch": 0.017289159118170163, + "grad_norm": 1620.2481689453125, + "learning_rate": 1.688888888888889e-06, + "loss": 291.1318, + "step": 2090 + }, + { + "epoch": 0.017371882367539396, + "grad_norm": 892.2626342773438, + "learning_rate": 1.6969696969696973e-06, + "loss": 337.7454, + "step": 2100 + }, + { + "epoch": 0.017454605616908633, + "grad_norm": 1724.4583740234375, + "learning_rate": 1.705050505050505e-06, + "loss": 386.2352, + "step": 2110 + }, + { + "epoch": 0.017537328866277867, + "grad_norm": 1373.0406494140625, + "learning_rate": 1.7131313131313131e-06, + "loss": 312.9512, + "step": 2120 + }, + { + "epoch": 0.017620052115647104, + "grad_norm": 1802.06884765625, + "learning_rate": 1.7212121212121214e-06, + "loss": 392.6038, + "step": 2130 + }, + { + "epoch": 0.017702775365016337, + "grad_norm": 1823.2747802734375, + "learning_rate": 1.7292929292929294e-06, + "loss": 276.9635, + "step": 2140 + }, + { + "epoch": 0.017785498614385574, + "grad_norm": 2303.46484375, + "learning_rate": 1.7373737373737376e-06, + "loss": 363.1688, + "step": 2150 + }, + { + "epoch": 0.017868221863754807, + "grad_norm": 939.9644165039062, + "learning_rate": 1.7454545454545456e-06, + "loss": 371.3494, + "step": 2160 + }, + { + "epoch": 0.017950945113124044, + "grad_norm": 803.9099731445312, + "learning_rate": 1.7535353535353537e-06, + "loss": 325.9621, + "step": 2170 + }, + { + "epoch": 0.018033668362493278, + "grad_norm": 1147.0989990234375, + "learning_rate": 1.7616161616161617e-06, + "loss": 339.9603, + "step": 2180 + }, + { + "epoch": 0.018116391611862515, + "grad_norm": 1110.0518798828125, + "learning_rate": 1.76969696969697e-06, + "loss": 311.3122, + "step": 2190 + }, + { + "epoch": 0.018199114861231748, + "grad_norm": 1060.744140625, + "learning_rate": 1.777777777777778e-06, + "loss": 350.8291, + "step": 2200 + }, + { + "epoch": 0.018281838110600985, + "grad_norm": 2863.481201171875, + "learning_rate": 1.7858585858585862e-06, + "loss": 347.886, + "step": 2210 + }, + { + "epoch": 0.01836456135997022, + "grad_norm": 1049.102294921875, + "learning_rate": 1.793939393939394e-06, + "loss": 255.8531, + "step": 2220 + }, + { + "epoch": 0.018447284609339455, + "grad_norm": 1447.9163818359375, + "learning_rate": 1.802020202020202e-06, + "loss": 270.9402, + "step": 2230 + }, + { + "epoch": 0.01853000785870869, + "grad_norm": 5666.75634765625, + "learning_rate": 1.8101010101010103e-06, + "loss": 265.4302, + "step": 2240 + }, + { + "epoch": 0.018612731108077925, + "grad_norm": 901.1162109375, + "learning_rate": 1.8181818181818183e-06, + "loss": 327.2734, + "step": 2250 + }, + { + "epoch": 0.01869545435744716, + "grad_norm": 1582.9307861328125, + "learning_rate": 1.8262626262626265e-06, + "loss": 295.4947, + "step": 2260 + }, + { + "epoch": 0.018778177606816396, + "grad_norm": 910.0087280273438, + "learning_rate": 1.8343434343434343e-06, + "loss": 254.5832, + "step": 2270 + }, + { + "epoch": 0.018860900856185633, + "grad_norm": 1553.7003173828125, + "learning_rate": 1.8424242424242426e-06, + "loss": 312.1798, + "step": 2280 + }, + { + "epoch": 0.018943624105554866, + "grad_norm": 1657.673828125, + "learning_rate": 1.8505050505050506e-06, + "loss": 333.7471, + "step": 2290 + }, + { + "epoch": 0.019026347354924103, + "grad_norm": 2194.053466796875, + "learning_rate": 1.8585858585858588e-06, + "loss": 252.629, + "step": 2300 + }, + { + "epoch": 0.019109070604293336, + "grad_norm": 968.1193237304688, + "learning_rate": 1.8666666666666669e-06, + "loss": 317.2808, + "step": 2310 + }, + { + "epoch": 0.019191793853662573, + "grad_norm": 2099.283203125, + "learning_rate": 1.874747474747475e-06, + "loss": 318.3145, + "step": 2320 + }, + { + "epoch": 0.019274517103031807, + "grad_norm": 1018.1463012695312, + "learning_rate": 1.882828282828283e-06, + "loss": 310.5461, + "step": 2330 + }, + { + "epoch": 0.019357240352401044, + "grad_norm": 1319.893310546875, + "learning_rate": 1.890909090909091e-06, + "loss": 403.0323, + "step": 2340 + }, + { + "epoch": 0.019439963601770277, + "grad_norm": 3296.14404296875, + "learning_rate": 1.8989898989898992e-06, + "loss": 277.1991, + "step": 2350 + }, + { + "epoch": 0.019522686851139514, + "grad_norm": 2120.728515625, + "learning_rate": 1.9070707070707072e-06, + "loss": 266.7476, + "step": 2360 + }, + { + "epoch": 0.019605410100508747, + "grad_norm": 1736.4859619140625, + "learning_rate": 1.9151515151515154e-06, + "loss": 385.987, + "step": 2370 + }, + { + "epoch": 0.019688133349877984, + "grad_norm": 1218.7796630859375, + "learning_rate": 1.9232323232323235e-06, + "loss": 337.6978, + "step": 2380 + }, + { + "epoch": 0.019770856599247218, + "grad_norm": 4797.71337890625, + "learning_rate": 1.9313131313131315e-06, + "loss": 255.6176, + "step": 2390 + }, + { + "epoch": 0.019853579848616455, + "grad_norm": 1956.2137451171875, + "learning_rate": 1.9393939393939395e-06, + "loss": 283.0651, + "step": 2400 + }, + { + "epoch": 0.019936303097985688, + "grad_norm": 918.0914916992188, + "learning_rate": 1.9474747474747475e-06, + "loss": 312.682, + "step": 2410 + }, + { + "epoch": 0.020019026347354925, + "grad_norm": 2107.04736328125, + "learning_rate": 1.955555555555556e-06, + "loss": 384.748, + "step": 2420 + }, + { + "epoch": 0.02010174959672416, + "grad_norm": 861.828125, + "learning_rate": 1.9636363636363636e-06, + "loss": 340.8865, + "step": 2430 + }, + { + "epoch": 0.020184472846093395, + "grad_norm": 6480.6171875, + "learning_rate": 1.9717171717171716e-06, + "loss": 416.4972, + "step": 2440 + }, + { + "epoch": 0.02026719609546263, + "grad_norm": 1926.79296875, + "learning_rate": 1.97979797979798e-06, + "loss": 260.5365, + "step": 2450 + }, + { + "epoch": 0.020349919344831865, + "grad_norm": 1225.735107421875, + "learning_rate": 1.987878787878788e-06, + "loss": 261.068, + "step": 2460 + }, + { + "epoch": 0.0204326425942011, + "grad_norm": 857.5597534179688, + "learning_rate": 1.995959595959596e-06, + "loss": 241.7558, + "step": 2470 + }, + { + "epoch": 0.020515365843570336, + "grad_norm": 42987.5546875, + "learning_rate": 2.004040404040404e-06, + "loss": 380.0271, + "step": 2480 + }, + { + "epoch": 0.02059808909293957, + "grad_norm": 1689.1239013671875, + "learning_rate": 2.012121212121212e-06, + "loss": 390.1416, + "step": 2490 + }, + { + "epoch": 0.020680812342308806, + "grad_norm": 4134.138671875, + "learning_rate": 2.02020202020202e-06, + "loss": 265.3958, + "step": 2500 + }, + { + "epoch": 0.02076353559167804, + "grad_norm": 1039.45703125, + "learning_rate": 2.0282828282828286e-06, + "loss": 297.8323, + "step": 2510 + }, + { + "epoch": 0.020846258841047276, + "grad_norm": 2275.5380859375, + "learning_rate": 2.0363636363636367e-06, + "loss": 326.8338, + "step": 2520 + }, + { + "epoch": 0.02092898209041651, + "grad_norm": 1341.4542236328125, + "learning_rate": 2.0444444444444447e-06, + "loss": 255.4626, + "step": 2530 + }, + { + "epoch": 0.021011705339785747, + "grad_norm": 1704.446044921875, + "learning_rate": 2.0525252525252527e-06, + "loss": 311.6551, + "step": 2540 + }, + { + "epoch": 0.021094428589154984, + "grad_norm": 1272.9852294921875, + "learning_rate": 2.0606060606060607e-06, + "loss": 278.1134, + "step": 2550 + }, + { + "epoch": 0.021177151838524217, + "grad_norm": 788.3895263671875, + "learning_rate": 2.0686868686868688e-06, + "loss": 282.1122, + "step": 2560 + }, + { + "epoch": 0.021259875087893454, + "grad_norm": 1385.612548828125, + "learning_rate": 2.0767676767676768e-06, + "loss": 314.3244, + "step": 2570 + }, + { + "epoch": 0.021342598337262687, + "grad_norm": 1955.3095703125, + "learning_rate": 2.0848484848484852e-06, + "loss": 262.7597, + "step": 2580 + }, + { + "epoch": 0.021425321586631924, + "grad_norm": 1727.3680419921875, + "learning_rate": 2.092929292929293e-06, + "loss": 264.8611, + "step": 2590 + }, + { + "epoch": 0.021508044836001158, + "grad_norm": 1702.6575927734375, + "learning_rate": 2.1010101010101013e-06, + "loss": 286.7952, + "step": 2600 + }, + { + "epoch": 0.021590768085370395, + "grad_norm": 1000.8258666992188, + "learning_rate": 2.1090909090909093e-06, + "loss": 259.9386, + "step": 2610 + }, + { + "epoch": 0.021673491334739628, + "grad_norm": 1225.5982666015625, + "learning_rate": 2.1171717171717173e-06, + "loss": 248.9738, + "step": 2620 + }, + { + "epoch": 0.021756214584108865, + "grad_norm": 2182.8955078125, + "learning_rate": 2.1252525252525254e-06, + "loss": 290.0493, + "step": 2630 + }, + { + "epoch": 0.0218389378334781, + "grad_norm": 946.896484375, + "learning_rate": 2.133333333333334e-06, + "loss": 285.7407, + "step": 2640 + }, + { + "epoch": 0.021921661082847335, + "grad_norm": 965.7489624023438, + "learning_rate": 2.1414141414141414e-06, + "loss": 277.0386, + "step": 2650 + }, + { + "epoch": 0.02200438433221657, + "grad_norm": 1855.605712890625, + "learning_rate": 2.1494949494949494e-06, + "loss": 308.2425, + "step": 2660 + }, + { + "epoch": 0.022087107581585805, + "grad_norm": 945.2372436523438, + "learning_rate": 2.157575757575758e-06, + "loss": 314.9084, + "step": 2670 + }, + { + "epoch": 0.02216983083095504, + "grad_norm": 2101.0771484375, + "learning_rate": 2.165656565656566e-06, + "loss": 277.5356, + "step": 2680 + }, + { + "epoch": 0.022252554080324276, + "grad_norm": 931.0570678710938, + "learning_rate": 2.173737373737374e-06, + "loss": 380.9075, + "step": 2690 + }, + { + "epoch": 0.02233527732969351, + "grad_norm": 849.65380859375, + "learning_rate": 2.181818181818182e-06, + "loss": 266.7404, + "step": 2700 + }, + { + "epoch": 0.022418000579062746, + "grad_norm": 795.3492431640625, + "learning_rate": 2.18989898989899e-06, + "loss": 301.0031, + "step": 2710 + }, + { + "epoch": 0.02250072382843198, + "grad_norm": 998.2992553710938, + "learning_rate": 2.197979797979798e-06, + "loss": 236.0826, + "step": 2720 + }, + { + "epoch": 0.022583447077801216, + "grad_norm": 3029.24462890625, + "learning_rate": 2.2060606060606064e-06, + "loss": 284.6466, + "step": 2730 + }, + { + "epoch": 0.02266617032717045, + "grad_norm": 991.4857177734375, + "learning_rate": 2.2141414141414145e-06, + "loss": 480.9545, + "step": 2740 + }, + { + "epoch": 0.022748893576539687, + "grad_norm": 610.8831787109375, + "learning_rate": 2.222222222222222e-06, + "loss": 280.1348, + "step": 2750 + }, + { + "epoch": 0.02283161682590892, + "grad_norm": 1224.61083984375, + "learning_rate": 2.2303030303030305e-06, + "loss": 287.0002, + "step": 2760 + }, + { + "epoch": 0.022914340075278157, + "grad_norm": 967.6873779296875, + "learning_rate": 2.2383838383838385e-06, + "loss": 302.4269, + "step": 2770 + }, + { + "epoch": 0.02299706332464739, + "grad_norm": 1249.24169921875, + "learning_rate": 2.2464646464646466e-06, + "loss": 324.5018, + "step": 2780 + }, + { + "epoch": 0.023079786574016627, + "grad_norm": 2674.385498046875, + "learning_rate": 2.254545454545455e-06, + "loss": 253.8143, + "step": 2790 + }, + { + "epoch": 0.023162509823385864, + "grad_norm": 1356.0936279296875, + "learning_rate": 2.262626262626263e-06, + "loss": 287.3596, + "step": 2800 + }, + { + "epoch": 0.023245233072755098, + "grad_norm": 1003.8848266601562, + "learning_rate": 2.2707070707070706e-06, + "loss": 259.5809, + "step": 2810 + }, + { + "epoch": 0.023327956322124335, + "grad_norm": 1609.186767578125, + "learning_rate": 2.278787878787879e-06, + "loss": 243.4104, + "step": 2820 + }, + { + "epoch": 0.023410679571493568, + "grad_norm": 1286.5640869140625, + "learning_rate": 2.286868686868687e-06, + "loss": 303.2196, + "step": 2830 + }, + { + "epoch": 0.023493402820862805, + "grad_norm": 1285.9228515625, + "learning_rate": 2.294949494949495e-06, + "loss": 274.7912, + "step": 2840 + }, + { + "epoch": 0.02357612607023204, + "grad_norm": 1015.9469604492188, + "learning_rate": 2.303030303030303e-06, + "loss": 255.5114, + "step": 2850 + }, + { + "epoch": 0.023658849319601275, + "grad_norm": 1524.3387451171875, + "learning_rate": 2.311111111111111e-06, + "loss": 394.2463, + "step": 2860 + }, + { + "epoch": 0.02374157256897051, + "grad_norm": 4101.37548828125, + "learning_rate": 2.3191919191919192e-06, + "loss": 343.4752, + "step": 2870 + }, + { + "epoch": 0.023824295818339745, + "grad_norm": 1837.815673828125, + "learning_rate": 2.3272727272727277e-06, + "loss": 306.1534, + "step": 2880 + }, + { + "epoch": 0.02390701906770898, + "grad_norm": 1742.3822021484375, + "learning_rate": 2.3353535353535357e-06, + "loss": 382.2382, + "step": 2890 + }, + { + "epoch": 0.023989742317078216, + "grad_norm": 922.0031127929688, + "learning_rate": 2.3434343434343437e-06, + "loss": 230.2729, + "step": 2900 + }, + { + "epoch": 0.02407246556644745, + "grad_norm": 860.6764526367188, + "learning_rate": 2.3515151515151517e-06, + "loss": 257.8272, + "step": 2910 + }, + { + "epoch": 0.024155188815816686, + "grad_norm": 3094.787841796875, + "learning_rate": 2.3595959595959598e-06, + "loss": 247.0388, + "step": 2920 + }, + { + "epoch": 0.02423791206518592, + "grad_norm": 1400.10009765625, + "learning_rate": 2.367676767676768e-06, + "loss": 350.3922, + "step": 2930 + }, + { + "epoch": 0.024320635314555156, + "grad_norm": 1306.7431640625, + "learning_rate": 2.375757575757576e-06, + "loss": 318.7807, + "step": 2940 + }, + { + "epoch": 0.02440335856392439, + "grad_norm": 859.3858032226562, + "learning_rate": 2.3838383838383843e-06, + "loss": 292.0828, + "step": 2950 + }, + { + "epoch": 0.024486081813293627, + "grad_norm": 1207.4444580078125, + "learning_rate": 2.3919191919191923e-06, + "loss": 268.6456, + "step": 2960 + }, + { + "epoch": 0.02456880506266286, + "grad_norm": 1388.7242431640625, + "learning_rate": 2.4000000000000003e-06, + "loss": 327.8167, + "step": 2970 + }, + { + "epoch": 0.024651528312032097, + "grad_norm": 1601.5413818359375, + "learning_rate": 2.4080808080808083e-06, + "loss": 217.4461, + "step": 2980 + }, + { + "epoch": 0.02473425156140133, + "grad_norm": 1094.072509765625, + "learning_rate": 2.4161616161616164e-06, + "loss": 237.2505, + "step": 2990 + }, + { + "epoch": 0.024816974810770567, + "grad_norm": 1064.4403076171875, + "learning_rate": 2.4242424242424244e-06, + "loss": 274.2577, + "step": 3000 + }, + { + "epoch": 0.0248996980601398, + "grad_norm": 2712.60986328125, + "learning_rate": 2.432323232323233e-06, + "loss": 336.2089, + "step": 3010 + }, + { + "epoch": 0.024982421309509038, + "grad_norm": 657.0144653320312, + "learning_rate": 2.4404040404040404e-06, + "loss": 285.5267, + "step": 3020 + }, + { + "epoch": 0.02506514455887827, + "grad_norm": 1818.3843994140625, + "learning_rate": 2.4484848484848485e-06, + "loss": 345.3333, + "step": 3030 + }, + { + "epoch": 0.025147867808247508, + "grad_norm": 1924.058349609375, + "learning_rate": 2.456565656565657e-06, + "loss": 254.806, + "step": 3040 + }, + { + "epoch": 0.025230591057616745, + "grad_norm": 926.4883422851562, + "learning_rate": 2.464646464646465e-06, + "loss": 248.8595, + "step": 3050 + }, + { + "epoch": 0.025313314306985978, + "grad_norm": 1217.055419921875, + "learning_rate": 2.472727272727273e-06, + "loss": 258.9547, + "step": 3060 + }, + { + "epoch": 0.025396037556355215, + "grad_norm": 837.9513549804688, + "learning_rate": 2.480808080808081e-06, + "loss": 239.7319, + "step": 3070 + }, + { + "epoch": 0.02547876080572445, + "grad_norm": 702.333740234375, + "learning_rate": 2.488888888888889e-06, + "loss": 290.1943, + "step": 3080 + }, + { + "epoch": 0.025561484055093685, + "grad_norm": 1217.6317138671875, + "learning_rate": 2.496969696969697e-06, + "loss": 282.1116, + "step": 3090 + }, + { + "epoch": 0.02564420730446292, + "grad_norm": 1642.226806640625, + "learning_rate": 2.5050505050505055e-06, + "loss": 242.3538, + "step": 3100 + }, + { + "epoch": 0.025726930553832156, + "grad_norm": 1036.6883544921875, + "learning_rate": 2.5131313131313135e-06, + "loss": 339.3384, + "step": 3110 + }, + { + "epoch": 0.02580965380320139, + "grad_norm": 2547.31396484375, + "learning_rate": 2.5212121212121215e-06, + "loss": 268.7113, + "step": 3120 + }, + { + "epoch": 0.025892377052570626, + "grad_norm": 2499.7900390625, + "learning_rate": 2.5292929292929296e-06, + "loss": 371.6728, + "step": 3130 + }, + { + "epoch": 0.02597510030193986, + "grad_norm": 835.4087524414062, + "learning_rate": 2.537373737373738e-06, + "loss": 331.6685, + "step": 3140 + }, + { + "epoch": 0.026057823551309096, + "grad_norm": 1723.9932861328125, + "learning_rate": 2.5454545454545456e-06, + "loss": 241.2475, + "step": 3150 + }, + { + "epoch": 0.02614054680067833, + "grad_norm": 892.8982543945312, + "learning_rate": 2.5535353535353536e-06, + "loss": 239.1675, + "step": 3160 + }, + { + "epoch": 0.026223270050047567, + "grad_norm": 1679.0596923828125, + "learning_rate": 2.5616161616161617e-06, + "loss": 251.3017, + "step": 3170 + }, + { + "epoch": 0.0263059932994168, + "grad_norm": 1314.4605712890625, + "learning_rate": 2.5696969696969697e-06, + "loss": 291.6142, + "step": 3180 + }, + { + "epoch": 0.026388716548786037, + "grad_norm": 1959.02490234375, + "learning_rate": 2.577777777777778e-06, + "loss": 335.8238, + "step": 3190 + }, + { + "epoch": 0.02647143979815527, + "grad_norm": 1912.5706787109375, + "learning_rate": 2.585858585858586e-06, + "loss": 374.3031, + "step": 3200 + }, + { + "epoch": 0.026554163047524507, + "grad_norm": 3296.021728515625, + "learning_rate": 2.593939393939394e-06, + "loss": 258.0628, + "step": 3210 + }, + { + "epoch": 0.02663688629689374, + "grad_norm": 1115.4268798828125, + "learning_rate": 2.602020202020202e-06, + "loss": 334.8846, + "step": 3220 + }, + { + "epoch": 0.026719609546262978, + "grad_norm": 973.0841064453125, + "learning_rate": 2.6101010101010107e-06, + "loss": 275.7638, + "step": 3230 + }, + { + "epoch": 0.02680233279563221, + "grad_norm": 1405.8780517578125, + "learning_rate": 2.6181818181818187e-06, + "loss": 290.556, + "step": 3240 + }, + { + "epoch": 0.026885056045001448, + "grad_norm": 2512.458740234375, + "learning_rate": 2.6262626262626267e-06, + "loss": 255.2991, + "step": 3250 + }, + { + "epoch": 0.02696777929437068, + "grad_norm": 1015.4577026367188, + "learning_rate": 2.6343434343434343e-06, + "loss": 293.1441, + "step": 3260 + }, + { + "epoch": 0.027050502543739918, + "grad_norm": 1590.4383544921875, + "learning_rate": 2.6424242424242423e-06, + "loss": 315.3852, + "step": 3270 + }, + { + "epoch": 0.02713322579310915, + "grad_norm": 1241.1497802734375, + "learning_rate": 2.6505050505050508e-06, + "loss": 279.314, + "step": 3280 + }, + { + "epoch": 0.02721594904247839, + "grad_norm": 803.2537841796875, + "learning_rate": 2.658585858585859e-06, + "loss": 237.7475, + "step": 3290 + }, + { + "epoch": 0.027298672291847625, + "grad_norm": 1308.884765625, + "learning_rate": 2.666666666666667e-06, + "loss": 289.3862, + "step": 3300 + }, + { + "epoch": 0.02738139554121686, + "grad_norm": 1037.373291015625, + "learning_rate": 2.674747474747475e-06, + "loss": 278.7418, + "step": 3310 + }, + { + "epoch": 0.027464118790586096, + "grad_norm": 1414.7493896484375, + "learning_rate": 2.6828282828282833e-06, + "loss": 388.9876, + "step": 3320 + }, + { + "epoch": 0.02754684203995533, + "grad_norm": 1686.8880615234375, + "learning_rate": 2.6909090909090913e-06, + "loss": 224.3872, + "step": 3330 + }, + { + "epoch": 0.027629565289324566, + "grad_norm": 1130.1907958984375, + "learning_rate": 2.6989898989898994e-06, + "loss": 219.274, + "step": 3340 + }, + { + "epoch": 0.0277122885386938, + "grad_norm": 1604.05419921875, + "learning_rate": 2.7070707070707074e-06, + "loss": 313.1014, + "step": 3350 + }, + { + "epoch": 0.027795011788063036, + "grad_norm": 1515.8199462890625, + "learning_rate": 2.715151515151516e-06, + "loss": 351.9699, + "step": 3360 + }, + { + "epoch": 0.02787773503743227, + "grad_norm": 1105.636474609375, + "learning_rate": 2.7232323232323234e-06, + "loss": 258.1985, + "step": 3370 + }, + { + "epoch": 0.027960458286801507, + "grad_norm": 2343.5537109375, + "learning_rate": 2.7313131313131315e-06, + "loss": 247.2701, + "step": 3380 + }, + { + "epoch": 0.02804318153617074, + "grad_norm": 3364.70068359375, + "learning_rate": 2.7393939393939395e-06, + "loss": 320.3825, + "step": 3390 + }, + { + "epoch": 0.028125904785539977, + "grad_norm": 1226.61279296875, + "learning_rate": 2.7474747474747475e-06, + "loss": 308.3539, + "step": 3400 + }, + { + "epoch": 0.02820862803490921, + "grad_norm": 1365.4581298828125, + "learning_rate": 2.755555555555556e-06, + "loss": 265.8813, + "step": 3410 + }, + { + "epoch": 0.028291351284278447, + "grad_norm": 1471.4437255859375, + "learning_rate": 2.763636363636364e-06, + "loss": 246.5465, + "step": 3420 + }, + { + "epoch": 0.02837407453364768, + "grad_norm": 1522.9873046875, + "learning_rate": 2.771717171717172e-06, + "loss": 294.6199, + "step": 3430 + }, + { + "epoch": 0.028456797783016918, + "grad_norm": 2295.73095703125, + "learning_rate": 2.77979797979798e-06, + "loss": 471.7754, + "step": 3440 + }, + { + "epoch": 0.02853952103238615, + "grad_norm": 1030.5484619140625, + "learning_rate": 2.7878787878787885e-06, + "loss": 315.6531, + "step": 3450 + }, + { + "epoch": 0.028622244281755388, + "grad_norm": 1534.0970458984375, + "learning_rate": 2.7959595959595965e-06, + "loss": 236.7922, + "step": 3460 + }, + { + "epoch": 0.02870496753112462, + "grad_norm": 1193.41748046875, + "learning_rate": 2.804040404040404e-06, + "loss": 301.1876, + "step": 3470 + }, + { + "epoch": 0.028787690780493858, + "grad_norm": 884.4568481445312, + "learning_rate": 2.812121212121212e-06, + "loss": 248.0099, + "step": 3480 + }, + { + "epoch": 0.02887041402986309, + "grad_norm": 1417.0870361328125, + "learning_rate": 2.82020202020202e-06, + "loss": 214.0175, + "step": 3490 + }, + { + "epoch": 0.02895313727923233, + "grad_norm": 906.0728149414062, + "learning_rate": 2.8282828282828286e-06, + "loss": 337.6909, + "step": 3500 + }, + { + "epoch": 0.029035860528601562, + "grad_norm": 720.5711059570312, + "learning_rate": 2.8363636363636366e-06, + "loss": 237.9402, + "step": 3510 + }, + { + "epoch": 0.0291185837779708, + "grad_norm": 2159.7626953125, + "learning_rate": 2.8444444444444446e-06, + "loss": 290.2204, + "step": 3520 + }, + { + "epoch": 0.029201307027340032, + "grad_norm": 1583.6937255859375, + "learning_rate": 2.8525252525252527e-06, + "loss": 339.3829, + "step": 3530 + }, + { + "epoch": 0.02928403027670927, + "grad_norm": 1711.7625732421875, + "learning_rate": 2.860606060606061e-06, + "loss": 338.8203, + "step": 3540 + }, + { + "epoch": 0.029366753526078506, + "grad_norm": 1867.3909912109375, + "learning_rate": 2.868686868686869e-06, + "loss": 295.4354, + "step": 3550 + }, + { + "epoch": 0.02944947677544774, + "grad_norm": 1350.51708984375, + "learning_rate": 2.876767676767677e-06, + "loss": 255.1478, + "step": 3560 + }, + { + "epoch": 0.029532200024816976, + "grad_norm": 1835.9559326171875, + "learning_rate": 2.884848484848485e-06, + "loss": 235.7582, + "step": 3570 + }, + { + "epoch": 0.02961492327418621, + "grad_norm": 1273.539794921875, + "learning_rate": 2.892929292929293e-06, + "loss": 305.7688, + "step": 3580 + }, + { + "epoch": 0.029697646523555447, + "grad_norm": 1092.1051025390625, + "learning_rate": 2.9010101010101012e-06, + "loss": 229.7812, + "step": 3590 + }, + { + "epoch": 0.02978036977292468, + "grad_norm": 1232.7423095703125, + "learning_rate": 2.9090909090909093e-06, + "loss": 308.136, + "step": 3600 + }, + { + "epoch": 0.029863093022293917, + "grad_norm": 860.4628295898438, + "learning_rate": 2.9171717171717173e-06, + "loss": 245.5094, + "step": 3610 + }, + { + "epoch": 0.02994581627166315, + "grad_norm": 1105.772705078125, + "learning_rate": 2.9252525252525253e-06, + "loss": 225.044, + "step": 3620 + }, + { + "epoch": 0.030028539521032387, + "grad_norm": 1092.992431640625, + "learning_rate": 2.9333333333333338e-06, + "loss": 240.9013, + "step": 3630 + }, + { + "epoch": 0.03011126277040162, + "grad_norm": 1877.3671875, + "learning_rate": 2.941414141414142e-06, + "loss": 304.4779, + "step": 3640 + }, + { + "epoch": 0.030193986019770858, + "grad_norm": 1553.412109375, + "learning_rate": 2.94949494949495e-06, + "loss": 253.1373, + "step": 3650 + }, + { + "epoch": 0.03027670926914009, + "grad_norm": 1163.033447265625, + "learning_rate": 2.957575757575758e-06, + "loss": 272.4007, + "step": 3660 + }, + { + "epoch": 0.030359432518509328, + "grad_norm": 1105.1661376953125, + "learning_rate": 2.9656565656565663e-06, + "loss": 243.5992, + "step": 3670 + }, + { + "epoch": 0.03044215576787856, + "grad_norm": 1370.2781982421875, + "learning_rate": 2.9737373737373743e-06, + "loss": 232.3612, + "step": 3680 + }, + { + "epoch": 0.030524879017247798, + "grad_norm": 925.8299560546875, + "learning_rate": 2.981818181818182e-06, + "loss": 194.9076, + "step": 3690 + }, + { + "epoch": 0.03060760226661703, + "grad_norm": 4565.89208984375, + "learning_rate": 2.98989898989899e-06, + "loss": 317.9731, + "step": 3700 + }, + { + "epoch": 0.03069032551598627, + "grad_norm": 1785.587890625, + "learning_rate": 2.997979797979798e-06, + "loss": 261.7598, + "step": 3710 + }, + { + "epoch": 0.030773048765355502, + "grad_norm": 1177.1761474609375, + "learning_rate": 3.0060606060606064e-06, + "loss": 211.625, + "step": 3720 + }, + { + "epoch": 0.03085577201472474, + "grad_norm": 980.7220458984375, + "learning_rate": 3.0141414141414144e-06, + "loss": 198.16, + "step": 3730 + }, + { + "epoch": 0.030938495264093972, + "grad_norm": 1450.34033203125, + "learning_rate": 3.0222222222222225e-06, + "loss": 281.4796, + "step": 3740 + }, + { + "epoch": 0.03102121851346321, + "grad_norm": 917.07275390625, + "learning_rate": 3.0303030303030305e-06, + "loss": 266.2501, + "step": 3750 + }, + { + "epoch": 0.031103941762832443, + "grad_norm": 1657.814697265625, + "learning_rate": 3.038383838383839e-06, + "loss": 279.4682, + "step": 3760 + }, + { + "epoch": 0.03118666501220168, + "grad_norm": 1294.77685546875, + "learning_rate": 3.046464646464647e-06, + "loss": 259.5207, + "step": 3770 + }, + { + "epoch": 0.03126938826157091, + "grad_norm": 1622.716796875, + "learning_rate": 3.054545454545455e-06, + "loss": 298.1313, + "step": 3780 + }, + { + "epoch": 0.031352111510940146, + "grad_norm": 1660.8116455078125, + "learning_rate": 3.0626262626262626e-06, + "loss": 250.9933, + "step": 3790 + }, + { + "epoch": 0.03143483476030939, + "grad_norm": 799.0031127929688, + "learning_rate": 3.0707070707070706e-06, + "loss": 310.6956, + "step": 3800 + }, + { + "epoch": 0.03151755800967862, + "grad_norm": 1199.9678955078125, + "learning_rate": 3.078787878787879e-06, + "loss": 283.1158, + "step": 3810 + }, + { + "epoch": 0.031600281259047854, + "grad_norm": 1449.6212158203125, + "learning_rate": 3.086868686868687e-06, + "loss": 249.93, + "step": 3820 + }, + { + "epoch": 0.031683004508417094, + "grad_norm": 1299.2960205078125, + "learning_rate": 3.094949494949495e-06, + "loss": 224.7813, + "step": 3830 + }, + { + "epoch": 0.03176572775778633, + "grad_norm": 2239.721435546875, + "learning_rate": 3.103030303030303e-06, + "loss": 223.2453, + "step": 3840 + }, + { + "epoch": 0.03184845100715556, + "grad_norm": 1760.5396728515625, + "learning_rate": 3.1111111111111116e-06, + "loss": 252.7971, + "step": 3850 + }, + { + "epoch": 0.031931174256524794, + "grad_norm": 682.6892700195312, + "learning_rate": 3.1191919191919196e-06, + "loss": 216.5466, + "step": 3860 + }, + { + "epoch": 0.032013897505894034, + "grad_norm": 2052.32861328125, + "learning_rate": 3.1272727272727276e-06, + "loss": 288.775, + "step": 3870 + }, + { + "epoch": 0.03209662075526327, + "grad_norm": 1602.2225341796875, + "learning_rate": 3.1353535353535357e-06, + "loss": 328.757, + "step": 3880 + }, + { + "epoch": 0.0321793440046325, + "grad_norm": 681.2459106445312, + "learning_rate": 3.143434343434344e-06, + "loss": 190.3252, + "step": 3890 + }, + { + "epoch": 0.032262067254001735, + "grad_norm": 1182.54052734375, + "learning_rate": 3.1515151515151517e-06, + "loss": 268.2503, + "step": 3900 + }, + { + "epoch": 0.032344790503370975, + "grad_norm": 1219.3660888671875, + "learning_rate": 3.1595959595959597e-06, + "loss": 239.1843, + "step": 3910 + }, + { + "epoch": 0.03242751375274021, + "grad_norm": 1041.707275390625, + "learning_rate": 3.1676767676767678e-06, + "loss": 273.016, + "step": 3920 + }, + { + "epoch": 0.03251023700210944, + "grad_norm": 1269.7681884765625, + "learning_rate": 3.1757575757575758e-06, + "loss": 249.6547, + "step": 3930 + }, + { + "epoch": 0.032592960251478675, + "grad_norm": 1372.903564453125, + "learning_rate": 3.1838383838383842e-06, + "loss": 311.8126, + "step": 3940 + }, + { + "epoch": 0.032675683500847916, + "grad_norm": 2617.830322265625, + "learning_rate": 3.1919191919191923e-06, + "loss": 299.153, + "step": 3950 + }, + { + "epoch": 0.03275840675021715, + "grad_norm": 1350.3302001953125, + "learning_rate": 3.2000000000000003e-06, + "loss": 238.4413, + "step": 3960 + }, + { + "epoch": 0.03284112999958638, + "grad_norm": 2270.457763671875, + "learning_rate": 3.2080808080808083e-06, + "loss": 267.5316, + "step": 3970 + }, + { + "epoch": 0.032923853248955616, + "grad_norm": 1409.05712890625, + "learning_rate": 3.2161616161616168e-06, + "loss": 237.9799, + "step": 3980 + }, + { + "epoch": 0.033006576498324856, + "grad_norm": 1641.050537109375, + "learning_rate": 3.2242424242424248e-06, + "loss": 352.6279, + "step": 3990 + }, + { + "epoch": 0.03308929974769409, + "grad_norm": 1178.6591796875, + "learning_rate": 3.232323232323233e-06, + "loss": 200.0928, + "step": 4000 + }, + { + "epoch": 0.03317202299706332, + "grad_norm": 895.1983032226562, + "learning_rate": 3.2404040404040404e-06, + "loss": 274.1366, + "step": 4010 + }, + { + "epoch": 0.03325474624643256, + "grad_norm": 2193.698486328125, + "learning_rate": 3.2484848484848484e-06, + "loss": 205.6771, + "step": 4020 + }, + { + "epoch": 0.0333374694958018, + "grad_norm": 1705.0743408203125, + "learning_rate": 3.256565656565657e-06, + "loss": 253.3518, + "step": 4030 + }, + { + "epoch": 0.03342019274517103, + "grad_norm": 1552.232666015625, + "learning_rate": 3.264646464646465e-06, + "loss": 243.4265, + "step": 4040 + }, + { + "epoch": 0.033502915994540264, + "grad_norm": 4037.46875, + "learning_rate": 3.272727272727273e-06, + "loss": 229.4004, + "step": 4050 + }, + { + "epoch": 0.033585639243909504, + "grad_norm": 3386.515869140625, + "learning_rate": 3.280808080808081e-06, + "loss": 338.8573, + "step": 4060 + }, + { + "epoch": 0.03366836249327874, + "grad_norm": 1145.6063232421875, + "learning_rate": 3.2888888888888894e-06, + "loss": 254.9263, + "step": 4070 + }, + { + "epoch": 0.03375108574264797, + "grad_norm": 2452.519775390625, + "learning_rate": 3.2969696969696974e-06, + "loss": 365.3762, + "step": 4080 + }, + { + "epoch": 0.033833808992017204, + "grad_norm": 1752.8304443359375, + "learning_rate": 3.3050505050505054e-06, + "loss": 242.0698, + "step": 4090 + }, + { + "epoch": 0.033916532241386445, + "grad_norm": 1975.453857421875, + "learning_rate": 3.3131313131313135e-06, + "loss": 294.2446, + "step": 4100 + }, + { + "epoch": 0.03399925549075568, + "grad_norm": 1159.05615234375, + "learning_rate": 3.321212121212121e-06, + "loss": 254.9454, + "step": 4110 + }, + { + "epoch": 0.03408197874012491, + "grad_norm": 904.747802734375, + "learning_rate": 3.3292929292929295e-06, + "loss": 291.38, + "step": 4120 + }, + { + "epoch": 0.034164701989494145, + "grad_norm": 1563.5689697265625, + "learning_rate": 3.3373737373737375e-06, + "loss": 212.5943, + "step": 4130 + }, + { + "epoch": 0.034247425238863385, + "grad_norm": 1204.7821044921875, + "learning_rate": 3.3454545454545456e-06, + "loss": 265.8186, + "step": 4140 + }, + { + "epoch": 0.03433014848823262, + "grad_norm": 904.9209594726562, + "learning_rate": 3.3535353535353536e-06, + "loss": 260.1104, + "step": 4150 + }, + { + "epoch": 0.03441287173760185, + "grad_norm": 1248.727783203125, + "learning_rate": 3.361616161616162e-06, + "loss": 291.2336, + "step": 4160 + }, + { + "epoch": 0.034495594986971086, + "grad_norm": 893.3284912109375, + "learning_rate": 3.36969696969697e-06, + "loss": 233.5644, + "step": 4170 + }, + { + "epoch": 0.034578318236340326, + "grad_norm": 1158.58984375, + "learning_rate": 3.377777777777778e-06, + "loss": 246.782, + "step": 4180 + }, + { + "epoch": 0.03466104148570956, + "grad_norm": 1969.4088134765625, + "learning_rate": 3.385858585858586e-06, + "loss": 237.9205, + "step": 4190 + }, + { + "epoch": 0.03474376473507879, + "grad_norm": 1035.88427734375, + "learning_rate": 3.3939393939393946e-06, + "loss": 321.7581, + "step": 4200 + }, + { + "epoch": 0.034826487984448026, + "grad_norm": 918.1465454101562, + "learning_rate": 3.4020202020202026e-06, + "loss": 272.7782, + "step": 4210 + }, + { + "epoch": 0.03490921123381727, + "grad_norm": 1070.0615234375, + "learning_rate": 3.41010101010101e-06, + "loss": 224.1239, + "step": 4220 + }, + { + "epoch": 0.0349919344831865, + "grad_norm": 1230.9432373046875, + "learning_rate": 3.4181818181818182e-06, + "loss": 276.4893, + "step": 4230 + }, + { + "epoch": 0.035074657732555733, + "grad_norm": 1131.748046875, + "learning_rate": 3.4262626262626262e-06, + "loss": 217.2287, + "step": 4240 + }, + { + "epoch": 0.03515738098192497, + "grad_norm": 2427.749267578125, + "learning_rate": 3.4343434343434347e-06, + "loss": 251.6459, + "step": 4250 + }, + { + "epoch": 0.03524010423129421, + "grad_norm": 1668.80810546875, + "learning_rate": 3.4424242424242427e-06, + "loss": 317.6187, + "step": 4260 + }, + { + "epoch": 0.03532282748066344, + "grad_norm": 372.29510498046875, + "learning_rate": 3.4505050505050507e-06, + "loss": 179.444, + "step": 4270 + }, + { + "epoch": 0.035405550730032674, + "grad_norm": 1011.8233032226562, + "learning_rate": 3.4585858585858588e-06, + "loss": 219.8903, + "step": 4280 + }, + { + "epoch": 0.03548827397940191, + "grad_norm": 1418.88232421875, + "learning_rate": 3.4666666666666672e-06, + "loss": 286.1539, + "step": 4290 + }, + { + "epoch": 0.03557099722877115, + "grad_norm": 2228.23095703125, + "learning_rate": 3.4747474747474752e-06, + "loss": 293.0923, + "step": 4300 + }, + { + "epoch": 0.03565372047814038, + "grad_norm": 715.6218872070312, + "learning_rate": 3.4828282828282833e-06, + "loss": 282.1017, + "step": 4310 + }, + { + "epoch": 0.035736443727509615, + "grad_norm": 742.7416381835938, + "learning_rate": 3.4909090909090913e-06, + "loss": 204.9258, + "step": 4320 + }, + { + "epoch": 0.035819166976878855, + "grad_norm": 1228.9534912109375, + "learning_rate": 3.498989898989899e-06, + "loss": 259.2133, + "step": 4330 + }, + { + "epoch": 0.03590189022624809, + "grad_norm": 967.5274047851562, + "learning_rate": 3.5070707070707073e-06, + "loss": 288.3385, + "step": 4340 + }, + { + "epoch": 0.03598461347561732, + "grad_norm": 1587.67724609375, + "learning_rate": 3.5151515151515154e-06, + "loss": 277.6203, + "step": 4350 + }, + { + "epoch": 0.036067336724986555, + "grad_norm": 1431.6448974609375, + "learning_rate": 3.5232323232323234e-06, + "loss": 213.064, + "step": 4360 + }, + { + "epoch": 0.036150059974355796, + "grad_norm": 1135.779052734375, + "learning_rate": 3.5313131313131314e-06, + "loss": 221.4357, + "step": 4370 + }, + { + "epoch": 0.03623278322372503, + "grad_norm": 1519.3890380859375, + "learning_rate": 3.53939393939394e-06, + "loss": 277.6575, + "step": 4380 + }, + { + "epoch": 0.03631550647309426, + "grad_norm": 1360.25048828125, + "learning_rate": 3.547474747474748e-06, + "loss": 195.2055, + "step": 4390 + }, + { + "epoch": 0.036398229722463496, + "grad_norm": 2059.44287109375, + "learning_rate": 3.555555555555556e-06, + "loss": 324.8405, + "step": 4400 + }, + { + "epoch": 0.036480952971832736, + "grad_norm": 780.7919921875, + "learning_rate": 3.563636363636364e-06, + "loss": 212.4319, + "step": 4410 + }, + { + "epoch": 0.03656367622120197, + "grad_norm": 1413.5367431640625, + "learning_rate": 3.5717171717171724e-06, + "loss": 255.6041, + "step": 4420 + }, + { + "epoch": 0.0366463994705712, + "grad_norm": 637.59521484375, + "learning_rate": 3.57979797979798e-06, + "loss": 193.1592, + "step": 4430 + }, + { + "epoch": 0.03672912271994044, + "grad_norm": 1240.098876953125, + "learning_rate": 3.587878787878788e-06, + "loss": 331.8386, + "step": 4440 + }, + { + "epoch": 0.03681184596930968, + "grad_norm": 980.0120239257812, + "learning_rate": 3.595959595959596e-06, + "loss": 276.4505, + "step": 4450 + }, + { + "epoch": 0.03689456921867891, + "grad_norm": 2805.927001953125, + "learning_rate": 3.604040404040404e-06, + "loss": 269.0381, + "step": 4460 + }, + { + "epoch": 0.036977292468048144, + "grad_norm": 2359.113525390625, + "learning_rate": 3.6121212121212125e-06, + "loss": 258.4454, + "step": 4470 + }, + { + "epoch": 0.03706001571741738, + "grad_norm": 1293.63720703125, + "learning_rate": 3.6202020202020205e-06, + "loss": 225.2379, + "step": 4480 + }, + { + "epoch": 0.03714273896678662, + "grad_norm": 2243.848876953125, + "learning_rate": 3.6282828282828286e-06, + "loss": 213.6965, + "step": 4490 + }, + { + "epoch": 0.03722546221615585, + "grad_norm": 770.7854614257812, + "learning_rate": 3.6363636363636366e-06, + "loss": 231.6815, + "step": 4500 + }, + { + "epoch": 0.037308185465525084, + "grad_norm": 1308.199462890625, + "learning_rate": 3.644444444444445e-06, + "loss": 232.9941, + "step": 4510 + }, + { + "epoch": 0.03739090871489432, + "grad_norm": 883.5875244140625, + "learning_rate": 3.652525252525253e-06, + "loss": 216.6295, + "step": 4520 + }, + { + "epoch": 0.03747363196426356, + "grad_norm": 672.0443115234375, + "learning_rate": 3.660606060606061e-06, + "loss": 201.8103, + "step": 4530 + }, + { + "epoch": 0.03755635521363279, + "grad_norm": 1314.2528076171875, + "learning_rate": 3.6686868686868687e-06, + "loss": 256.3278, + "step": 4540 + }, + { + "epoch": 0.037639078463002025, + "grad_norm": 1359.0018310546875, + "learning_rate": 3.6767676767676767e-06, + "loss": 216.419, + "step": 4550 + }, + { + "epoch": 0.037721801712371265, + "grad_norm": 1884.207763671875, + "learning_rate": 3.684848484848485e-06, + "loss": 211.731, + "step": 4560 + }, + { + "epoch": 0.0378045249617405, + "grad_norm": 1213.398193359375, + "learning_rate": 3.692929292929293e-06, + "loss": 272.2958, + "step": 4570 + }, + { + "epoch": 0.03788724821110973, + "grad_norm": 2207.1171875, + "learning_rate": 3.701010101010101e-06, + "loss": 265.4331, + "step": 4580 + }, + { + "epoch": 0.037969971460478966, + "grad_norm": 1944.73876953125, + "learning_rate": 3.7090909090909092e-06, + "loss": 272.1659, + "step": 4590 + }, + { + "epoch": 0.038052694709848206, + "grad_norm": 1226.4189453125, + "learning_rate": 3.7171717171717177e-06, + "loss": 226.3827, + "step": 4600 + }, + { + "epoch": 0.03813541795921744, + "grad_norm": 1100.8829345703125, + "learning_rate": 3.7252525252525257e-06, + "loss": 221.5738, + "step": 4610 + }, + { + "epoch": 0.03821814120858667, + "grad_norm": 1859.399169921875, + "learning_rate": 3.7333333333333337e-06, + "loss": 229.0128, + "step": 4620 + }, + { + "epoch": 0.038300864457955906, + "grad_norm": 1107.61572265625, + "learning_rate": 3.7414141414141418e-06, + "loss": 224.0653, + "step": 4630 + }, + { + "epoch": 0.03838358770732515, + "grad_norm": 1617.0789794921875, + "learning_rate": 3.74949494949495e-06, + "loss": 240.6666, + "step": 4640 + }, + { + "epoch": 0.03846631095669438, + "grad_norm": 2554.2158203125, + "learning_rate": 3.757575757575758e-06, + "loss": 258.9923, + "step": 4650 + }, + { + "epoch": 0.038549034206063613, + "grad_norm": 3580.094482421875, + "learning_rate": 3.765656565656566e-06, + "loss": 270.2465, + "step": 4660 + }, + { + "epoch": 0.03863175745543285, + "grad_norm": 1391.0946044921875, + "learning_rate": 3.773737373737374e-06, + "loss": 184.8681, + "step": 4670 + }, + { + "epoch": 0.03871448070480209, + "grad_norm": 1413.2205810546875, + "learning_rate": 3.781818181818182e-06, + "loss": 210.8876, + "step": 4680 + }, + { + "epoch": 0.03879720395417132, + "grad_norm": 1123.7315673828125, + "learning_rate": 3.7898989898989903e-06, + "loss": 226.6469, + "step": 4690 + }, + { + "epoch": 0.038879927203540554, + "grad_norm": 1507.7408447265625, + "learning_rate": 3.7979797979797984e-06, + "loss": 243.835, + "step": 4700 + }, + { + "epoch": 0.03896265045290979, + "grad_norm": 1154.9927978515625, + "learning_rate": 3.8060606060606064e-06, + "loss": 275.5142, + "step": 4710 + }, + { + "epoch": 0.03904537370227903, + "grad_norm": 1263.9693603515625, + "learning_rate": 3.8141414141414144e-06, + "loss": 208.4059, + "step": 4720 + }, + { + "epoch": 0.03912809695164826, + "grad_norm": 2182.939208984375, + "learning_rate": 3.8222222222222224e-06, + "loss": 225.735, + "step": 4730 + }, + { + "epoch": 0.039210820201017495, + "grad_norm": 1238.331298828125, + "learning_rate": 3.830303030303031e-06, + "loss": 215.773, + "step": 4740 + }, + { + "epoch": 0.03929354345038673, + "grad_norm": 1922.897216796875, + "learning_rate": 3.8383838383838385e-06, + "loss": 205.833, + "step": 4750 + }, + { + "epoch": 0.03937626669975597, + "grad_norm": 1248.5205078125, + "learning_rate": 3.846464646464647e-06, + "loss": 212.8604, + "step": 4760 + }, + { + "epoch": 0.0394589899491252, + "grad_norm": 1999.810791015625, + "learning_rate": 3.8545454545454545e-06, + "loss": 234.7124, + "step": 4770 + }, + { + "epoch": 0.039541713198494435, + "grad_norm": 1900.56787109375, + "learning_rate": 3.862626262626263e-06, + "loss": 206.3176, + "step": 4780 + }, + { + "epoch": 0.03962443644786367, + "grad_norm": 1776.326171875, + "learning_rate": 3.8707070707070706e-06, + "loss": 235.4771, + "step": 4790 + }, + { + "epoch": 0.03970715969723291, + "grad_norm": 1206.546630859375, + "learning_rate": 3.878787878787879e-06, + "loss": 232.6159, + "step": 4800 + }, + { + "epoch": 0.03978988294660214, + "grad_norm": 1193.0535888671875, + "learning_rate": 3.8868686868686875e-06, + "loss": 239.4891, + "step": 4810 + }, + { + "epoch": 0.039872606195971376, + "grad_norm": 1266.81103515625, + "learning_rate": 3.894949494949495e-06, + "loss": 264.1928, + "step": 4820 + }, + { + "epoch": 0.039955329445340616, + "grad_norm": 1152.444580078125, + "learning_rate": 3.9030303030303035e-06, + "loss": 197.2479, + "step": 4830 + }, + { + "epoch": 0.04003805269470985, + "grad_norm": 1796.5777587890625, + "learning_rate": 3.911111111111112e-06, + "loss": 232.1052, + "step": 4840 + }, + { + "epoch": 0.04012077594407908, + "grad_norm": 1190.642822265625, + "learning_rate": 3.9191919191919196e-06, + "loss": 246.6287, + "step": 4850 + }, + { + "epoch": 0.04020349919344832, + "grad_norm": 1134.314453125, + "learning_rate": 3.927272727272727e-06, + "loss": 268.4593, + "step": 4860 + }, + { + "epoch": 0.04028622244281756, + "grad_norm": 1696.9732666015625, + "learning_rate": 3.935353535353536e-06, + "loss": 226.5233, + "step": 4870 + }, + { + "epoch": 0.04036894569218679, + "grad_norm": 1237.491455078125, + "learning_rate": 3.943434343434343e-06, + "loss": 196.7495, + "step": 4880 + }, + { + "epoch": 0.040451668941556024, + "grad_norm": 2431.97265625, + "learning_rate": 3.951515151515152e-06, + "loss": 220.9505, + "step": 4890 + }, + { + "epoch": 0.04053439219092526, + "grad_norm": 1421.2952880859375, + "learning_rate": 3.95959595959596e-06, + "loss": 253.9248, + "step": 4900 + }, + { + "epoch": 0.0406171154402945, + "grad_norm": 1305.2431640625, + "learning_rate": 3.967676767676768e-06, + "loss": 302.1668, + "step": 4910 + }, + { + "epoch": 0.04069983868966373, + "grad_norm": 1975.3135986328125, + "learning_rate": 3.975757575757576e-06, + "loss": 282.536, + "step": 4920 + }, + { + "epoch": 0.040782561939032964, + "grad_norm": 1655.1588134765625, + "learning_rate": 3.983838383838385e-06, + "loss": 217.2957, + "step": 4930 + }, + { + "epoch": 0.0408652851884022, + "grad_norm": 1051.86083984375, + "learning_rate": 3.991919191919192e-06, + "loss": 240.2383, + "step": 4940 + }, + { + "epoch": 0.04094800843777144, + "grad_norm": 773.5546875, + "learning_rate": 4.000000000000001e-06, + "loss": 221.8524, + "step": 4950 + }, + { + "epoch": 0.04103073168714067, + "grad_norm": 1287.1944580078125, + "learning_rate": 4.008080808080808e-06, + "loss": 229.3614, + "step": 4960 + }, + { + "epoch": 0.041113454936509905, + "grad_norm": 1897.0220947265625, + "learning_rate": 4.016161616161616e-06, + "loss": 233.9596, + "step": 4970 + }, + { + "epoch": 0.04119617818587914, + "grad_norm": 1463.894287109375, + "learning_rate": 4.024242424242424e-06, + "loss": 205.995, + "step": 4980 + }, + { + "epoch": 0.04127890143524838, + "grad_norm": 981.1106567382812, + "learning_rate": 4.032323232323233e-06, + "loss": 226.2633, + "step": 4990 + }, + { + "epoch": 0.04136162468461761, + "grad_norm": 3404.24365234375, + "learning_rate": 4.04040404040404e-06, + "loss": 249.0778, + "step": 5000 + }, + { + "epoch": 0.041444347933986846, + "grad_norm": 1160.4622802734375, + "learning_rate": 4.048484848484849e-06, + "loss": 201.7348, + "step": 5010 + }, + { + "epoch": 0.04152707118335608, + "grad_norm": 1339.532470703125, + "learning_rate": 4.056565656565657e-06, + "loss": 296.6651, + "step": 5020 + }, + { + "epoch": 0.04160979443272532, + "grad_norm": 1330.253173828125, + "learning_rate": 4.064646464646465e-06, + "loss": 222.7661, + "step": 5030 + }, + { + "epoch": 0.04169251768209455, + "grad_norm": 1871.73095703125, + "learning_rate": 4.072727272727273e-06, + "loss": 185.6875, + "step": 5040 + }, + { + "epoch": 0.041775240931463786, + "grad_norm": 989.5170288085938, + "learning_rate": 4.080808080808081e-06, + "loss": 184.853, + "step": 5050 + }, + { + "epoch": 0.04185796418083302, + "grad_norm": 1325.712158203125, + "learning_rate": 4.088888888888889e-06, + "loss": 177.4656, + "step": 5060 + }, + { + "epoch": 0.04194068743020226, + "grad_norm": 1084.450927734375, + "learning_rate": 4.096969696969697e-06, + "loss": 218.0059, + "step": 5070 + }, + { + "epoch": 0.04202341067957149, + "grad_norm": 1230.160888671875, + "learning_rate": 4.105050505050505e-06, + "loss": 202.8811, + "step": 5080 + }, + { + "epoch": 0.04210613392894073, + "grad_norm": 909.71728515625, + "learning_rate": 4.113131313131313e-06, + "loss": 229.2698, + "step": 5090 + }, + { + "epoch": 0.04218885717830997, + "grad_norm": 2449.3154296875, + "learning_rate": 4.1212121212121215e-06, + "loss": 211.7621, + "step": 5100 + }, + { + "epoch": 0.0422715804276792, + "grad_norm": 578.8762817382812, + "learning_rate": 4.12929292929293e-06, + "loss": 220.8765, + "step": 5110 + }, + { + "epoch": 0.042354303677048434, + "grad_norm": 1294.6402587890625, + "learning_rate": 4.1373737373737375e-06, + "loss": 252.3842, + "step": 5120 + }, + { + "epoch": 0.04243702692641767, + "grad_norm": 911.5681762695312, + "learning_rate": 4.145454545454546e-06, + "loss": 294.7006, + "step": 5130 + }, + { + "epoch": 0.04251975017578691, + "grad_norm": 1328.2413330078125, + "learning_rate": 4.1535353535353536e-06, + "loss": 211.482, + "step": 5140 + }, + { + "epoch": 0.04260247342515614, + "grad_norm": 1352.004638671875, + "learning_rate": 4.161616161616162e-06, + "loss": 210.5752, + "step": 5150 + }, + { + "epoch": 0.042685196674525375, + "grad_norm": 1081.876953125, + "learning_rate": 4.1696969696969705e-06, + "loss": 224.4739, + "step": 5160 + }, + { + "epoch": 0.04276791992389461, + "grad_norm": 2295.4033203125, + "learning_rate": 4.177777777777778e-06, + "loss": 229.6548, + "step": 5170 + }, + { + "epoch": 0.04285064317326385, + "grad_norm": 1036.4749755859375, + "learning_rate": 4.185858585858586e-06, + "loss": 208.1364, + "step": 5180 + }, + { + "epoch": 0.04293336642263308, + "grad_norm": 1558.9718017578125, + "learning_rate": 4.193939393939394e-06, + "loss": 262.7995, + "step": 5190 + }, + { + "epoch": 0.043016089672002315, + "grad_norm": 1155.65478515625, + "learning_rate": 4.2020202020202026e-06, + "loss": 251.0647, + "step": 5200 + }, + { + "epoch": 0.04309881292137155, + "grad_norm": 1081.0423583984375, + "learning_rate": 4.21010101010101e-06, + "loss": 235.0402, + "step": 5210 + }, + { + "epoch": 0.04318153617074079, + "grad_norm": 725.3358764648438, + "learning_rate": 4.218181818181819e-06, + "loss": 310.5599, + "step": 5220 + }, + { + "epoch": 0.04326425942011002, + "grad_norm": 1182.2801513671875, + "learning_rate": 4.226262626262626e-06, + "loss": 178.3826, + "step": 5230 + }, + { + "epoch": 0.043346982669479256, + "grad_norm": 1413.02587890625, + "learning_rate": 4.234343434343435e-06, + "loss": 257.1767, + "step": 5240 + }, + { + "epoch": 0.04342970591884849, + "grad_norm": 1222.1142578125, + "learning_rate": 4.242424242424243e-06, + "loss": 218.1251, + "step": 5250 + }, + { + "epoch": 0.04351242916821773, + "grad_norm": 998.1055297851562, + "learning_rate": 4.250505050505051e-06, + "loss": 211.1936, + "step": 5260 + }, + { + "epoch": 0.04359515241758696, + "grad_norm": 844.65478515625, + "learning_rate": 4.258585858585859e-06, + "loss": 235.3665, + "step": 5270 + }, + { + "epoch": 0.0436778756669562, + "grad_norm": 691.6785278320312, + "learning_rate": 4.266666666666668e-06, + "loss": 173.3902, + "step": 5280 + }, + { + "epoch": 0.04376059891632543, + "grad_norm": 1113.0621337890625, + "learning_rate": 4.274747474747475e-06, + "loss": 169.7838, + "step": 5290 + }, + { + "epoch": 0.04384332216569467, + "grad_norm": 1622.1474609375, + "learning_rate": 4.282828282828283e-06, + "loss": 217.8731, + "step": 5300 + }, + { + "epoch": 0.043926045415063904, + "grad_norm": 1753.8427734375, + "learning_rate": 4.290909090909091e-06, + "loss": 251.789, + "step": 5310 + }, + { + "epoch": 0.04400876866443314, + "grad_norm": 918.1004638671875, + "learning_rate": 4.298989898989899e-06, + "loss": 201.8801, + "step": 5320 + }, + { + "epoch": 0.04409149191380238, + "grad_norm": 899.498779296875, + "learning_rate": 4.307070707070707e-06, + "loss": 230.2721, + "step": 5330 + }, + { + "epoch": 0.04417421516317161, + "grad_norm": 1194.7314453125, + "learning_rate": 4.315151515151516e-06, + "loss": 302.4559, + "step": 5340 + }, + { + "epoch": 0.044256938412540844, + "grad_norm": 745.4715576171875, + "learning_rate": 4.323232323232323e-06, + "loss": 217.5794, + "step": 5350 + }, + { + "epoch": 0.04433966166191008, + "grad_norm": 3024.5693359375, + "learning_rate": 4.331313131313132e-06, + "loss": 215.8062, + "step": 5360 + }, + { + "epoch": 0.04442238491127932, + "grad_norm": 692.5128173828125, + "learning_rate": 4.33939393939394e-06, + "loss": 206.9033, + "step": 5370 + }, + { + "epoch": 0.04450510816064855, + "grad_norm": 638.6357421875, + "learning_rate": 4.347474747474748e-06, + "loss": 277.6233, + "step": 5380 + }, + { + "epoch": 0.044587831410017785, + "grad_norm": 1237.0350341796875, + "learning_rate": 4.3555555555555555e-06, + "loss": 202.0422, + "step": 5390 + }, + { + "epoch": 0.04467055465938702, + "grad_norm": 1546.1956787109375, + "learning_rate": 4.363636363636364e-06, + "loss": 195.5879, + "step": 5400 + }, + { + "epoch": 0.04475327790875626, + "grad_norm": 1744.332763671875, + "learning_rate": 4.3717171717171715e-06, + "loss": 308.2887, + "step": 5410 + }, + { + "epoch": 0.04483600115812549, + "grad_norm": 1885.436767578125, + "learning_rate": 4.37979797979798e-06, + "loss": 257.4365, + "step": 5420 + }, + { + "epoch": 0.044918724407494726, + "grad_norm": 1949.8758544921875, + "learning_rate": 4.387878787878788e-06, + "loss": 288.0396, + "step": 5430 + }, + { + "epoch": 0.04500144765686396, + "grad_norm": 2583.996826171875, + "learning_rate": 4.395959595959596e-06, + "loss": 243.6479, + "step": 5440 + }, + { + "epoch": 0.0450841709062332, + "grad_norm": 940.0198974609375, + "learning_rate": 4.4040404040404044e-06, + "loss": 209.4657, + "step": 5450 + }, + { + "epoch": 0.04516689415560243, + "grad_norm": 897.3545532226562, + "learning_rate": 4.412121212121213e-06, + "loss": 244.724, + "step": 5460 + }, + { + "epoch": 0.045249617404971666, + "grad_norm": 1344.112060546875, + "learning_rate": 4.4202020202020205e-06, + "loss": 210.6839, + "step": 5470 + }, + { + "epoch": 0.0453323406543409, + "grad_norm": 2638.66357421875, + "learning_rate": 4.428282828282829e-06, + "loss": 281.9325, + "step": 5480 + }, + { + "epoch": 0.04541506390371014, + "grad_norm": 1480.95068359375, + "learning_rate": 4.436363636363637e-06, + "loss": 227.8617, + "step": 5490 + }, + { + "epoch": 0.04549778715307937, + "grad_norm": 1484.61962890625, + "learning_rate": 4.444444444444444e-06, + "loss": 307.6048, + "step": 5500 + }, + { + "epoch": 0.04558051040244861, + "grad_norm": 921.8578491210938, + "learning_rate": 4.452525252525253e-06, + "loss": 283.7532, + "step": 5510 + }, + { + "epoch": 0.04566323365181784, + "grad_norm": 1015.0638427734375, + "learning_rate": 4.460606060606061e-06, + "loss": 232.6935, + "step": 5520 + }, + { + "epoch": 0.04574595690118708, + "grad_norm": 1154.900146484375, + "learning_rate": 4.468686868686869e-06, + "loss": 237.65, + "step": 5530 + }, + { + "epoch": 0.045828680150556314, + "grad_norm": 1067.7034912109375, + "learning_rate": 4.476767676767677e-06, + "loss": 178.2383, + "step": 5540 + }, + { + "epoch": 0.04591140339992555, + "grad_norm": 1582.29541015625, + "learning_rate": 4.4848484848484855e-06, + "loss": 208.2042, + "step": 5550 + }, + { + "epoch": 0.04599412664929478, + "grad_norm": 1326.168212890625, + "learning_rate": 4.492929292929293e-06, + "loss": 206.2426, + "step": 5560 + }, + { + "epoch": 0.04607684989866402, + "grad_norm": 1166.575927734375, + "learning_rate": 4.501010101010102e-06, + "loss": 246.9012, + "step": 5570 + }, + { + "epoch": 0.046159573148033255, + "grad_norm": 1051.04345703125, + "learning_rate": 4.50909090909091e-06, + "loss": 220.1656, + "step": 5580 + }, + { + "epoch": 0.04624229639740249, + "grad_norm": 994.1414794921875, + "learning_rate": 4.517171717171718e-06, + "loss": 246.7432, + "step": 5590 + }, + { + "epoch": 0.04632501964677173, + "grad_norm": 1792.089111328125, + "learning_rate": 4.525252525252526e-06, + "loss": 195.5538, + "step": 5600 + }, + { + "epoch": 0.04640774289614096, + "grad_norm": 889.2850341796875, + "learning_rate": 4.533333333333334e-06, + "loss": 205.7484, + "step": 5610 + }, + { + "epoch": 0.046490466145510195, + "grad_norm": 986.1232299804688, + "learning_rate": 4.541414141414141e-06, + "loss": 259.9392, + "step": 5620 + }, + { + "epoch": 0.04657318939487943, + "grad_norm": 1162.5496826171875, + "learning_rate": 4.54949494949495e-06, + "loss": 215.2503, + "step": 5630 + }, + { + "epoch": 0.04665591264424867, + "grad_norm": 1568.9271240234375, + "learning_rate": 4.557575757575758e-06, + "loss": 198.5209, + "step": 5640 + }, + { + "epoch": 0.0467386358936179, + "grad_norm": 1163.0565185546875, + "learning_rate": 4.565656565656566e-06, + "loss": 199.4977, + "step": 5650 + }, + { + "epoch": 0.046821359142987136, + "grad_norm": 918.8221435546875, + "learning_rate": 4.573737373737374e-06, + "loss": 292.0952, + "step": 5660 + }, + { + "epoch": 0.04690408239235637, + "grad_norm": 1420.076171875, + "learning_rate": 4.581818181818183e-06, + "loss": 273.1453, + "step": 5670 + }, + { + "epoch": 0.04698680564172561, + "grad_norm": 751.3970947265625, + "learning_rate": 4.58989898989899e-06, + "loss": 227.5678, + "step": 5680 + }, + { + "epoch": 0.04706952889109484, + "grad_norm": 4106.84814453125, + "learning_rate": 4.597979797979799e-06, + "loss": 257.4646, + "step": 5690 + }, + { + "epoch": 0.04715225214046408, + "grad_norm": 1618.319091796875, + "learning_rate": 4.606060606060606e-06, + "loss": 242.2784, + "step": 5700 + }, + { + "epoch": 0.04723497538983331, + "grad_norm": 1682.986083984375, + "learning_rate": 4.614141414141414e-06, + "loss": 270.8784, + "step": 5710 + }, + { + "epoch": 0.04731769863920255, + "grad_norm": 2293.774658203125, + "learning_rate": 4.622222222222222e-06, + "loss": 310.2911, + "step": 5720 + }, + { + "epoch": 0.047400421888571784, + "grad_norm": 896.1268920898438, + "learning_rate": 4.630303030303031e-06, + "loss": 262.9586, + "step": 5730 + }, + { + "epoch": 0.04748314513794102, + "grad_norm": 1522.369384765625, + "learning_rate": 4.6383838383838384e-06, + "loss": 200.667, + "step": 5740 + }, + { + "epoch": 0.04756586838731025, + "grad_norm": 1271.126220703125, + "learning_rate": 4.646464646464647e-06, + "loss": 211.7462, + "step": 5750 + }, + { + "epoch": 0.04764859163667949, + "grad_norm": 1342.4813232421875, + "learning_rate": 4.654545454545455e-06, + "loss": 271.1495, + "step": 5760 + }, + { + "epoch": 0.047731314886048724, + "grad_norm": 1042.1790771484375, + "learning_rate": 4.662626262626263e-06, + "loss": 207.4777, + "step": 5770 + }, + { + "epoch": 0.04781403813541796, + "grad_norm": 1017.2898559570312, + "learning_rate": 4.670707070707071e-06, + "loss": 272.7402, + "step": 5780 + }, + { + "epoch": 0.04789676138478719, + "grad_norm": 1240.869140625, + "learning_rate": 4.678787878787879e-06, + "loss": 224.074, + "step": 5790 + }, + { + "epoch": 0.04797948463415643, + "grad_norm": 1895.0374755859375, + "learning_rate": 4.6868686868686874e-06, + "loss": 348.4789, + "step": 5800 + }, + { + "epoch": 0.048062207883525665, + "grad_norm": 1665.503662109375, + "learning_rate": 4.694949494949496e-06, + "loss": 287.3858, + "step": 5810 + }, + { + "epoch": 0.0481449311328949, + "grad_norm": 888.2938842773438, + "learning_rate": 4.7030303030303035e-06, + "loss": 186.7068, + "step": 5820 + }, + { + "epoch": 0.04822765438226414, + "grad_norm": 1077.885009765625, + "learning_rate": 4.711111111111111e-06, + "loss": 188.8208, + "step": 5830 + }, + { + "epoch": 0.04831037763163337, + "grad_norm": 1367.186279296875, + "learning_rate": 4.7191919191919195e-06, + "loss": 235.7021, + "step": 5840 + }, + { + "epoch": 0.048393100881002606, + "grad_norm": 880.5682373046875, + "learning_rate": 4.727272727272728e-06, + "loss": 150.1969, + "step": 5850 + }, + { + "epoch": 0.04847582413037184, + "grad_norm": 1563.840576171875, + "learning_rate": 4.735353535353536e-06, + "loss": 213.3092, + "step": 5860 + }, + { + "epoch": 0.04855854737974108, + "grad_norm": 1928.1456298828125, + "learning_rate": 4.743434343434344e-06, + "loss": 258.8421, + "step": 5870 + }, + { + "epoch": 0.04864127062911031, + "grad_norm": 1255.3079833984375, + "learning_rate": 4.751515151515152e-06, + "loss": 242.0784, + "step": 5880 + }, + { + "epoch": 0.048723993878479546, + "grad_norm": 1542.299072265625, + "learning_rate": 4.75959595959596e-06, + "loss": 303.837, + "step": 5890 + }, + { + "epoch": 0.04880671712784878, + "grad_norm": 2009.2406005859375, + "learning_rate": 4.7676767676767685e-06, + "loss": 155.5049, + "step": 5900 + }, + { + "epoch": 0.04888944037721802, + "grad_norm": 1701.25830078125, + "learning_rate": 4.775757575757576e-06, + "loss": 289.9675, + "step": 5910 + }, + { + "epoch": 0.04897216362658725, + "grad_norm": 1794.075439453125, + "learning_rate": 4.783838383838385e-06, + "loss": 206.6406, + "step": 5920 + }, + { + "epoch": 0.04905488687595649, + "grad_norm": 2225.670654296875, + "learning_rate": 4.791919191919192e-06, + "loss": 241.3904, + "step": 5930 + }, + { + "epoch": 0.04913761012532572, + "grad_norm": 1904.87939453125, + "learning_rate": 4.800000000000001e-06, + "loss": 245.3052, + "step": 5940 + }, + { + "epoch": 0.04922033337469496, + "grad_norm": 1126.554931640625, + "learning_rate": 4.808080808080808e-06, + "loss": 188.594, + "step": 5950 + }, + { + "epoch": 0.049303056624064194, + "grad_norm": 691.1046142578125, + "learning_rate": 4.816161616161617e-06, + "loss": 248.3712, + "step": 5960 + }, + { + "epoch": 0.04938577987343343, + "grad_norm": 1169.880859375, + "learning_rate": 4.824242424242424e-06, + "loss": 245.4409, + "step": 5970 + }, + { + "epoch": 0.04946850312280266, + "grad_norm": 1042.1029052734375, + "learning_rate": 4.832323232323233e-06, + "loss": 202.4587, + "step": 5980 + }, + { + "epoch": 0.0495512263721719, + "grad_norm": 1452.813720703125, + "learning_rate": 4.840404040404041e-06, + "loss": 257.6912, + "step": 5990 + }, + { + "epoch": 0.049633949621541135, + "grad_norm": 1247.24609375, + "learning_rate": 4.848484848484849e-06, + "loss": 210.8361, + "step": 6000 + }, + { + "epoch": 0.04971667287091037, + "grad_norm": 1635.0625, + "learning_rate": 4.856565656565657e-06, + "loss": 241.0676, + "step": 6010 + }, + { + "epoch": 0.0497993961202796, + "grad_norm": 1609.6522216796875, + "learning_rate": 4.864646464646466e-06, + "loss": 188.2872, + "step": 6020 + }, + { + "epoch": 0.04988211936964884, + "grad_norm": 1085.433837890625, + "learning_rate": 4.872727272727273e-06, + "loss": 194.121, + "step": 6030 + }, + { + "epoch": 0.049964842619018075, + "grad_norm": 1211.4505615234375, + "learning_rate": 4.880808080808081e-06, + "loss": 237.9093, + "step": 6040 + }, + { + "epoch": 0.05004756586838731, + "grad_norm": 1017.5370483398438, + "learning_rate": 4.888888888888889e-06, + "loss": 298.4579, + "step": 6050 + }, + { + "epoch": 0.05013028911775654, + "grad_norm": 1557.1414794921875, + "learning_rate": 4.896969696969697e-06, + "loss": 254.7002, + "step": 6060 + }, + { + "epoch": 0.05021301236712578, + "grad_norm": 966.5237426757812, + "learning_rate": 4.905050505050505e-06, + "loss": 218.6844, + "step": 6070 + }, + { + "epoch": 0.050295735616495016, + "grad_norm": 1119.049072265625, + "learning_rate": 4.913131313131314e-06, + "loss": 185.6555, + "step": 6080 + }, + { + "epoch": 0.05037845886586425, + "grad_norm": 813.0082397460938, + "learning_rate": 4.9212121212121214e-06, + "loss": 193.8167, + "step": 6090 + }, + { + "epoch": 0.05046118211523349, + "grad_norm": 1165.0540771484375, + "learning_rate": 4.92929292929293e-06, + "loss": 270.3936, + "step": 6100 + }, + { + "epoch": 0.05054390536460272, + "grad_norm": 774.7178344726562, + "learning_rate": 4.937373737373738e-06, + "loss": 187.8722, + "step": 6110 + }, + { + "epoch": 0.050626628613971957, + "grad_norm": 1326.367919921875, + "learning_rate": 4.945454545454546e-06, + "loss": 243.3256, + "step": 6120 + }, + { + "epoch": 0.05070935186334119, + "grad_norm": 1486.3304443359375, + "learning_rate": 4.953535353535354e-06, + "loss": 186.0064, + "step": 6130 + }, + { + "epoch": 0.05079207511271043, + "grad_norm": 1937.40234375, + "learning_rate": 4.961616161616162e-06, + "loss": 246.7979, + "step": 6140 + }, + { + "epoch": 0.050874798362079664, + "grad_norm": 1517.21923828125, + "learning_rate": 4.9696969696969696e-06, + "loss": 231.4793, + "step": 6150 + }, + { + "epoch": 0.0509575216114489, + "grad_norm": 5960.52197265625, + "learning_rate": 4.977777777777778e-06, + "loss": 197.895, + "step": 6160 + }, + { + "epoch": 0.05104024486081813, + "grad_norm": 1942.2510986328125, + "learning_rate": 4.9858585858585865e-06, + "loss": 300.5423, + "step": 6170 + }, + { + "epoch": 0.05112296811018737, + "grad_norm": 1276.1055908203125, + "learning_rate": 4.993939393939394e-06, + "loss": 288.2531, + "step": 6180 + }, + { + "epoch": 0.051205691359556604, + "grad_norm": 2154.565673828125, + "learning_rate": 5.0020202020202025e-06, + "loss": 259.9872, + "step": 6190 + }, + { + "epoch": 0.05128841460892584, + "grad_norm": 961.3928833007812, + "learning_rate": 5.010101010101011e-06, + "loss": 177.8174, + "step": 6200 + }, + { + "epoch": 0.05137113785829507, + "grad_norm": 1438.574462890625, + "learning_rate": 5.0181818181818186e-06, + "loss": 197.5066, + "step": 6210 + }, + { + "epoch": 0.05145386110766431, + "grad_norm": 766.4448852539062, + "learning_rate": 5.026262626262627e-06, + "loss": 257.3491, + "step": 6220 + }, + { + "epoch": 0.051536584357033545, + "grad_norm": 793.5451049804688, + "learning_rate": 5.034343434343435e-06, + "loss": 184.9512, + "step": 6230 + }, + { + "epoch": 0.05161930760640278, + "grad_norm": 869.6885375976562, + "learning_rate": 5.042424242424243e-06, + "loss": 253.1606, + "step": 6240 + }, + { + "epoch": 0.05170203085577201, + "grad_norm": 1229.890869140625, + "learning_rate": 5.0505050505050515e-06, + "loss": 184.6765, + "step": 6250 + }, + { + "epoch": 0.05178475410514125, + "grad_norm": 804.93994140625, + "learning_rate": 5.058585858585859e-06, + "loss": 162.1275, + "step": 6260 + }, + { + "epoch": 0.051867477354510486, + "grad_norm": 1098.615478515625, + "learning_rate": 5.0666666666666676e-06, + "loss": 174.5565, + "step": 6270 + }, + { + "epoch": 0.05195020060387972, + "grad_norm": 570.5386352539062, + "learning_rate": 5.074747474747476e-06, + "loss": 182.8037, + "step": 6280 + }, + { + "epoch": 0.05203292385324895, + "grad_norm": 3363.004150390625, + "learning_rate": 5.082828282828284e-06, + "loss": 265.7634, + "step": 6290 + }, + { + "epoch": 0.05211564710261819, + "grad_norm": 2040.7962646484375, + "learning_rate": 5.090909090909091e-06, + "loss": 240.1976, + "step": 6300 + }, + { + "epoch": 0.052198370351987426, + "grad_norm": 1316.55908203125, + "learning_rate": 5.098989898989899e-06, + "loss": 249.6734, + "step": 6310 + }, + { + "epoch": 0.05228109360135666, + "grad_norm": 2261.989013671875, + "learning_rate": 5.107070707070707e-06, + "loss": 208.5467, + "step": 6320 + }, + { + "epoch": 0.0523638168507259, + "grad_norm": 1246.7120361328125, + "learning_rate": 5.115151515151515e-06, + "loss": 207.1795, + "step": 6330 + }, + { + "epoch": 0.05244654010009513, + "grad_norm": 1235.8924560546875, + "learning_rate": 5.123232323232323e-06, + "loss": 231.5552, + "step": 6340 + }, + { + "epoch": 0.05252926334946437, + "grad_norm": 805.1367797851562, + "learning_rate": 5.131313131313132e-06, + "loss": 217.3461, + "step": 6350 + }, + { + "epoch": 0.0526119865988336, + "grad_norm": 902.8432006835938, + "learning_rate": 5.139393939393939e-06, + "loss": 250.8667, + "step": 6360 + }, + { + "epoch": 0.05269470984820284, + "grad_norm": 1314.75634765625, + "learning_rate": 5.147474747474748e-06, + "loss": 188.1431, + "step": 6370 + }, + { + "epoch": 0.052777433097572074, + "grad_norm": 1282.649169921875, + "learning_rate": 5.155555555555556e-06, + "loss": 198.1481, + "step": 6380 + }, + { + "epoch": 0.05286015634694131, + "grad_norm": 1167.904052734375, + "learning_rate": 5.163636363636364e-06, + "loss": 192.307, + "step": 6390 + }, + { + "epoch": 0.05294287959631054, + "grad_norm": 686.8750610351562, + "learning_rate": 5.171717171717172e-06, + "loss": 225.3385, + "step": 6400 + }, + { + "epoch": 0.05302560284567978, + "grad_norm": 1144.7125244140625, + "learning_rate": 5.17979797979798e-06, + "loss": 188.2769, + "step": 6410 + }, + { + "epoch": 0.053108326095049015, + "grad_norm": 1010.5237426757812, + "learning_rate": 5.187878787878788e-06, + "loss": 225.7106, + "step": 6420 + }, + { + "epoch": 0.05319104934441825, + "grad_norm": 1336.3463134765625, + "learning_rate": 5.195959595959597e-06, + "loss": 208.7707, + "step": 6430 + }, + { + "epoch": 0.05327377259378748, + "grad_norm": 1576.3480224609375, + "learning_rate": 5.204040404040404e-06, + "loss": 236.1072, + "step": 6440 + }, + { + "epoch": 0.05335649584315672, + "grad_norm": 1445.076416015625, + "learning_rate": 5.212121212121213e-06, + "loss": 271.4749, + "step": 6450 + }, + { + "epoch": 0.053439219092525955, + "grad_norm": 895.9810791015625, + "learning_rate": 5.220202020202021e-06, + "loss": 258.855, + "step": 6460 + }, + { + "epoch": 0.05352194234189519, + "grad_norm": 952.8675537109375, + "learning_rate": 5.228282828282829e-06, + "loss": 175.3387, + "step": 6470 + }, + { + "epoch": 0.05360466559126442, + "grad_norm": 1066.5716552734375, + "learning_rate": 5.236363636363637e-06, + "loss": 218.2292, + "step": 6480 + }, + { + "epoch": 0.05368738884063366, + "grad_norm": 849.3695678710938, + "learning_rate": 5.244444444444445e-06, + "loss": 221.0137, + "step": 6490 + }, + { + "epoch": 0.053770112090002896, + "grad_norm": 1675.2149658203125, + "learning_rate": 5.252525252525253e-06, + "loss": 223.2969, + "step": 6500 + }, + { + "epoch": 0.05385283533937213, + "grad_norm": 1693.541748046875, + "learning_rate": 5.26060606060606e-06, + "loss": 229.6179, + "step": 6510 + }, + { + "epoch": 0.05393555858874136, + "grad_norm": 1324.1651611328125, + "learning_rate": 5.268686868686869e-06, + "loss": 244.8779, + "step": 6520 + }, + { + "epoch": 0.0540182818381106, + "grad_norm": 1403.1368408203125, + "learning_rate": 5.276767676767677e-06, + "loss": 268.8457, + "step": 6530 + }, + { + "epoch": 0.054101005087479836, + "grad_norm": 1165.638671875, + "learning_rate": 5.284848484848485e-06, + "loss": 239.8147, + "step": 6540 + }, + { + "epoch": 0.05418372833684907, + "grad_norm": 995.2945556640625, + "learning_rate": 5.292929292929293e-06, + "loss": 229.9961, + "step": 6550 + }, + { + "epoch": 0.0542664515862183, + "grad_norm": 1505.5985107421875, + "learning_rate": 5.3010101010101016e-06, + "loss": 223.1589, + "step": 6560 + }, + { + "epoch": 0.054349174835587544, + "grad_norm": 1667.1300048828125, + "learning_rate": 5.309090909090909e-06, + "loss": 217.6828, + "step": 6570 + }, + { + "epoch": 0.05443189808495678, + "grad_norm": 1639.0364990234375, + "learning_rate": 5.317171717171718e-06, + "loss": 205.0005, + "step": 6580 + }, + { + "epoch": 0.05451462133432601, + "grad_norm": 1414.21240234375, + "learning_rate": 5.325252525252525e-06, + "loss": 215.9356, + "step": 6590 + }, + { + "epoch": 0.05459734458369525, + "grad_norm": 586.2081298828125, + "learning_rate": 5.333333333333334e-06, + "loss": 227.373, + "step": 6600 + }, + { + "epoch": 0.054680067833064484, + "grad_norm": 2097.269775390625, + "learning_rate": 5.341414141414142e-06, + "loss": 234.4245, + "step": 6610 + }, + { + "epoch": 0.05476279108243372, + "grad_norm": 1416.9189453125, + "learning_rate": 5.34949494949495e-06, + "loss": 262.4427, + "step": 6620 + }, + { + "epoch": 0.05484551433180295, + "grad_norm": 1144.749755859375, + "learning_rate": 5.357575757575758e-06, + "loss": 201.3394, + "step": 6630 + }, + { + "epoch": 0.05492823758117219, + "grad_norm": 1287.0030517578125, + "learning_rate": 5.365656565656567e-06, + "loss": 281.6339, + "step": 6640 + }, + { + "epoch": 0.055010960830541425, + "grad_norm": 1555.04150390625, + "learning_rate": 5.373737373737374e-06, + "loss": 225.9727, + "step": 6650 + }, + { + "epoch": 0.05509368407991066, + "grad_norm": 1129.1629638671875, + "learning_rate": 5.381818181818183e-06, + "loss": 159.6996, + "step": 6660 + }, + { + "epoch": 0.05517640732927989, + "grad_norm": 1159.3424072265625, + "learning_rate": 5.38989898989899e-06, + "loss": 226.1631, + "step": 6670 + }, + { + "epoch": 0.05525913057864913, + "grad_norm": 1065.8494873046875, + "learning_rate": 5.397979797979799e-06, + "loss": 213.2366, + "step": 6680 + }, + { + "epoch": 0.055341853828018366, + "grad_norm": 801.6089477539062, + "learning_rate": 5.406060606060607e-06, + "loss": 180.9512, + "step": 6690 + }, + { + "epoch": 0.0554245770773876, + "grad_norm": 1254.9515380859375, + "learning_rate": 5.414141414141415e-06, + "loss": 223.2975, + "step": 6700 + }, + { + "epoch": 0.05550730032675683, + "grad_norm": 1734.849609375, + "learning_rate": 5.422222222222223e-06, + "loss": 179.4333, + "step": 6710 + }, + { + "epoch": 0.05559002357612607, + "grad_norm": 1987.9564208984375, + "learning_rate": 5.430303030303032e-06, + "loss": 248.2462, + "step": 6720 + }, + { + "epoch": 0.055672746825495306, + "grad_norm": 1181.2037353515625, + "learning_rate": 5.438383838383838e-06, + "loss": 182.878, + "step": 6730 + }, + { + "epoch": 0.05575547007486454, + "grad_norm": 1901.66015625, + "learning_rate": 5.446464646464647e-06, + "loss": 251.8006, + "step": 6740 + }, + { + "epoch": 0.05583819332423377, + "grad_norm": 632.8333740234375, + "learning_rate": 5.4545454545454545e-06, + "loss": 196.6042, + "step": 6750 + }, + { + "epoch": 0.05592091657360301, + "grad_norm": 604.5985717773438, + "learning_rate": 5.462626262626263e-06, + "loss": 177.947, + "step": 6760 + }, + { + "epoch": 0.05600363982297225, + "grad_norm": 1062.9954833984375, + "learning_rate": 5.4707070707070705e-06, + "loss": 312.2176, + "step": 6770 + }, + { + "epoch": 0.05608636307234148, + "grad_norm": 1964.1070556640625, + "learning_rate": 5.478787878787879e-06, + "loss": 192.9068, + "step": 6780 + }, + { + "epoch": 0.056169086321710714, + "grad_norm": 1671.50146484375, + "learning_rate": 5.486868686868687e-06, + "loss": 236.6793, + "step": 6790 + }, + { + "epoch": 0.056251809571079954, + "grad_norm": 1363.7489013671875, + "learning_rate": 5.494949494949495e-06, + "loss": 207.5258, + "step": 6800 + }, + { + "epoch": 0.05633453282044919, + "grad_norm": 1217.836669921875, + "learning_rate": 5.5030303030303034e-06, + "loss": 185.7013, + "step": 6810 + }, + { + "epoch": 0.05641725606981842, + "grad_norm": 1585.8988037109375, + "learning_rate": 5.511111111111112e-06, + "loss": 217.5803, + "step": 6820 + }, + { + "epoch": 0.056499979319187654, + "grad_norm": 983.3916625976562, + "learning_rate": 5.5191919191919195e-06, + "loss": 224.2803, + "step": 6830 + }, + { + "epoch": 0.056582702568556895, + "grad_norm": 1328.7664794921875, + "learning_rate": 5.527272727272728e-06, + "loss": 213.4681, + "step": 6840 + }, + { + "epoch": 0.05666542581792613, + "grad_norm": 908.5962524414062, + "learning_rate": 5.5353535353535355e-06, + "loss": 191.1099, + "step": 6850 + }, + { + "epoch": 0.05674814906729536, + "grad_norm": 965.225341796875, + "learning_rate": 5.543434343434344e-06, + "loss": 180.1947, + "step": 6860 + }, + { + "epoch": 0.0568308723166646, + "grad_norm": 1269.7061767578125, + "learning_rate": 5.5515151515151524e-06, + "loss": 235.1135, + "step": 6870 + }, + { + "epoch": 0.056913595566033835, + "grad_norm": 1158.512939453125, + "learning_rate": 5.55959595959596e-06, + "loss": 215.2743, + "step": 6880 + }, + { + "epoch": 0.05699631881540307, + "grad_norm": 1231.1844482421875, + "learning_rate": 5.5676767676767685e-06, + "loss": 327.6613, + "step": 6890 + }, + { + "epoch": 0.0570790420647723, + "grad_norm": 1180.1192626953125, + "learning_rate": 5.575757575757577e-06, + "loss": 228.0458, + "step": 6900 + }, + { + "epoch": 0.05716176531414154, + "grad_norm": 952.682861328125, + "learning_rate": 5.5838383838383845e-06, + "loss": 250.6257, + "step": 6910 + }, + { + "epoch": 0.057244488563510776, + "grad_norm": 16013.0146484375, + "learning_rate": 5.591919191919193e-06, + "loss": 250.4285, + "step": 6920 + }, + { + "epoch": 0.05732721181288001, + "grad_norm": 1266.31787109375, + "learning_rate": 5.600000000000001e-06, + "loss": 213.8897, + "step": 6930 + }, + { + "epoch": 0.05740993506224924, + "grad_norm": 1832.4088134765625, + "learning_rate": 5.608080808080808e-06, + "loss": 218.5001, + "step": 6940 + }, + { + "epoch": 0.05749265831161848, + "grad_norm": 1213.3984375, + "learning_rate": 5.616161616161616e-06, + "loss": 211.7139, + "step": 6950 + }, + { + "epoch": 0.057575381560987716, + "grad_norm": 1369.9437255859375, + "learning_rate": 5.624242424242424e-06, + "loss": 192.9413, + "step": 6960 + }, + { + "epoch": 0.05765810481035695, + "grad_norm": 1018.5819091796875, + "learning_rate": 5.632323232323233e-06, + "loss": 150.2073, + "step": 6970 + }, + { + "epoch": 0.05774082805972618, + "grad_norm": 1435.1353759765625, + "learning_rate": 5.64040404040404e-06, + "loss": 187.5981, + "step": 6980 + }, + { + "epoch": 0.057823551309095424, + "grad_norm": 1339.41650390625, + "learning_rate": 5.648484848484849e-06, + "loss": 165.7803, + "step": 6990 + }, + { + "epoch": 0.05790627455846466, + "grad_norm": 3150.48974609375, + "learning_rate": 5.656565656565657e-06, + "loss": 183.762, + "step": 7000 + }, + { + "epoch": 0.05798899780783389, + "grad_norm": 1491.4512939453125, + "learning_rate": 5.664646464646465e-06, + "loss": 205.812, + "step": 7010 + }, + { + "epoch": 0.058071721057203124, + "grad_norm": 1045.4873046875, + "learning_rate": 5.672727272727273e-06, + "loss": 222.1431, + "step": 7020 + }, + { + "epoch": 0.058154444306572364, + "grad_norm": 1604.70556640625, + "learning_rate": 5.680808080808081e-06, + "loss": 312.9387, + "step": 7030 + }, + { + "epoch": 0.0582371675559416, + "grad_norm": 794.0164184570312, + "learning_rate": 5.688888888888889e-06, + "loss": 148.5473, + "step": 7040 + }, + { + "epoch": 0.05831989080531083, + "grad_norm": 1710.36328125, + "learning_rate": 5.696969696969698e-06, + "loss": 187.5921, + "step": 7050 + }, + { + "epoch": 0.058402614054680065, + "grad_norm": 1974.2613525390625, + "learning_rate": 5.705050505050505e-06, + "loss": 192.2352, + "step": 7060 + }, + { + "epoch": 0.058485337304049305, + "grad_norm": 1533.34619140625, + "learning_rate": 5.713131313131314e-06, + "loss": 189.592, + "step": 7070 + }, + { + "epoch": 0.05856806055341854, + "grad_norm": 1127.27392578125, + "learning_rate": 5.721212121212122e-06, + "loss": 211.6275, + "step": 7080 + }, + { + "epoch": 0.05865078380278777, + "grad_norm": 1902.27001953125, + "learning_rate": 5.72929292929293e-06, + "loss": 189.9227, + "step": 7090 + }, + { + "epoch": 0.05873350705215701, + "grad_norm": 1564.533203125, + "learning_rate": 5.737373737373738e-06, + "loss": 231.5597, + "step": 7100 + }, + { + "epoch": 0.058816230301526246, + "grad_norm": 2943.590576171875, + "learning_rate": 5.745454545454546e-06, + "loss": 177.4852, + "step": 7110 + }, + { + "epoch": 0.05889895355089548, + "grad_norm": 1309.1622314453125, + "learning_rate": 5.753535353535354e-06, + "loss": 165.6322, + "step": 7120 + }, + { + "epoch": 0.05898167680026471, + "grad_norm": 1501.813720703125, + "learning_rate": 5.761616161616163e-06, + "loss": 189.5367, + "step": 7130 + }, + { + "epoch": 0.05906440004963395, + "grad_norm": 1574.177978515625, + "learning_rate": 5.76969696969697e-06, + "loss": 234.3292, + "step": 7140 + }, + { + "epoch": 0.059147123299003186, + "grad_norm": 1052.1170654296875, + "learning_rate": 5.777777777777778e-06, + "loss": 214.4601, + "step": 7150 + }, + { + "epoch": 0.05922984654837242, + "grad_norm": 1018.6898803710938, + "learning_rate": 5.785858585858586e-06, + "loss": 156.9678, + "step": 7160 + }, + { + "epoch": 0.05931256979774165, + "grad_norm": 1917.61865234375, + "learning_rate": 5.793939393939394e-06, + "loss": 233.6657, + "step": 7170 + }, + { + "epoch": 0.05939529304711089, + "grad_norm": 1654.3441162109375, + "learning_rate": 5.8020202020202025e-06, + "loss": 173.1022, + "step": 7180 + }, + { + "epoch": 0.05947801629648013, + "grad_norm": 1147.4951171875, + "learning_rate": 5.81010101010101e-06, + "loss": 209.7125, + "step": 7190 + }, + { + "epoch": 0.05956073954584936, + "grad_norm": 1505.357177734375, + "learning_rate": 5.8181818181818185e-06, + "loss": 239.9892, + "step": 7200 + }, + { + "epoch": 0.059643462795218594, + "grad_norm": 3524.7958984375, + "learning_rate": 5.826262626262626e-06, + "loss": 199.2161, + "step": 7210 + }, + { + "epoch": 0.059726186044587834, + "grad_norm": 2917.4033203125, + "learning_rate": 5.834343434343435e-06, + "loss": 282.2665, + "step": 7220 + }, + { + "epoch": 0.05980890929395707, + "grad_norm": 976.106689453125, + "learning_rate": 5.842424242424243e-06, + "loss": 220.0685, + "step": 7230 + }, + { + "epoch": 0.0598916325433263, + "grad_norm": 1532.435791015625, + "learning_rate": 5.850505050505051e-06, + "loss": 247.5289, + "step": 7240 + }, + { + "epoch": 0.059974355792695534, + "grad_norm": 868.5018920898438, + "learning_rate": 5.858585858585859e-06, + "loss": 204.2799, + "step": 7250 + }, + { + "epoch": 0.060057079042064775, + "grad_norm": 1144.6495361328125, + "learning_rate": 5.8666666666666675e-06, + "loss": 233.1151, + "step": 7260 + }, + { + "epoch": 0.06013980229143401, + "grad_norm": 1262.310302734375, + "learning_rate": 5.874747474747475e-06, + "loss": 215.0127, + "step": 7270 + }, + { + "epoch": 0.06022252554080324, + "grad_norm": 1465.82568359375, + "learning_rate": 5.882828282828284e-06, + "loss": 214.6379, + "step": 7280 + }, + { + "epoch": 0.060305248790172475, + "grad_norm": 890.1478271484375, + "learning_rate": 5.890909090909091e-06, + "loss": 212.3409, + "step": 7290 + }, + { + "epoch": 0.060387972039541715, + "grad_norm": 804.8695068359375, + "learning_rate": 5.8989898989899e-06, + "loss": 161.0226, + "step": 7300 + }, + { + "epoch": 0.06047069528891095, + "grad_norm": 1049.8092041015625, + "learning_rate": 5.907070707070708e-06, + "loss": 228.0541, + "step": 7310 + }, + { + "epoch": 0.06055341853828018, + "grad_norm": 1314.708984375, + "learning_rate": 5.915151515151516e-06, + "loss": 169.0287, + "step": 7320 + }, + { + "epoch": 0.060636141787649415, + "grad_norm": 877.56982421875, + "learning_rate": 5.923232323232324e-06, + "loss": 182.722, + "step": 7330 + }, + { + "epoch": 0.060718865037018656, + "grad_norm": 1187.692138671875, + "learning_rate": 5.9313131313131326e-06, + "loss": 216.9126, + "step": 7340 + }, + { + "epoch": 0.06080158828638789, + "grad_norm": 910.7957763671875, + "learning_rate": 5.93939393939394e-06, + "loss": 156.4217, + "step": 7350 + }, + { + "epoch": 0.06088431153575712, + "grad_norm": 937.8931274414062, + "learning_rate": 5.947474747474749e-06, + "loss": 206.2811, + "step": 7360 + }, + { + "epoch": 0.06096703478512636, + "grad_norm": 1068.356201171875, + "learning_rate": 5.955555555555555e-06, + "loss": 209.255, + "step": 7370 + }, + { + "epoch": 0.061049758034495596, + "grad_norm": 1415.2979736328125, + "learning_rate": 5.963636363636364e-06, + "loss": 180.4899, + "step": 7380 + }, + { + "epoch": 0.06113248128386483, + "grad_norm": 1216.490966796875, + "learning_rate": 5.9717171717171714e-06, + "loss": 181.2195, + "step": 7390 + }, + { + "epoch": 0.06121520453323406, + "grad_norm": 1969.770751953125, + "learning_rate": 5.97979797979798e-06, + "loss": 185.2952, + "step": 7400 + }, + { + "epoch": 0.061297927782603304, + "grad_norm": 1073.1572265625, + "learning_rate": 5.987878787878788e-06, + "loss": 194.6978, + "step": 7410 + }, + { + "epoch": 0.06138065103197254, + "grad_norm": 1329.2269287109375, + "learning_rate": 5.995959595959596e-06, + "loss": 225.0991, + "step": 7420 + }, + { + "epoch": 0.06146337428134177, + "grad_norm": 1447.0704345703125, + "learning_rate": 6.004040404040404e-06, + "loss": 222.2881, + "step": 7430 + }, + { + "epoch": 0.061546097530711004, + "grad_norm": 1847.17578125, + "learning_rate": 6.012121212121213e-06, + "loss": 250.8386, + "step": 7440 + }, + { + "epoch": 0.061628820780080244, + "grad_norm": 1011.4767456054688, + "learning_rate": 6.0202020202020204e-06, + "loss": 217.8489, + "step": 7450 + }, + { + "epoch": 0.06171154402944948, + "grad_norm": 905.0313110351562, + "learning_rate": 6.028282828282829e-06, + "loss": 176.8829, + "step": 7460 + }, + { + "epoch": 0.06179426727881871, + "grad_norm": 1627.3341064453125, + "learning_rate": 6.0363636363636365e-06, + "loss": 245.858, + "step": 7470 + }, + { + "epoch": 0.061876990528187945, + "grad_norm": 1333.339111328125, + "learning_rate": 6.044444444444445e-06, + "loss": 172.512, + "step": 7480 + }, + { + "epoch": 0.061959713777557185, + "grad_norm": 981.724609375, + "learning_rate": 6.052525252525253e-06, + "loss": 188.6218, + "step": 7490 + }, + { + "epoch": 0.06204243702692642, + "grad_norm": 963.2952880859375, + "learning_rate": 6.060606060606061e-06, + "loss": 200.9197, + "step": 7500 + }, + { + "epoch": 0.06212516027629565, + "grad_norm": 1976.6124267578125, + "learning_rate": 6.068686868686869e-06, + "loss": 206.6058, + "step": 7510 + }, + { + "epoch": 0.062207883525664885, + "grad_norm": 964.7189331054688, + "learning_rate": 6.076767676767678e-06, + "loss": 244.5135, + "step": 7520 + }, + { + "epoch": 0.062290606775034126, + "grad_norm": 940.2135009765625, + "learning_rate": 6.0848484848484855e-06, + "loss": 286.4893, + "step": 7530 + }, + { + "epoch": 0.06237333002440336, + "grad_norm": 1086.095947265625, + "learning_rate": 6.092929292929294e-06, + "loss": 166.7001, + "step": 7540 + }, + { + "epoch": 0.06245605327377259, + "grad_norm": 824.2728881835938, + "learning_rate": 6.1010101010101015e-06, + "loss": 178.9928, + "step": 7550 + }, + { + "epoch": 0.06253877652314183, + "grad_norm": 1227.130859375, + "learning_rate": 6.10909090909091e-06, + "loss": 188.424, + "step": 7560 + }, + { + "epoch": 0.06262149977251107, + "grad_norm": 943.979736328125, + "learning_rate": 6.117171717171718e-06, + "loss": 170.1881, + "step": 7570 + }, + { + "epoch": 0.06270422302188029, + "grad_norm": 893.2919921875, + "learning_rate": 6.125252525252525e-06, + "loss": 203.6881, + "step": 7580 + }, + { + "epoch": 0.06278694627124953, + "grad_norm": 1502.2542724609375, + "learning_rate": 6.133333333333334e-06, + "loss": 227.4439, + "step": 7590 + }, + { + "epoch": 0.06286966952061877, + "grad_norm": 2379.823974609375, + "learning_rate": 6.141414141414141e-06, + "loss": 216.3211, + "step": 7600 + }, + { + "epoch": 0.062952392769988, + "grad_norm": 2946.320068359375, + "learning_rate": 6.14949494949495e-06, + "loss": 181.0182, + "step": 7610 + }, + { + "epoch": 0.06303511601935724, + "grad_norm": 1127.286865234375, + "learning_rate": 6.157575757575758e-06, + "loss": 219.9298, + "step": 7620 + }, + { + "epoch": 0.06311783926872648, + "grad_norm": 2195.520751953125, + "learning_rate": 6.165656565656566e-06, + "loss": 225.9704, + "step": 7630 + }, + { + "epoch": 0.06320056251809571, + "grad_norm": 1115.85107421875, + "learning_rate": 6.173737373737374e-06, + "loss": 192.5431, + "step": 7640 + }, + { + "epoch": 0.06328328576746495, + "grad_norm": 1610.82861328125, + "learning_rate": 6.181818181818182e-06, + "loss": 234.5798, + "step": 7650 + }, + { + "epoch": 0.06336600901683419, + "grad_norm": 2228.57080078125, + "learning_rate": 6.18989898989899e-06, + "loss": 203.8422, + "step": 7660 + }, + { + "epoch": 0.06344873226620341, + "grad_norm": 1072.0853271484375, + "learning_rate": 6.197979797979799e-06, + "loss": 136.4943, + "step": 7670 + }, + { + "epoch": 0.06353145551557265, + "grad_norm": 987.9502563476562, + "learning_rate": 6.206060606060606e-06, + "loss": 176.214, + "step": 7680 + }, + { + "epoch": 0.06361417876494188, + "grad_norm": 1846.4237060546875, + "learning_rate": 6.214141414141415e-06, + "loss": 328.0838, + "step": 7690 + }, + { + "epoch": 0.06369690201431112, + "grad_norm": 1142.6070556640625, + "learning_rate": 6.222222222222223e-06, + "loss": 197.7707, + "step": 7700 + }, + { + "epoch": 0.06377962526368036, + "grad_norm": 2267.01904296875, + "learning_rate": 6.230303030303031e-06, + "loss": 246.8396, + "step": 7710 + }, + { + "epoch": 0.06386234851304959, + "grad_norm": 1803.31591796875, + "learning_rate": 6.238383838383839e-06, + "loss": 156.6524, + "step": 7720 + }, + { + "epoch": 0.06394507176241883, + "grad_norm": 1524.9769287109375, + "learning_rate": 6.246464646464647e-06, + "loss": 252.2839, + "step": 7730 + }, + { + "epoch": 0.06402779501178807, + "grad_norm": 1911.2991943359375, + "learning_rate": 6.254545454545455e-06, + "loss": 185.8043, + "step": 7740 + }, + { + "epoch": 0.0641105182611573, + "grad_norm": 1456.3634033203125, + "learning_rate": 6.262626262626264e-06, + "loss": 189.6922, + "step": 7750 + }, + { + "epoch": 0.06419324151052654, + "grad_norm": 872.6129150390625, + "learning_rate": 6.270707070707071e-06, + "loss": 211.5073, + "step": 7760 + }, + { + "epoch": 0.06427596475989576, + "grad_norm": 2161.432861328125, + "learning_rate": 6.27878787878788e-06, + "loss": 227.991, + "step": 7770 + }, + { + "epoch": 0.064358688009265, + "grad_norm": 1427.72900390625, + "learning_rate": 6.286868686868688e-06, + "loss": 187.7627, + "step": 7780 + }, + { + "epoch": 0.06444141125863424, + "grad_norm": 1048.0509033203125, + "learning_rate": 6.294949494949495e-06, + "loss": 216.9241, + "step": 7790 + }, + { + "epoch": 0.06452413450800347, + "grad_norm": 831.1154174804688, + "learning_rate": 6.303030303030303e-06, + "loss": 156.4739, + "step": 7800 + }, + { + "epoch": 0.06460685775737271, + "grad_norm": 1421.5413818359375, + "learning_rate": 6.311111111111111e-06, + "loss": 240.7841, + "step": 7810 + }, + { + "epoch": 0.06468958100674195, + "grad_norm": 1609.0704345703125, + "learning_rate": 6.3191919191919195e-06, + "loss": 202.0292, + "step": 7820 + }, + { + "epoch": 0.06477230425611118, + "grad_norm": 1056.16064453125, + "learning_rate": 6.327272727272727e-06, + "loss": 150.6357, + "step": 7830 + }, + { + "epoch": 0.06485502750548042, + "grad_norm": 3895.295166015625, + "learning_rate": 6.3353535353535355e-06, + "loss": 166.0538, + "step": 7840 + }, + { + "epoch": 0.06493775075484966, + "grad_norm": 1299.444580078125, + "learning_rate": 6.343434343434344e-06, + "loss": 160.5584, + "step": 7850 + }, + { + "epoch": 0.06502047400421888, + "grad_norm": 712.9761352539062, + "learning_rate": 6.3515151515151516e-06, + "loss": 197.3979, + "step": 7860 + }, + { + "epoch": 0.06510319725358812, + "grad_norm": 1037.716064453125, + "learning_rate": 6.35959595959596e-06, + "loss": 200.6918, + "step": 7870 + }, + { + "epoch": 0.06518592050295735, + "grad_norm": 1494.057373046875, + "learning_rate": 6.3676767676767685e-06, + "loss": 238.1615, + "step": 7880 + }, + { + "epoch": 0.06526864375232659, + "grad_norm": 1253.9837646484375, + "learning_rate": 6.375757575757576e-06, + "loss": 190.7486, + "step": 7890 + }, + { + "epoch": 0.06535136700169583, + "grad_norm": 1362.12353515625, + "learning_rate": 6.3838383838383845e-06, + "loss": 214.7477, + "step": 7900 + }, + { + "epoch": 0.06543409025106506, + "grad_norm": 890.4193725585938, + "learning_rate": 6.391919191919192e-06, + "loss": 170.1652, + "step": 7910 + }, + { + "epoch": 0.0655168135004343, + "grad_norm": 1644.9490966796875, + "learning_rate": 6.4000000000000006e-06, + "loss": 175.0597, + "step": 7920 + }, + { + "epoch": 0.06559953674980354, + "grad_norm": 777.9044799804688, + "learning_rate": 6.408080808080809e-06, + "loss": 238.5318, + "step": 7930 + }, + { + "epoch": 0.06568225999917277, + "grad_norm": 1498.41455078125, + "learning_rate": 6.416161616161617e-06, + "loss": 183.5011, + "step": 7940 + }, + { + "epoch": 0.065764983248542, + "grad_norm": 1392.91748046875, + "learning_rate": 6.424242424242425e-06, + "loss": 164.5937, + "step": 7950 + }, + { + "epoch": 0.06584770649791123, + "grad_norm": 1324.7021484375, + "learning_rate": 6.4323232323232335e-06, + "loss": 230.2011, + "step": 7960 + }, + { + "epoch": 0.06593042974728047, + "grad_norm": 1371.1768798828125, + "learning_rate": 6.440404040404041e-06, + "loss": 226.2801, + "step": 7970 + }, + { + "epoch": 0.06601315299664971, + "grad_norm": 2229.455078125, + "learning_rate": 6.4484848484848496e-06, + "loss": 196.2276, + "step": 7980 + }, + { + "epoch": 0.06609587624601894, + "grad_norm": 1298.6649169921875, + "learning_rate": 6.456565656565658e-06, + "loss": 211.2707, + "step": 7990 + }, + { + "epoch": 0.06617859949538818, + "grad_norm": 784.8013305664062, + "learning_rate": 6.464646464646466e-06, + "loss": 207.776, + "step": 8000 + }, + { + "epoch": 0.06626132274475742, + "grad_norm": 911.982666015625, + "learning_rate": 6.472727272727272e-06, + "loss": 162.3888, + "step": 8010 + }, + { + "epoch": 0.06634404599412665, + "grad_norm": 1066.7635498046875, + "learning_rate": 6.480808080808081e-06, + "loss": 182.5109, + "step": 8020 + }, + { + "epoch": 0.06642676924349589, + "grad_norm": 733.5493774414062, + "learning_rate": 6.488888888888889e-06, + "loss": 140.7046, + "step": 8030 + }, + { + "epoch": 0.06650949249286511, + "grad_norm": 1074.85888671875, + "learning_rate": 6.496969696969697e-06, + "loss": 182.7351, + "step": 8040 + }, + { + "epoch": 0.06659221574223435, + "grad_norm": 1245.099365234375, + "learning_rate": 6.505050505050505e-06, + "loss": 228.4951, + "step": 8050 + }, + { + "epoch": 0.0666749389916036, + "grad_norm": 820.326171875, + "learning_rate": 6.513131313131314e-06, + "loss": 172.91, + "step": 8060 + }, + { + "epoch": 0.06675766224097282, + "grad_norm": 1347.561279296875, + "learning_rate": 6.521212121212121e-06, + "loss": 175.6294, + "step": 8070 + }, + { + "epoch": 0.06684038549034206, + "grad_norm": 2743.030517578125, + "learning_rate": 6.52929292929293e-06, + "loss": 240.5082, + "step": 8080 + }, + { + "epoch": 0.0669231087397113, + "grad_norm": 1014.7551879882812, + "learning_rate": 6.537373737373737e-06, + "loss": 181.1287, + "step": 8090 + }, + { + "epoch": 0.06700583198908053, + "grad_norm": 1014.5245971679688, + "learning_rate": 6.545454545454546e-06, + "loss": 189.7482, + "step": 8100 + }, + { + "epoch": 0.06708855523844977, + "grad_norm": 778.7886962890625, + "learning_rate": 6.553535353535354e-06, + "loss": 170.7764, + "step": 8110 + }, + { + "epoch": 0.06717127848781901, + "grad_norm": 2081.901611328125, + "learning_rate": 6.561616161616162e-06, + "loss": 245.8872, + "step": 8120 + }, + { + "epoch": 0.06725400173718823, + "grad_norm": 1881.017333984375, + "learning_rate": 6.56969696969697e-06, + "loss": 252.341, + "step": 8130 + }, + { + "epoch": 0.06733672498655748, + "grad_norm": 910.3214111328125, + "learning_rate": 6.577777777777779e-06, + "loss": 179.5906, + "step": 8140 + }, + { + "epoch": 0.0674194482359267, + "grad_norm": 1163.5965576171875, + "learning_rate": 6.585858585858586e-06, + "loss": 233.8698, + "step": 8150 + }, + { + "epoch": 0.06750217148529594, + "grad_norm": 884.6065673828125, + "learning_rate": 6.593939393939395e-06, + "loss": 139.5692, + "step": 8160 + }, + { + "epoch": 0.06758489473466518, + "grad_norm": 1140.02392578125, + "learning_rate": 6.602020202020203e-06, + "loss": 195.1031, + "step": 8170 + }, + { + "epoch": 0.06766761798403441, + "grad_norm": 1378.050537109375, + "learning_rate": 6.610101010101011e-06, + "loss": 215.5058, + "step": 8180 + }, + { + "epoch": 0.06775034123340365, + "grad_norm": 1974.1138916015625, + "learning_rate": 6.618181818181819e-06, + "loss": 294.2934, + "step": 8190 + }, + { + "epoch": 0.06783306448277289, + "grad_norm": 832.8023681640625, + "learning_rate": 6.626262626262627e-06, + "loss": 184.4631, + "step": 8200 + }, + { + "epoch": 0.06791578773214212, + "grad_norm": 856.9111938476562, + "learning_rate": 6.634343434343435e-06, + "loss": 170.4937, + "step": 8210 + }, + { + "epoch": 0.06799851098151136, + "grad_norm": 916.113525390625, + "learning_rate": 6.642424242424242e-06, + "loss": 170.3063, + "step": 8220 + }, + { + "epoch": 0.06808123423088058, + "grad_norm": 1267.204345703125, + "learning_rate": 6.650505050505051e-06, + "loss": 154.5532, + "step": 8230 + }, + { + "epoch": 0.06816395748024982, + "grad_norm": 1163.3558349609375, + "learning_rate": 6.658585858585859e-06, + "loss": 196.9229, + "step": 8240 + }, + { + "epoch": 0.06824668072961906, + "grad_norm": 1783.79541015625, + "learning_rate": 6.666666666666667e-06, + "loss": 210.5392, + "step": 8250 + }, + { + "epoch": 0.06832940397898829, + "grad_norm": 917.7781982421875, + "learning_rate": 6.674747474747475e-06, + "loss": 177.4379, + "step": 8260 + }, + { + "epoch": 0.06841212722835753, + "grad_norm": 674.7391967773438, + "learning_rate": 6.682828282828283e-06, + "loss": 186.2342, + "step": 8270 + }, + { + "epoch": 0.06849485047772677, + "grad_norm": 654.15234375, + "learning_rate": 6.690909090909091e-06, + "loss": 136.4612, + "step": 8280 + }, + { + "epoch": 0.068577573727096, + "grad_norm": 1094.0560302734375, + "learning_rate": 6.6989898989899e-06, + "loss": 170.5708, + "step": 8290 + }, + { + "epoch": 0.06866029697646524, + "grad_norm": 1474.585693359375, + "learning_rate": 6.707070707070707e-06, + "loss": 203.4748, + "step": 8300 + }, + { + "epoch": 0.06874302022583446, + "grad_norm": 1227.391845703125, + "learning_rate": 6.715151515151516e-06, + "loss": 222.0174, + "step": 8310 + }, + { + "epoch": 0.0688257434752037, + "grad_norm": 1838.2691650390625, + "learning_rate": 6.723232323232324e-06, + "loss": 224.6444, + "step": 8320 + }, + { + "epoch": 0.06890846672457294, + "grad_norm": 1169.060791015625, + "learning_rate": 6.731313131313132e-06, + "loss": 142.1164, + "step": 8330 + }, + { + "epoch": 0.06899118997394217, + "grad_norm": 547.6967163085938, + "learning_rate": 6.73939393939394e-06, + "loss": 153.1282, + "step": 8340 + }, + { + "epoch": 0.06907391322331141, + "grad_norm": 1083.063720703125, + "learning_rate": 6.747474747474749e-06, + "loss": 184.0296, + "step": 8350 + }, + { + "epoch": 0.06915663647268065, + "grad_norm": 5596.943359375, + "learning_rate": 6.755555555555556e-06, + "loss": 173.6666, + "step": 8360 + }, + { + "epoch": 0.06923935972204988, + "grad_norm": 1326.8721923828125, + "learning_rate": 6.763636363636365e-06, + "loss": 190.1673, + "step": 8370 + }, + { + "epoch": 0.06932208297141912, + "grad_norm": 1101.3780517578125, + "learning_rate": 6.771717171717172e-06, + "loss": 220.6135, + "step": 8380 + }, + { + "epoch": 0.06940480622078836, + "grad_norm": 1435.279296875, + "learning_rate": 6.779797979797981e-06, + "loss": 178.5454, + "step": 8390 + }, + { + "epoch": 0.06948752947015759, + "grad_norm": 1620.093994140625, + "learning_rate": 6.787878787878789e-06, + "loss": 151.146, + "step": 8400 + }, + { + "epoch": 0.06957025271952683, + "grad_norm": 1524.4627685546875, + "learning_rate": 6.795959595959597e-06, + "loss": 185.1478, + "step": 8410 + }, + { + "epoch": 0.06965297596889605, + "grad_norm": 1806.7978515625, + "learning_rate": 6.804040404040405e-06, + "loss": 173.0628, + "step": 8420 + }, + { + "epoch": 0.06973569921826529, + "grad_norm": 1826.8575439453125, + "learning_rate": 6.812121212121212e-06, + "loss": 167.8523, + "step": 8430 + }, + { + "epoch": 0.06981842246763453, + "grad_norm": 1353.9345703125, + "learning_rate": 6.82020202020202e-06, + "loss": 194.5669, + "step": 8440 + }, + { + "epoch": 0.06990114571700376, + "grad_norm": 2063.500244140625, + "learning_rate": 6.828282828282828e-06, + "loss": 184.4123, + "step": 8450 + }, + { + "epoch": 0.069983868966373, + "grad_norm": 1244.160400390625, + "learning_rate": 6.8363636363636364e-06, + "loss": 234.5416, + "step": 8460 + }, + { + "epoch": 0.07006659221574224, + "grad_norm": 894.0686645507812, + "learning_rate": 6.844444444444445e-06, + "loss": 157.8413, + "step": 8470 + }, + { + "epoch": 0.07014931546511147, + "grad_norm": 953.1651000976562, + "learning_rate": 6.8525252525252525e-06, + "loss": 187.5382, + "step": 8480 + }, + { + "epoch": 0.07023203871448071, + "grad_norm": 868.2008056640625, + "learning_rate": 6.860606060606061e-06, + "loss": 153.3808, + "step": 8490 + }, + { + "epoch": 0.07031476196384993, + "grad_norm": 1246.61328125, + "learning_rate": 6.868686868686869e-06, + "loss": 198.8839, + "step": 8500 + }, + { + "epoch": 0.07039748521321917, + "grad_norm": 1278.021484375, + "learning_rate": 6.876767676767677e-06, + "loss": 162.5859, + "step": 8510 + }, + { + "epoch": 0.07048020846258841, + "grad_norm": 1224.3677978515625, + "learning_rate": 6.8848484848484854e-06, + "loss": 216.0751, + "step": 8520 + }, + { + "epoch": 0.07056293171195764, + "grad_norm": 629.5072021484375, + "learning_rate": 6.892929292929294e-06, + "loss": 219.3646, + "step": 8530 + }, + { + "epoch": 0.07064565496132688, + "grad_norm": 873.804443359375, + "learning_rate": 6.9010101010101015e-06, + "loss": 176.4796, + "step": 8540 + }, + { + "epoch": 0.07072837821069612, + "grad_norm": 1832.682373046875, + "learning_rate": 6.90909090909091e-06, + "loss": 256.3848, + "step": 8550 + }, + { + "epoch": 0.07081110146006535, + "grad_norm": 1244.7239990234375, + "learning_rate": 6.9171717171717175e-06, + "loss": 215.3179, + "step": 8560 + }, + { + "epoch": 0.07089382470943459, + "grad_norm": 938.3501586914062, + "learning_rate": 6.925252525252526e-06, + "loss": 169.5242, + "step": 8570 + }, + { + "epoch": 0.07097654795880382, + "grad_norm": 2076.439697265625, + "learning_rate": 6.9333333333333344e-06, + "loss": 200.3409, + "step": 8580 + }, + { + "epoch": 0.07105927120817306, + "grad_norm": 800.4453735351562, + "learning_rate": 6.941414141414142e-06, + "loss": 192.4127, + "step": 8590 + }, + { + "epoch": 0.0711419944575423, + "grad_norm": 1373.4234619140625, + "learning_rate": 6.9494949494949505e-06, + "loss": 161.1228, + "step": 8600 + }, + { + "epoch": 0.07122471770691152, + "grad_norm": 950.77197265625, + "learning_rate": 6.957575757575759e-06, + "loss": 179.3362, + "step": 8610 + }, + { + "epoch": 0.07130744095628076, + "grad_norm": 1121.46044921875, + "learning_rate": 6.9656565656565665e-06, + "loss": 196.0917, + "step": 8620 + }, + { + "epoch": 0.07139016420565, + "grad_norm": 906.2406005859375, + "learning_rate": 6.973737373737375e-06, + "loss": 139.1432, + "step": 8630 + }, + { + "epoch": 0.07147288745501923, + "grad_norm": 2017.980712890625, + "learning_rate": 6.981818181818183e-06, + "loss": 212.8915, + "step": 8640 + }, + { + "epoch": 0.07155561070438847, + "grad_norm": 1261.973388671875, + "learning_rate": 6.98989898989899e-06, + "loss": 200.444, + "step": 8650 + }, + { + "epoch": 0.07163833395375771, + "grad_norm": 855.2569580078125, + "learning_rate": 6.997979797979798e-06, + "loss": 222.0111, + "step": 8660 + }, + { + "epoch": 0.07172105720312694, + "grad_norm": 1921.39599609375, + "learning_rate": 7.006060606060606e-06, + "loss": 222.5992, + "step": 8670 + }, + { + "epoch": 0.07180378045249618, + "grad_norm": 857.9965209960938, + "learning_rate": 7.014141414141415e-06, + "loss": 219.586, + "step": 8680 + }, + { + "epoch": 0.0718865037018654, + "grad_norm": 915.5559692382812, + "learning_rate": 7.022222222222222e-06, + "loss": 184.9689, + "step": 8690 + }, + { + "epoch": 0.07196922695123464, + "grad_norm": 1838.1370849609375, + "learning_rate": 7.030303030303031e-06, + "loss": 232.5889, + "step": 8700 + }, + { + "epoch": 0.07205195020060388, + "grad_norm": 932.5780639648438, + "learning_rate": 7.038383838383839e-06, + "loss": 171.6244, + "step": 8710 + }, + { + "epoch": 0.07213467344997311, + "grad_norm": 1225.2701416015625, + "learning_rate": 7.046464646464647e-06, + "loss": 153.8994, + "step": 8720 + }, + { + "epoch": 0.07221739669934235, + "grad_norm": 1276.4774169921875, + "learning_rate": 7.054545454545455e-06, + "loss": 195.4201, + "step": 8730 + }, + { + "epoch": 0.07230011994871159, + "grad_norm": 1169.0623779296875, + "learning_rate": 7.062626262626263e-06, + "loss": 184.115, + "step": 8740 + }, + { + "epoch": 0.07238284319808082, + "grad_norm": 1720.584716796875, + "learning_rate": 7.070707070707071e-06, + "loss": 225.3465, + "step": 8750 + }, + { + "epoch": 0.07246556644745006, + "grad_norm": 811.720947265625, + "learning_rate": 7.07878787878788e-06, + "loss": 155.7371, + "step": 8760 + }, + { + "epoch": 0.07254828969681928, + "grad_norm": 1744.4354248046875, + "learning_rate": 7.086868686868687e-06, + "loss": 193.3227, + "step": 8770 + }, + { + "epoch": 0.07263101294618853, + "grad_norm": 862.0523681640625, + "learning_rate": 7.094949494949496e-06, + "loss": 170.9686, + "step": 8780 + }, + { + "epoch": 0.07271373619555777, + "grad_norm": 2165.270751953125, + "learning_rate": 7.103030303030304e-06, + "loss": 234.9279, + "step": 8790 + }, + { + "epoch": 0.07279645944492699, + "grad_norm": 1295.5478515625, + "learning_rate": 7.111111111111112e-06, + "loss": 211.1977, + "step": 8800 + }, + { + "epoch": 0.07287918269429623, + "grad_norm": 957.7193603515625, + "learning_rate": 7.11919191919192e-06, + "loss": 191.1015, + "step": 8810 + }, + { + "epoch": 0.07296190594366547, + "grad_norm": 1650.5340576171875, + "learning_rate": 7.127272727272728e-06, + "loss": 164.5978, + "step": 8820 + }, + { + "epoch": 0.0730446291930347, + "grad_norm": 1488.514404296875, + "learning_rate": 7.135353535353536e-06, + "loss": 187.6664, + "step": 8830 + }, + { + "epoch": 0.07312735244240394, + "grad_norm": 1756.99853515625, + "learning_rate": 7.143434343434345e-06, + "loss": 219.4122, + "step": 8840 + }, + { + "epoch": 0.07321007569177317, + "grad_norm": 924.37158203125, + "learning_rate": 7.151515151515152e-06, + "loss": 157.6518, + "step": 8850 + }, + { + "epoch": 0.0732927989411424, + "grad_norm": 1157.555908203125, + "learning_rate": 7.15959595959596e-06, + "loss": 162.5851, + "step": 8860 + }, + { + "epoch": 0.07337552219051165, + "grad_norm": 668.1375732421875, + "learning_rate": 7.1676767676767676e-06, + "loss": 153.8082, + "step": 8870 + }, + { + "epoch": 0.07345824543988087, + "grad_norm": 1706.42333984375, + "learning_rate": 7.175757575757576e-06, + "loss": 145.3626, + "step": 8880 + }, + { + "epoch": 0.07354096868925011, + "grad_norm": 2563.07861328125, + "learning_rate": 7.1838383838383845e-06, + "loss": 217.706, + "step": 8890 + }, + { + "epoch": 0.07362369193861935, + "grad_norm": 1756.02392578125, + "learning_rate": 7.191919191919192e-06, + "loss": 177.5173, + "step": 8900 + }, + { + "epoch": 0.07370641518798858, + "grad_norm": 1470.858642578125, + "learning_rate": 7.2000000000000005e-06, + "loss": 218.2071, + "step": 8910 + }, + { + "epoch": 0.07378913843735782, + "grad_norm": 1429.1534423828125, + "learning_rate": 7.208080808080808e-06, + "loss": 192.6579, + "step": 8920 + }, + { + "epoch": 0.07387186168672706, + "grad_norm": 1044.5311279296875, + "learning_rate": 7.2161616161616166e-06, + "loss": 175.2781, + "step": 8930 + }, + { + "epoch": 0.07395458493609629, + "grad_norm": 1081.16748046875, + "learning_rate": 7.224242424242425e-06, + "loss": 195.337, + "step": 8940 + }, + { + "epoch": 0.07403730818546553, + "grad_norm": 1816.50634765625, + "learning_rate": 7.232323232323233e-06, + "loss": 164.159, + "step": 8950 + }, + { + "epoch": 0.07412003143483475, + "grad_norm": 941.095947265625, + "learning_rate": 7.240404040404041e-06, + "loss": 167.4316, + "step": 8960 + }, + { + "epoch": 0.074202754684204, + "grad_norm": 1430.9918212890625, + "learning_rate": 7.2484848484848495e-06, + "loss": 142.7207, + "step": 8970 + }, + { + "epoch": 0.07428547793357324, + "grad_norm": 987.8670043945312, + "learning_rate": 7.256565656565657e-06, + "loss": 185.2262, + "step": 8980 + }, + { + "epoch": 0.07436820118294246, + "grad_norm": 1886.202392578125, + "learning_rate": 7.2646464646464656e-06, + "loss": 201.7182, + "step": 8990 + }, + { + "epoch": 0.0744509244323117, + "grad_norm": 6469.64501953125, + "learning_rate": 7.272727272727273e-06, + "loss": 242.3246, + "step": 9000 + }, + { + "epoch": 0.07453364768168094, + "grad_norm": 1446.23388671875, + "learning_rate": 7.280808080808082e-06, + "loss": 185.7719, + "step": 9010 + }, + { + "epoch": 0.07461637093105017, + "grad_norm": 1158.1019287109375, + "learning_rate": 7.28888888888889e-06, + "loss": 135.7039, + "step": 9020 + }, + { + "epoch": 0.07469909418041941, + "grad_norm": 1710.8179931640625, + "learning_rate": 7.296969696969698e-06, + "loss": 214.3662, + "step": 9030 + }, + { + "epoch": 0.07478181742978864, + "grad_norm": 693.0316772460938, + "learning_rate": 7.305050505050506e-06, + "loss": 168.4758, + "step": 9040 + }, + { + "epoch": 0.07486454067915788, + "grad_norm": 1307.99267578125, + "learning_rate": 7.3131313131313146e-06, + "loss": 221.4772, + "step": 9050 + }, + { + "epoch": 0.07494726392852712, + "grad_norm": 578.5460815429688, + "learning_rate": 7.321212121212122e-06, + "loss": 172.2857, + "step": 9060 + }, + { + "epoch": 0.07502998717789634, + "grad_norm": 4154.9052734375, + "learning_rate": 7.32929292929293e-06, + "loss": 200.6511, + "step": 9070 + }, + { + "epoch": 0.07511271042726558, + "grad_norm": 3818.542236328125, + "learning_rate": 7.337373737373737e-06, + "loss": 198.6826, + "step": 9080 + }, + { + "epoch": 0.07519543367663482, + "grad_norm": 630.0006713867188, + "learning_rate": 7.345454545454546e-06, + "loss": 146.3737, + "step": 9090 + }, + { + "epoch": 0.07527815692600405, + "grad_norm": 1059.06640625, + "learning_rate": 7.353535353535353e-06, + "loss": 163.5028, + "step": 9100 + }, + { + "epoch": 0.07536088017537329, + "grad_norm": 668.64111328125, + "learning_rate": 7.361616161616162e-06, + "loss": 170.1274, + "step": 9110 + }, + { + "epoch": 0.07544360342474253, + "grad_norm": 1227.2437744140625, + "learning_rate": 7.36969696969697e-06, + "loss": 167.1523, + "step": 9120 + }, + { + "epoch": 0.07552632667411176, + "grad_norm": 989.3958740234375, + "learning_rate": 7.377777777777778e-06, + "loss": 179.0444, + "step": 9130 + }, + { + "epoch": 0.075609049923481, + "grad_norm": 2812.70166015625, + "learning_rate": 7.385858585858586e-06, + "loss": 170.3505, + "step": 9140 + }, + { + "epoch": 0.07569177317285022, + "grad_norm": 1429.5191650390625, + "learning_rate": 7.393939393939395e-06, + "loss": 198.7005, + "step": 9150 + }, + { + "epoch": 0.07577449642221946, + "grad_norm": 937.4029541015625, + "learning_rate": 7.402020202020202e-06, + "loss": 149.7339, + "step": 9160 + }, + { + "epoch": 0.0758572196715887, + "grad_norm": 6428.83642578125, + "learning_rate": 7.410101010101011e-06, + "loss": 224.5054, + "step": 9170 + }, + { + "epoch": 0.07593994292095793, + "grad_norm": 1135.309814453125, + "learning_rate": 7.4181818181818185e-06, + "loss": 206.6003, + "step": 9180 + }, + { + "epoch": 0.07602266617032717, + "grad_norm": 663.953125, + "learning_rate": 7.426262626262627e-06, + "loss": 148.6399, + "step": 9190 + }, + { + "epoch": 0.07610538941969641, + "grad_norm": 1465.2401123046875, + "learning_rate": 7.434343434343435e-06, + "loss": 129.7046, + "step": 9200 + }, + { + "epoch": 0.07618811266906564, + "grad_norm": 840.3555297851562, + "learning_rate": 7.442424242424243e-06, + "loss": 212.8563, + "step": 9210 + }, + { + "epoch": 0.07627083591843488, + "grad_norm": 1056.4595947265625, + "learning_rate": 7.450505050505051e-06, + "loss": 219.987, + "step": 9220 + }, + { + "epoch": 0.0763535591678041, + "grad_norm": 1585.2525634765625, + "learning_rate": 7.45858585858586e-06, + "loss": 254.3914, + "step": 9230 + }, + { + "epoch": 0.07643628241717335, + "grad_norm": 1167.71728515625, + "learning_rate": 7.4666666666666675e-06, + "loss": 173.4369, + "step": 9240 + }, + { + "epoch": 0.07651900566654259, + "grad_norm": 1241.547119140625, + "learning_rate": 7.474747474747476e-06, + "loss": 196.5241, + "step": 9250 + }, + { + "epoch": 0.07660172891591181, + "grad_norm": 1076.36279296875, + "learning_rate": 7.4828282828282835e-06, + "loss": 141.0298, + "step": 9260 + }, + { + "epoch": 0.07668445216528105, + "grad_norm": 894.4515991210938, + "learning_rate": 7.490909090909092e-06, + "loss": 182.4499, + "step": 9270 + }, + { + "epoch": 0.0767671754146503, + "grad_norm": 1318.6171875, + "learning_rate": 7.4989898989899e-06, + "loss": 294.3835, + "step": 9280 + }, + { + "epoch": 0.07684989866401952, + "grad_norm": 1835.936279296875, + "learning_rate": 7.507070707070707e-06, + "loss": 200.7481, + "step": 9290 + }, + { + "epoch": 0.07693262191338876, + "grad_norm": 1094.5806884765625, + "learning_rate": 7.515151515151516e-06, + "loss": 155.6739, + "step": 9300 + }, + { + "epoch": 0.07701534516275799, + "grad_norm": 2438.4599609375, + "learning_rate": 7.523232323232323e-06, + "loss": 168.196, + "step": 9310 + }, + { + "epoch": 0.07709806841212723, + "grad_norm": 1409.369384765625, + "learning_rate": 7.531313131313132e-06, + "loss": 168.5339, + "step": 9320 + }, + { + "epoch": 0.07718079166149647, + "grad_norm": 792.833740234375, + "learning_rate": 7.53939393939394e-06, + "loss": 204.8236, + "step": 9330 + }, + { + "epoch": 0.0772635149108657, + "grad_norm": 4106.85107421875, + "learning_rate": 7.547474747474748e-06, + "loss": 206.0253, + "step": 9340 + }, + { + "epoch": 0.07734623816023493, + "grad_norm": 1158.9046630859375, + "learning_rate": 7.555555555555556e-06, + "loss": 200.0595, + "step": 9350 + }, + { + "epoch": 0.07742896140960417, + "grad_norm": 829.5303344726562, + "learning_rate": 7.563636363636364e-06, + "loss": 152.1526, + "step": 9360 + }, + { + "epoch": 0.0775116846589734, + "grad_norm": 1672.3824462890625, + "learning_rate": 7.571717171717172e-06, + "loss": 196.9728, + "step": 9370 + }, + { + "epoch": 0.07759440790834264, + "grad_norm": 1697.9437255859375, + "learning_rate": 7.579797979797981e-06, + "loss": 140.7818, + "step": 9380 + }, + { + "epoch": 0.07767713115771188, + "grad_norm": 1467.42138671875, + "learning_rate": 7.587878787878788e-06, + "loss": 237.8365, + "step": 9390 + }, + { + "epoch": 0.07775985440708111, + "grad_norm": 1372.5516357421875, + "learning_rate": 7.595959595959597e-06, + "loss": 201.537, + "step": 9400 + }, + { + "epoch": 0.07784257765645035, + "grad_norm": 1280.147216796875, + "learning_rate": 7.604040404040405e-06, + "loss": 168.6732, + "step": 9410 + }, + { + "epoch": 0.07792530090581958, + "grad_norm": 1045.321533203125, + "learning_rate": 7.612121212121213e-06, + "loss": 154.092, + "step": 9420 + }, + { + "epoch": 0.07800802415518882, + "grad_norm": 6423.11962890625, + "learning_rate": 7.620202020202021e-06, + "loss": 251.8613, + "step": 9430 + }, + { + "epoch": 0.07809074740455806, + "grad_norm": 1650.4332275390625, + "learning_rate": 7.628282828282829e-06, + "loss": 218.2032, + "step": 9440 + }, + { + "epoch": 0.07817347065392728, + "grad_norm": 1520.5958251953125, + "learning_rate": 7.636363636363638e-06, + "loss": 194.5603, + "step": 9450 + }, + { + "epoch": 0.07825619390329652, + "grad_norm": 1989.30126953125, + "learning_rate": 7.644444444444445e-06, + "loss": 167.2613, + "step": 9460 + }, + { + "epoch": 0.07833891715266576, + "grad_norm": 1363.1075439453125, + "learning_rate": 7.652525252525253e-06, + "loss": 193.0324, + "step": 9470 + }, + { + "epoch": 0.07842164040203499, + "grad_norm": 1044.422607421875, + "learning_rate": 7.660606060606062e-06, + "loss": 239.5535, + "step": 9480 + }, + { + "epoch": 0.07850436365140423, + "grad_norm": 1506.1378173828125, + "learning_rate": 7.66868686868687e-06, + "loss": 157.3207, + "step": 9490 + }, + { + "epoch": 0.07858708690077346, + "grad_norm": 1023.1807250976562, + "learning_rate": 7.676767676767677e-06, + "loss": 177.0086, + "step": 9500 + }, + { + "epoch": 0.0786698101501427, + "grad_norm": 766.3860473632812, + "learning_rate": 7.684848484848485e-06, + "loss": 180.8027, + "step": 9510 + }, + { + "epoch": 0.07875253339951194, + "grad_norm": 1220.765380859375, + "learning_rate": 7.692929292929294e-06, + "loss": 199.764, + "step": 9520 + }, + { + "epoch": 0.07883525664888116, + "grad_norm": 798.783447265625, + "learning_rate": 7.7010101010101e-06, + "loss": 178.7332, + "step": 9530 + }, + { + "epoch": 0.0789179798982504, + "grad_norm": 1135.2913818359375, + "learning_rate": 7.709090909090909e-06, + "loss": 138.7885, + "step": 9540 + }, + { + "epoch": 0.07900070314761964, + "grad_norm": 820.0831298828125, + "learning_rate": 7.717171717171717e-06, + "loss": 192.7479, + "step": 9550 + }, + { + "epoch": 0.07908342639698887, + "grad_norm": 1487.1383056640625, + "learning_rate": 7.725252525252526e-06, + "loss": 202.4578, + "step": 9560 + }, + { + "epoch": 0.07916614964635811, + "grad_norm": 3294.249267578125, + "learning_rate": 7.733333333333334e-06, + "loss": 154.1931, + "step": 9570 + }, + { + "epoch": 0.07924887289572734, + "grad_norm": 1593.376708984375, + "learning_rate": 7.741414141414141e-06, + "loss": 220.1035, + "step": 9580 + }, + { + "epoch": 0.07933159614509658, + "grad_norm": 1254.923095703125, + "learning_rate": 7.74949494949495e-06, + "loss": 162.3905, + "step": 9590 + }, + { + "epoch": 0.07941431939446582, + "grad_norm": 989.010009765625, + "learning_rate": 7.757575757575758e-06, + "loss": 150.6632, + "step": 9600 + }, + { + "epoch": 0.07949704264383504, + "grad_norm": 668.603759765625, + "learning_rate": 7.765656565656566e-06, + "loss": 180.0927, + "step": 9610 + }, + { + "epoch": 0.07957976589320429, + "grad_norm": 918.044677734375, + "learning_rate": 7.773737373737375e-06, + "loss": 216.1173, + "step": 9620 + }, + { + "epoch": 0.07966248914257353, + "grad_norm": 1150.803955078125, + "learning_rate": 7.781818181818183e-06, + "loss": 165.5561, + "step": 9630 + }, + { + "epoch": 0.07974521239194275, + "grad_norm": 1339.1085205078125, + "learning_rate": 7.78989898989899e-06, + "loss": 120.4266, + "step": 9640 + }, + { + "epoch": 0.07982793564131199, + "grad_norm": 1119.98388671875, + "learning_rate": 7.797979797979799e-06, + "loss": 186.139, + "step": 9650 + }, + { + "epoch": 0.07991065889068123, + "grad_norm": 845.5322875976562, + "learning_rate": 7.806060606060607e-06, + "loss": 147.2671, + "step": 9660 + }, + { + "epoch": 0.07999338214005046, + "grad_norm": 1525.1080322265625, + "learning_rate": 7.814141414141415e-06, + "loss": 227.4823, + "step": 9670 + }, + { + "epoch": 0.0800761053894197, + "grad_norm": 936.9368286132812, + "learning_rate": 7.822222222222224e-06, + "loss": 147.8492, + "step": 9680 + }, + { + "epoch": 0.08015882863878893, + "grad_norm": 1528.34375, + "learning_rate": 7.83030303030303e-06, + "loss": 176.4171, + "step": 9690 + }, + { + "epoch": 0.08024155188815817, + "grad_norm": 1028.9813232421875, + "learning_rate": 7.838383838383839e-06, + "loss": 168.1568, + "step": 9700 + }, + { + "epoch": 0.0803242751375274, + "grad_norm": 3313.676513671875, + "learning_rate": 7.846464646464646e-06, + "loss": 211.1931, + "step": 9710 + }, + { + "epoch": 0.08040699838689663, + "grad_norm": 698.055419921875, + "learning_rate": 7.854545454545454e-06, + "loss": 147.0731, + "step": 9720 + }, + { + "epoch": 0.08048972163626587, + "grad_norm": 1256.8193359375, + "learning_rate": 7.862626262626263e-06, + "loss": 141.3686, + "step": 9730 + }, + { + "epoch": 0.08057244488563511, + "grad_norm": 1118.2659912109375, + "learning_rate": 7.870707070707071e-06, + "loss": 191.6792, + "step": 9740 + }, + { + "epoch": 0.08065516813500434, + "grad_norm": 1615.0177001953125, + "learning_rate": 7.87878787878788e-06, + "loss": 171.2617, + "step": 9750 + }, + { + "epoch": 0.08073789138437358, + "grad_norm": 1359.4404296875, + "learning_rate": 7.886868686868686e-06, + "loss": 233.8179, + "step": 9760 + }, + { + "epoch": 0.08082061463374281, + "grad_norm": 1080.4814453125, + "learning_rate": 7.894949494949495e-06, + "loss": 167.5727, + "step": 9770 + }, + { + "epoch": 0.08090333788311205, + "grad_norm": 1568.5997314453125, + "learning_rate": 7.903030303030303e-06, + "loss": 180.408, + "step": 9780 + }, + { + "epoch": 0.08098606113248129, + "grad_norm": 1154.7166748046875, + "learning_rate": 7.911111111111112e-06, + "loss": 141.3988, + "step": 9790 + }, + { + "epoch": 0.08106878438185051, + "grad_norm": 1465.5701904296875, + "learning_rate": 7.91919191919192e-06, + "loss": 235.8258, + "step": 9800 + }, + { + "epoch": 0.08115150763121975, + "grad_norm": 1399.992919921875, + "learning_rate": 7.927272727272729e-06, + "loss": 197.8094, + "step": 9810 + }, + { + "epoch": 0.081234230880589, + "grad_norm": 1609.233154296875, + "learning_rate": 7.935353535353535e-06, + "loss": 169.6143, + "step": 9820 + }, + { + "epoch": 0.08131695412995822, + "grad_norm": 771.2155151367188, + "learning_rate": 7.943434343434344e-06, + "loss": 174.0228, + "step": 9830 + }, + { + "epoch": 0.08139967737932746, + "grad_norm": 1397.8433837890625, + "learning_rate": 7.951515151515152e-06, + "loss": 195.8236, + "step": 9840 + }, + { + "epoch": 0.08148240062869669, + "grad_norm": 1942.0560302734375, + "learning_rate": 7.95959595959596e-06, + "loss": 174.9514, + "step": 9850 + }, + { + "epoch": 0.08156512387806593, + "grad_norm": 881.597900390625, + "learning_rate": 7.96767676767677e-06, + "loss": 155.3664, + "step": 9860 + }, + { + "epoch": 0.08164784712743517, + "grad_norm": 982.1563110351562, + "learning_rate": 7.975757575757576e-06, + "loss": 166.2256, + "step": 9870 + }, + { + "epoch": 0.0817305703768044, + "grad_norm": 1786.5938720703125, + "learning_rate": 7.983838383838384e-06, + "loss": 179.014, + "step": 9880 + }, + { + "epoch": 0.08181329362617364, + "grad_norm": 1422.998046875, + "learning_rate": 7.991919191919193e-06, + "loss": 185.8182, + "step": 9890 + }, + { + "epoch": 0.08189601687554288, + "grad_norm": 1009.948486328125, + "learning_rate": 8.000000000000001e-06, + "loss": 153.7416, + "step": 9900 + }, + { + "epoch": 0.0819787401249121, + "grad_norm": 929.018798828125, + "learning_rate": 8.00808080808081e-06, + "loss": 178.9259, + "step": 9910 + }, + { + "epoch": 0.08206146337428134, + "grad_norm": 654.3829345703125, + "learning_rate": 8.016161616161617e-06, + "loss": 130.9901, + "step": 9920 + }, + { + "epoch": 0.08214418662365058, + "grad_norm": 1169.20068359375, + "learning_rate": 8.024242424242425e-06, + "loss": 125.0886, + "step": 9930 + }, + { + "epoch": 0.08222690987301981, + "grad_norm": 1203.2630615234375, + "learning_rate": 8.032323232323232e-06, + "loss": 191.309, + "step": 9940 + }, + { + "epoch": 0.08230963312238905, + "grad_norm": 1353.7027587890625, + "learning_rate": 8.04040404040404e-06, + "loss": 239.4487, + "step": 9950 + }, + { + "epoch": 0.08239235637175828, + "grad_norm": 1199.4378662109375, + "learning_rate": 8.048484848484849e-06, + "loss": 173.5184, + "step": 9960 + }, + { + "epoch": 0.08247507962112752, + "grad_norm": 1027.1614990234375, + "learning_rate": 8.056565656565657e-06, + "loss": 236.1041, + "step": 9970 + }, + { + "epoch": 0.08255780287049676, + "grad_norm": 2071.078125, + "learning_rate": 8.064646464646466e-06, + "loss": 210.167, + "step": 9980 + }, + { + "epoch": 0.08264052611986598, + "grad_norm": 931.7273559570312, + "learning_rate": 8.072727272727274e-06, + "loss": 169.0378, + "step": 9990 + }, + { + "epoch": 0.08272324936923522, + "grad_norm": 1780.4796142578125, + "learning_rate": 8.08080808080808e-06, + "loss": 182.981, + "step": 10000 + }, + { + "epoch": 0.08280597261860446, + "grad_norm": 1484.16845703125, + "learning_rate": 8.08888888888889e-06, + "loss": 190.9732, + "step": 10010 + }, + { + "epoch": 0.08288869586797369, + "grad_norm": 1517.7740478515625, + "learning_rate": 8.096969696969698e-06, + "loss": 214.248, + "step": 10020 + }, + { + "epoch": 0.08297141911734293, + "grad_norm": 4507.2001953125, + "learning_rate": 8.105050505050506e-06, + "loss": 149.1075, + "step": 10030 + }, + { + "epoch": 0.08305414236671216, + "grad_norm": 1202.4232177734375, + "learning_rate": 8.113131313131315e-06, + "loss": 167.8956, + "step": 10040 + }, + { + "epoch": 0.0831368656160814, + "grad_norm": 3145.95556640625, + "learning_rate": 8.121212121212121e-06, + "loss": 218.0878, + "step": 10050 + }, + { + "epoch": 0.08321958886545064, + "grad_norm": 833.5780029296875, + "learning_rate": 8.12929292929293e-06, + "loss": 183.5963, + "step": 10060 + }, + { + "epoch": 0.08330231211481987, + "grad_norm": 1200.9332275390625, + "learning_rate": 8.137373737373738e-06, + "loss": 200.6742, + "step": 10070 + }, + { + "epoch": 0.0833850353641891, + "grad_norm": 2536.378662109375, + "learning_rate": 8.145454545454547e-06, + "loss": 244.585, + "step": 10080 + }, + { + "epoch": 0.08346775861355835, + "grad_norm": 0.0, + "learning_rate": 8.153535353535355e-06, + "loss": 151.4828, + "step": 10090 + }, + { + "epoch": 0.08355048186292757, + "grad_norm": 1108.849853515625, + "learning_rate": 8.161616161616162e-06, + "loss": 178.3399, + "step": 10100 + }, + { + "epoch": 0.08363320511229681, + "grad_norm": 809.9747314453125, + "learning_rate": 8.16969696969697e-06, + "loss": 201.301, + "step": 10110 + }, + { + "epoch": 0.08371592836166604, + "grad_norm": 1331.628173828125, + "learning_rate": 8.177777777777779e-06, + "loss": 141.3669, + "step": 10120 + }, + { + "epoch": 0.08379865161103528, + "grad_norm": 990.9824829101562, + "learning_rate": 8.185858585858587e-06, + "loss": 169.3207, + "step": 10130 + }, + { + "epoch": 0.08388137486040452, + "grad_norm": 1191.9073486328125, + "learning_rate": 8.193939393939394e-06, + "loss": 136.0156, + "step": 10140 + }, + { + "epoch": 0.08396409810977375, + "grad_norm": 829.3964233398438, + "learning_rate": 8.202020202020202e-06, + "loss": 165.8068, + "step": 10150 + }, + { + "epoch": 0.08404682135914299, + "grad_norm": 781.4263305664062, + "learning_rate": 8.21010101010101e-06, + "loss": 186.8697, + "step": 10160 + }, + { + "epoch": 0.08412954460851223, + "grad_norm": 815.2606811523438, + "learning_rate": 8.21818181818182e-06, + "loss": 257.8631, + "step": 10170 + }, + { + "epoch": 0.08421226785788145, + "grad_norm": 1860.0218505859375, + "learning_rate": 8.226262626262626e-06, + "loss": 187.696, + "step": 10180 + }, + { + "epoch": 0.0842949911072507, + "grad_norm": 2099.6728515625, + "learning_rate": 8.234343434343434e-06, + "loss": 205.9569, + "step": 10190 + }, + { + "epoch": 0.08437771435661993, + "grad_norm": 1526.5322265625, + "learning_rate": 8.242424242424243e-06, + "loss": 200.3001, + "step": 10200 + }, + { + "epoch": 0.08446043760598916, + "grad_norm": 1122.8441162109375, + "learning_rate": 8.250505050505051e-06, + "loss": 132.1188, + "step": 10210 + }, + { + "epoch": 0.0845431608553584, + "grad_norm": 952.8866577148438, + "learning_rate": 8.25858585858586e-06, + "loss": 180.2814, + "step": 10220 + }, + { + "epoch": 0.08462588410472763, + "grad_norm": 1098.6842041015625, + "learning_rate": 8.266666666666667e-06, + "loss": 186.863, + "step": 10230 + }, + { + "epoch": 0.08470860735409687, + "grad_norm": 1201.576904296875, + "learning_rate": 8.274747474747475e-06, + "loss": 166.658, + "step": 10240 + }, + { + "epoch": 0.08479133060346611, + "grad_norm": 3628.583984375, + "learning_rate": 8.282828282828283e-06, + "loss": 280.9522, + "step": 10250 + }, + { + "epoch": 0.08487405385283533, + "grad_norm": 956.4686279296875, + "learning_rate": 8.290909090909092e-06, + "loss": 177.9082, + "step": 10260 + }, + { + "epoch": 0.08495677710220458, + "grad_norm": 1213.8653564453125, + "learning_rate": 8.2989898989899e-06, + "loss": 176.0302, + "step": 10270 + }, + { + "epoch": 0.08503950035157382, + "grad_norm": 1263.095458984375, + "learning_rate": 8.307070707070707e-06, + "loss": 171.6865, + "step": 10280 + }, + { + "epoch": 0.08512222360094304, + "grad_norm": 1094.4754638671875, + "learning_rate": 8.315151515151516e-06, + "loss": 158.2062, + "step": 10290 + }, + { + "epoch": 0.08520494685031228, + "grad_norm": 957.9495239257812, + "learning_rate": 8.323232323232324e-06, + "loss": 169.5506, + "step": 10300 + }, + { + "epoch": 0.08528767009968151, + "grad_norm": 1073.7796630859375, + "learning_rate": 8.331313131313132e-06, + "loss": 147.6098, + "step": 10310 + }, + { + "epoch": 0.08537039334905075, + "grad_norm": 1453.0513916015625, + "learning_rate": 8.339393939393941e-06, + "loss": 189.7265, + "step": 10320 + }, + { + "epoch": 0.08545311659841999, + "grad_norm": 791.720703125, + "learning_rate": 8.34747474747475e-06, + "loss": 214.0726, + "step": 10330 + }, + { + "epoch": 0.08553583984778922, + "grad_norm": 1202.690673828125, + "learning_rate": 8.355555555555556e-06, + "loss": 197.4867, + "step": 10340 + }, + { + "epoch": 0.08561856309715846, + "grad_norm": 1175.0545654296875, + "learning_rate": 8.363636363636365e-06, + "loss": 153.0685, + "step": 10350 + }, + { + "epoch": 0.0857012863465277, + "grad_norm": 745.9451293945312, + "learning_rate": 8.371717171717171e-06, + "loss": 170.8535, + "step": 10360 + }, + { + "epoch": 0.08578400959589692, + "grad_norm": 1357.7061767578125, + "learning_rate": 8.37979797979798e-06, + "loss": 162.8026, + "step": 10370 + }, + { + "epoch": 0.08586673284526616, + "grad_norm": 1293.4013671875, + "learning_rate": 8.387878787878788e-06, + "loss": 152.5479, + "step": 10380 + }, + { + "epoch": 0.0859494560946354, + "grad_norm": 1522.9151611328125, + "learning_rate": 8.395959595959597e-06, + "loss": 180.1763, + "step": 10390 + }, + { + "epoch": 0.08603217934400463, + "grad_norm": 2097.494140625, + "learning_rate": 8.404040404040405e-06, + "loss": 168.4439, + "step": 10400 + }, + { + "epoch": 0.08611490259337387, + "grad_norm": 1112.0291748046875, + "learning_rate": 8.412121212121212e-06, + "loss": 193.0542, + "step": 10410 + }, + { + "epoch": 0.0861976258427431, + "grad_norm": 927.9266357421875, + "learning_rate": 8.42020202020202e-06, + "loss": 211.676, + "step": 10420 + }, + { + "epoch": 0.08628034909211234, + "grad_norm": 1505.004150390625, + "learning_rate": 8.428282828282829e-06, + "loss": 203.2798, + "step": 10430 + }, + { + "epoch": 0.08636307234148158, + "grad_norm": 1962.483154296875, + "learning_rate": 8.436363636363637e-06, + "loss": 217.3728, + "step": 10440 + }, + { + "epoch": 0.0864457955908508, + "grad_norm": 2074.048583984375, + "learning_rate": 8.444444444444446e-06, + "loss": 224.9844, + "step": 10450 + }, + { + "epoch": 0.08652851884022005, + "grad_norm": 1316.5950927734375, + "learning_rate": 8.452525252525252e-06, + "loss": 165.5474, + "step": 10460 + }, + { + "epoch": 0.08661124208958929, + "grad_norm": 1096.7896728515625, + "learning_rate": 8.460606060606061e-06, + "loss": 138.2374, + "step": 10470 + }, + { + "epoch": 0.08669396533895851, + "grad_norm": 1451.124755859375, + "learning_rate": 8.46868686868687e-06, + "loss": 141.8237, + "step": 10480 + }, + { + "epoch": 0.08677668858832775, + "grad_norm": 1239.64697265625, + "learning_rate": 8.476767676767678e-06, + "loss": 156.3808, + "step": 10490 + }, + { + "epoch": 0.08685941183769698, + "grad_norm": 689.178955078125, + "learning_rate": 8.484848484848486e-06, + "loss": 143.1666, + "step": 10500 + }, + { + "epoch": 0.08694213508706622, + "grad_norm": 684.1348266601562, + "learning_rate": 8.492929292929295e-06, + "loss": 182.6362, + "step": 10510 + }, + { + "epoch": 0.08702485833643546, + "grad_norm": 1299.8443603515625, + "learning_rate": 8.501010101010101e-06, + "loss": 160.0256, + "step": 10520 + }, + { + "epoch": 0.08710758158580469, + "grad_norm": 845.9354858398438, + "learning_rate": 8.50909090909091e-06, + "loss": 184.4307, + "step": 10530 + }, + { + "epoch": 0.08719030483517393, + "grad_norm": 1035.8701171875, + "learning_rate": 8.517171717171718e-06, + "loss": 228.6084, + "step": 10540 + }, + { + "epoch": 0.08727302808454317, + "grad_norm": 668.9830932617188, + "learning_rate": 8.525252525252527e-06, + "loss": 200.758, + "step": 10550 + }, + { + "epoch": 0.0873557513339124, + "grad_norm": 1058.86181640625, + "learning_rate": 8.533333333333335e-06, + "loss": 204.7663, + "step": 10560 + }, + { + "epoch": 0.08743847458328163, + "grad_norm": 1521.737548828125, + "learning_rate": 8.541414141414142e-06, + "loss": 192.2928, + "step": 10570 + }, + { + "epoch": 0.08752119783265086, + "grad_norm": 1117.1220703125, + "learning_rate": 8.54949494949495e-06, + "loss": 142.4138, + "step": 10580 + }, + { + "epoch": 0.0876039210820201, + "grad_norm": 1011.63525390625, + "learning_rate": 8.557575757575757e-06, + "loss": 193.8034, + "step": 10590 + }, + { + "epoch": 0.08768664433138934, + "grad_norm": 2114.847900390625, + "learning_rate": 8.565656565656566e-06, + "loss": 184.3933, + "step": 10600 + }, + { + "epoch": 0.08776936758075857, + "grad_norm": 823.4564819335938, + "learning_rate": 8.573737373737374e-06, + "loss": 186.983, + "step": 10610 + }, + { + "epoch": 0.08785209083012781, + "grad_norm": 1262.8677978515625, + "learning_rate": 8.581818181818183e-06, + "loss": 145.9637, + "step": 10620 + }, + { + "epoch": 0.08793481407949705, + "grad_norm": 1084.4609375, + "learning_rate": 8.589898989898991e-06, + "loss": 199.8638, + "step": 10630 + }, + { + "epoch": 0.08801753732886627, + "grad_norm": 1483.7017822265625, + "learning_rate": 8.597979797979798e-06, + "loss": 183.0729, + "step": 10640 + }, + { + "epoch": 0.08810026057823551, + "grad_norm": 1132.1685791015625, + "learning_rate": 8.606060606060606e-06, + "loss": 160.7905, + "step": 10650 + }, + { + "epoch": 0.08818298382760476, + "grad_norm": 1912.802978515625, + "learning_rate": 8.614141414141415e-06, + "loss": 235.4324, + "step": 10660 + }, + { + "epoch": 0.08826570707697398, + "grad_norm": 1315.4351806640625, + "learning_rate": 8.622222222222223e-06, + "loss": 186.3054, + "step": 10670 + }, + { + "epoch": 0.08834843032634322, + "grad_norm": 884.4146118164062, + "learning_rate": 8.630303030303032e-06, + "loss": 157.2276, + "step": 10680 + }, + { + "epoch": 0.08843115357571245, + "grad_norm": 1087.9329833984375, + "learning_rate": 8.63838383838384e-06, + "loss": 154.3768, + "step": 10690 + }, + { + "epoch": 0.08851387682508169, + "grad_norm": 913.88818359375, + "learning_rate": 8.646464646464647e-06, + "loss": 185.6447, + "step": 10700 + }, + { + "epoch": 0.08859660007445093, + "grad_norm": 2494.311279296875, + "learning_rate": 8.654545454545455e-06, + "loss": 195.7933, + "step": 10710 + }, + { + "epoch": 0.08867932332382016, + "grad_norm": 1353.1029052734375, + "learning_rate": 8.662626262626264e-06, + "loss": 179.2149, + "step": 10720 + }, + { + "epoch": 0.0887620465731894, + "grad_norm": 1235.08056640625, + "learning_rate": 8.670707070707072e-06, + "loss": 140.4072, + "step": 10730 + }, + { + "epoch": 0.08884476982255864, + "grad_norm": 1513.452880859375, + "learning_rate": 8.67878787878788e-06, + "loss": 188.1772, + "step": 10740 + }, + { + "epoch": 0.08892749307192786, + "grad_norm": 576.0892333984375, + "learning_rate": 8.686868686868687e-06, + "loss": 149.6942, + "step": 10750 + }, + { + "epoch": 0.0890102163212971, + "grad_norm": 1103.5166015625, + "learning_rate": 8.694949494949496e-06, + "loss": 165.9619, + "step": 10760 + }, + { + "epoch": 0.08909293957066633, + "grad_norm": 964.4625244140625, + "learning_rate": 8.703030303030304e-06, + "loss": 138.5367, + "step": 10770 + }, + { + "epoch": 0.08917566282003557, + "grad_norm": 1614.7874755859375, + "learning_rate": 8.711111111111111e-06, + "loss": 175.2976, + "step": 10780 + }, + { + "epoch": 0.08925838606940481, + "grad_norm": 1648.361328125, + "learning_rate": 8.71919191919192e-06, + "loss": 187.3021, + "step": 10790 + }, + { + "epoch": 0.08934110931877404, + "grad_norm": 1707.2655029296875, + "learning_rate": 8.727272727272728e-06, + "loss": 258.8253, + "step": 10800 + }, + { + "epoch": 0.08942383256814328, + "grad_norm": 1160.01806640625, + "learning_rate": 8.735353535353536e-06, + "loss": 194.5743, + "step": 10810 + }, + { + "epoch": 0.08950655581751252, + "grad_norm": 1146.39990234375, + "learning_rate": 8.743434343434343e-06, + "loss": 203.5324, + "step": 10820 + }, + { + "epoch": 0.08958927906688174, + "grad_norm": 3626.928955078125, + "learning_rate": 8.751515151515151e-06, + "loss": 253.2095, + "step": 10830 + }, + { + "epoch": 0.08967200231625098, + "grad_norm": 728.5478515625, + "learning_rate": 8.75959595959596e-06, + "loss": 145.7956, + "step": 10840 + }, + { + "epoch": 0.08975472556562021, + "grad_norm": 1002.9689331054688, + "learning_rate": 8.767676767676768e-06, + "loss": 181.9694, + "step": 10850 + }, + { + "epoch": 0.08983744881498945, + "grad_norm": 1103.3865966796875, + "learning_rate": 8.775757575757577e-06, + "loss": 177.4934, + "step": 10860 + }, + { + "epoch": 0.08992017206435869, + "grad_norm": 1987.7679443359375, + "learning_rate": 8.783838383838385e-06, + "loss": 190.5272, + "step": 10870 + }, + { + "epoch": 0.09000289531372792, + "grad_norm": 1918.5572509765625, + "learning_rate": 8.791919191919192e-06, + "loss": 176.4213, + "step": 10880 + }, + { + "epoch": 0.09008561856309716, + "grad_norm": 979.5933837890625, + "learning_rate": 8.8e-06, + "loss": 208.7932, + "step": 10890 + }, + { + "epoch": 0.0901683418124664, + "grad_norm": 1254.18603515625, + "learning_rate": 8.808080808080809e-06, + "loss": 179.7351, + "step": 10900 + }, + { + "epoch": 0.09025106506183563, + "grad_norm": 1393.46044921875, + "learning_rate": 8.816161616161617e-06, + "loss": 239.5334, + "step": 10910 + }, + { + "epoch": 0.09033378831120487, + "grad_norm": 932.0426025390625, + "learning_rate": 8.824242424242426e-06, + "loss": 159.2398, + "step": 10920 + }, + { + "epoch": 0.0904165115605741, + "grad_norm": 1175.749755859375, + "learning_rate": 8.832323232323233e-06, + "loss": 196.0824, + "step": 10930 + }, + { + "epoch": 0.09049923480994333, + "grad_norm": 669.9056396484375, + "learning_rate": 8.840404040404041e-06, + "loss": 195.3154, + "step": 10940 + }, + { + "epoch": 0.09058195805931257, + "grad_norm": 2161.71826171875, + "learning_rate": 8.84848484848485e-06, + "loss": 201.2208, + "step": 10950 + }, + { + "epoch": 0.0906646813086818, + "grad_norm": 1083.76318359375, + "learning_rate": 8.856565656565658e-06, + "loss": 181.3205, + "step": 10960 + }, + { + "epoch": 0.09074740455805104, + "grad_norm": 1360.2738037109375, + "learning_rate": 8.864646464646466e-06, + "loss": 173.5026, + "step": 10970 + }, + { + "epoch": 0.09083012780742028, + "grad_norm": 1274.3690185546875, + "learning_rate": 8.872727272727275e-06, + "loss": 191.3013, + "step": 10980 + }, + { + "epoch": 0.0909128510567895, + "grad_norm": 1275.59814453125, + "learning_rate": 8.880808080808082e-06, + "loss": 180.379, + "step": 10990 + }, + { + "epoch": 0.09099557430615875, + "grad_norm": 1908.28125, + "learning_rate": 8.888888888888888e-06, + "loss": 226.928, + "step": 11000 + }, + { + "epoch": 0.09107829755552799, + "grad_norm": 1636.098388671875, + "learning_rate": 8.896969696969697e-06, + "loss": 166.5137, + "step": 11010 + }, + { + "epoch": 0.09116102080489721, + "grad_norm": 1362.2508544921875, + "learning_rate": 8.905050505050505e-06, + "loss": 162.6263, + "step": 11020 + }, + { + "epoch": 0.09124374405426645, + "grad_norm": 958.8033447265625, + "learning_rate": 8.913131313131314e-06, + "loss": 126.4212, + "step": 11030 + }, + { + "epoch": 0.09132646730363568, + "grad_norm": 4022.8896484375, + "learning_rate": 8.921212121212122e-06, + "loss": 171.8223, + "step": 11040 + }, + { + "epoch": 0.09140919055300492, + "grad_norm": 1437.0355224609375, + "learning_rate": 8.92929292929293e-06, + "loss": 150.3858, + "step": 11050 + }, + { + "epoch": 0.09149191380237416, + "grad_norm": 1457.9029541015625, + "learning_rate": 8.937373737373737e-06, + "loss": 191.1608, + "step": 11060 + }, + { + "epoch": 0.09157463705174339, + "grad_norm": 1079.6673583984375, + "learning_rate": 8.945454545454546e-06, + "loss": 169.316, + "step": 11070 + }, + { + "epoch": 0.09165736030111263, + "grad_norm": 1028.742431640625, + "learning_rate": 8.953535353535354e-06, + "loss": 141.8136, + "step": 11080 + }, + { + "epoch": 0.09174008355048187, + "grad_norm": 1539.916259765625, + "learning_rate": 8.961616161616163e-06, + "loss": 196.1172, + "step": 11090 + }, + { + "epoch": 0.0918228067998511, + "grad_norm": 1310.006591796875, + "learning_rate": 8.969696969696971e-06, + "loss": 153.3139, + "step": 11100 + }, + { + "epoch": 0.09190553004922034, + "grad_norm": 3632.826416015625, + "learning_rate": 8.977777777777778e-06, + "loss": 274.3921, + "step": 11110 + }, + { + "epoch": 0.09198825329858956, + "grad_norm": 1360.5455322265625, + "learning_rate": 8.985858585858586e-06, + "loss": 199.4904, + "step": 11120 + }, + { + "epoch": 0.0920709765479588, + "grad_norm": 1219.487548828125, + "learning_rate": 8.993939393939395e-06, + "loss": 138.8559, + "step": 11130 + }, + { + "epoch": 0.09215369979732804, + "grad_norm": 1320.4920654296875, + "learning_rate": 9.002020202020203e-06, + "loss": 179.0958, + "step": 11140 + }, + { + "epoch": 0.09223642304669727, + "grad_norm": 754.0531616210938, + "learning_rate": 9.010101010101012e-06, + "loss": 183.8919, + "step": 11150 + }, + { + "epoch": 0.09231914629606651, + "grad_norm": 1132.8018798828125, + "learning_rate": 9.01818181818182e-06, + "loss": 138.0276, + "step": 11160 + }, + { + "epoch": 0.09240186954543575, + "grad_norm": 1015.0302124023438, + "learning_rate": 9.026262626262627e-06, + "loss": 169.833, + "step": 11170 + }, + { + "epoch": 0.09248459279480498, + "grad_norm": 1256.841552734375, + "learning_rate": 9.034343434343435e-06, + "loss": 192.5117, + "step": 11180 + }, + { + "epoch": 0.09256731604417422, + "grad_norm": 637.6029663085938, + "learning_rate": 9.042424242424244e-06, + "loss": 136.4471, + "step": 11190 + }, + { + "epoch": 0.09265003929354346, + "grad_norm": 2063.305419921875, + "learning_rate": 9.050505050505052e-06, + "loss": 187.3035, + "step": 11200 + }, + { + "epoch": 0.09273276254291268, + "grad_norm": 893.4026489257812, + "learning_rate": 9.058585858585859e-06, + "loss": 197.6645, + "step": 11210 + }, + { + "epoch": 0.09281548579228192, + "grad_norm": 11727.92578125, + "learning_rate": 9.066666666666667e-06, + "loss": 184.4137, + "step": 11220 + }, + { + "epoch": 0.09289820904165115, + "grad_norm": 939.4194946289062, + "learning_rate": 9.074747474747476e-06, + "loss": 163.7697, + "step": 11230 + }, + { + "epoch": 0.09298093229102039, + "grad_norm": 1211.7532958984375, + "learning_rate": 9.082828282828283e-06, + "loss": 150.1024, + "step": 11240 + }, + { + "epoch": 0.09306365554038963, + "grad_norm": 1020.1021118164062, + "learning_rate": 9.090909090909091e-06, + "loss": 176.2725, + "step": 11250 + }, + { + "epoch": 0.09314637878975886, + "grad_norm": 1597.582763671875, + "learning_rate": 9.0989898989899e-06, + "loss": 161.5863, + "step": 11260 + }, + { + "epoch": 0.0932291020391281, + "grad_norm": 1205.4434814453125, + "learning_rate": 9.107070707070708e-06, + "loss": 191.3436, + "step": 11270 + }, + { + "epoch": 0.09331182528849734, + "grad_norm": 1347.427001953125, + "learning_rate": 9.115151515151516e-06, + "loss": 147.056, + "step": 11280 + }, + { + "epoch": 0.09339454853786656, + "grad_norm": 1340.0323486328125, + "learning_rate": 9.123232323232323e-06, + "loss": 166.0659, + "step": 11290 + }, + { + "epoch": 0.0934772717872358, + "grad_norm": 1243.469482421875, + "learning_rate": 9.131313131313132e-06, + "loss": 160.2771, + "step": 11300 + }, + { + "epoch": 0.09355999503660503, + "grad_norm": 1073.79638671875, + "learning_rate": 9.13939393939394e-06, + "loss": 189.4957, + "step": 11310 + }, + { + "epoch": 0.09364271828597427, + "grad_norm": 1185.810546875, + "learning_rate": 9.147474747474748e-06, + "loss": 169.7764, + "step": 11320 + }, + { + "epoch": 0.09372544153534351, + "grad_norm": 1002.9879760742188, + "learning_rate": 9.155555555555557e-06, + "loss": 117.5662, + "step": 11330 + }, + { + "epoch": 0.09380816478471274, + "grad_norm": 1063.5313720703125, + "learning_rate": 9.163636363636365e-06, + "loss": 199.0167, + "step": 11340 + }, + { + "epoch": 0.09389088803408198, + "grad_norm": 975.7847290039062, + "learning_rate": 9.171717171717172e-06, + "loss": 178.3596, + "step": 11350 + }, + { + "epoch": 0.09397361128345122, + "grad_norm": 1078.6475830078125, + "learning_rate": 9.17979797979798e-06, + "loss": 167.8156, + "step": 11360 + }, + { + "epoch": 0.09405633453282045, + "grad_norm": 2150.571044921875, + "learning_rate": 9.187878787878789e-06, + "loss": 151.6369, + "step": 11370 + }, + { + "epoch": 0.09413905778218969, + "grad_norm": 1107.6488037109375, + "learning_rate": 9.195959595959597e-06, + "loss": 138.8753, + "step": 11380 + }, + { + "epoch": 0.09422178103155891, + "grad_norm": 2013.7427978515625, + "learning_rate": 9.204040404040406e-06, + "loss": 168.8607, + "step": 11390 + }, + { + "epoch": 0.09430450428092815, + "grad_norm": 926.5513916015625, + "learning_rate": 9.212121212121213e-06, + "loss": 155.2859, + "step": 11400 + }, + { + "epoch": 0.0943872275302974, + "grad_norm": 640.766357421875, + "learning_rate": 9.220202020202021e-06, + "loss": 170.4931, + "step": 11410 + }, + { + "epoch": 0.09446995077966662, + "grad_norm": 2521.883544921875, + "learning_rate": 9.228282828282828e-06, + "loss": 163.8456, + "step": 11420 + }, + { + "epoch": 0.09455267402903586, + "grad_norm": 2574.5419921875, + "learning_rate": 9.236363636363636e-06, + "loss": 192.8077, + "step": 11430 + }, + { + "epoch": 0.0946353972784051, + "grad_norm": 1514.116943359375, + "learning_rate": 9.244444444444445e-06, + "loss": 190.4653, + "step": 11440 + }, + { + "epoch": 0.09471812052777433, + "grad_norm": 1116.5655517578125, + "learning_rate": 9.252525252525253e-06, + "loss": 150.5008, + "step": 11450 + }, + { + "epoch": 0.09480084377714357, + "grad_norm": 7808.6015625, + "learning_rate": 9.260606060606062e-06, + "loss": 282.1488, + "step": 11460 + }, + { + "epoch": 0.09488356702651281, + "grad_norm": 1106.9825439453125, + "learning_rate": 9.268686868686868e-06, + "loss": 195.7642, + "step": 11470 + }, + { + "epoch": 0.09496629027588203, + "grad_norm": 3581.72900390625, + "learning_rate": 9.276767676767677e-06, + "loss": 179.7523, + "step": 11480 + }, + { + "epoch": 0.09504901352525127, + "grad_norm": 1101.411865234375, + "learning_rate": 9.284848484848485e-06, + "loss": 199.7394, + "step": 11490 + }, + { + "epoch": 0.0951317367746205, + "grad_norm": 1929.9052734375, + "learning_rate": 9.292929292929294e-06, + "loss": 174.8564, + "step": 11500 + }, + { + "epoch": 0.09521446002398974, + "grad_norm": 1121.5028076171875, + "learning_rate": 9.301010101010102e-06, + "loss": 134.3296, + "step": 11510 + }, + { + "epoch": 0.09529718327335898, + "grad_norm": 1080.38671875, + "learning_rate": 9.30909090909091e-06, + "loss": 173.9918, + "step": 11520 + }, + { + "epoch": 0.09537990652272821, + "grad_norm": 1371.1961669921875, + "learning_rate": 9.317171717171717e-06, + "loss": 213.5175, + "step": 11530 + }, + { + "epoch": 0.09546262977209745, + "grad_norm": 1480.9495849609375, + "learning_rate": 9.325252525252526e-06, + "loss": 136.4908, + "step": 11540 + }, + { + "epoch": 0.09554535302146669, + "grad_norm": 912.1797485351562, + "learning_rate": 9.333333333333334e-06, + "loss": 215.026, + "step": 11550 + }, + { + "epoch": 0.09562807627083592, + "grad_norm": 2468.00341796875, + "learning_rate": 9.341414141414143e-06, + "loss": 175.5986, + "step": 11560 + }, + { + "epoch": 0.09571079952020516, + "grad_norm": 1296.7786865234375, + "learning_rate": 9.349494949494951e-06, + "loss": 209.1408, + "step": 11570 + }, + { + "epoch": 0.09579352276957438, + "grad_norm": 1264.5654296875, + "learning_rate": 9.357575757575758e-06, + "loss": 200.8274, + "step": 11580 + }, + { + "epoch": 0.09587624601894362, + "grad_norm": 1155.8309326171875, + "learning_rate": 9.365656565656566e-06, + "loss": 164.7913, + "step": 11590 + }, + { + "epoch": 0.09595896926831286, + "grad_norm": 1020.3081665039062, + "learning_rate": 9.373737373737375e-06, + "loss": 155.3867, + "step": 11600 + }, + { + "epoch": 0.09604169251768209, + "grad_norm": 1648.6768798828125, + "learning_rate": 9.381818181818183e-06, + "loss": 150.0429, + "step": 11610 + }, + { + "epoch": 0.09612441576705133, + "grad_norm": 1153.5174560546875, + "learning_rate": 9.389898989898992e-06, + "loss": 183.8605, + "step": 11620 + }, + { + "epoch": 0.09620713901642057, + "grad_norm": 1894.8492431640625, + "learning_rate": 9.397979797979799e-06, + "loss": 178.0279, + "step": 11630 + }, + { + "epoch": 0.0962898622657898, + "grad_norm": 1127.750244140625, + "learning_rate": 9.406060606060607e-06, + "loss": 162.6299, + "step": 11640 + }, + { + "epoch": 0.09637258551515904, + "grad_norm": 1268.74267578125, + "learning_rate": 9.414141414141414e-06, + "loss": 155.4865, + "step": 11650 + }, + { + "epoch": 0.09645530876452828, + "grad_norm": 1007.6961669921875, + "learning_rate": 9.422222222222222e-06, + "loss": 209.233, + "step": 11660 + }, + { + "epoch": 0.0965380320138975, + "grad_norm": 1457.4285888671875, + "learning_rate": 9.43030303030303e-06, + "loss": 163.3144, + "step": 11670 + }, + { + "epoch": 0.09662075526326674, + "grad_norm": 1042.603515625, + "learning_rate": 9.438383838383839e-06, + "loss": 182.3341, + "step": 11680 + }, + { + "epoch": 0.09670347851263597, + "grad_norm": 1047.1702880859375, + "learning_rate": 9.446464646464648e-06, + "loss": 167.2329, + "step": 11690 + }, + { + "epoch": 0.09678620176200521, + "grad_norm": 1840.010009765625, + "learning_rate": 9.454545454545456e-06, + "loss": 204.4755, + "step": 11700 + }, + { + "epoch": 0.09686892501137445, + "grad_norm": 573.9185180664062, + "learning_rate": 9.462626262626263e-06, + "loss": 106.0682, + "step": 11710 + }, + { + "epoch": 0.09695164826074368, + "grad_norm": 1605.5093994140625, + "learning_rate": 9.470707070707071e-06, + "loss": 214.7734, + "step": 11720 + }, + { + "epoch": 0.09703437151011292, + "grad_norm": 2460.1376953125, + "learning_rate": 9.47878787878788e-06, + "loss": 180.7382, + "step": 11730 + }, + { + "epoch": 0.09711709475948216, + "grad_norm": 1100.57080078125, + "learning_rate": 9.486868686868688e-06, + "loss": 220.1118, + "step": 11740 + }, + { + "epoch": 0.09719981800885139, + "grad_norm": 937.7584838867188, + "learning_rate": 9.494949494949497e-06, + "loss": 176.8332, + "step": 11750 + }, + { + "epoch": 0.09728254125822063, + "grad_norm": 1534.452880859375, + "learning_rate": 9.503030303030303e-06, + "loss": 173.9935, + "step": 11760 + }, + { + "epoch": 0.09736526450758985, + "grad_norm": 1466.6376953125, + "learning_rate": 9.511111111111112e-06, + "loss": 196.6087, + "step": 11770 + }, + { + "epoch": 0.09744798775695909, + "grad_norm": 1731.87841796875, + "learning_rate": 9.51919191919192e-06, + "loss": 179.0696, + "step": 11780 + }, + { + "epoch": 0.09753071100632833, + "grad_norm": 1235.0560302734375, + "learning_rate": 9.527272727272729e-06, + "loss": 164.9756, + "step": 11790 + }, + { + "epoch": 0.09761343425569756, + "grad_norm": 662.07568359375, + "learning_rate": 9.535353535353537e-06, + "loss": 155.2859, + "step": 11800 + }, + { + "epoch": 0.0976961575050668, + "grad_norm": 642.6212158203125, + "learning_rate": 9.543434343434344e-06, + "loss": 183.2646, + "step": 11810 + }, + { + "epoch": 0.09777888075443604, + "grad_norm": 861.9931030273438, + "learning_rate": 9.551515151515152e-06, + "loss": 140.6058, + "step": 11820 + }, + { + "epoch": 0.09786160400380527, + "grad_norm": 2064.848388671875, + "learning_rate": 9.55959595959596e-06, + "loss": 175.8456, + "step": 11830 + }, + { + "epoch": 0.0979443272531745, + "grad_norm": 1402.7391357421875, + "learning_rate": 9.56767676767677e-06, + "loss": 168.1851, + "step": 11840 + }, + { + "epoch": 0.09802705050254373, + "grad_norm": 1274.8916015625, + "learning_rate": 9.575757575757576e-06, + "loss": 177.3613, + "step": 11850 + }, + { + "epoch": 0.09810977375191297, + "grad_norm": 1175.4793701171875, + "learning_rate": 9.583838383838384e-06, + "loss": 155.9946, + "step": 11860 + }, + { + "epoch": 0.09819249700128221, + "grad_norm": 1113.3656005859375, + "learning_rate": 9.591919191919193e-06, + "loss": 174.7684, + "step": 11870 + }, + { + "epoch": 0.09827522025065144, + "grad_norm": 1227.9544677734375, + "learning_rate": 9.600000000000001e-06, + "loss": 173.7199, + "step": 11880 + }, + { + "epoch": 0.09835794350002068, + "grad_norm": 1131.5029296875, + "learning_rate": 9.608080808080808e-06, + "loss": 179.6841, + "step": 11890 + }, + { + "epoch": 0.09844066674938992, + "grad_norm": 1879.9422607421875, + "learning_rate": 9.616161616161616e-06, + "loss": 201.8642, + "step": 11900 + }, + { + "epoch": 0.09852338999875915, + "grad_norm": 1383.9271240234375, + "learning_rate": 9.624242424242425e-06, + "loss": 155.471, + "step": 11910 + }, + { + "epoch": 0.09860611324812839, + "grad_norm": 1202.215576171875, + "learning_rate": 9.632323232323233e-06, + "loss": 176.2691, + "step": 11920 + }, + { + "epoch": 0.09868883649749763, + "grad_norm": 713.47216796875, + "learning_rate": 9.640404040404042e-06, + "loss": 129.0435, + "step": 11930 + }, + { + "epoch": 0.09877155974686685, + "grad_norm": 1676.49658203125, + "learning_rate": 9.648484848484849e-06, + "loss": 158.0141, + "step": 11940 + }, + { + "epoch": 0.0988542829962361, + "grad_norm": 822.8709716796875, + "learning_rate": 9.656565656565657e-06, + "loss": 128.7683, + "step": 11950 + }, + { + "epoch": 0.09893700624560532, + "grad_norm": 2505.596923828125, + "learning_rate": 9.664646464646465e-06, + "loss": 173.4101, + "step": 11960 + }, + { + "epoch": 0.09901972949497456, + "grad_norm": 764.0625610351562, + "learning_rate": 9.672727272727274e-06, + "loss": 186.7536, + "step": 11970 + }, + { + "epoch": 0.0991024527443438, + "grad_norm": 1434.5181884765625, + "learning_rate": 9.680808080808082e-06, + "loss": 195.2917, + "step": 11980 + }, + { + "epoch": 0.09918517599371303, + "grad_norm": 916.0157470703125, + "learning_rate": 9.688888888888889e-06, + "loss": 151.0252, + "step": 11990 + }, + { + "epoch": 0.09926789924308227, + "grad_norm": 1034.74072265625, + "learning_rate": 9.696969696969698e-06, + "loss": 163.2282, + "step": 12000 + }, + { + "epoch": 0.09935062249245151, + "grad_norm": 1380.8863525390625, + "learning_rate": 9.705050505050506e-06, + "loss": 139.9053, + "step": 12010 + }, + { + "epoch": 0.09943334574182074, + "grad_norm": 911.8162231445312, + "learning_rate": 9.713131313131314e-06, + "loss": 159.4434, + "step": 12020 + }, + { + "epoch": 0.09951606899118998, + "grad_norm": 1710.5338134765625, + "learning_rate": 9.721212121212123e-06, + "loss": 152.8905, + "step": 12030 + }, + { + "epoch": 0.0995987922405592, + "grad_norm": 1827.9671630859375, + "learning_rate": 9.729292929292931e-06, + "loss": 195.7561, + "step": 12040 + }, + { + "epoch": 0.09968151548992844, + "grad_norm": 1594.53759765625, + "learning_rate": 9.737373737373738e-06, + "loss": 172.4959, + "step": 12050 + }, + { + "epoch": 0.09976423873929768, + "grad_norm": 985.9189453125, + "learning_rate": 9.745454545454547e-06, + "loss": 167.3074, + "step": 12060 + }, + { + "epoch": 0.09984696198866691, + "grad_norm": 1075.2579345703125, + "learning_rate": 9.753535353535353e-06, + "loss": 163.0599, + "step": 12070 + }, + { + "epoch": 0.09992968523803615, + "grad_norm": 2030.679443359375, + "learning_rate": 9.761616161616162e-06, + "loss": 163.872, + "step": 12080 + }, + { + "epoch": 0.10001240848740539, + "grad_norm": 1216.58984375, + "learning_rate": 9.76969696969697e-06, + "loss": 158.4474, + "step": 12090 + }, + { + "epoch": 0.10009513173677462, + "grad_norm": 1842.97998046875, + "learning_rate": 9.777777777777779e-06, + "loss": 225.2362, + "step": 12100 + }, + { + "epoch": 0.10017785498614386, + "grad_norm": 1359.1461181640625, + "learning_rate": 9.785858585858587e-06, + "loss": 172.9057, + "step": 12110 + }, + { + "epoch": 0.10026057823551308, + "grad_norm": 1055.8148193359375, + "learning_rate": 9.793939393939394e-06, + "loss": 153.7417, + "step": 12120 + }, + { + "epoch": 0.10034330148488232, + "grad_norm": 1045.6163330078125, + "learning_rate": 9.802020202020202e-06, + "loss": 151.8674, + "step": 12130 + }, + { + "epoch": 0.10042602473425156, + "grad_norm": 702.3483276367188, + "learning_rate": 9.81010101010101e-06, + "loss": 154.9376, + "step": 12140 + }, + { + "epoch": 0.10050874798362079, + "grad_norm": 582.2290649414062, + "learning_rate": 9.81818181818182e-06, + "loss": 191.1368, + "step": 12150 + }, + { + "epoch": 0.10059147123299003, + "grad_norm": 1067.549072265625, + "learning_rate": 9.826262626262628e-06, + "loss": 215.4936, + "step": 12160 + }, + { + "epoch": 0.10067419448235927, + "grad_norm": 1477.08251953125, + "learning_rate": 9.834343434343434e-06, + "loss": 179.9154, + "step": 12170 + }, + { + "epoch": 0.1007569177317285, + "grad_norm": 1615.7445068359375, + "learning_rate": 9.842424242424243e-06, + "loss": 151.6433, + "step": 12180 + }, + { + "epoch": 0.10083964098109774, + "grad_norm": 1283.7108154296875, + "learning_rate": 9.850505050505051e-06, + "loss": 176.6817, + "step": 12190 + }, + { + "epoch": 0.10092236423046698, + "grad_norm": 1741.5172119140625, + "learning_rate": 9.85858585858586e-06, + "loss": 148.4978, + "step": 12200 + }, + { + "epoch": 0.1010050874798362, + "grad_norm": 956.2932739257812, + "learning_rate": 9.866666666666668e-06, + "loss": 178.0466, + "step": 12210 + }, + { + "epoch": 0.10108781072920545, + "grad_norm": 771.7984008789062, + "learning_rate": 9.874747474747477e-06, + "loss": 177.3261, + "step": 12220 + }, + { + "epoch": 0.10117053397857467, + "grad_norm": 703.4427490234375, + "learning_rate": 9.882828282828283e-06, + "loss": 175.3549, + "step": 12230 + }, + { + "epoch": 0.10125325722794391, + "grad_norm": 931.7167358398438, + "learning_rate": 9.890909090909092e-06, + "loss": 158.6873, + "step": 12240 + }, + { + "epoch": 0.10133598047731315, + "grad_norm": 1080.5570068359375, + "learning_rate": 9.8989898989899e-06, + "loss": 109.4355, + "step": 12250 + }, + { + "epoch": 0.10141870372668238, + "grad_norm": 1163.09423828125, + "learning_rate": 9.907070707070709e-06, + "loss": 164.6523, + "step": 12260 + }, + { + "epoch": 0.10150142697605162, + "grad_norm": 870.4569091796875, + "learning_rate": 9.915151515151515e-06, + "loss": 162.8174, + "step": 12270 + }, + { + "epoch": 0.10158415022542086, + "grad_norm": 976.0827026367188, + "learning_rate": 9.923232323232324e-06, + "loss": 149.6927, + "step": 12280 + }, + { + "epoch": 0.10166687347479009, + "grad_norm": 881.8626098632812, + "learning_rate": 9.931313131313132e-06, + "loss": 128.7362, + "step": 12290 + }, + { + "epoch": 0.10174959672415933, + "grad_norm": 612.5805053710938, + "learning_rate": 9.939393939393939e-06, + "loss": 129.0437, + "step": 12300 + }, + { + "epoch": 0.10183231997352855, + "grad_norm": 1372.064453125, + "learning_rate": 9.947474747474748e-06, + "loss": 188.9469, + "step": 12310 + }, + { + "epoch": 0.1019150432228978, + "grad_norm": 1295.59375, + "learning_rate": 9.955555555555556e-06, + "loss": 167.0848, + "step": 12320 + }, + { + "epoch": 0.10199776647226703, + "grad_norm": 1037.228515625, + "learning_rate": 9.963636363636364e-06, + "loss": 176.8787, + "step": 12330 + }, + { + "epoch": 0.10208048972163626, + "grad_norm": 1227.047607421875, + "learning_rate": 9.971717171717173e-06, + "loss": 198.2504, + "step": 12340 + }, + { + "epoch": 0.1021632129710055, + "grad_norm": 1416.220458984375, + "learning_rate": 9.97979797979798e-06, + "loss": 179.9958, + "step": 12350 + }, + { + "epoch": 0.10224593622037474, + "grad_norm": 1424.654052734375, + "learning_rate": 9.987878787878788e-06, + "loss": 191.8388, + "step": 12360 + }, + { + "epoch": 0.10232865946974397, + "grad_norm": 1317.3236083984375, + "learning_rate": 9.995959595959597e-06, + "loss": 167.6917, + "step": 12370 + }, + { + "epoch": 0.10241138271911321, + "grad_norm": 868.8121337890625, + "learning_rate": 9.99999995027162e-06, + "loss": 156.1878, + "step": 12380 + }, + { + "epoch": 0.10249410596848244, + "grad_norm": 889.690185546875, + "learning_rate": 9.99999955244457e-06, + "loss": 188.0309, + "step": 12390 + }, + { + "epoch": 0.10257682921785168, + "grad_norm": 964.9615478515625, + "learning_rate": 9.999998756790503e-06, + "loss": 162.0736, + "step": 12400 + }, + { + "epoch": 0.10265955246722092, + "grad_norm": 2292.847900390625, + "learning_rate": 9.999997563309483e-06, + "loss": 138.4502, + "step": 12410 + }, + { + "epoch": 0.10274227571659014, + "grad_norm": 1073.1864013671875, + "learning_rate": 9.999995972001602e-06, + "loss": 183.7101, + "step": 12420 + }, + { + "epoch": 0.10282499896595938, + "grad_norm": 1411.09521484375, + "learning_rate": 9.99999398286699e-06, + "loss": 172.5208, + "step": 12430 + }, + { + "epoch": 0.10290772221532862, + "grad_norm": 1048.4619140625, + "learning_rate": 9.999991595905803e-06, + "loss": 169.999, + "step": 12440 + }, + { + "epoch": 0.10299044546469785, + "grad_norm": 986.3778686523438, + "learning_rate": 9.999988811118232e-06, + "loss": 146.7089, + "step": 12450 + }, + { + "epoch": 0.10307316871406709, + "grad_norm": 795.2116088867188, + "learning_rate": 9.999985628504498e-06, + "loss": 159.4188, + "step": 12460 + }, + { + "epoch": 0.10315589196343633, + "grad_norm": 919.0172119140625, + "learning_rate": 9.999982048064854e-06, + "loss": 144.6598, + "step": 12470 + }, + { + "epoch": 0.10323861521280556, + "grad_norm": 1297.25732421875, + "learning_rate": 9.999978069799585e-06, + "loss": 171.9091, + "step": 12480 + }, + { + "epoch": 0.1033213384621748, + "grad_norm": 1352.2923583984375, + "learning_rate": 9.999973693709008e-06, + "loss": 151.0232, + "step": 12490 + }, + { + "epoch": 0.10340406171154402, + "grad_norm": 1723.31640625, + "learning_rate": 9.99996891979347e-06, + "loss": 131.0583, + "step": 12500 + }, + { + "epoch": 0.10348678496091326, + "grad_norm": 1077.4090576171875, + "learning_rate": 9.999963748053354e-06, + "loss": 155.0065, + "step": 12510 + }, + { + "epoch": 0.1035695082102825, + "grad_norm": 881.5154418945312, + "learning_rate": 9.999958178489069e-06, + "loss": 150.2538, + "step": 12520 + }, + { + "epoch": 0.10365223145965173, + "grad_norm": 1021.1143188476562, + "learning_rate": 9.999952211101056e-06, + "loss": 154.2661, + "step": 12530 + }, + { + "epoch": 0.10373495470902097, + "grad_norm": 1220.9776611328125, + "learning_rate": 9.999945845889795e-06, + "loss": 154.2683, + "step": 12540 + }, + { + "epoch": 0.10381767795839021, + "grad_norm": 1148.5760498046875, + "learning_rate": 9.999939082855788e-06, + "loss": 137.0497, + "step": 12550 + }, + { + "epoch": 0.10390040120775944, + "grad_norm": 1300.0975341796875, + "learning_rate": 9.999931921999575e-06, + "loss": 169.4536, + "step": 12560 + }, + { + "epoch": 0.10398312445712868, + "grad_norm": 1183.8826904296875, + "learning_rate": 9.999924363321726e-06, + "loss": 166.6497, + "step": 12570 + }, + { + "epoch": 0.1040658477064979, + "grad_norm": 718.7395629882812, + "learning_rate": 9.999916406822843e-06, + "loss": 109.411, + "step": 12580 + }, + { + "epoch": 0.10414857095586715, + "grad_norm": 841.6725463867188, + "learning_rate": 9.999908052503557e-06, + "loss": 146.8344, + "step": 12590 + }, + { + "epoch": 0.10423129420523639, + "grad_norm": 918.6251831054688, + "learning_rate": 9.999899300364534e-06, + "loss": 162.9566, + "step": 12600 + }, + { + "epoch": 0.10431401745460561, + "grad_norm": 1467.325927734375, + "learning_rate": 9.99989015040647e-06, + "loss": 189.8623, + "step": 12610 + }, + { + "epoch": 0.10439674070397485, + "grad_norm": 2947.025146484375, + "learning_rate": 9.999880602630092e-06, + "loss": 141.2983, + "step": 12620 + }, + { + "epoch": 0.10447946395334409, + "grad_norm": 1483.4896240234375, + "learning_rate": 9.999870657036161e-06, + "loss": 162.2035, + "step": 12630 + }, + { + "epoch": 0.10456218720271332, + "grad_norm": 1846.94580078125, + "learning_rate": 9.99986031362547e-06, + "loss": 150.0733, + "step": 12640 + }, + { + "epoch": 0.10464491045208256, + "grad_norm": 1538.964111328125, + "learning_rate": 9.99984957239884e-06, + "loss": 206.5021, + "step": 12650 + }, + { + "epoch": 0.1047276337014518, + "grad_norm": 1100.305908203125, + "learning_rate": 9.999838433357124e-06, + "loss": 197.57, + "step": 12660 + }, + { + "epoch": 0.10481035695082103, + "grad_norm": 594.6061401367188, + "learning_rate": 9.99982689650121e-06, + "loss": 179.5414, + "step": 12670 + }, + { + "epoch": 0.10489308020019027, + "grad_norm": 737.6015014648438, + "learning_rate": 9.999814961832018e-06, + "loss": 170.4644, + "step": 12680 + }, + { + "epoch": 0.1049758034495595, + "grad_norm": 1346.4503173828125, + "learning_rate": 9.999802629350492e-06, + "loss": 202.0369, + "step": 12690 + }, + { + "epoch": 0.10505852669892873, + "grad_norm": 2559.85546875, + "learning_rate": 9.99978989905762e-06, + "loss": 221.5045, + "step": 12700 + }, + { + "epoch": 0.10514124994829797, + "grad_norm": 1314.4476318359375, + "learning_rate": 9.999776770954411e-06, + "loss": 173.8742, + "step": 12710 + }, + { + "epoch": 0.1052239731976672, + "grad_norm": 3551.19677734375, + "learning_rate": 9.99976324504191e-06, + "loss": 149.5013, + "step": 12720 + }, + { + "epoch": 0.10530669644703644, + "grad_norm": 1428.34814453125, + "learning_rate": 9.999749321321192e-06, + "loss": 251.0179, + "step": 12730 + }, + { + "epoch": 0.10538941969640568, + "grad_norm": 753.5355224609375, + "learning_rate": 9.999734999793369e-06, + "loss": 163.6853, + "step": 12740 + }, + { + "epoch": 0.10547214294577491, + "grad_norm": 1578.79541015625, + "learning_rate": 9.999720280459576e-06, + "loss": 154.6416, + "step": 12750 + }, + { + "epoch": 0.10555486619514415, + "grad_norm": 1111.734130859375, + "learning_rate": 9.999705163320987e-06, + "loss": 177.3941, + "step": 12760 + }, + { + "epoch": 0.10563758944451337, + "grad_norm": 1722.9654541015625, + "learning_rate": 9.999689648378801e-06, + "loss": 179.8888, + "step": 12770 + }, + { + "epoch": 0.10572031269388261, + "grad_norm": 1038.706298828125, + "learning_rate": 9.999673735634259e-06, + "loss": 127.1906, + "step": 12780 + }, + { + "epoch": 0.10580303594325186, + "grad_norm": 11962.8779296875, + "learning_rate": 9.99965742508862e-06, + "loss": 213.939, + "step": 12790 + }, + { + "epoch": 0.10588575919262108, + "grad_norm": 1482.868408203125, + "learning_rate": 9.999640716743186e-06, + "loss": 133.2547, + "step": 12800 + }, + { + "epoch": 0.10596848244199032, + "grad_norm": 3282.25390625, + "learning_rate": 9.999623610599287e-06, + "loss": 160.013, + "step": 12810 + }, + { + "epoch": 0.10605120569135956, + "grad_norm": 1836.9112548828125, + "learning_rate": 9.999606106658282e-06, + "loss": 201.4363, + "step": 12820 + }, + { + "epoch": 0.10613392894072879, + "grad_norm": 816.5084838867188, + "learning_rate": 9.999588204921562e-06, + "loss": 174.8686, + "step": 12830 + }, + { + "epoch": 0.10621665219009803, + "grad_norm": 1352.0396728515625, + "learning_rate": 9.999569905390556e-06, + "loss": 167.4276, + "step": 12840 + }, + { + "epoch": 0.10629937543946726, + "grad_norm": 533.4453125, + "learning_rate": 9.999551208066716e-06, + "loss": 100.8425, + "step": 12850 + }, + { + "epoch": 0.1063820986888365, + "grad_norm": 1191.3555908203125, + "learning_rate": 9.99953211295153e-06, + "loss": 147.7438, + "step": 12860 + }, + { + "epoch": 0.10646482193820574, + "grad_norm": 1642.895263671875, + "learning_rate": 9.999512620046523e-06, + "loss": 170.0533, + "step": 12870 + }, + { + "epoch": 0.10654754518757496, + "grad_norm": 1362.4990234375, + "learning_rate": 9.999492729353238e-06, + "loss": 177.1871, + "step": 12880 + }, + { + "epoch": 0.1066302684369442, + "grad_norm": 879.8888549804688, + "learning_rate": 9.999472440873261e-06, + "loss": 160.7778, + "step": 12890 + }, + { + "epoch": 0.10671299168631344, + "grad_norm": 1243.377685546875, + "learning_rate": 9.999451754608208e-06, + "loss": 150.4305, + "step": 12900 + }, + { + "epoch": 0.10679571493568267, + "grad_norm": 984.1234130859375, + "learning_rate": 9.999430670559723e-06, + "loss": 139.6723, + "step": 12910 + }, + { + "epoch": 0.10687843818505191, + "grad_norm": 852.8709716796875, + "learning_rate": 9.999409188729484e-06, + "loss": 134.8206, + "step": 12920 + }, + { + "epoch": 0.10696116143442115, + "grad_norm": 886.4750366210938, + "learning_rate": 9.999387309119198e-06, + "loss": 150.4601, + "step": 12930 + }, + { + "epoch": 0.10704388468379038, + "grad_norm": 1050.5213623046875, + "learning_rate": 9.999365031730609e-06, + "loss": 162.7591, + "step": 12940 + }, + { + "epoch": 0.10712660793315962, + "grad_norm": 1679.367919921875, + "learning_rate": 9.99934235656549e-06, + "loss": 146.1199, + "step": 12950 + }, + { + "epoch": 0.10720933118252884, + "grad_norm": 1546.12939453125, + "learning_rate": 9.999319283625641e-06, + "loss": 207.9114, + "step": 12960 + }, + { + "epoch": 0.10729205443189808, + "grad_norm": 856.313720703125, + "learning_rate": 9.999295812912902e-06, + "loss": 168.0912, + "step": 12970 + }, + { + "epoch": 0.10737477768126732, + "grad_norm": 1466.169677734375, + "learning_rate": 9.999271944429139e-06, + "loss": 202.6795, + "step": 12980 + }, + { + "epoch": 0.10745750093063655, + "grad_norm": 1105.8492431640625, + "learning_rate": 9.99924767817625e-06, + "loss": 139.1426, + "step": 12990 + }, + { + "epoch": 0.10754022418000579, + "grad_norm": 1072.4610595703125, + "learning_rate": 9.999223014156167e-06, + "loss": 196.2584, + "step": 13000 + }, + { + "epoch": 0.10762294742937503, + "grad_norm": 820.6765747070312, + "learning_rate": 9.999197952370851e-06, + "loss": 177.7073, + "step": 13010 + }, + { + "epoch": 0.10770567067874426, + "grad_norm": 788.7813720703125, + "learning_rate": 9.9991724928223e-06, + "loss": 157.0522, + "step": 13020 + }, + { + "epoch": 0.1077883939281135, + "grad_norm": 1179.322265625, + "learning_rate": 9.999146635512535e-06, + "loss": 189.2783, + "step": 13030 + }, + { + "epoch": 0.10787111717748273, + "grad_norm": 1035.255615234375, + "learning_rate": 9.999120380443614e-06, + "loss": 145.2693, + "step": 13040 + }, + { + "epoch": 0.10795384042685197, + "grad_norm": 1015.682861328125, + "learning_rate": 9.99909372761763e-06, + "loss": 143.6347, + "step": 13050 + }, + { + "epoch": 0.1080365636762212, + "grad_norm": 2936.619873046875, + "learning_rate": 9.9990666770367e-06, + "loss": 179.5399, + "step": 13060 + }, + { + "epoch": 0.10811928692559043, + "grad_norm": 1349.863037109375, + "learning_rate": 9.999039228702975e-06, + "loss": 176.5338, + "step": 13070 + }, + { + "epoch": 0.10820201017495967, + "grad_norm": 1154.5147705078125, + "learning_rate": 9.999011382618644e-06, + "loss": 166.4444, + "step": 13080 + }, + { + "epoch": 0.10828473342432891, + "grad_norm": 1495.1844482421875, + "learning_rate": 9.998983138785919e-06, + "loss": 149.649, + "step": 13090 + }, + { + "epoch": 0.10836745667369814, + "grad_norm": 1207.58984375, + "learning_rate": 9.998954497207045e-06, + "loss": 149.596, + "step": 13100 + }, + { + "epoch": 0.10845017992306738, + "grad_norm": 1018.6047973632812, + "learning_rate": 9.998925457884307e-06, + "loss": 154.7165, + "step": 13110 + }, + { + "epoch": 0.1085329031724366, + "grad_norm": 1084.2874755859375, + "learning_rate": 9.99889602082001e-06, + "loss": 145.5709, + "step": 13120 + }, + { + "epoch": 0.10861562642180585, + "grad_norm": 1162.242919921875, + "learning_rate": 9.998866186016501e-06, + "loss": 168.6215, + "step": 13130 + }, + { + "epoch": 0.10869834967117509, + "grad_norm": 1217.2177734375, + "learning_rate": 9.99883595347615e-06, + "loss": 150.9769, + "step": 13140 + }, + { + "epoch": 0.10878107292054431, + "grad_norm": 884.3536376953125, + "learning_rate": 9.998805323201364e-06, + "loss": 161.5837, + "step": 13150 + }, + { + "epoch": 0.10886379616991355, + "grad_norm": 1069.2374267578125, + "learning_rate": 9.998774295194579e-06, + "loss": 167.3784, + "step": 13160 + }, + { + "epoch": 0.1089465194192828, + "grad_norm": 749.6088256835938, + "learning_rate": 9.998742869458264e-06, + "loss": 115.7084, + "step": 13170 + }, + { + "epoch": 0.10902924266865202, + "grad_norm": 703.9533081054688, + "learning_rate": 9.998711045994922e-06, + "loss": 162.0743, + "step": 13180 + }, + { + "epoch": 0.10911196591802126, + "grad_norm": 1521.7666015625, + "learning_rate": 9.998678824807082e-06, + "loss": 189.686, + "step": 13190 + }, + { + "epoch": 0.1091946891673905, + "grad_norm": 1742.9715576171875, + "learning_rate": 9.99864620589731e-06, + "loss": 163.9251, + "step": 13200 + }, + { + "epoch": 0.10927741241675973, + "grad_norm": 999.13623046875, + "learning_rate": 9.998613189268197e-06, + "loss": 142.0809, + "step": 13210 + }, + { + "epoch": 0.10936013566612897, + "grad_norm": 1409.1549072265625, + "learning_rate": 9.998579774922377e-06, + "loss": 149.1874, + "step": 13220 + }, + { + "epoch": 0.1094428589154982, + "grad_norm": 675.3158569335938, + "learning_rate": 9.998545962862503e-06, + "loss": 117.2426, + "step": 13230 + }, + { + "epoch": 0.10952558216486744, + "grad_norm": 1737.0997314453125, + "learning_rate": 9.998511753091267e-06, + "loss": 155.6872, + "step": 13240 + }, + { + "epoch": 0.10960830541423668, + "grad_norm": 4492.49853515625, + "learning_rate": 9.998477145611389e-06, + "loss": 195.1054, + "step": 13250 + }, + { + "epoch": 0.1096910286636059, + "grad_norm": 1431.2005615234375, + "learning_rate": 9.998442140425625e-06, + "loss": 189.3633, + "step": 13260 + }, + { + "epoch": 0.10977375191297514, + "grad_norm": 1214.457763671875, + "learning_rate": 9.998406737536761e-06, + "loss": 167.9152, + "step": 13270 + }, + { + "epoch": 0.10985647516234438, + "grad_norm": 553.1201171875, + "learning_rate": 9.998370936947614e-06, + "loss": 154.5592, + "step": 13280 + }, + { + "epoch": 0.10993919841171361, + "grad_norm": 1713.546875, + "learning_rate": 9.998334738661028e-06, + "loss": 178.8647, + "step": 13290 + }, + { + "epoch": 0.11002192166108285, + "grad_norm": 1338.5965576171875, + "learning_rate": 9.998298142679888e-06, + "loss": 209.809, + "step": 13300 + }, + { + "epoch": 0.11010464491045208, + "grad_norm": 1447.3448486328125, + "learning_rate": 9.998261149007104e-06, + "loss": 151.2987, + "step": 13310 + }, + { + "epoch": 0.11018736815982132, + "grad_norm": 1176.512939453125, + "learning_rate": 9.998223757645618e-06, + "loss": 154.0995, + "step": 13320 + }, + { + "epoch": 0.11027009140919056, + "grad_norm": 1491.28466796875, + "learning_rate": 9.998185968598407e-06, + "loss": 172.6219, + "step": 13330 + }, + { + "epoch": 0.11035281465855978, + "grad_norm": 872.3587036132812, + "learning_rate": 9.998147781868477e-06, + "loss": 136.3148, + "step": 13340 + }, + { + "epoch": 0.11043553790792902, + "grad_norm": 1718.0472412109375, + "learning_rate": 9.998109197458865e-06, + "loss": 147.2434, + "step": 13350 + }, + { + "epoch": 0.11051826115729826, + "grad_norm": 1724.40966796875, + "learning_rate": 9.998070215372645e-06, + "loss": 155.5677, + "step": 13360 + }, + { + "epoch": 0.11060098440666749, + "grad_norm": 1238.861572265625, + "learning_rate": 9.998030835612914e-06, + "loss": 177.9599, + "step": 13370 + }, + { + "epoch": 0.11068370765603673, + "grad_norm": 1163.747802734375, + "learning_rate": 9.997991058182807e-06, + "loss": 159.718, + "step": 13380 + }, + { + "epoch": 0.11076643090540596, + "grad_norm": 1202.32763671875, + "learning_rate": 9.997950883085492e-06, + "loss": 161.9838, + "step": 13390 + }, + { + "epoch": 0.1108491541547752, + "grad_norm": 1815.2415771484375, + "learning_rate": 9.99791031032416e-06, + "loss": 187.6479, + "step": 13400 + }, + { + "epoch": 0.11093187740414444, + "grad_norm": 1177.880126953125, + "learning_rate": 9.997869339902043e-06, + "loss": 180.2671, + "step": 13410 + }, + { + "epoch": 0.11101460065351366, + "grad_norm": 1133.1861572265625, + "learning_rate": 9.9978279718224e-06, + "loss": 161.116, + "step": 13420 + }, + { + "epoch": 0.1110973239028829, + "grad_norm": 1010.6656494140625, + "learning_rate": 9.99778620608852e-06, + "loss": 178.742, + "step": 13430 + }, + { + "epoch": 0.11118004715225215, + "grad_norm": 1107.10986328125, + "learning_rate": 9.997744042703731e-06, + "loss": 139.692, + "step": 13440 + }, + { + "epoch": 0.11126277040162137, + "grad_norm": 1005.2382202148438, + "learning_rate": 9.997701481671384e-06, + "loss": 178.2121, + "step": 13450 + }, + { + "epoch": 0.11134549365099061, + "grad_norm": 2284.540283203125, + "learning_rate": 9.997658522994867e-06, + "loss": 156.5188, + "step": 13460 + }, + { + "epoch": 0.11142821690035985, + "grad_norm": 5658.24658203125, + "learning_rate": 9.997615166677597e-06, + "loss": 146.769, + "step": 13470 + }, + { + "epoch": 0.11151094014972908, + "grad_norm": 457.560302734375, + "learning_rate": 9.997571412723024e-06, + "loss": 119.6845, + "step": 13480 + }, + { + "epoch": 0.11159366339909832, + "grad_norm": 1155.69677734375, + "learning_rate": 9.99752726113463e-06, + "loss": 126.1926, + "step": 13490 + }, + { + "epoch": 0.11167638664846755, + "grad_norm": 1057.7230224609375, + "learning_rate": 9.997482711915926e-06, + "loss": 145.3562, + "step": 13500 + }, + { + "epoch": 0.11175910989783679, + "grad_norm": 1107.236328125, + "learning_rate": 9.99743776507046e-06, + "loss": 189.204, + "step": 13510 + }, + { + "epoch": 0.11184183314720603, + "grad_norm": 1068.25634765625, + "learning_rate": 9.997392420601804e-06, + "loss": 135.3788, + "step": 13520 + }, + { + "epoch": 0.11192455639657525, + "grad_norm": 875.785888671875, + "learning_rate": 9.99734667851357e-06, + "loss": 152.927, + "step": 13530 + }, + { + "epoch": 0.1120072796459445, + "grad_norm": 1772.181396484375, + "learning_rate": 9.997300538809394e-06, + "loss": 222.4996, + "step": 13540 + }, + { + "epoch": 0.11209000289531373, + "grad_norm": 1264.271240234375, + "learning_rate": 9.99725400149295e-06, + "loss": 145.0414, + "step": 13550 + }, + { + "epoch": 0.11217272614468296, + "grad_norm": 2198.46630859375, + "learning_rate": 9.997207066567939e-06, + "loss": 194.5429, + "step": 13560 + }, + { + "epoch": 0.1122554493940522, + "grad_norm": 1130.449951171875, + "learning_rate": 9.997159734038096e-06, + "loss": 166.257, + "step": 13570 + }, + { + "epoch": 0.11233817264342143, + "grad_norm": 1828.01513671875, + "learning_rate": 9.997112003907186e-06, + "loss": 152.8911, + "step": 13580 + }, + { + "epoch": 0.11242089589279067, + "grad_norm": 1196.669677734375, + "learning_rate": 9.997063876179007e-06, + "loss": 129.7313, + "step": 13590 + }, + { + "epoch": 0.11250361914215991, + "grad_norm": 1254.476806640625, + "learning_rate": 9.997015350857391e-06, + "loss": 169.0213, + "step": 13600 + }, + { + "epoch": 0.11258634239152913, + "grad_norm": 1430.6507568359375, + "learning_rate": 9.996966427946195e-06, + "loss": 150.1627, + "step": 13610 + }, + { + "epoch": 0.11266906564089837, + "grad_norm": 906.8787231445312, + "learning_rate": 9.996917107449313e-06, + "loss": 174.3134, + "step": 13620 + }, + { + "epoch": 0.11275178889026762, + "grad_norm": 812.057373046875, + "learning_rate": 9.99686738937067e-06, + "loss": 138.5191, + "step": 13630 + }, + { + "epoch": 0.11283451213963684, + "grad_norm": 1879.1136474609375, + "learning_rate": 9.996817273714222e-06, + "loss": 174.5974, + "step": 13640 + }, + { + "epoch": 0.11291723538900608, + "grad_norm": 945.3467407226562, + "learning_rate": 9.996766760483955e-06, + "loss": 161.973, + "step": 13650 + }, + { + "epoch": 0.11299995863837531, + "grad_norm": 862.3170776367188, + "learning_rate": 9.996715849683889e-06, + "loss": 137.5633, + "step": 13660 + }, + { + "epoch": 0.11308268188774455, + "grad_norm": 690.8417358398438, + "learning_rate": 9.996664541318076e-06, + "loss": 141.2179, + "step": 13670 + }, + { + "epoch": 0.11316540513711379, + "grad_norm": 972.900634765625, + "learning_rate": 9.996612835390596e-06, + "loss": 115.8736, + "step": 13680 + }, + { + "epoch": 0.11324812838648302, + "grad_norm": 1904.7318115234375, + "learning_rate": 9.996560731905565e-06, + "loss": 154.7887, + "step": 13690 + }, + { + "epoch": 0.11333085163585226, + "grad_norm": 851.8038940429688, + "learning_rate": 9.996508230867126e-06, + "loss": 137.4024, + "step": 13700 + }, + { + "epoch": 0.1134135748852215, + "grad_norm": 1190.90380859375, + "learning_rate": 9.996455332279458e-06, + "loss": 153.5202, + "step": 13710 + }, + { + "epoch": 0.11349629813459072, + "grad_norm": 2241.255126953125, + "learning_rate": 9.99640203614677e-06, + "loss": 173.7966, + "step": 13720 + }, + { + "epoch": 0.11357902138395996, + "grad_norm": 1084.945068359375, + "learning_rate": 9.996348342473304e-06, + "loss": 159.4762, + "step": 13730 + }, + { + "epoch": 0.1136617446333292, + "grad_norm": 1753.5206298828125, + "learning_rate": 9.99629425126333e-06, + "loss": 218.3281, + "step": 13740 + }, + { + "epoch": 0.11374446788269843, + "grad_norm": 802.15576171875, + "learning_rate": 9.996239762521152e-06, + "loss": 153.0149, + "step": 13750 + }, + { + "epoch": 0.11382719113206767, + "grad_norm": 2257.760986328125, + "learning_rate": 9.996184876251105e-06, + "loss": 134.656, + "step": 13760 + }, + { + "epoch": 0.1139099143814369, + "grad_norm": 1485.7083740234375, + "learning_rate": 9.996129592457558e-06, + "loss": 119.6472, + "step": 13770 + }, + { + "epoch": 0.11399263763080614, + "grad_norm": 1323.489990234375, + "learning_rate": 9.996073911144907e-06, + "loss": 135.8627, + "step": 13780 + }, + { + "epoch": 0.11407536088017538, + "grad_norm": 1439.948974609375, + "learning_rate": 9.996017832317583e-06, + "loss": 105.2017, + "step": 13790 + }, + { + "epoch": 0.1141580841295446, + "grad_norm": 755.0234375, + "learning_rate": 9.995961355980052e-06, + "loss": 119.7319, + "step": 13800 + }, + { + "epoch": 0.11424080737891384, + "grad_norm": 1265.96826171875, + "learning_rate": 9.995904482136803e-06, + "loss": 144.368, + "step": 13810 + }, + { + "epoch": 0.11432353062828308, + "grad_norm": 1014.328857421875, + "learning_rate": 9.99584721079236e-06, + "loss": 180.5596, + "step": 13820 + }, + { + "epoch": 0.11440625387765231, + "grad_norm": 1355.7919921875, + "learning_rate": 9.995789541951287e-06, + "loss": 169.1609, + "step": 13830 + }, + { + "epoch": 0.11448897712702155, + "grad_norm": 1709.610107421875, + "learning_rate": 9.995731475618163e-06, + "loss": 152.0147, + "step": 13840 + }, + { + "epoch": 0.11457170037639078, + "grad_norm": 1426.723388671875, + "learning_rate": 9.995673011797615e-06, + "loss": 142.7122, + "step": 13850 + }, + { + "epoch": 0.11465442362576002, + "grad_norm": 3322.473876953125, + "learning_rate": 9.995614150494293e-06, + "loss": 192.3159, + "step": 13860 + }, + { + "epoch": 0.11473714687512926, + "grad_norm": 852.6958618164062, + "learning_rate": 9.995554891712879e-06, + "loss": 221.4455, + "step": 13870 + }, + { + "epoch": 0.11481987012449849, + "grad_norm": 679.8392944335938, + "learning_rate": 9.995495235458087e-06, + "loss": 205.1969, + "step": 13880 + }, + { + "epoch": 0.11490259337386773, + "grad_norm": 1818.237548828125, + "learning_rate": 9.99543518173467e-06, + "loss": 157.8227, + "step": 13890 + }, + { + "epoch": 0.11498531662323697, + "grad_norm": 911.2511596679688, + "learning_rate": 9.995374730547397e-06, + "loss": 213.6541, + "step": 13900 + }, + { + "epoch": 0.11506803987260619, + "grad_norm": 1822.821044921875, + "learning_rate": 9.995313881901085e-06, + "loss": 198.7188, + "step": 13910 + }, + { + "epoch": 0.11515076312197543, + "grad_norm": 653.8102416992188, + "learning_rate": 9.995252635800572e-06, + "loss": 127.7723, + "step": 13920 + }, + { + "epoch": 0.11523348637134467, + "grad_norm": 1713.5816650390625, + "learning_rate": 9.995190992250732e-06, + "loss": 225.4239, + "step": 13930 + }, + { + "epoch": 0.1153162096207139, + "grad_norm": 1139.3336181640625, + "learning_rate": 9.995128951256469e-06, + "loss": 140.1807, + "step": 13940 + }, + { + "epoch": 0.11539893287008314, + "grad_norm": 1414.932861328125, + "learning_rate": 9.99506651282272e-06, + "loss": 161.69, + "step": 13950 + }, + { + "epoch": 0.11548165611945237, + "grad_norm": 1176.623291015625, + "learning_rate": 9.995003676954454e-06, + "loss": 151.0156, + "step": 13960 + }, + { + "epoch": 0.11556437936882161, + "grad_norm": 784.6156616210938, + "learning_rate": 9.994940443656668e-06, + "loss": 198.3028, + "step": 13970 + }, + { + "epoch": 0.11564710261819085, + "grad_norm": 862.885986328125, + "learning_rate": 9.994876812934395e-06, + "loss": 153.6012, + "step": 13980 + }, + { + "epoch": 0.11572982586756007, + "grad_norm": 2193.662109375, + "learning_rate": 9.994812784792698e-06, + "loss": 165.4299, + "step": 13990 + }, + { + "epoch": 0.11581254911692931, + "grad_norm": 1005.8052978515625, + "learning_rate": 9.99474835923667e-06, + "loss": 117.067, + "step": 14000 + }, + { + "epoch": 0.11589527236629855, + "grad_norm": 870.8054809570312, + "learning_rate": 9.994683536271437e-06, + "loss": 177.513, + "step": 14010 + }, + { + "epoch": 0.11597799561566778, + "grad_norm": 1938.4117431640625, + "learning_rate": 9.994618315902161e-06, + "loss": 147.8295, + "step": 14020 + }, + { + "epoch": 0.11606071886503702, + "grad_norm": 1145.3619384765625, + "learning_rate": 9.994552698134023e-06, + "loss": 126.2492, + "step": 14030 + }, + { + "epoch": 0.11614344211440625, + "grad_norm": 704.4757080078125, + "learning_rate": 9.994486682972253e-06, + "loss": 183.3489, + "step": 14040 + }, + { + "epoch": 0.11622616536377549, + "grad_norm": 799.8057250976562, + "learning_rate": 9.994420270422096e-06, + "loss": 155.8286, + "step": 14050 + }, + { + "epoch": 0.11630888861314473, + "grad_norm": 2168.39794921875, + "learning_rate": 9.994353460488842e-06, + "loss": 165.6206, + "step": 14060 + }, + { + "epoch": 0.11639161186251396, + "grad_norm": 1048.3438720703125, + "learning_rate": 9.994286253177803e-06, + "loss": 196.4472, + "step": 14070 + }, + { + "epoch": 0.1164743351118832, + "grad_norm": 1240.358642578125, + "learning_rate": 9.994218648494327e-06, + "loss": 169.1644, + "step": 14080 + }, + { + "epoch": 0.11655705836125244, + "grad_norm": 1450.99169921875, + "learning_rate": 9.994150646443793e-06, + "loss": 119.286, + "step": 14090 + }, + { + "epoch": 0.11663978161062166, + "grad_norm": 1026.7149658203125, + "learning_rate": 9.994082247031613e-06, + "loss": 166.7578, + "step": 14100 + }, + { + "epoch": 0.1167225048599909, + "grad_norm": 1081.6656494140625, + "learning_rate": 9.99401345026323e-06, + "loss": 141.0757, + "step": 14110 + }, + { + "epoch": 0.11680522810936013, + "grad_norm": 746.9979248046875, + "learning_rate": 9.993944256144115e-06, + "loss": 124.9759, + "step": 14120 + }, + { + "epoch": 0.11688795135872937, + "grad_norm": 891.9210815429688, + "learning_rate": 9.993874664679774e-06, + "loss": 150.3685, + "step": 14130 + }, + { + "epoch": 0.11697067460809861, + "grad_norm": 1401.4002685546875, + "learning_rate": 9.993804675875744e-06, + "loss": 168.8493, + "step": 14140 + }, + { + "epoch": 0.11705339785746784, + "grad_norm": 1588.7640380859375, + "learning_rate": 9.993734289737596e-06, + "loss": 141.4464, + "step": 14150 + }, + { + "epoch": 0.11713612110683708, + "grad_norm": 1859.66552734375, + "learning_rate": 9.993663506270928e-06, + "loss": 162.024, + "step": 14160 + }, + { + "epoch": 0.11721884435620632, + "grad_norm": 1133.1839599609375, + "learning_rate": 9.993592325481373e-06, + "loss": 166.6096, + "step": 14170 + }, + { + "epoch": 0.11730156760557554, + "grad_norm": 1811.849365234375, + "learning_rate": 9.993520747374594e-06, + "loss": 127.2197, + "step": 14180 + }, + { + "epoch": 0.11738429085494478, + "grad_norm": 909.2362060546875, + "learning_rate": 9.993448771956285e-06, + "loss": 189.4919, + "step": 14190 + }, + { + "epoch": 0.11746701410431402, + "grad_norm": 1350.44140625, + "learning_rate": 9.993376399232175e-06, + "loss": 142.4382, + "step": 14200 + }, + { + "epoch": 0.11754973735368325, + "grad_norm": 1765.2679443359375, + "learning_rate": 9.993303629208023e-06, + "loss": 148.8411, + "step": 14210 + }, + { + "epoch": 0.11763246060305249, + "grad_norm": 2343.818359375, + "learning_rate": 9.993230461889616e-06, + "loss": 212.7168, + "step": 14220 + }, + { + "epoch": 0.11771518385242172, + "grad_norm": 683.275146484375, + "learning_rate": 9.993156897282776e-06, + "loss": 148.4446, + "step": 14230 + }, + { + "epoch": 0.11779790710179096, + "grad_norm": 998.6349487304688, + "learning_rate": 9.99308293539336e-06, + "loss": 117.4103, + "step": 14240 + }, + { + "epoch": 0.1178806303511602, + "grad_norm": 823.5191040039062, + "learning_rate": 9.993008576227248e-06, + "loss": 130.8048, + "step": 14250 + }, + { + "epoch": 0.11796335360052942, + "grad_norm": 1420.8035888671875, + "learning_rate": 9.992933819790358e-06, + "loss": 163.5295, + "step": 14260 + }, + { + "epoch": 0.11804607684989867, + "grad_norm": 1167.037109375, + "learning_rate": 9.992858666088638e-06, + "loss": 164.6194, + "step": 14270 + }, + { + "epoch": 0.1181288000992679, + "grad_norm": 1515.985107421875, + "learning_rate": 9.992783115128072e-06, + "loss": 163.406, + "step": 14280 + }, + { + "epoch": 0.11821152334863713, + "grad_norm": 1549.8917236328125, + "learning_rate": 9.992707166914662e-06, + "loss": 168.7726, + "step": 14290 + }, + { + "epoch": 0.11829424659800637, + "grad_norm": 1190.7861328125, + "learning_rate": 9.992630821454458e-06, + "loss": 140.9276, + "step": 14300 + }, + { + "epoch": 0.1183769698473756, + "grad_norm": 1568.7037353515625, + "learning_rate": 9.992554078753534e-06, + "loss": 147.5554, + "step": 14310 + }, + { + "epoch": 0.11845969309674484, + "grad_norm": 1000.02880859375, + "learning_rate": 9.992476938817994e-06, + "loss": 180.1213, + "step": 14320 + }, + { + "epoch": 0.11854241634611408, + "grad_norm": 1296.5947265625, + "learning_rate": 9.992399401653976e-06, + "loss": 137.781, + "step": 14330 + }, + { + "epoch": 0.1186251395954833, + "grad_norm": 1144.0504150390625, + "learning_rate": 9.99232146726765e-06, + "loss": 140.0204, + "step": 14340 + }, + { + "epoch": 0.11870786284485255, + "grad_norm": 1281.876708984375, + "learning_rate": 9.992243135665217e-06, + "loss": 154.8919, + "step": 14350 + }, + { + "epoch": 0.11879058609422179, + "grad_norm": 871.8610229492188, + "learning_rate": 9.992164406852908e-06, + "loss": 186.1516, + "step": 14360 + }, + { + "epoch": 0.11887330934359101, + "grad_norm": 1534.3536376953125, + "learning_rate": 9.992085280836988e-06, + "loss": 160.6092, + "step": 14370 + }, + { + "epoch": 0.11895603259296025, + "grad_norm": 988.0948486328125, + "learning_rate": 9.992005757623753e-06, + "loss": 203.4977, + "step": 14380 + }, + { + "epoch": 0.11903875584232948, + "grad_norm": 1357.1668701171875, + "learning_rate": 9.991925837219532e-06, + "loss": 160.5042, + "step": 14390 + }, + { + "epoch": 0.11912147909169872, + "grad_norm": 636.4329223632812, + "learning_rate": 9.991845519630679e-06, + "loss": 137.9073, + "step": 14400 + }, + { + "epoch": 0.11920420234106796, + "grad_norm": 1027.252197265625, + "learning_rate": 9.991764804863588e-06, + "loss": 122.9028, + "step": 14410 + }, + { + "epoch": 0.11928692559043719, + "grad_norm": 2356.058837890625, + "learning_rate": 9.991683692924682e-06, + "loss": 155.1582, + "step": 14420 + }, + { + "epoch": 0.11936964883980643, + "grad_norm": 743.142578125, + "learning_rate": 9.991602183820412e-06, + "loss": 142.9998, + "step": 14430 + }, + { + "epoch": 0.11945237208917567, + "grad_norm": 1039.77978515625, + "learning_rate": 9.991520277557266e-06, + "loss": 156.9646, + "step": 14440 + }, + { + "epoch": 0.1195350953385449, + "grad_norm": 1624.250732421875, + "learning_rate": 9.991437974141759e-06, + "loss": 165.5059, + "step": 14450 + }, + { + "epoch": 0.11961781858791413, + "grad_norm": 589.4429321289062, + "learning_rate": 9.99135527358044e-06, + "loss": 119.63, + "step": 14460 + }, + { + "epoch": 0.11970054183728338, + "grad_norm": 1199.8302001953125, + "learning_rate": 9.991272175879888e-06, + "loss": 175.7935, + "step": 14470 + }, + { + "epoch": 0.1197832650866526, + "grad_norm": 1233.6771240234375, + "learning_rate": 9.991188681046718e-06, + "loss": 192.3081, + "step": 14480 + }, + { + "epoch": 0.11986598833602184, + "grad_norm": 1209.5980224609375, + "learning_rate": 9.991104789087568e-06, + "loss": 139.1357, + "step": 14490 + }, + { + "epoch": 0.11994871158539107, + "grad_norm": 1937.0008544921875, + "learning_rate": 9.991020500009118e-06, + "loss": 138.8762, + "step": 14500 + }, + { + "epoch": 0.12003143483476031, + "grad_norm": 545.6674194335938, + "learning_rate": 9.990935813818073e-06, + "loss": 139.6014, + "step": 14510 + }, + { + "epoch": 0.12011415808412955, + "grad_norm": 650.5000610351562, + "learning_rate": 9.99085073052117e-06, + "loss": 195.5592, + "step": 14520 + }, + { + "epoch": 0.12019688133349878, + "grad_norm": 566.500732421875, + "learning_rate": 9.990765250125179e-06, + "loss": 154.2413, + "step": 14530 + }, + { + "epoch": 0.12027960458286802, + "grad_norm": 1750.378662109375, + "learning_rate": 9.990679372636902e-06, + "loss": 161.8778, + "step": 14540 + }, + { + "epoch": 0.12036232783223726, + "grad_norm": 1349.2432861328125, + "learning_rate": 9.99059309806317e-06, + "loss": 172.9573, + "step": 14550 + }, + { + "epoch": 0.12044505108160648, + "grad_norm": 975.1179809570312, + "learning_rate": 9.990506426410851e-06, + "loss": 128.1013, + "step": 14560 + }, + { + "epoch": 0.12052777433097572, + "grad_norm": 3406.892333984375, + "learning_rate": 9.990419357686839e-06, + "loss": 177.5993, + "step": 14570 + }, + { + "epoch": 0.12061049758034495, + "grad_norm": 428.1646423339844, + "learning_rate": 9.99033189189806e-06, + "loss": 128.9519, + "step": 14580 + }, + { + "epoch": 0.12069322082971419, + "grad_norm": 996.7293701171875, + "learning_rate": 9.990244029051475e-06, + "loss": 149.3833, + "step": 14590 + }, + { + "epoch": 0.12077594407908343, + "grad_norm": 1458.9307861328125, + "learning_rate": 9.990155769154077e-06, + "loss": 160.3518, + "step": 14600 + }, + { + "epoch": 0.12085866732845266, + "grad_norm": 714.3155517578125, + "learning_rate": 9.990067112212884e-06, + "loss": 128.5733, + "step": 14610 + }, + { + "epoch": 0.1209413905778219, + "grad_norm": 629.7801513671875, + "learning_rate": 9.989978058234952e-06, + "loss": 141.1231, + "step": 14620 + }, + { + "epoch": 0.12102411382719114, + "grad_norm": 1056.1544189453125, + "learning_rate": 9.989888607227369e-06, + "loss": 173.8705, + "step": 14630 + }, + { + "epoch": 0.12110683707656036, + "grad_norm": 1272.472412109375, + "learning_rate": 9.989798759197247e-06, + "loss": 146.9385, + "step": 14640 + }, + { + "epoch": 0.1211895603259296, + "grad_norm": 1054.3629150390625, + "learning_rate": 9.989708514151739e-06, + "loss": 164.1719, + "step": 14650 + }, + { + "epoch": 0.12127228357529883, + "grad_norm": 659.7613525390625, + "learning_rate": 9.989617872098026e-06, + "loss": 149.6539, + "step": 14660 + }, + { + "epoch": 0.12135500682466807, + "grad_norm": 815.8479614257812, + "learning_rate": 9.989526833043316e-06, + "loss": 140.1702, + "step": 14670 + }, + { + "epoch": 0.12143773007403731, + "grad_norm": 1148.9129638671875, + "learning_rate": 9.989435396994856e-06, + "loss": 125.2471, + "step": 14680 + }, + { + "epoch": 0.12152045332340654, + "grad_norm": 3006.224609375, + "learning_rate": 9.989343563959919e-06, + "loss": 150.3076, + "step": 14690 + }, + { + "epoch": 0.12160317657277578, + "grad_norm": 1365.89892578125, + "learning_rate": 9.989251333945813e-06, + "loss": 179.2145, + "step": 14700 + }, + { + "epoch": 0.12168589982214502, + "grad_norm": 982.8682861328125, + "learning_rate": 9.989158706959875e-06, + "loss": 137.9394, + "step": 14710 + }, + { + "epoch": 0.12176862307151425, + "grad_norm": 1103.151123046875, + "learning_rate": 9.989065683009477e-06, + "loss": 150.3043, + "step": 14720 + }, + { + "epoch": 0.12185134632088349, + "grad_norm": 1068.439208984375, + "learning_rate": 9.988972262102018e-06, + "loss": 115.0475, + "step": 14730 + }, + { + "epoch": 0.12193406957025273, + "grad_norm": 1475.9112548828125, + "learning_rate": 9.988878444244937e-06, + "loss": 162.3183, + "step": 14740 + }, + { + "epoch": 0.12201679281962195, + "grad_norm": 1475.7916259765625, + "learning_rate": 9.988784229445689e-06, + "loss": 132.9056, + "step": 14750 + }, + { + "epoch": 0.12209951606899119, + "grad_norm": 1580.335205078125, + "learning_rate": 9.988689617711777e-06, + "loss": 180.2133, + "step": 14760 + }, + { + "epoch": 0.12218223931836042, + "grad_norm": 1742.8638916015625, + "learning_rate": 9.988594609050726e-06, + "loss": 170.6644, + "step": 14770 + }, + { + "epoch": 0.12226496256772966, + "grad_norm": 778.4093017578125, + "learning_rate": 9.988499203470097e-06, + "loss": 163.6835, + "step": 14780 + }, + { + "epoch": 0.1223476858170989, + "grad_norm": 908.4758911132812, + "learning_rate": 9.988403400977482e-06, + "loss": 143.079, + "step": 14790 + }, + { + "epoch": 0.12243040906646813, + "grad_norm": 1540.624755859375, + "learning_rate": 9.9883072015805e-06, + "loss": 160.3763, + "step": 14800 + }, + { + "epoch": 0.12251313231583737, + "grad_norm": 919.294677734375, + "learning_rate": 9.98821060528681e-06, + "loss": 165.4283, + "step": 14810 + }, + { + "epoch": 0.12259585556520661, + "grad_norm": 865.2339477539062, + "learning_rate": 9.988113612104093e-06, + "loss": 128.951, + "step": 14820 + }, + { + "epoch": 0.12267857881457583, + "grad_norm": 2098.492919921875, + "learning_rate": 9.988016222040067e-06, + "loss": 151.1649, + "step": 14830 + }, + { + "epoch": 0.12276130206394507, + "grad_norm": 847.621337890625, + "learning_rate": 9.987918435102484e-06, + "loss": 121.6645, + "step": 14840 + }, + { + "epoch": 0.1228440253133143, + "grad_norm": 1472.7208251953125, + "learning_rate": 9.987820251299121e-06, + "loss": 140.8588, + "step": 14850 + }, + { + "epoch": 0.12292674856268354, + "grad_norm": 1310.5726318359375, + "learning_rate": 9.987721670637794e-06, + "loss": 132.6207, + "step": 14860 + }, + { + "epoch": 0.12300947181205278, + "grad_norm": 788.9578247070312, + "learning_rate": 9.987622693126342e-06, + "loss": 139.2334, + "step": 14870 + }, + { + "epoch": 0.12309219506142201, + "grad_norm": 1761.3287353515625, + "learning_rate": 9.987523318772644e-06, + "loss": 156.3363, + "step": 14880 + }, + { + "epoch": 0.12317491831079125, + "grad_norm": 1024.44140625, + "learning_rate": 9.987423547584605e-06, + "loss": 167.0266, + "step": 14890 + }, + { + "epoch": 0.12325764156016049, + "grad_norm": 891.505126953125, + "learning_rate": 9.987323379570161e-06, + "loss": 144.4436, + "step": 14900 + }, + { + "epoch": 0.12334036480952972, + "grad_norm": 1373.43359375, + "learning_rate": 9.987222814737287e-06, + "loss": 139.1032, + "step": 14910 + }, + { + "epoch": 0.12342308805889896, + "grad_norm": 2005.6689453125, + "learning_rate": 9.987121853093982e-06, + "loss": 179.2018, + "step": 14920 + }, + { + "epoch": 0.12350581130826818, + "grad_norm": 1276.0216064453125, + "learning_rate": 9.987020494648279e-06, + "loss": 157.102, + "step": 14930 + }, + { + "epoch": 0.12358853455763742, + "grad_norm": 1294.2474365234375, + "learning_rate": 9.986918739408241e-06, + "loss": 176.7196, + "step": 14940 + }, + { + "epoch": 0.12367125780700666, + "grad_norm": 1715.0291748046875, + "learning_rate": 9.986816587381966e-06, + "loss": 150.2139, + "step": 14950 + }, + { + "epoch": 0.12375398105637589, + "grad_norm": 1020.9821166992188, + "learning_rate": 9.986714038577582e-06, + "loss": 145.5669, + "step": 14960 + }, + { + "epoch": 0.12383670430574513, + "grad_norm": 969.8027954101562, + "learning_rate": 9.986611093003249e-06, + "loss": 129.0563, + "step": 14970 + }, + { + "epoch": 0.12391942755511437, + "grad_norm": 1188.594482421875, + "learning_rate": 9.986507750667157e-06, + "loss": 130.371, + "step": 14980 + }, + { + "epoch": 0.1240021508044836, + "grad_norm": 2070.416015625, + "learning_rate": 9.986404011577525e-06, + "loss": 164.5774, + "step": 14990 + }, + { + "epoch": 0.12408487405385284, + "grad_norm": 1122.52587890625, + "learning_rate": 9.986299875742612e-06, + "loss": 187.0694, + "step": 15000 + }, + { + "epoch": 0.12416759730322208, + "grad_norm": 1119.6961669921875, + "learning_rate": 9.986195343170703e-06, + "loss": 180.5289, + "step": 15010 + }, + { + "epoch": 0.1242503205525913, + "grad_norm": 1574.2568359375, + "learning_rate": 9.986090413870114e-06, + "loss": 144.7522, + "step": 15020 + }, + { + "epoch": 0.12433304380196054, + "grad_norm": 1006.7045288085938, + "learning_rate": 9.985985087849193e-06, + "loss": 143.7221, + "step": 15030 + }, + { + "epoch": 0.12441576705132977, + "grad_norm": 1300.2181396484375, + "learning_rate": 9.98587936511632e-06, + "loss": 150.1932, + "step": 15040 + }, + { + "epoch": 0.12449849030069901, + "grad_norm": 821.7041625976562, + "learning_rate": 9.98577324567991e-06, + "loss": 139.0086, + "step": 15050 + }, + { + "epoch": 0.12458121355006825, + "grad_norm": 830.7269287109375, + "learning_rate": 9.985666729548404e-06, + "loss": 146.4651, + "step": 15060 + }, + { + "epoch": 0.12466393679943748, + "grad_norm": 1310.355224609375, + "learning_rate": 9.985559816730277e-06, + "loss": 141.5489, + "step": 15070 + }, + { + "epoch": 0.12474666004880672, + "grad_norm": 1190.0335693359375, + "learning_rate": 9.985452507234037e-06, + "loss": 144.9001, + "step": 15080 + }, + { + "epoch": 0.12482938329817596, + "grad_norm": 2714.714599609375, + "learning_rate": 9.98534480106822e-06, + "loss": 154.9118, + "step": 15090 + }, + { + "epoch": 0.12491210654754518, + "grad_norm": 792.223388671875, + "learning_rate": 9.985236698241396e-06, + "loss": 149.7406, + "step": 15100 + }, + { + "epoch": 0.12499482979691443, + "grad_norm": 1287.8345947265625, + "learning_rate": 9.985128198762168e-06, + "loss": 171.4261, + "step": 15110 + }, + { + "epoch": 0.12507755304628365, + "grad_norm": 957.4619140625, + "learning_rate": 9.98501930263917e-06, + "loss": 234.8733, + "step": 15120 + }, + { + "epoch": 0.1251602762956529, + "grad_norm": 862.0460205078125, + "learning_rate": 9.984910009881062e-06, + "loss": 112.6332, + "step": 15130 + }, + { + "epoch": 0.12524299954502213, + "grad_norm": 1100.07080078125, + "learning_rate": 9.984800320496542e-06, + "loss": 139.7673, + "step": 15140 + }, + { + "epoch": 0.12532572279439136, + "grad_norm": 1111.9737548828125, + "learning_rate": 9.984690234494338e-06, + "loss": 106.7051, + "step": 15150 + }, + { + "epoch": 0.12540844604376059, + "grad_norm": 744.8794555664062, + "learning_rate": 9.98457975188321e-06, + "loss": 142.1312, + "step": 15160 + }, + { + "epoch": 0.12549116929312984, + "grad_norm": 923.189697265625, + "learning_rate": 9.984468872671945e-06, + "loss": 139.3656, + "step": 15170 + }, + { + "epoch": 0.12557389254249907, + "grad_norm": 1322.687255859375, + "learning_rate": 9.984357596869369e-06, + "loss": 148.6495, + "step": 15180 + }, + { + "epoch": 0.1256566157918683, + "grad_norm": 882.3487548828125, + "learning_rate": 9.984245924484334e-06, + "loss": 141.7766, + "step": 15190 + }, + { + "epoch": 0.12573933904123755, + "grad_norm": 725.9840698242188, + "learning_rate": 9.984133855525723e-06, + "loss": 138.5364, + "step": 15200 + }, + { + "epoch": 0.12582206229060677, + "grad_norm": 1547.522705078125, + "learning_rate": 9.984021390002458e-06, + "loss": 136.4458, + "step": 15210 + }, + { + "epoch": 0.125904785539976, + "grad_norm": 1425.56494140625, + "learning_rate": 9.983908527923486e-06, + "loss": 222.0387, + "step": 15220 + }, + { + "epoch": 0.12598750878934525, + "grad_norm": 911.7035522460938, + "learning_rate": 9.983795269297782e-06, + "loss": 169.7902, + "step": 15230 + }, + { + "epoch": 0.12607023203871448, + "grad_norm": 1066.0264892578125, + "learning_rate": 9.983681614134363e-06, + "loss": 122.5573, + "step": 15240 + }, + { + "epoch": 0.1261529552880837, + "grad_norm": 1829.2509765625, + "learning_rate": 9.98356756244227e-06, + "loss": 154.7958, + "step": 15250 + }, + { + "epoch": 0.12623567853745296, + "grad_norm": 1402.93408203125, + "learning_rate": 9.983453114230575e-06, + "loss": 145.442, + "step": 15260 + }, + { + "epoch": 0.1263184017868222, + "grad_norm": 990.7800903320312, + "learning_rate": 9.98333826950839e-06, + "loss": 138.916, + "step": 15270 + }, + { + "epoch": 0.12640112503619141, + "grad_norm": 861.1292724609375, + "learning_rate": 9.983223028284847e-06, + "loss": 152.3527, + "step": 15280 + }, + { + "epoch": 0.12648384828556067, + "grad_norm": 887.3511962890625, + "learning_rate": 9.983107390569118e-06, + "loss": 129.7973, + "step": 15290 + }, + { + "epoch": 0.1265665715349299, + "grad_norm": 1043.2373046875, + "learning_rate": 9.982991356370404e-06, + "loss": 116.1451, + "step": 15300 + }, + { + "epoch": 0.12664929478429912, + "grad_norm": 1244.5079345703125, + "learning_rate": 9.982874925697937e-06, + "loss": 221.0664, + "step": 15310 + }, + { + "epoch": 0.12673201803366838, + "grad_norm": 1715.1995849609375, + "learning_rate": 9.982758098560978e-06, + "loss": 186.7455, + "step": 15320 + }, + { + "epoch": 0.1268147412830376, + "grad_norm": 679.9988403320312, + "learning_rate": 9.982640874968827e-06, + "loss": 171.8672, + "step": 15330 + }, + { + "epoch": 0.12689746453240683, + "grad_norm": 595.40625, + "learning_rate": 9.98252325493081e-06, + "loss": 130.3511, + "step": 15340 + }, + { + "epoch": 0.12698018778177605, + "grad_norm": 915.3275146484375, + "learning_rate": 9.982405238456281e-06, + "loss": 153.7831, + "step": 15350 + }, + { + "epoch": 0.1270629110311453, + "grad_norm": 1383.0423583984375, + "learning_rate": 9.982286825554636e-06, + "loss": 155.1486, + "step": 15360 + }, + { + "epoch": 0.12714563428051454, + "grad_norm": 1527.0670166015625, + "learning_rate": 9.982168016235292e-06, + "loss": 235.3831, + "step": 15370 + }, + { + "epoch": 0.12722835752988376, + "grad_norm": 1168.0416259765625, + "learning_rate": 9.982048810507706e-06, + "loss": 175.3166, + "step": 15380 + }, + { + "epoch": 0.12731108077925302, + "grad_norm": 1577.9107666015625, + "learning_rate": 9.98192920838136e-06, + "loss": 136.4098, + "step": 15390 + }, + { + "epoch": 0.12739380402862224, + "grad_norm": 2239.125244140625, + "learning_rate": 9.98180920986577e-06, + "loss": 162.4811, + "step": 15400 + }, + { + "epoch": 0.12747652727799147, + "grad_norm": 1140.6561279296875, + "learning_rate": 9.981688814970485e-06, + "loss": 159.3877, + "step": 15410 + }, + { + "epoch": 0.12755925052736072, + "grad_norm": 929.8948974609375, + "learning_rate": 9.981568023705085e-06, + "loss": 113.0717, + "step": 15420 + }, + { + "epoch": 0.12764197377672995, + "grad_norm": 1146.56396484375, + "learning_rate": 9.981446836079178e-06, + "loss": 121.9914, + "step": 15430 + }, + { + "epoch": 0.12772469702609918, + "grad_norm": 877.8550415039062, + "learning_rate": 9.981325252102408e-06, + "loss": 173.6141, + "step": 15440 + }, + { + "epoch": 0.12780742027546843, + "grad_norm": 969.0079956054688, + "learning_rate": 9.98120327178445e-06, + "loss": 178.5706, + "step": 15450 + }, + { + "epoch": 0.12789014352483766, + "grad_norm": 1263.2391357421875, + "learning_rate": 9.981080895135007e-06, + "loss": 180.7431, + "step": 15460 + }, + { + "epoch": 0.12797286677420688, + "grad_norm": 742.5184326171875, + "learning_rate": 9.980958122163818e-06, + "loss": 111.0224, + "step": 15470 + }, + { + "epoch": 0.12805559002357614, + "grad_norm": 1423.19873046875, + "learning_rate": 9.980834952880652e-06, + "loss": 128.3473, + "step": 15480 + }, + { + "epoch": 0.12813831327294536, + "grad_norm": 1583.2940673828125, + "learning_rate": 9.980711387295306e-06, + "loss": 149.955, + "step": 15490 + }, + { + "epoch": 0.1282210365223146, + "grad_norm": 1052.7265625, + "learning_rate": 9.980587425417612e-06, + "loss": 159.9205, + "step": 15500 + }, + { + "epoch": 0.12830375977168385, + "grad_norm": 2138.17724609375, + "learning_rate": 9.980463067257437e-06, + "loss": 169.7366, + "step": 15510 + }, + { + "epoch": 0.12838648302105307, + "grad_norm": 1006.1878662109375, + "learning_rate": 9.980338312824672e-06, + "loss": 193.1612, + "step": 15520 + }, + { + "epoch": 0.1284692062704223, + "grad_norm": 1047.7593994140625, + "learning_rate": 9.980213162129244e-06, + "loss": 175.5892, + "step": 15530 + }, + { + "epoch": 0.12855192951979152, + "grad_norm": 1267.4644775390625, + "learning_rate": 9.980087615181111e-06, + "loss": 149.4357, + "step": 15540 + }, + { + "epoch": 0.12863465276916078, + "grad_norm": 1171.0859375, + "learning_rate": 9.979961671990263e-06, + "loss": 165.6414, + "step": 15550 + }, + { + "epoch": 0.12871737601853, + "grad_norm": 911.0418701171875, + "learning_rate": 9.979835332566719e-06, + "loss": 155.2462, + "step": 15560 + }, + { + "epoch": 0.12880009926789923, + "grad_norm": 1016.1674194335938, + "learning_rate": 9.97970859692053e-06, + "loss": 142.4974, + "step": 15570 + }, + { + "epoch": 0.12888282251726849, + "grad_norm": 653.232421875, + "learning_rate": 9.979581465061784e-06, + "loss": 155.5012, + "step": 15580 + }, + { + "epoch": 0.1289655457666377, + "grad_norm": 1058.5008544921875, + "learning_rate": 9.979453937000594e-06, + "loss": 101.9423, + "step": 15590 + }, + { + "epoch": 0.12904826901600694, + "grad_norm": 820.3455200195312, + "learning_rate": 9.979326012747106e-06, + "loss": 117.5258, + "step": 15600 + }, + { + "epoch": 0.1291309922653762, + "grad_norm": 1352.7105712890625, + "learning_rate": 9.9791976923115e-06, + "loss": 99.8209, + "step": 15610 + }, + { + "epoch": 0.12921371551474542, + "grad_norm": 1026.9713134765625, + "learning_rate": 9.979068975703984e-06, + "loss": 166.7305, + "step": 15620 + }, + { + "epoch": 0.12929643876411465, + "grad_norm": 1245.904296875, + "learning_rate": 9.978939862934802e-06, + "loss": 126.3938, + "step": 15630 + }, + { + "epoch": 0.1293791620134839, + "grad_norm": 1658.7640380859375, + "learning_rate": 9.978810354014223e-06, + "loss": 135.5493, + "step": 15640 + }, + { + "epoch": 0.12946188526285313, + "grad_norm": 1517.82373046875, + "learning_rate": 9.978680448952556e-06, + "loss": 139.2036, + "step": 15650 + }, + { + "epoch": 0.12954460851222235, + "grad_norm": 1112.2469482421875, + "learning_rate": 9.978550147760133e-06, + "loss": 127.4167, + "step": 15660 + }, + { + "epoch": 0.1296273317615916, + "grad_norm": 995.6966552734375, + "learning_rate": 9.978419450447325e-06, + "loss": 128.1456, + "step": 15670 + }, + { + "epoch": 0.12971005501096083, + "grad_norm": 785.5421142578125, + "learning_rate": 9.978288357024527e-06, + "loss": 142.7447, + "step": 15680 + }, + { + "epoch": 0.12979277826033006, + "grad_norm": 3418.008544921875, + "learning_rate": 9.978156867502173e-06, + "loss": 161.2918, + "step": 15690 + }, + { + "epoch": 0.12987550150969931, + "grad_norm": 1545.32568359375, + "learning_rate": 9.978024981890724e-06, + "loss": 107.0028, + "step": 15700 + }, + { + "epoch": 0.12995822475906854, + "grad_norm": 963.7340087890625, + "learning_rate": 9.977892700200673e-06, + "loss": 140.569, + "step": 15710 + }, + { + "epoch": 0.13004094800843777, + "grad_norm": 692.7611694335938, + "learning_rate": 9.977760022442545e-06, + "loss": 110.664, + "step": 15720 + }, + { + "epoch": 0.130123671257807, + "grad_norm": 1015.7322998046875, + "learning_rate": 9.977626948626897e-06, + "loss": 158.9243, + "step": 15730 + }, + { + "epoch": 0.13020639450717625, + "grad_norm": 1334.6917724609375, + "learning_rate": 9.977493478764316e-06, + "loss": 152.3215, + "step": 15740 + }, + { + "epoch": 0.13028911775654548, + "grad_norm": 963.1575927734375, + "learning_rate": 9.977359612865424e-06, + "loss": 137.1868, + "step": 15750 + }, + { + "epoch": 0.1303718410059147, + "grad_norm": 1332.0909423828125, + "learning_rate": 9.97722535094087e-06, + "loss": 130.2153, + "step": 15760 + }, + { + "epoch": 0.13045456425528396, + "grad_norm": 864.9472045898438, + "learning_rate": 9.977090693001336e-06, + "loss": 142.6017, + "step": 15770 + }, + { + "epoch": 0.13053728750465318, + "grad_norm": 1091.3128662109375, + "learning_rate": 9.976955639057539e-06, + "loss": 126.0693, + "step": 15780 + }, + { + "epoch": 0.1306200107540224, + "grad_norm": 1137.7115478515625, + "learning_rate": 9.976820189120223e-06, + "loss": 147.4185, + "step": 15790 + }, + { + "epoch": 0.13070273400339166, + "grad_norm": 1658.2982177734375, + "learning_rate": 9.976684343200164e-06, + "loss": 135.441, + "step": 15800 + }, + { + "epoch": 0.1307854572527609, + "grad_norm": 1823.3642578125, + "learning_rate": 9.976548101308173e-06, + "loss": 138.8229, + "step": 15810 + }, + { + "epoch": 0.13086818050213012, + "grad_norm": 1511.6375732421875, + "learning_rate": 9.976411463455088e-06, + "loss": 140.3549, + "step": 15820 + }, + { + "epoch": 0.13095090375149937, + "grad_norm": 970.9559326171875, + "learning_rate": 9.976274429651783e-06, + "loss": 188.4605, + "step": 15830 + }, + { + "epoch": 0.1310336270008686, + "grad_norm": 1540.7110595703125, + "learning_rate": 9.976136999909156e-06, + "loss": 106.2589, + "step": 15840 + }, + { + "epoch": 0.13111635025023782, + "grad_norm": 829.7328491210938, + "learning_rate": 9.97599917423815e-06, + "loss": 166.2885, + "step": 15850 + }, + { + "epoch": 0.13119907349960708, + "grad_norm": 0.0, + "learning_rate": 9.975860952649724e-06, + "loss": 180.9173, + "step": 15860 + }, + { + "epoch": 0.1312817967489763, + "grad_norm": 1011.6902465820312, + "learning_rate": 9.975722335154876e-06, + "loss": 161.2201, + "step": 15870 + }, + { + "epoch": 0.13136451999834553, + "grad_norm": 1166.7960205078125, + "learning_rate": 9.975583321764638e-06, + "loss": 144.3113, + "step": 15880 + }, + { + "epoch": 0.13144724324771476, + "grad_norm": 1041.4771728515625, + "learning_rate": 9.975443912490073e-06, + "loss": 149.5042, + "step": 15890 + }, + { + "epoch": 0.131529966497084, + "grad_norm": 2316.5087890625, + "learning_rate": 9.975304107342268e-06, + "loss": 179.2303, + "step": 15900 + }, + { + "epoch": 0.13161268974645324, + "grad_norm": 692.1578369140625, + "learning_rate": 9.97516390633235e-06, + "loss": 133.4318, + "step": 15910 + }, + { + "epoch": 0.13169541299582246, + "grad_norm": 1125.5006103515625, + "learning_rate": 9.975023309471473e-06, + "loss": 156.1001, + "step": 15920 + }, + { + "epoch": 0.13177813624519172, + "grad_norm": 1064.159423828125, + "learning_rate": 9.974882316770823e-06, + "loss": 147.876, + "step": 15930 + }, + { + "epoch": 0.13186085949456094, + "grad_norm": 1456.9761962890625, + "learning_rate": 9.974740928241617e-06, + "loss": 146.098, + "step": 15940 + }, + { + "epoch": 0.13194358274393017, + "grad_norm": 1191.022705078125, + "learning_rate": 9.974599143895107e-06, + "loss": 139.6693, + "step": 15950 + }, + { + "epoch": 0.13202630599329943, + "grad_norm": 2010.4088134765625, + "learning_rate": 9.974456963742573e-06, + "loss": 152.4677, + "step": 15960 + }, + { + "epoch": 0.13210902924266865, + "grad_norm": 1077.85205078125, + "learning_rate": 9.97431438779533e-06, + "loss": 199.6097, + "step": 15970 + }, + { + "epoch": 0.13219175249203788, + "grad_norm": 975.5093994140625, + "learning_rate": 9.974171416064719e-06, + "loss": 110.029, + "step": 15980 + }, + { + "epoch": 0.13227447574140713, + "grad_norm": 1180.7437744140625, + "learning_rate": 9.974028048562118e-06, + "loss": 136.7102, + "step": 15990 + }, + { + "epoch": 0.13235719899077636, + "grad_norm": 1241.7110595703125, + "learning_rate": 9.973884285298932e-06, + "loss": 154.7749, + "step": 16000 + }, + { + "epoch": 0.13243992224014559, + "grad_norm": 1181.715576171875, + "learning_rate": 9.9737401262866e-06, + "loss": 163.9049, + "step": 16010 + }, + { + "epoch": 0.13252264548951484, + "grad_norm": 1441.7060546875, + "learning_rate": 9.973595571536593e-06, + "loss": 131.6654, + "step": 16020 + }, + { + "epoch": 0.13260536873888407, + "grad_norm": 1810.145751953125, + "learning_rate": 9.973450621060412e-06, + "loss": 155.4361, + "step": 16030 + }, + { + "epoch": 0.1326880919882533, + "grad_norm": 1024.084716796875, + "learning_rate": 9.97330527486959e-06, + "loss": 130.6234, + "step": 16040 + }, + { + "epoch": 0.13277081523762255, + "grad_norm": 1294.561279296875, + "learning_rate": 9.973159532975691e-06, + "loss": 122.6079, + "step": 16050 + }, + { + "epoch": 0.13285353848699177, + "grad_norm": 1282.573486328125, + "learning_rate": 9.973013395390314e-06, + "loss": 173.6021, + "step": 16060 + }, + { + "epoch": 0.132936261736361, + "grad_norm": 1436.6795654296875, + "learning_rate": 9.972866862125083e-06, + "loss": 201.6667, + "step": 16070 + }, + { + "epoch": 0.13301898498573023, + "grad_norm": 880.5997924804688, + "learning_rate": 9.972719933191657e-06, + "loss": 121.1312, + "step": 16080 + }, + { + "epoch": 0.13310170823509948, + "grad_norm": 720.5911254882812, + "learning_rate": 9.97257260860173e-06, + "loss": 117.1484, + "step": 16090 + }, + { + "epoch": 0.1331844314844687, + "grad_norm": 1505.3927001953125, + "learning_rate": 9.972424888367019e-06, + "loss": 146.7309, + "step": 16100 + }, + { + "epoch": 0.13326715473383793, + "grad_norm": 958.6402587890625, + "learning_rate": 9.972276772499281e-06, + "loss": 156.9766, + "step": 16110 + }, + { + "epoch": 0.1333498779832072, + "grad_norm": 877.50244140625, + "learning_rate": 9.9721282610103e-06, + "loss": 191.0899, + "step": 16120 + }, + { + "epoch": 0.13343260123257641, + "grad_norm": 1021.2138671875, + "learning_rate": 9.971979353911891e-06, + "loss": 133.9165, + "step": 16130 + }, + { + "epoch": 0.13351532448194564, + "grad_norm": 847.0870971679688, + "learning_rate": 9.971830051215905e-06, + "loss": 101.3374, + "step": 16140 + }, + { + "epoch": 0.1335980477313149, + "grad_norm": 2785.597412109375, + "learning_rate": 9.97168035293422e-06, + "loss": 267.7292, + "step": 16150 + }, + { + "epoch": 0.13368077098068412, + "grad_norm": 801.3421020507812, + "learning_rate": 9.971530259078743e-06, + "loss": 111.4734, + "step": 16160 + }, + { + "epoch": 0.13376349423005335, + "grad_norm": 768.2542114257812, + "learning_rate": 9.971379769661422e-06, + "loss": 149.4196, + "step": 16170 + }, + { + "epoch": 0.1338462174794226, + "grad_norm": 893.0291748046875, + "learning_rate": 9.971228884694228e-06, + "loss": 122.37, + "step": 16180 + }, + { + "epoch": 0.13392894072879183, + "grad_norm": 1295.072509765625, + "learning_rate": 9.971077604189166e-06, + "loss": 156.3286, + "step": 16190 + }, + { + "epoch": 0.13401166397816106, + "grad_norm": 998.2085571289062, + "learning_rate": 9.970925928158275e-06, + "loss": 122.403, + "step": 16200 + }, + { + "epoch": 0.1340943872275303, + "grad_norm": 701.3370361328125, + "learning_rate": 9.970773856613617e-06, + "loss": 140.6802, + "step": 16210 + }, + { + "epoch": 0.13417711047689954, + "grad_norm": 971.6983032226562, + "learning_rate": 9.970621389567301e-06, + "loss": 178.1052, + "step": 16220 + }, + { + "epoch": 0.13425983372626876, + "grad_norm": 2665.119384765625, + "learning_rate": 9.97046852703145e-06, + "loss": 138.6044, + "step": 16230 + }, + { + "epoch": 0.13434255697563802, + "grad_norm": 2127.31884765625, + "learning_rate": 9.970315269018231e-06, + "loss": 157.2493, + "step": 16240 + }, + { + "epoch": 0.13442528022500724, + "grad_norm": 1778.2391357421875, + "learning_rate": 9.970161615539837e-06, + "loss": 134.0471, + "step": 16250 + }, + { + "epoch": 0.13450800347437647, + "grad_norm": 993.4716796875, + "learning_rate": 9.970007566608492e-06, + "loss": 146.2506, + "step": 16260 + }, + { + "epoch": 0.1345907267237457, + "grad_norm": 798.2664184570312, + "learning_rate": 9.969853122236455e-06, + "loss": 114.1296, + "step": 16270 + }, + { + "epoch": 0.13467344997311495, + "grad_norm": 703.0869750976562, + "learning_rate": 9.969698282436013e-06, + "loss": 120.5299, + "step": 16280 + }, + { + "epoch": 0.13475617322248418, + "grad_norm": 1201.6317138671875, + "learning_rate": 9.969543047219487e-06, + "loss": 125.8007, + "step": 16290 + }, + { + "epoch": 0.1348388964718534, + "grad_norm": 1785.0177001953125, + "learning_rate": 9.969387416599227e-06, + "loss": 144.5029, + "step": 16300 + }, + { + "epoch": 0.13492161972122266, + "grad_norm": 1228.9619140625, + "learning_rate": 9.969231390587618e-06, + "loss": 164.9693, + "step": 16310 + }, + { + "epoch": 0.13500434297059188, + "grad_norm": 864.3604736328125, + "learning_rate": 9.969074969197072e-06, + "loss": 168.7043, + "step": 16320 + }, + { + "epoch": 0.1350870662199611, + "grad_norm": 1214.023681640625, + "learning_rate": 9.968918152440036e-06, + "loss": 172.751, + "step": 16330 + }, + { + "epoch": 0.13516978946933036, + "grad_norm": 928.501220703125, + "learning_rate": 9.968760940328987e-06, + "loss": 131.5311, + "step": 16340 + }, + { + "epoch": 0.1352525127186996, + "grad_norm": 510.1147155761719, + "learning_rate": 9.968603332876435e-06, + "loss": 171.1721, + "step": 16350 + }, + { + "epoch": 0.13533523596806882, + "grad_norm": 1110.3807373046875, + "learning_rate": 9.968445330094915e-06, + "loss": 169.255, + "step": 16360 + }, + { + "epoch": 0.13541795921743807, + "grad_norm": 1672.8614501953125, + "learning_rate": 9.968286931997004e-06, + "loss": 112.5926, + "step": 16370 + }, + { + "epoch": 0.1355006824668073, + "grad_norm": 1014.0128784179688, + "learning_rate": 9.968128138595304e-06, + "loss": 100.9882, + "step": 16380 + }, + { + "epoch": 0.13558340571617652, + "grad_norm": 1446.147216796875, + "learning_rate": 9.967968949902448e-06, + "loss": 185.0402, + "step": 16390 + }, + { + "epoch": 0.13566612896554578, + "grad_norm": 753.0343627929688, + "learning_rate": 9.967809365931102e-06, + "loss": 148.759, + "step": 16400 + }, + { + "epoch": 0.135748852214915, + "grad_norm": 909.8871459960938, + "learning_rate": 9.967649386693964e-06, + "loss": 123.6662, + "step": 16410 + }, + { + "epoch": 0.13583157546428423, + "grad_norm": 1223.7244873046875, + "learning_rate": 9.967489012203765e-06, + "loss": 132.6178, + "step": 16420 + }, + { + "epoch": 0.13591429871365346, + "grad_norm": 1106.0858154296875, + "learning_rate": 9.967328242473261e-06, + "loss": 146.9553, + "step": 16430 + }, + { + "epoch": 0.1359970219630227, + "grad_norm": 1789.8829345703125, + "learning_rate": 9.967167077515246e-06, + "loss": 133.0784, + "step": 16440 + }, + { + "epoch": 0.13607974521239194, + "grad_norm": 741.1414184570312, + "learning_rate": 9.967005517342544e-06, + "loss": 143.1583, + "step": 16450 + }, + { + "epoch": 0.13616246846176117, + "grad_norm": 1324.021240234375, + "learning_rate": 9.966843561968005e-06, + "loss": 108.1861, + "step": 16460 + }, + { + "epoch": 0.13624519171113042, + "grad_norm": 866.0011596679688, + "learning_rate": 9.966681211404521e-06, + "loss": 138.6324, + "step": 16470 + }, + { + "epoch": 0.13632791496049965, + "grad_norm": 520.3377685546875, + "learning_rate": 9.966518465665007e-06, + "loss": 113.3134, + "step": 16480 + }, + { + "epoch": 0.13641063820986887, + "grad_norm": 883.1153564453125, + "learning_rate": 9.966355324762412e-06, + "loss": 163.313, + "step": 16490 + }, + { + "epoch": 0.13649336145923813, + "grad_norm": 1007.1843872070312, + "learning_rate": 9.966191788709716e-06, + "loss": 140.2184, + "step": 16500 + }, + { + "epoch": 0.13657608470860735, + "grad_norm": 1669.2816162109375, + "learning_rate": 9.966027857519931e-06, + "loss": 188.2176, + "step": 16510 + }, + { + "epoch": 0.13665880795797658, + "grad_norm": 772.6116943359375, + "learning_rate": 9.9658635312061e-06, + "loss": 163.7544, + "step": 16520 + }, + { + "epoch": 0.13674153120734583, + "grad_norm": 706.4850463867188, + "learning_rate": 9.965698809781298e-06, + "loss": 121.3989, + "step": 16530 + }, + { + "epoch": 0.13682425445671506, + "grad_norm": 766.0828247070312, + "learning_rate": 9.965533693258632e-06, + "loss": 213.4713, + "step": 16540 + }, + { + "epoch": 0.1369069777060843, + "grad_norm": 957.917724609375, + "learning_rate": 9.965368181651239e-06, + "loss": 183.1273, + "step": 16550 + }, + { + "epoch": 0.13698970095545354, + "grad_norm": 696.8062744140625, + "learning_rate": 9.965202274972288e-06, + "loss": 112.6891, + "step": 16560 + }, + { + "epoch": 0.13707242420482277, + "grad_norm": 902.3621215820312, + "learning_rate": 9.965035973234977e-06, + "loss": 113.6838, + "step": 16570 + }, + { + "epoch": 0.137155147454192, + "grad_norm": 1020.390625, + "learning_rate": 9.964869276452542e-06, + "loss": 106.0109, + "step": 16580 + }, + { + "epoch": 0.13723787070356125, + "grad_norm": 1181.8326416015625, + "learning_rate": 9.964702184638244e-06, + "loss": 139.7021, + "step": 16590 + }, + { + "epoch": 0.13732059395293048, + "grad_norm": 629.9285278320312, + "learning_rate": 9.964534697805377e-06, + "loss": 193.1732, + "step": 16600 + }, + { + "epoch": 0.1374033172022997, + "grad_norm": 1531.7962646484375, + "learning_rate": 9.96436681596727e-06, + "loss": 154.7776, + "step": 16610 + }, + { + "epoch": 0.13748604045166893, + "grad_norm": 1220.1796875, + "learning_rate": 9.964198539137277e-06, + "loss": 191.2195, + "step": 16620 + }, + { + "epoch": 0.13756876370103818, + "grad_norm": 0.0, + "learning_rate": 9.964029867328791e-06, + "loss": 112.8693, + "step": 16630 + }, + { + "epoch": 0.1376514869504074, + "grad_norm": 1105.817626953125, + "learning_rate": 9.963860800555228e-06, + "loss": 103.0777, + "step": 16640 + }, + { + "epoch": 0.13773421019977664, + "grad_norm": 472.4584655761719, + "learning_rate": 9.963691338830045e-06, + "loss": 123.1952, + "step": 16650 + }, + { + "epoch": 0.1378169334491459, + "grad_norm": 990.940673828125, + "learning_rate": 9.963521482166718e-06, + "loss": 136.4567, + "step": 16660 + }, + { + "epoch": 0.13789965669851512, + "grad_norm": 1503.9461669921875, + "learning_rate": 9.96335123057877e-06, + "loss": 136.2858, + "step": 16670 + }, + { + "epoch": 0.13798237994788434, + "grad_norm": 1348.58740234375, + "learning_rate": 9.963180584079741e-06, + "loss": 137.5341, + "step": 16680 + }, + { + "epoch": 0.1380651031972536, + "grad_norm": 1100.0037841796875, + "learning_rate": 9.963009542683214e-06, + "loss": 199.9709, + "step": 16690 + }, + { + "epoch": 0.13814782644662282, + "grad_norm": 718.8609619140625, + "learning_rate": 9.962838106402791e-06, + "loss": 184.6782, + "step": 16700 + }, + { + "epoch": 0.13823054969599205, + "grad_norm": 865.5576782226562, + "learning_rate": 9.962666275252117e-06, + "loss": 104.1854, + "step": 16710 + }, + { + "epoch": 0.1383132729453613, + "grad_norm": 1161.63525390625, + "learning_rate": 9.962494049244866e-06, + "loss": 169.3983, + "step": 16720 + }, + { + "epoch": 0.13839599619473053, + "grad_norm": 589.0774536132812, + "learning_rate": 9.962321428394735e-06, + "loss": 165.776, + "step": 16730 + }, + { + "epoch": 0.13847871944409976, + "grad_norm": 2693.160888671875, + "learning_rate": 9.962148412715464e-06, + "loss": 154.1448, + "step": 16740 + }, + { + "epoch": 0.138561442693469, + "grad_norm": 1310.2269287109375, + "learning_rate": 9.961975002220816e-06, + "loss": 166.3599, + "step": 16750 + }, + { + "epoch": 0.13864416594283824, + "grad_norm": 1167.153076171875, + "learning_rate": 9.96180119692459e-06, + "loss": 171.0495, + "step": 16760 + }, + { + "epoch": 0.13872688919220746, + "grad_norm": 1377.29833984375, + "learning_rate": 9.961626996840613e-06, + "loss": 102.7167, + "step": 16770 + }, + { + "epoch": 0.13880961244157672, + "grad_norm": 977.5831909179688, + "learning_rate": 9.961452401982748e-06, + "loss": 136.4004, + "step": 16780 + }, + { + "epoch": 0.13889233569094595, + "grad_norm": 1010.1982421875, + "learning_rate": 9.961277412364884e-06, + "loss": 146.971, + "step": 16790 + }, + { + "epoch": 0.13897505894031517, + "grad_norm": 814.7576293945312, + "learning_rate": 9.961102028000948e-06, + "loss": 213.2676, + "step": 16800 + }, + { + "epoch": 0.1390577821896844, + "grad_norm": 881.7014770507812, + "learning_rate": 9.96092624890489e-06, + "loss": 91.0271, + "step": 16810 + }, + { + "epoch": 0.13914050543905365, + "grad_norm": 4899.205078125, + "learning_rate": 9.960750075090698e-06, + "loss": 166.8467, + "step": 16820 + }, + { + "epoch": 0.13922322868842288, + "grad_norm": 1270.030029296875, + "learning_rate": 9.960573506572391e-06, + "loss": 186.535, + "step": 16830 + }, + { + "epoch": 0.1393059519377921, + "grad_norm": 1338.3089599609375, + "learning_rate": 9.960396543364013e-06, + "loss": 192.4324, + "step": 16840 + }, + { + "epoch": 0.13938867518716136, + "grad_norm": 1512.3917236328125, + "learning_rate": 9.96021918547965e-06, + "loss": 124.9194, + "step": 16850 + }, + { + "epoch": 0.13947139843653059, + "grad_norm": 1637.7535400390625, + "learning_rate": 9.96004143293341e-06, + "loss": 131.2566, + "step": 16860 + }, + { + "epoch": 0.1395541216858998, + "grad_norm": 1564.211669921875, + "learning_rate": 9.959863285739436e-06, + "loss": 124.8255, + "step": 16870 + }, + { + "epoch": 0.13963684493526907, + "grad_norm": 720.8834228515625, + "learning_rate": 9.959684743911904e-06, + "loss": 140.7759, + "step": 16880 + }, + { + "epoch": 0.1397195681846383, + "grad_norm": 796.6300659179688, + "learning_rate": 9.959505807465018e-06, + "loss": 120.1176, + "step": 16890 + }, + { + "epoch": 0.13980229143400752, + "grad_norm": 1232.4276123046875, + "learning_rate": 9.959326476413016e-06, + "loss": 130.2664, + "step": 16900 + }, + { + "epoch": 0.13988501468337677, + "grad_norm": 457.3919677734375, + "learning_rate": 9.959146750770167e-06, + "loss": 124.8512, + "step": 16910 + }, + { + "epoch": 0.139967737932746, + "grad_norm": 708.2092895507812, + "learning_rate": 9.95896663055077e-06, + "loss": 120.5444, + "step": 16920 + }, + { + "epoch": 0.14005046118211523, + "grad_norm": 995.7003784179688, + "learning_rate": 9.958786115769157e-06, + "loss": 114.9213, + "step": 16930 + }, + { + "epoch": 0.14013318443148448, + "grad_norm": 1515.4827880859375, + "learning_rate": 9.958605206439692e-06, + "loss": 146.7894, + "step": 16940 + }, + { + "epoch": 0.1402159076808537, + "grad_norm": 814.6317138671875, + "learning_rate": 9.958423902576764e-06, + "loss": 99.1024, + "step": 16950 + }, + { + "epoch": 0.14029863093022293, + "grad_norm": 760.5602416992188, + "learning_rate": 9.958242204194804e-06, + "loss": 160.827, + "step": 16960 + }, + { + "epoch": 0.1403813541795922, + "grad_norm": 738.33349609375, + "learning_rate": 9.958060111308267e-06, + "loss": 136.0457, + "step": 16970 + }, + { + "epoch": 0.14046407742896141, + "grad_norm": 1149.28857421875, + "learning_rate": 9.957877623931642e-06, + "loss": 151.1577, + "step": 16980 + }, + { + "epoch": 0.14054680067833064, + "grad_norm": 1362.2108154296875, + "learning_rate": 9.95769474207945e-06, + "loss": 173.5694, + "step": 16990 + }, + { + "epoch": 0.14062952392769987, + "grad_norm": 1314.1846923828125, + "learning_rate": 9.957511465766236e-06, + "loss": 169.4035, + "step": 17000 + }, + { + "epoch": 0.14071224717706912, + "grad_norm": 1065.3922119140625, + "learning_rate": 9.957327795006589e-06, + "loss": 169.1779, + "step": 17010 + }, + { + "epoch": 0.14079497042643835, + "grad_norm": 1382.638427734375, + "learning_rate": 9.95714372981512e-06, + "loss": 145.6161, + "step": 17020 + }, + { + "epoch": 0.14087769367580757, + "grad_norm": 1485.4481201171875, + "learning_rate": 9.956959270206474e-06, + "loss": 131.7884, + "step": 17030 + }, + { + "epoch": 0.14096041692517683, + "grad_norm": 901.7747192382812, + "learning_rate": 9.956774416195329e-06, + "loss": 129.2612, + "step": 17040 + }, + { + "epoch": 0.14104314017454606, + "grad_norm": 1346.950439453125, + "learning_rate": 9.956589167796392e-06, + "loss": 108.1172, + "step": 17050 + }, + { + "epoch": 0.14112586342391528, + "grad_norm": 857.8418579101562, + "learning_rate": 9.956403525024402e-06, + "loss": 132.697, + "step": 17060 + }, + { + "epoch": 0.14120858667328454, + "grad_norm": 1938.1868896484375, + "learning_rate": 9.956217487894131e-06, + "loss": 165.6452, + "step": 17070 + }, + { + "epoch": 0.14129130992265376, + "grad_norm": 749.3518676757812, + "learning_rate": 9.95603105642038e-06, + "loss": 212.4321, + "step": 17080 + }, + { + "epoch": 0.141374033172023, + "grad_norm": 709.408447265625, + "learning_rate": 9.955844230617985e-06, + "loss": 156.41, + "step": 17090 + }, + { + "epoch": 0.14145675642139224, + "grad_norm": 1008.6261596679688, + "learning_rate": 9.955657010501807e-06, + "loss": 118.0272, + "step": 17100 + }, + { + "epoch": 0.14153947967076147, + "grad_norm": 828.895751953125, + "learning_rate": 9.955469396086743e-06, + "loss": 138.8411, + "step": 17110 + }, + { + "epoch": 0.1416222029201307, + "grad_norm": 1362.32421875, + "learning_rate": 9.955281387387724e-06, + "loss": 145.7589, + "step": 17120 + }, + { + "epoch": 0.14170492616949995, + "grad_norm": 1597.079345703125, + "learning_rate": 9.955092984419705e-06, + "loss": 170.475, + "step": 17130 + }, + { + "epoch": 0.14178764941886918, + "grad_norm": 1059.4306640625, + "learning_rate": 9.954904187197679e-06, + "loss": 158.0434, + "step": 17140 + }, + { + "epoch": 0.1418703726682384, + "grad_norm": 694.0506591796875, + "learning_rate": 9.954714995736667e-06, + "loss": 142.6755, + "step": 17150 + }, + { + "epoch": 0.14195309591760763, + "grad_norm": 1392.7862548828125, + "learning_rate": 9.95452541005172e-06, + "loss": 192.9698, + "step": 17160 + }, + { + "epoch": 0.14203581916697688, + "grad_norm": 1239.712646484375, + "learning_rate": 9.954335430157926e-06, + "loss": 126.2119, + "step": 17170 + }, + { + "epoch": 0.1421185424163461, + "grad_norm": 949.230712890625, + "learning_rate": 9.9541450560704e-06, + "loss": 76.8772, + "step": 17180 + }, + { + "epoch": 0.14220126566571534, + "grad_norm": 1190.4364013671875, + "learning_rate": 9.953954287804286e-06, + "loss": 156.7768, + "step": 17190 + }, + { + "epoch": 0.1422839889150846, + "grad_norm": 1422.4742431640625, + "learning_rate": 9.953763125374767e-06, + "loss": 107.7513, + "step": 17200 + }, + { + "epoch": 0.14236671216445382, + "grad_norm": 1076.408935546875, + "learning_rate": 9.953571568797049e-06, + "loss": 136.0641, + "step": 17210 + }, + { + "epoch": 0.14244943541382304, + "grad_norm": 930.828125, + "learning_rate": 9.953379618086377e-06, + "loss": 143.9599, + "step": 17220 + }, + { + "epoch": 0.1425321586631923, + "grad_norm": 1367.8873291015625, + "learning_rate": 9.95318727325802e-06, + "loss": 128.7768, + "step": 17230 + }, + { + "epoch": 0.14261488191256153, + "grad_norm": 1150.171875, + "learning_rate": 9.952994534327283e-06, + "loss": 124.427, + "step": 17240 + }, + { + "epoch": 0.14269760516193075, + "grad_norm": 821.237548828125, + "learning_rate": 9.952801401309504e-06, + "loss": 137.096, + "step": 17250 + }, + { + "epoch": 0.1427803284113, + "grad_norm": 1357.8616943359375, + "learning_rate": 9.952607874220048e-06, + "loss": 201.047, + "step": 17260 + }, + { + "epoch": 0.14286305166066923, + "grad_norm": 1452.91650390625, + "learning_rate": 9.952413953074312e-06, + "loss": 199.8793, + "step": 17270 + }, + { + "epoch": 0.14294577491003846, + "grad_norm": 965.8828125, + "learning_rate": 9.952219637887725e-06, + "loss": 129.7407, + "step": 17280 + }, + { + "epoch": 0.1430284981594077, + "grad_norm": 1721.4344482421875, + "learning_rate": 9.952024928675752e-06, + "loss": 177.8543, + "step": 17290 + }, + { + "epoch": 0.14311122140877694, + "grad_norm": 3541.317626953125, + "learning_rate": 9.951829825453881e-06, + "loss": 167.7698, + "step": 17300 + }, + { + "epoch": 0.14319394465814617, + "grad_norm": 2036.2423095703125, + "learning_rate": 9.951634328237635e-06, + "loss": 141.8449, + "step": 17310 + }, + { + "epoch": 0.14327666790751542, + "grad_norm": 880.5416870117188, + "learning_rate": 9.951438437042572e-06, + "loss": 198.8033, + "step": 17320 + }, + { + "epoch": 0.14335939115688465, + "grad_norm": 807.236572265625, + "learning_rate": 9.951242151884275e-06, + "loss": 112.0078, + "step": 17330 + }, + { + "epoch": 0.14344211440625387, + "grad_norm": 1530.7301025390625, + "learning_rate": 9.951045472778365e-06, + "loss": 133.3953, + "step": 17340 + }, + { + "epoch": 0.1435248376556231, + "grad_norm": 1775.3485107421875, + "learning_rate": 9.950848399740488e-06, + "loss": 132.5112, + "step": 17350 + }, + { + "epoch": 0.14360756090499235, + "grad_norm": 1216.1314697265625, + "learning_rate": 9.950650932786325e-06, + "loss": 150.7454, + "step": 17360 + }, + { + "epoch": 0.14369028415436158, + "grad_norm": 756.1212158203125, + "learning_rate": 9.95045307193159e-06, + "loss": 114.4585, + "step": 17370 + }, + { + "epoch": 0.1437730074037308, + "grad_norm": 987.248779296875, + "learning_rate": 9.95025481719202e-06, + "loss": 140.8504, + "step": 17380 + }, + { + "epoch": 0.14385573065310006, + "grad_norm": 1126.249267578125, + "learning_rate": 9.950056168583395e-06, + "loss": 225.9696, + "step": 17390 + }, + { + "epoch": 0.1439384539024693, + "grad_norm": 706.3463745117188, + "learning_rate": 9.949857126121519e-06, + "loss": 113.696, + "step": 17400 + }, + { + "epoch": 0.14402117715183851, + "grad_norm": 892.3402099609375, + "learning_rate": 9.949657689822226e-06, + "loss": 162.9231, + "step": 17410 + }, + { + "epoch": 0.14410390040120777, + "grad_norm": 856.6466674804688, + "learning_rate": 9.949457859701388e-06, + "loss": 99.4635, + "step": 17420 + }, + { + "epoch": 0.144186623650577, + "grad_norm": 775.4996948242188, + "learning_rate": 9.949257635774903e-06, + "loss": 152.7363, + "step": 17430 + }, + { + "epoch": 0.14426934689994622, + "grad_norm": 842.1768798828125, + "learning_rate": 9.9490570180587e-06, + "loss": 85.8346, + "step": 17440 + }, + { + "epoch": 0.14435207014931548, + "grad_norm": 1798.95849609375, + "learning_rate": 9.948856006568746e-06, + "loss": 197.5757, + "step": 17450 + }, + { + "epoch": 0.1444347933986847, + "grad_norm": 1381.5155029296875, + "learning_rate": 9.94865460132103e-06, + "loss": 150.2531, + "step": 17460 + }, + { + "epoch": 0.14451751664805393, + "grad_norm": 997.7630004882812, + "learning_rate": 9.948452802331578e-06, + "loss": 133.1603, + "step": 17470 + }, + { + "epoch": 0.14460023989742318, + "grad_norm": 1275.1690673828125, + "learning_rate": 9.948250609616449e-06, + "loss": 168.5733, + "step": 17480 + }, + { + "epoch": 0.1446829631467924, + "grad_norm": 1112.8721923828125, + "learning_rate": 9.948048023191728e-06, + "loss": 182.301, + "step": 17490 + }, + { + "epoch": 0.14476568639616164, + "grad_norm": 950.4414672851562, + "learning_rate": 9.947845043073533e-06, + "loss": 149.5477, + "step": 17500 + }, + { + "epoch": 0.1448484096455309, + "grad_norm": 1122.95751953125, + "learning_rate": 9.947641669278016e-06, + "loss": 123.1119, + "step": 17510 + }, + { + "epoch": 0.14493113289490012, + "grad_norm": 1148.9334716796875, + "learning_rate": 9.947437901821358e-06, + "loss": 128.3063, + "step": 17520 + }, + { + "epoch": 0.14501385614426934, + "grad_norm": 1392.179443359375, + "learning_rate": 9.947233740719772e-06, + "loss": 139.3278, + "step": 17530 + }, + { + "epoch": 0.14509657939363857, + "grad_norm": 604.5231323242188, + "learning_rate": 9.947029185989501e-06, + "loss": 163.2896, + "step": 17540 + }, + { + "epoch": 0.14517930264300782, + "grad_norm": 1102.948486328125, + "learning_rate": 9.946824237646823e-06, + "loss": 153.8839, + "step": 17550 + }, + { + "epoch": 0.14526202589237705, + "grad_norm": 2167.79638671875, + "learning_rate": 9.946618895708043e-06, + "loss": 172.0367, + "step": 17560 + }, + { + "epoch": 0.14534474914174628, + "grad_norm": 1476.7362060546875, + "learning_rate": 9.946413160189498e-06, + "loss": 138.3295, + "step": 17570 + }, + { + "epoch": 0.14542747239111553, + "grad_norm": 882.1810913085938, + "learning_rate": 9.946207031107562e-06, + "loss": 186.2194, + "step": 17580 + }, + { + "epoch": 0.14551019564048476, + "grad_norm": 2111.673095703125, + "learning_rate": 9.94600050847863e-06, + "loss": 170.1872, + "step": 17590 + }, + { + "epoch": 0.14559291888985398, + "grad_norm": 1099.032958984375, + "learning_rate": 9.945793592319137e-06, + "loss": 128.6498, + "step": 17600 + }, + { + "epoch": 0.14567564213922324, + "grad_norm": 1059.4005126953125, + "learning_rate": 9.945586282645545e-06, + "loss": 134.5357, + "step": 17610 + }, + { + "epoch": 0.14575836538859246, + "grad_norm": 1566.564208984375, + "learning_rate": 9.945378579474351e-06, + "loss": 164.359, + "step": 17620 + }, + { + "epoch": 0.1458410886379617, + "grad_norm": 782.61279296875, + "learning_rate": 9.945170482822079e-06, + "loss": 106.899, + "step": 17630 + }, + { + "epoch": 0.14592381188733095, + "grad_norm": 1026.7816162109375, + "learning_rate": 9.944961992705288e-06, + "loss": 142.0462, + "step": 17640 + }, + { + "epoch": 0.14600653513670017, + "grad_norm": 817.039306640625, + "learning_rate": 9.944753109140564e-06, + "loss": 166.4367, + "step": 17650 + }, + { + "epoch": 0.1460892583860694, + "grad_norm": 856.3842163085938, + "learning_rate": 9.94454383214453e-06, + "loss": 131.3289, + "step": 17660 + }, + { + "epoch": 0.14617198163543865, + "grad_norm": 1656.999755859375, + "learning_rate": 9.944334161733835e-06, + "loss": 129.1978, + "step": 17670 + }, + { + "epoch": 0.14625470488480788, + "grad_norm": 1338.8382568359375, + "learning_rate": 9.944124097925161e-06, + "loss": 184.4288, + "step": 17680 + }, + { + "epoch": 0.1463374281341771, + "grad_norm": 1011.0686645507812, + "learning_rate": 9.943913640735224e-06, + "loss": 127.4451, + "step": 17690 + }, + { + "epoch": 0.14642015138354633, + "grad_norm": 923.1884765625, + "learning_rate": 9.94370279018077e-06, + "loss": 120.5529, + "step": 17700 + }, + { + "epoch": 0.1465028746329156, + "grad_norm": 1001.093505859375, + "learning_rate": 9.94349154627857e-06, + "loss": 129.3988, + "step": 17710 + }, + { + "epoch": 0.1465855978822848, + "grad_norm": 1354.6356201171875, + "learning_rate": 9.943279909045438e-06, + "loss": 122.9835, + "step": 17720 + }, + { + "epoch": 0.14666832113165404, + "grad_norm": 1260.7392578125, + "learning_rate": 9.94306787849821e-06, + "loss": 101.6319, + "step": 17730 + }, + { + "epoch": 0.1467510443810233, + "grad_norm": 1424.63330078125, + "learning_rate": 9.942855454653755e-06, + "loss": 179.1118, + "step": 17740 + }, + { + "epoch": 0.14683376763039252, + "grad_norm": 1053.8809814453125, + "learning_rate": 9.942642637528977e-06, + "loss": 167.5939, + "step": 17750 + }, + { + "epoch": 0.14691649087976175, + "grad_norm": 936.3515014648438, + "learning_rate": 9.942429427140807e-06, + "loss": 154.7948, + "step": 17760 + }, + { + "epoch": 0.146999214129131, + "grad_norm": 876.3916015625, + "learning_rate": 9.942215823506211e-06, + "loss": 114.5722, + "step": 17770 + }, + { + "epoch": 0.14708193737850023, + "grad_norm": 1172.0423583984375, + "learning_rate": 9.942001826642184e-06, + "loss": 142.9646, + "step": 17780 + }, + { + "epoch": 0.14716466062786945, + "grad_norm": 1635.97802734375, + "learning_rate": 9.941787436565751e-06, + "loss": 150.69, + "step": 17790 + }, + { + "epoch": 0.1472473838772387, + "grad_norm": 728.3792724609375, + "learning_rate": 9.941572653293974e-06, + "loss": 97.5937, + "step": 17800 + }, + { + "epoch": 0.14733010712660793, + "grad_norm": 935.0343627929688, + "learning_rate": 9.941357476843938e-06, + "loss": 135.0443, + "step": 17810 + }, + { + "epoch": 0.14741283037597716, + "grad_norm": 583.3887329101562, + "learning_rate": 9.941141907232766e-06, + "loss": 134.4311, + "step": 17820 + }, + { + "epoch": 0.14749555362534642, + "grad_norm": 1191.19677734375, + "learning_rate": 9.940925944477608e-06, + "loss": 129.727, + "step": 17830 + }, + { + "epoch": 0.14757827687471564, + "grad_norm": 1111.1417236328125, + "learning_rate": 9.940709588595649e-06, + "loss": 171.4274, + "step": 17840 + }, + { + "epoch": 0.14766100012408487, + "grad_norm": 2006.4134521484375, + "learning_rate": 9.940492839604103e-06, + "loss": 152.9817, + "step": 17850 + }, + { + "epoch": 0.14774372337345412, + "grad_norm": 1163.596923828125, + "learning_rate": 9.940275697520216e-06, + "loss": 169.9584, + "step": 17860 + }, + { + "epoch": 0.14782644662282335, + "grad_norm": 1189.015869140625, + "learning_rate": 9.940058162361264e-06, + "loss": 152.1794, + "step": 17870 + }, + { + "epoch": 0.14790916987219258, + "grad_norm": 998.8855590820312, + "learning_rate": 9.939840234144556e-06, + "loss": 129.5204, + "step": 17880 + }, + { + "epoch": 0.1479918931215618, + "grad_norm": 442.9149475097656, + "learning_rate": 9.939621912887431e-06, + "loss": 106.7805, + "step": 17890 + }, + { + "epoch": 0.14807461637093106, + "grad_norm": 830.00927734375, + "learning_rate": 9.93940319860726e-06, + "loss": 139.6457, + "step": 17900 + }, + { + "epoch": 0.14815733962030028, + "grad_norm": 1069.5220947265625, + "learning_rate": 9.939184091321445e-06, + "loss": 129.1493, + "step": 17910 + }, + { + "epoch": 0.1482400628696695, + "grad_norm": 1180.868896484375, + "learning_rate": 9.938964591047421e-06, + "loss": 108.2578, + "step": 17920 + }, + { + "epoch": 0.14832278611903876, + "grad_norm": 1095.6793212890625, + "learning_rate": 9.938744697802651e-06, + "loss": 145.4649, + "step": 17930 + }, + { + "epoch": 0.148405509368408, + "grad_norm": 1292.62744140625, + "learning_rate": 9.938524411604631e-06, + "loss": 145.161, + "step": 17940 + }, + { + "epoch": 0.14848823261777722, + "grad_norm": 1319.2213134765625, + "learning_rate": 9.938303732470888e-06, + "loss": 129.5037, + "step": 17950 + }, + { + "epoch": 0.14857095586714647, + "grad_norm": 697.8318481445312, + "learning_rate": 9.938082660418981e-06, + "loss": 103.5571, + "step": 17960 + }, + { + "epoch": 0.1486536791165157, + "grad_norm": 784.6300659179688, + "learning_rate": 9.937861195466498e-06, + "loss": 133.7046, + "step": 17970 + }, + { + "epoch": 0.14873640236588492, + "grad_norm": 966.1806030273438, + "learning_rate": 9.937639337631064e-06, + "loss": 170.2544, + "step": 17980 + }, + { + "epoch": 0.14881912561525418, + "grad_norm": 862.203857421875, + "learning_rate": 9.937417086930328e-06, + "loss": 129.5846, + "step": 17990 + }, + { + "epoch": 0.1489018488646234, + "grad_norm": 3391.59716796875, + "learning_rate": 9.937194443381972e-06, + "loss": 195.0929, + "step": 18000 + }, + { + "epoch": 0.14898457211399263, + "grad_norm": 737.4010009765625, + "learning_rate": 9.936971407003714e-06, + "loss": 110.9804, + "step": 18010 + }, + { + "epoch": 0.14906729536336188, + "grad_norm": 1527.3822021484375, + "learning_rate": 9.936747977813299e-06, + "loss": 124.5241, + "step": 18020 + }, + { + "epoch": 0.1491500186127311, + "grad_norm": 1383.10986328125, + "learning_rate": 9.936524155828503e-06, + "loss": 138.0007, + "step": 18030 + }, + { + "epoch": 0.14923274186210034, + "grad_norm": 850.4631958007812, + "learning_rate": 9.936299941067137e-06, + "loss": 131.9197, + "step": 18040 + }, + { + "epoch": 0.1493154651114696, + "grad_norm": 736.7586059570312, + "learning_rate": 9.93607533354704e-06, + "loss": 116.8003, + "step": 18050 + }, + { + "epoch": 0.14939818836083882, + "grad_norm": 3558.953857421875, + "learning_rate": 9.935850333286081e-06, + "loss": 236.4352, + "step": 18060 + }, + { + "epoch": 0.14948091161020804, + "grad_norm": 2170.6923828125, + "learning_rate": 9.935624940302165e-06, + "loss": 162.2385, + "step": 18070 + }, + { + "epoch": 0.14956363485957727, + "grad_norm": 890.1776123046875, + "learning_rate": 9.93539915461322e-06, + "loss": 152.4946, + "step": 18080 + }, + { + "epoch": 0.14964635810894653, + "grad_norm": 1055.08447265625, + "learning_rate": 9.935172976237218e-06, + "loss": 179.4581, + "step": 18090 + }, + { + "epoch": 0.14972908135831575, + "grad_norm": 1069.735595703125, + "learning_rate": 9.934946405192152e-06, + "loss": 109.1896, + "step": 18100 + }, + { + "epoch": 0.14981180460768498, + "grad_norm": 936.50048828125, + "learning_rate": 9.934719441496048e-06, + "loss": 170.2172, + "step": 18110 + }, + { + "epoch": 0.14989452785705423, + "grad_norm": 1424.594970703125, + "learning_rate": 9.934492085166965e-06, + "loss": 120.5943, + "step": 18120 + }, + { + "epoch": 0.14997725110642346, + "grad_norm": 1259.637939453125, + "learning_rate": 9.934264336222992e-06, + "loss": 141.8418, + "step": 18130 + }, + { + "epoch": 0.15005997435579269, + "grad_norm": 1107.58447265625, + "learning_rate": 9.934036194682253e-06, + "loss": 132.8073, + "step": 18140 + }, + { + "epoch": 0.15014269760516194, + "grad_norm": 1031.5169677734375, + "learning_rate": 9.933807660562898e-06, + "loss": 122.9906, + "step": 18150 + }, + { + "epoch": 0.15022542085453117, + "grad_norm": 683.3692626953125, + "learning_rate": 9.933578733883109e-06, + "loss": 175.373, + "step": 18160 + }, + { + "epoch": 0.1503081441039004, + "grad_norm": 841.3174438476562, + "learning_rate": 9.933349414661103e-06, + "loss": 143.8702, + "step": 18170 + }, + { + "epoch": 0.15039086735326965, + "grad_norm": 1086.541015625, + "learning_rate": 9.933119702915125e-06, + "loss": 149.0898, + "step": 18180 + }, + { + "epoch": 0.15047359060263887, + "grad_norm": 1380.8690185546875, + "learning_rate": 9.932889598663452e-06, + "loss": 142.0298, + "step": 18190 + }, + { + "epoch": 0.1505563138520081, + "grad_norm": 800.4336547851562, + "learning_rate": 9.932659101924393e-06, + "loss": 169.3204, + "step": 18200 + }, + { + "epoch": 0.15063903710137735, + "grad_norm": 883.3157348632812, + "learning_rate": 9.932428212716287e-06, + "loss": 183.8594, + "step": 18210 + }, + { + "epoch": 0.15072176035074658, + "grad_norm": 922.6904907226562, + "learning_rate": 9.932196931057505e-06, + "loss": 157.8369, + "step": 18220 + }, + { + "epoch": 0.1508044836001158, + "grad_norm": 1918.9375, + "learning_rate": 9.931965256966449e-06, + "loss": 143.9471, + "step": 18230 + }, + { + "epoch": 0.15088720684948506, + "grad_norm": 1153.788818359375, + "learning_rate": 9.931733190461552e-06, + "loss": 167.4599, + "step": 18240 + }, + { + "epoch": 0.1509699300988543, + "grad_norm": 1510.7779541015625, + "learning_rate": 9.931500731561279e-06, + "loss": 123.1982, + "step": 18250 + }, + { + "epoch": 0.15105265334822351, + "grad_norm": 939.08447265625, + "learning_rate": 9.931267880284124e-06, + "loss": 128.6788, + "step": 18260 + }, + { + "epoch": 0.15113537659759274, + "grad_norm": 521.10693359375, + "learning_rate": 9.931034636648616e-06, + "loss": 110.3548, + "step": 18270 + }, + { + "epoch": 0.151218099846962, + "grad_norm": 1793.3514404296875, + "learning_rate": 9.930801000673314e-06, + "loss": 226.4601, + "step": 18280 + }, + { + "epoch": 0.15130082309633122, + "grad_norm": 1056.244384765625, + "learning_rate": 9.930566972376803e-06, + "loss": 137.7991, + "step": 18290 + }, + { + "epoch": 0.15138354634570045, + "grad_norm": 1053.8623046875, + "learning_rate": 9.930332551777709e-06, + "loss": 126.393, + "step": 18300 + }, + { + "epoch": 0.1514662695950697, + "grad_norm": 466.3129577636719, + "learning_rate": 9.930097738894679e-06, + "loss": 142.9212, + "step": 18310 + }, + { + "epoch": 0.15154899284443893, + "grad_norm": 1002.7549438476562, + "learning_rate": 9.929862533746398e-06, + "loss": 142.7721, + "step": 18320 + }, + { + "epoch": 0.15163171609380816, + "grad_norm": 758.2431030273438, + "learning_rate": 9.92962693635158e-06, + "loss": 138.2474, + "step": 18330 + }, + { + "epoch": 0.1517144393431774, + "grad_norm": 640.2601318359375, + "learning_rate": 9.929390946728972e-06, + "loss": 127.6863, + "step": 18340 + }, + { + "epoch": 0.15179716259254664, + "grad_norm": 974.4703979492188, + "learning_rate": 9.929154564897347e-06, + "loss": 127.8559, + "step": 18350 + }, + { + "epoch": 0.15187988584191586, + "grad_norm": 989.0883178710938, + "learning_rate": 9.928917790875519e-06, + "loss": 146.4885, + "step": 18360 + }, + { + "epoch": 0.15196260909128512, + "grad_norm": 926.8894653320312, + "learning_rate": 9.92868062468232e-06, + "loss": 140.2718, + "step": 18370 + }, + { + "epoch": 0.15204533234065434, + "grad_norm": 1093.3875732421875, + "learning_rate": 9.928443066336624e-06, + "loss": 156.4275, + "step": 18380 + }, + { + "epoch": 0.15212805559002357, + "grad_norm": 1011.3756713867188, + "learning_rate": 9.92820511585733e-06, + "loss": 110.9518, + "step": 18390 + }, + { + "epoch": 0.15221077883939282, + "grad_norm": 963.0042114257812, + "learning_rate": 9.927966773263375e-06, + "loss": 126.9806, + "step": 18400 + }, + { + "epoch": 0.15229350208876205, + "grad_norm": 1338.420654296875, + "learning_rate": 9.92772803857372e-06, + "loss": 151.1979, + "step": 18410 + }, + { + "epoch": 0.15237622533813128, + "grad_norm": 2231.09326171875, + "learning_rate": 9.927488911807359e-06, + "loss": 126.2641, + "step": 18420 + }, + { + "epoch": 0.1524589485875005, + "grad_norm": 740.568603515625, + "learning_rate": 9.927249392983319e-06, + "loss": 161.8315, + "step": 18430 + }, + { + "epoch": 0.15254167183686976, + "grad_norm": 1194.8526611328125, + "learning_rate": 9.927009482120658e-06, + "loss": 147.5258, + "step": 18440 + }, + { + "epoch": 0.15262439508623898, + "grad_norm": 575.281005859375, + "learning_rate": 9.926769179238467e-06, + "loss": 123.2295, + "step": 18450 + }, + { + "epoch": 0.1527071183356082, + "grad_norm": 1439.9266357421875, + "learning_rate": 9.926528484355859e-06, + "loss": 131.7167, + "step": 18460 + }, + { + "epoch": 0.15278984158497746, + "grad_norm": 1190.1434326171875, + "learning_rate": 9.926287397491992e-06, + "loss": 147.6172, + "step": 18470 + }, + { + "epoch": 0.1528725648343467, + "grad_norm": 1017.2939453125, + "learning_rate": 9.926045918666045e-06, + "loss": 144.2414, + "step": 18480 + }, + { + "epoch": 0.15295528808371592, + "grad_norm": 1303.34814453125, + "learning_rate": 9.925804047897231e-06, + "loss": 202.542, + "step": 18490 + }, + { + "epoch": 0.15303801133308517, + "grad_norm": 1302.6015625, + "learning_rate": 9.925561785204797e-06, + "loss": 150.7994, + "step": 18500 + }, + { + "epoch": 0.1531207345824544, + "grad_norm": 1482.454345703125, + "learning_rate": 9.925319130608015e-06, + "loss": 160.8186, + "step": 18510 + }, + { + "epoch": 0.15320345783182363, + "grad_norm": 1089.9215087890625, + "learning_rate": 9.925076084126194e-06, + "loss": 140.3311, + "step": 18520 + }, + { + "epoch": 0.15328618108119288, + "grad_norm": 2586.4873046875, + "learning_rate": 9.924832645778674e-06, + "loss": 105.1053, + "step": 18530 + }, + { + "epoch": 0.1533689043305621, + "grad_norm": 986.44775390625, + "learning_rate": 9.924588815584822e-06, + "loss": 146.6998, + "step": 18540 + }, + { + "epoch": 0.15345162757993133, + "grad_norm": 1076.886474609375, + "learning_rate": 9.924344593564038e-06, + "loss": 167.1004, + "step": 18550 + }, + { + "epoch": 0.1535343508293006, + "grad_norm": 576.464599609375, + "learning_rate": 9.924099979735754e-06, + "loss": 109.8678, + "step": 18560 + }, + { + "epoch": 0.1536170740786698, + "grad_norm": 722.518310546875, + "learning_rate": 9.923854974119434e-06, + "loss": 125.0473, + "step": 18570 + }, + { + "epoch": 0.15369979732803904, + "grad_norm": 1820.7373046875, + "learning_rate": 9.92360957673457e-06, + "loss": 141.7229, + "step": 18580 + }, + { + "epoch": 0.1537825205774083, + "grad_norm": 1114.6781005859375, + "learning_rate": 9.923363787600688e-06, + "loss": 141.2934, + "step": 18590 + }, + { + "epoch": 0.15386524382677752, + "grad_norm": 1381.0604248046875, + "learning_rate": 9.923117606737347e-06, + "loss": 116.5776, + "step": 18600 + }, + { + "epoch": 0.15394796707614675, + "grad_norm": 1086.9346923828125, + "learning_rate": 9.92287103416413e-06, + "loss": 191.1018, + "step": 18610 + }, + { + "epoch": 0.15403069032551597, + "grad_norm": 1572.17529296875, + "learning_rate": 9.922624069900658e-06, + "loss": 155.7499, + "step": 18620 + }, + { + "epoch": 0.15411341357488523, + "grad_norm": 1132.931884765625, + "learning_rate": 9.922376713966581e-06, + "loss": 152.9908, + "step": 18630 + }, + { + "epoch": 0.15419613682425445, + "grad_norm": 585.323486328125, + "learning_rate": 9.92212896638158e-06, + "loss": 152.2068, + "step": 18640 + }, + { + "epoch": 0.15427886007362368, + "grad_norm": 595.2325439453125, + "learning_rate": 9.921880827165367e-06, + "loss": 118.7037, + "step": 18650 + }, + { + "epoch": 0.15436158332299293, + "grad_norm": 4941.7626953125, + "learning_rate": 9.921632296337683e-06, + "loss": 153.8302, + "step": 18660 + }, + { + "epoch": 0.15444430657236216, + "grad_norm": 1101.675048828125, + "learning_rate": 9.921383373918305e-06, + "loss": 180.7743, + "step": 18670 + }, + { + "epoch": 0.1545270298217314, + "grad_norm": 1755.6380615234375, + "learning_rate": 9.92113405992704e-06, + "loss": 180.545, + "step": 18680 + }, + { + "epoch": 0.15460975307110064, + "grad_norm": 925.7059326171875, + "learning_rate": 9.92088435438372e-06, + "loss": 134.731, + "step": 18690 + }, + { + "epoch": 0.15469247632046987, + "grad_norm": 1003.4811401367188, + "learning_rate": 9.920634257308217e-06, + "loss": 123.1074, + "step": 18700 + }, + { + "epoch": 0.1547751995698391, + "grad_norm": 853.6227416992188, + "learning_rate": 9.920383768720429e-06, + "loss": 150.079, + "step": 18710 + }, + { + "epoch": 0.15485792281920835, + "grad_norm": 1113.7686767578125, + "learning_rate": 9.920132888640286e-06, + "loss": 155.0464, + "step": 18720 + }, + { + "epoch": 0.15494064606857758, + "grad_norm": 1343.3956298828125, + "learning_rate": 9.91988161708775e-06, + "loss": 158.199, + "step": 18730 + }, + { + "epoch": 0.1550233693179468, + "grad_norm": 704.8764038085938, + "learning_rate": 9.919629954082813e-06, + "loss": 153.2144, + "step": 18740 + }, + { + "epoch": 0.15510609256731606, + "grad_norm": 550.6301879882812, + "learning_rate": 9.919377899645497e-06, + "loss": 141.5231, + "step": 18750 + }, + { + "epoch": 0.15518881581668528, + "grad_norm": 658.1661376953125, + "learning_rate": 9.91912545379586e-06, + "loss": 138.1113, + "step": 18760 + }, + { + "epoch": 0.1552715390660545, + "grad_norm": 1573.263916015625, + "learning_rate": 9.918872616553986e-06, + "loss": 129.6509, + "step": 18770 + }, + { + "epoch": 0.15535426231542376, + "grad_norm": 1778.827392578125, + "learning_rate": 9.918619387939991e-06, + "loss": 155.8357, + "step": 18780 + }, + { + "epoch": 0.155436985564793, + "grad_norm": 3056.092041015625, + "learning_rate": 9.918365767974025e-06, + "loss": 187.3279, + "step": 18790 + }, + { + "epoch": 0.15551970881416222, + "grad_norm": 905.0897827148438, + "learning_rate": 9.91811175667627e-06, + "loss": 198.9183, + "step": 18800 + }, + { + "epoch": 0.15560243206353144, + "grad_norm": 854.0653076171875, + "learning_rate": 9.91785735406693e-06, + "loss": 111.8965, + "step": 18810 + }, + { + "epoch": 0.1556851553129007, + "grad_norm": 1693.5703125, + "learning_rate": 9.917602560166253e-06, + "loss": 138.9856, + "step": 18820 + }, + { + "epoch": 0.15576787856226992, + "grad_norm": 1688.492431640625, + "learning_rate": 9.917347374994507e-06, + "loss": 118.313, + "step": 18830 + }, + { + "epoch": 0.15585060181163915, + "grad_norm": 1094.8780517578125, + "learning_rate": 9.917091798571998e-06, + "loss": 122.0171, + "step": 18840 + }, + { + "epoch": 0.1559333250610084, + "grad_norm": 1383.755859375, + "learning_rate": 9.916835830919062e-06, + "loss": 149.7231, + "step": 18850 + }, + { + "epoch": 0.15601604831037763, + "grad_norm": 4172.68603515625, + "learning_rate": 9.916579472056064e-06, + "loss": 164.1563, + "step": 18860 + }, + { + "epoch": 0.15609877155974686, + "grad_norm": 1112.282958984375, + "learning_rate": 9.916322722003402e-06, + "loss": 140.6031, + "step": 18870 + }, + { + "epoch": 0.1561814948091161, + "grad_norm": 1334.9544677734375, + "learning_rate": 9.916065580781504e-06, + "loss": 125.8786, + "step": 18880 + }, + { + "epoch": 0.15626421805848534, + "grad_norm": 759.095703125, + "learning_rate": 9.91580804841083e-06, + "loss": 123.02, + "step": 18890 + }, + { + "epoch": 0.15634694130785456, + "grad_norm": 923.3683471679688, + "learning_rate": 9.915550124911866e-06, + "loss": 111.5178, + "step": 18900 + }, + { + "epoch": 0.15642966455722382, + "grad_norm": 1883.232177734375, + "learning_rate": 9.915291810305141e-06, + "loss": 153.0945, + "step": 18910 + }, + { + "epoch": 0.15651238780659305, + "grad_norm": 1571.4327392578125, + "learning_rate": 9.915033104611204e-06, + "loss": 152.6783, + "step": 18920 + }, + { + "epoch": 0.15659511105596227, + "grad_norm": 842.9326171875, + "learning_rate": 9.914774007850641e-06, + "loss": 154.6972, + "step": 18930 + }, + { + "epoch": 0.15667783430533153, + "grad_norm": 1248.7547607421875, + "learning_rate": 9.914514520044065e-06, + "loss": 169.4783, + "step": 18940 + }, + { + "epoch": 0.15676055755470075, + "grad_norm": 976.1325073242188, + "learning_rate": 9.914254641212124e-06, + "loss": 114.911, + "step": 18950 + }, + { + "epoch": 0.15684328080406998, + "grad_norm": 893.714111328125, + "learning_rate": 9.913994371375494e-06, + "loss": 81.8798, + "step": 18960 + }, + { + "epoch": 0.1569260040534392, + "grad_norm": 1223.4085693359375, + "learning_rate": 9.913733710554886e-06, + "loss": 138.9431, + "step": 18970 + }, + { + "epoch": 0.15700872730280846, + "grad_norm": 2076.80712890625, + "learning_rate": 9.913472658771034e-06, + "loss": 113.3516, + "step": 18980 + }, + { + "epoch": 0.15709145055217769, + "grad_norm": 1146.1357421875, + "learning_rate": 9.913211216044715e-06, + "loss": 162.9254, + "step": 18990 + }, + { + "epoch": 0.1571741738015469, + "grad_norm": 1230.30224609375, + "learning_rate": 9.912949382396728e-06, + "loss": 197.952, + "step": 19000 + }, + { + "epoch": 0.15725689705091617, + "grad_norm": 1353.6494140625, + "learning_rate": 9.912687157847905e-06, + "loss": 137.2512, + "step": 19010 + }, + { + "epoch": 0.1573396203002854, + "grad_norm": 1508.077392578125, + "learning_rate": 9.91242454241911e-06, + "loss": 133.2853, + "step": 19020 + }, + { + "epoch": 0.15742234354965462, + "grad_norm": 935.4700927734375, + "learning_rate": 9.912161536131242e-06, + "loss": 126.4163, + "step": 19030 + }, + { + "epoch": 0.15750506679902387, + "grad_norm": 920.0881958007812, + "learning_rate": 9.911898139005222e-06, + "loss": 106.0859, + "step": 19040 + }, + { + "epoch": 0.1575877900483931, + "grad_norm": 1098.602783203125, + "learning_rate": 9.91163435106201e-06, + "loss": 113.0203, + "step": 19050 + }, + { + "epoch": 0.15767051329776233, + "grad_norm": 529.7808227539062, + "learning_rate": 9.911370172322595e-06, + "loss": 100.4977, + "step": 19060 + }, + { + "epoch": 0.15775323654713158, + "grad_norm": 1122.334228515625, + "learning_rate": 9.911105602807996e-06, + "loss": 147.1685, + "step": 19070 + }, + { + "epoch": 0.1578359597965008, + "grad_norm": 1302.458740234375, + "learning_rate": 9.910840642539261e-06, + "loss": 138.237, + "step": 19080 + }, + { + "epoch": 0.15791868304587003, + "grad_norm": 1324.593505859375, + "learning_rate": 9.910575291537476e-06, + "loss": 182.2281, + "step": 19090 + }, + { + "epoch": 0.1580014062952393, + "grad_norm": 746.2387084960938, + "learning_rate": 9.91030954982375e-06, + "loss": 111.4794, + "step": 19100 + }, + { + "epoch": 0.15808412954460851, + "grad_norm": 1335.113525390625, + "learning_rate": 9.910043417419228e-06, + "loss": 148.9087, + "step": 19110 + }, + { + "epoch": 0.15816685279397774, + "grad_norm": 688.1320190429688, + "learning_rate": 9.909776894345086e-06, + "loss": 141.3004, + "step": 19120 + }, + { + "epoch": 0.158249576043347, + "grad_norm": 1126.445068359375, + "learning_rate": 9.909509980622532e-06, + "loss": 112.016, + "step": 19130 + }, + { + "epoch": 0.15833229929271622, + "grad_norm": 600.5185546875, + "learning_rate": 9.909242676272797e-06, + "loss": 114.159, + "step": 19140 + }, + { + "epoch": 0.15841502254208545, + "grad_norm": 1174.3468017578125, + "learning_rate": 9.908974981317155e-06, + "loss": 171.2533, + "step": 19150 + }, + { + "epoch": 0.15849774579145468, + "grad_norm": 795.6885986328125, + "learning_rate": 9.9087068957769e-06, + "loss": 136.8247, + "step": 19160 + }, + { + "epoch": 0.15858046904082393, + "grad_norm": 1241.0509033203125, + "learning_rate": 9.908438419673367e-06, + "loss": 137.2768, + "step": 19170 + }, + { + "epoch": 0.15866319229019316, + "grad_norm": 761.6776123046875, + "learning_rate": 9.908169553027916e-06, + "loss": 165.4491, + "step": 19180 + }, + { + "epoch": 0.15874591553956238, + "grad_norm": 1572.6136474609375, + "learning_rate": 9.90790029586194e-06, + "loss": 124.7587, + "step": 19190 + }, + { + "epoch": 0.15882863878893164, + "grad_norm": 732.7517700195312, + "learning_rate": 9.907630648196857e-06, + "loss": 142.462, + "step": 19200 + }, + { + "epoch": 0.15891136203830086, + "grad_norm": 957.8698120117188, + "learning_rate": 9.907360610054132e-06, + "loss": 145.4445, + "step": 19210 + }, + { + "epoch": 0.1589940852876701, + "grad_norm": 1933.4423828125, + "learning_rate": 9.907090181455241e-06, + "loss": 126.4228, + "step": 19220 + }, + { + "epoch": 0.15907680853703934, + "grad_norm": 1341.31591796875, + "learning_rate": 9.906819362421707e-06, + "loss": 127.2506, + "step": 19230 + }, + { + "epoch": 0.15915953178640857, + "grad_norm": 1401.2039794921875, + "learning_rate": 9.906548152975076e-06, + "loss": 142.5762, + "step": 19240 + }, + { + "epoch": 0.1592422550357778, + "grad_norm": 1090.92578125, + "learning_rate": 9.906276553136924e-06, + "loss": 133.9682, + "step": 19250 + }, + { + "epoch": 0.15932497828514705, + "grad_norm": 627.4381713867188, + "learning_rate": 9.906004562928865e-06, + "loss": 123.456, + "step": 19260 + }, + { + "epoch": 0.15940770153451628, + "grad_norm": 1303.6290283203125, + "learning_rate": 9.905732182372538e-06, + "loss": 176.1459, + "step": 19270 + }, + { + "epoch": 0.1594904247838855, + "grad_norm": 1230.2550048828125, + "learning_rate": 9.905459411489617e-06, + "loss": 150.8253, + "step": 19280 + }, + { + "epoch": 0.15957314803325476, + "grad_norm": 557.597900390625, + "learning_rate": 9.905186250301802e-06, + "loss": 128.1924, + "step": 19290 + }, + { + "epoch": 0.15965587128262398, + "grad_norm": 1013.2421875, + "learning_rate": 9.904912698830828e-06, + "loss": 148.6797, + "step": 19300 + }, + { + "epoch": 0.1597385945319932, + "grad_norm": 1238.0384521484375, + "learning_rate": 9.904638757098464e-06, + "loss": 143.5567, + "step": 19310 + }, + { + "epoch": 0.15982131778136247, + "grad_norm": 1117.36962890625, + "learning_rate": 9.9043644251265e-06, + "loss": 129.1499, + "step": 19320 + }, + { + "epoch": 0.1599040410307317, + "grad_norm": 1082.633544921875, + "learning_rate": 9.90408970293677e-06, + "loss": 99.3771, + "step": 19330 + }, + { + "epoch": 0.15998676428010092, + "grad_norm": 1239.34326171875, + "learning_rate": 9.903814590551127e-06, + "loss": 152.1191, + "step": 19340 + }, + { + "epoch": 0.16006948752947014, + "grad_norm": 1026.2008056640625, + "learning_rate": 9.903539087991462e-06, + "loss": 138.5603, + "step": 19350 + }, + { + "epoch": 0.1601522107788394, + "grad_norm": 716.8160400390625, + "learning_rate": 9.903263195279698e-06, + "loss": 121.6254, + "step": 19360 + }, + { + "epoch": 0.16023493402820863, + "grad_norm": 1355.804931640625, + "learning_rate": 9.902986912437784e-06, + "loss": 121.697, + "step": 19370 + }, + { + "epoch": 0.16031765727757785, + "grad_norm": 1001.9555053710938, + "learning_rate": 9.902710239487702e-06, + "loss": 123.8956, + "step": 19380 + }, + { + "epoch": 0.1604003805269471, + "grad_norm": 929.7130737304688, + "learning_rate": 9.902433176451466e-06, + "loss": 108.8211, + "step": 19390 + }, + { + "epoch": 0.16048310377631633, + "grad_norm": 1794.6314697265625, + "learning_rate": 9.902155723351124e-06, + "loss": 119.6667, + "step": 19400 + }, + { + "epoch": 0.16056582702568556, + "grad_norm": 981.8839721679688, + "learning_rate": 9.901877880208747e-06, + "loss": 123.9001, + "step": 19410 + }, + { + "epoch": 0.1606485502750548, + "grad_norm": 1454.4476318359375, + "learning_rate": 9.901599647046443e-06, + "loss": 131.0193, + "step": 19420 + }, + { + "epoch": 0.16073127352442404, + "grad_norm": 1284.30224609375, + "learning_rate": 9.901321023886351e-06, + "loss": 169.1719, + "step": 19430 + }, + { + "epoch": 0.16081399677379327, + "grad_norm": 1159.77783203125, + "learning_rate": 9.901042010750641e-06, + "loss": 100.9739, + "step": 19440 + }, + { + "epoch": 0.16089672002316252, + "grad_norm": 1844.110107421875, + "learning_rate": 9.900762607661509e-06, + "loss": 153.9659, + "step": 19450 + }, + { + "epoch": 0.16097944327253175, + "grad_norm": 701.9683227539062, + "learning_rate": 9.900482814641188e-06, + "loss": 109.9286, + "step": 19460 + }, + { + "epoch": 0.16106216652190097, + "grad_norm": 1962.1533203125, + "learning_rate": 9.90020263171194e-06, + "loss": 158.6689, + "step": 19470 + }, + { + "epoch": 0.16114488977127023, + "grad_norm": 1527.3931884765625, + "learning_rate": 9.899922058896058e-06, + "loss": 129.6219, + "step": 19480 + }, + { + "epoch": 0.16122761302063945, + "grad_norm": 935.3746337890625, + "learning_rate": 9.899641096215865e-06, + "loss": 187.1026, + "step": 19490 + }, + { + "epoch": 0.16131033627000868, + "grad_norm": 1236.936279296875, + "learning_rate": 9.899359743693715e-06, + "loss": 194.122, + "step": 19500 + }, + { + "epoch": 0.16139305951937793, + "grad_norm": 2321.50439453125, + "learning_rate": 9.899078001351996e-06, + "loss": 164.5937, + "step": 19510 + }, + { + "epoch": 0.16147578276874716, + "grad_norm": 1305.2666015625, + "learning_rate": 9.898795869213125e-06, + "loss": 149.4349, + "step": 19520 + }, + { + "epoch": 0.1615585060181164, + "grad_norm": 1175.187744140625, + "learning_rate": 9.898513347299549e-06, + "loss": 142.7042, + "step": 19530 + }, + { + "epoch": 0.16164122926748561, + "grad_norm": 1031.960693359375, + "learning_rate": 9.898230435633747e-06, + "loss": 151.7943, + "step": 19540 + }, + { + "epoch": 0.16172395251685487, + "grad_norm": 1225.9884033203125, + "learning_rate": 9.897947134238228e-06, + "loss": 162.5945, + "step": 19550 + }, + { + "epoch": 0.1618066757662241, + "grad_norm": 1006.8120727539062, + "learning_rate": 9.897663443135534e-06, + "loss": 127.9761, + "step": 19560 + }, + { + "epoch": 0.16188939901559332, + "grad_norm": 2127.416748046875, + "learning_rate": 9.897379362348239e-06, + "loss": 171.9894, + "step": 19570 + }, + { + "epoch": 0.16197212226496258, + "grad_norm": 745.5164794921875, + "learning_rate": 9.897094891898942e-06, + "loss": 150.5477, + "step": 19580 + }, + { + "epoch": 0.1620548455143318, + "grad_norm": 2045.43896484375, + "learning_rate": 9.89681003181028e-06, + "loss": 143.4611, + "step": 19590 + }, + { + "epoch": 0.16213756876370103, + "grad_norm": 1801.7623291015625, + "learning_rate": 9.896524782104917e-06, + "loss": 136.5524, + "step": 19600 + }, + { + "epoch": 0.16222029201307028, + "grad_norm": 865.27001953125, + "learning_rate": 9.89623914280555e-06, + "loss": 135.0138, + "step": 19610 + }, + { + "epoch": 0.1623030152624395, + "grad_norm": 1108.943359375, + "learning_rate": 9.895953113934904e-06, + "loss": 131.2855, + "step": 19620 + }, + { + "epoch": 0.16238573851180874, + "grad_norm": 1284.2874755859375, + "learning_rate": 9.895666695515739e-06, + "loss": 158.5307, + "step": 19630 + }, + { + "epoch": 0.162468461761178, + "grad_norm": 1160.2169189453125, + "learning_rate": 9.895379887570842e-06, + "loss": 146.816, + "step": 19640 + }, + { + "epoch": 0.16255118501054722, + "grad_norm": 1387.5469970703125, + "learning_rate": 9.895092690123036e-06, + "loss": 130.8016, + "step": 19650 + }, + { + "epoch": 0.16263390825991644, + "grad_norm": 1327.9295654296875, + "learning_rate": 9.894805103195168e-06, + "loss": 131.4063, + "step": 19660 + }, + { + "epoch": 0.1627166315092857, + "grad_norm": 1514.6529541015625, + "learning_rate": 9.894517126810122e-06, + "loss": 209.5621, + "step": 19670 + }, + { + "epoch": 0.16279935475865492, + "grad_norm": 2022.22021484375, + "learning_rate": 9.894228760990811e-06, + "loss": 152.1554, + "step": 19680 + }, + { + "epoch": 0.16288207800802415, + "grad_norm": 1740.86767578125, + "learning_rate": 9.893940005760181e-06, + "loss": 154.0035, + "step": 19690 + }, + { + "epoch": 0.16296480125739338, + "grad_norm": 1580.342529296875, + "learning_rate": 9.893650861141204e-06, + "loss": 157.6928, + "step": 19700 + }, + { + "epoch": 0.16304752450676263, + "grad_norm": 1006.531494140625, + "learning_rate": 9.893361327156887e-06, + "loss": 127.0846, + "step": 19710 + }, + { + "epoch": 0.16313024775613186, + "grad_norm": 1319.7847900390625, + "learning_rate": 9.893071403830265e-06, + "loss": 136.8425, + "step": 19720 + }, + { + "epoch": 0.16321297100550108, + "grad_norm": 830.5723876953125, + "learning_rate": 9.892781091184409e-06, + "loss": 136.2878, + "step": 19730 + }, + { + "epoch": 0.16329569425487034, + "grad_norm": 759.5004272460938, + "learning_rate": 9.892490389242417e-06, + "loss": 120.3061, + "step": 19740 + }, + { + "epoch": 0.16337841750423956, + "grad_norm": 2213.310546875, + "learning_rate": 9.892199298027416e-06, + "loss": 143.1016, + "step": 19750 + }, + { + "epoch": 0.1634611407536088, + "grad_norm": 1020.21337890625, + "learning_rate": 9.891907817562572e-06, + "loss": 116.1548, + "step": 19760 + }, + { + "epoch": 0.16354386400297805, + "grad_norm": 810.1748657226562, + "learning_rate": 9.891615947871072e-06, + "loss": 141.0581, + "step": 19770 + }, + { + "epoch": 0.16362658725234727, + "grad_norm": 810.9425048828125, + "learning_rate": 9.89132368897614e-06, + "loss": 130.3051, + "step": 19780 + }, + { + "epoch": 0.1637093105017165, + "grad_norm": 1321.05908203125, + "learning_rate": 9.891031040901031e-06, + "loss": 154.2215, + "step": 19790 + }, + { + "epoch": 0.16379203375108575, + "grad_norm": 1216.8099365234375, + "learning_rate": 9.890738003669029e-06, + "loss": 164.4314, + "step": 19800 + }, + { + "epoch": 0.16387475700045498, + "grad_norm": 2961.08447265625, + "learning_rate": 9.890444577303448e-06, + "loss": 184.4128, + "step": 19810 + }, + { + "epoch": 0.1639574802498242, + "grad_norm": 702.2813110351562, + "learning_rate": 9.890150761827639e-06, + "loss": 118.5094, + "step": 19820 + }, + { + "epoch": 0.16404020349919346, + "grad_norm": 1224.574951171875, + "learning_rate": 9.889856557264975e-06, + "loss": 164.4189, + "step": 19830 + }, + { + "epoch": 0.1641229267485627, + "grad_norm": 1510.7064208984375, + "learning_rate": 9.889561963638866e-06, + "loss": 168.8556, + "step": 19840 + }, + { + "epoch": 0.1642056499979319, + "grad_norm": 1308.8349609375, + "learning_rate": 9.889266980972752e-06, + "loss": 157.53, + "step": 19850 + }, + { + "epoch": 0.16428837324730117, + "grad_norm": 554.7841796875, + "learning_rate": 9.888971609290103e-06, + "loss": 123.5679, + "step": 19860 + }, + { + "epoch": 0.1643710964966704, + "grad_norm": 1066.4405517578125, + "learning_rate": 9.88867584861442e-06, + "loss": 115.5183, + "step": 19870 + }, + { + "epoch": 0.16445381974603962, + "grad_norm": 823.9727172851562, + "learning_rate": 9.888379698969236e-06, + "loss": 127.0505, + "step": 19880 + }, + { + "epoch": 0.16453654299540885, + "grad_norm": 2073.93017578125, + "learning_rate": 9.888083160378114e-06, + "loss": 142.6533, + "step": 19890 + }, + { + "epoch": 0.1646192662447781, + "grad_norm": 2434.051513671875, + "learning_rate": 9.887786232864648e-06, + "loss": 147.1622, + "step": 19900 + }, + { + "epoch": 0.16470198949414733, + "grad_norm": 724.6605224609375, + "learning_rate": 9.887488916452463e-06, + "loss": 121.8898, + "step": 19910 + }, + { + "epoch": 0.16478471274351655, + "grad_norm": 1281.83203125, + "learning_rate": 9.887191211165217e-06, + "loss": 151.6535, + "step": 19920 + }, + { + "epoch": 0.1648674359928858, + "grad_norm": 1131.1641845703125, + "learning_rate": 9.886893117026593e-06, + "loss": 149.5577, + "step": 19930 + }, + { + "epoch": 0.16495015924225503, + "grad_norm": 1077.4385986328125, + "learning_rate": 9.886594634060314e-06, + "loss": 203.1148, + "step": 19940 + }, + { + "epoch": 0.16503288249162426, + "grad_norm": 1563.59228515625, + "learning_rate": 9.886295762290125e-06, + "loss": 156.8315, + "step": 19950 + }, + { + "epoch": 0.16511560574099352, + "grad_norm": 728.146240234375, + "learning_rate": 9.885996501739808e-06, + "loss": 123.6347, + "step": 19960 + }, + { + "epoch": 0.16519832899036274, + "grad_norm": 1174.09521484375, + "learning_rate": 9.885696852433174e-06, + "loss": 171.4022, + "step": 19970 + }, + { + "epoch": 0.16528105223973197, + "grad_norm": 2437.55908203125, + "learning_rate": 9.885396814394062e-06, + "loss": 166.2973, + "step": 19980 + }, + { + "epoch": 0.16536377548910122, + "grad_norm": 756.368896484375, + "learning_rate": 9.885096387646346e-06, + "loss": 102.5183, + "step": 19990 + }, + { + "epoch": 0.16544649873847045, + "grad_norm": 1208.659423828125, + "learning_rate": 9.88479557221393e-06, + "loss": 146.3919, + "step": 20000 + }, + { + "epoch": 0.16552922198783968, + "grad_norm": 793.578857421875, + "learning_rate": 9.88449436812075e-06, + "loss": 151.3374, + "step": 20010 + }, + { + "epoch": 0.16561194523720893, + "grad_norm": 3023.392333984375, + "learning_rate": 9.88419277539077e-06, + "loss": 147.2389, + "step": 20020 + }, + { + "epoch": 0.16569466848657816, + "grad_norm": 1020.14404296875, + "learning_rate": 9.883890794047985e-06, + "loss": 133.7473, + "step": 20030 + }, + { + "epoch": 0.16577739173594738, + "grad_norm": 925.8684692382812, + "learning_rate": 9.883588424116424e-06, + "loss": 145.2095, + "step": 20040 + }, + { + "epoch": 0.16586011498531664, + "grad_norm": 1657.950927734375, + "learning_rate": 9.883285665620145e-06, + "loss": 131.4692, + "step": 20050 + }, + { + "epoch": 0.16594283823468586, + "grad_norm": 1638.5106201171875, + "learning_rate": 9.882982518583238e-06, + "loss": 120.6384, + "step": 20060 + }, + { + "epoch": 0.1660255614840551, + "grad_norm": 994.8275146484375, + "learning_rate": 9.882678983029819e-06, + "loss": 191.7884, + "step": 20070 + }, + { + "epoch": 0.16610828473342432, + "grad_norm": 1858.4609375, + "learning_rate": 9.882375058984044e-06, + "loss": 145.8128, + "step": 20080 + }, + { + "epoch": 0.16619100798279357, + "grad_norm": 640.3125, + "learning_rate": 9.882070746470092e-06, + "loss": 113.2083, + "step": 20090 + }, + { + "epoch": 0.1662737312321628, + "grad_norm": 1469.511474609375, + "learning_rate": 9.881766045512176e-06, + "loss": 189.0106, + "step": 20100 + }, + { + "epoch": 0.16635645448153202, + "grad_norm": 740.4965209960938, + "learning_rate": 9.88146095613454e-06, + "loss": 130.8047, + "step": 20110 + }, + { + "epoch": 0.16643917773090128, + "grad_norm": 683.9896240234375, + "learning_rate": 9.881155478361459e-06, + "loss": 175.5372, + "step": 20120 + }, + { + "epoch": 0.1665219009802705, + "grad_norm": 938.4227905273438, + "learning_rate": 9.880849612217238e-06, + "loss": 108.5235, + "step": 20130 + }, + { + "epoch": 0.16660462422963973, + "grad_norm": 841.4732666015625, + "learning_rate": 9.880543357726214e-06, + "loss": 142.9208, + "step": 20140 + }, + { + "epoch": 0.16668734747900898, + "grad_norm": 764.7952880859375, + "learning_rate": 9.880236714912754e-06, + "loss": 136.3933, + "step": 20150 + }, + { + "epoch": 0.1667700707283782, + "grad_norm": 1391.5673828125, + "learning_rate": 9.879929683801254e-06, + "loss": 138.4007, + "step": 20160 + }, + { + "epoch": 0.16685279397774744, + "grad_norm": 1550.94873046875, + "learning_rate": 9.879622264416147e-06, + "loss": 147.9795, + "step": 20170 + }, + { + "epoch": 0.1669355172271167, + "grad_norm": 879.2140502929688, + "learning_rate": 9.87931445678189e-06, + "loss": 155.5872, + "step": 20180 + }, + { + "epoch": 0.16701824047648592, + "grad_norm": 584.1538696289062, + "learning_rate": 9.879006260922975e-06, + "loss": 98.7441, + "step": 20190 + }, + { + "epoch": 0.16710096372585515, + "grad_norm": 719.3741455078125, + "learning_rate": 9.878697676863922e-06, + "loss": 126.3837, + "step": 20200 + }, + { + "epoch": 0.1671836869752244, + "grad_norm": 1768.941162109375, + "learning_rate": 9.878388704629286e-06, + "loss": 181.439, + "step": 20210 + }, + { + "epoch": 0.16726641022459363, + "grad_norm": 1062.1995849609375, + "learning_rate": 9.87807934424365e-06, + "loss": 155.1683, + "step": 20220 + }, + { + "epoch": 0.16734913347396285, + "grad_norm": 1074.44482421875, + "learning_rate": 9.877769595731629e-06, + "loss": 149.4426, + "step": 20230 + }, + { + "epoch": 0.16743185672333208, + "grad_norm": 766.1312866210938, + "learning_rate": 9.877459459117864e-06, + "loss": 133.7859, + "step": 20240 + }, + { + "epoch": 0.16751457997270133, + "grad_norm": 1180.6207275390625, + "learning_rate": 9.877148934427037e-06, + "loss": 185.559, + "step": 20250 + }, + { + "epoch": 0.16759730322207056, + "grad_norm": 936.2619018554688, + "learning_rate": 9.87683802168385e-06, + "loss": 153.8027, + "step": 20260 + }, + { + "epoch": 0.16768002647143979, + "grad_norm": 1289.442626953125, + "learning_rate": 9.876526720913045e-06, + "loss": 146.8949, + "step": 20270 + }, + { + "epoch": 0.16776274972080904, + "grad_norm": 1198.4373779296875, + "learning_rate": 9.87621503213939e-06, + "loss": 101.3234, + "step": 20280 + }, + { + "epoch": 0.16784547297017827, + "grad_norm": 1139.7901611328125, + "learning_rate": 9.875902955387682e-06, + "loss": 105.7266, + "step": 20290 + }, + { + "epoch": 0.1679281962195475, + "grad_norm": 885.1135864257812, + "learning_rate": 9.875590490682754e-06, + "loss": 139.6578, + "step": 20300 + }, + { + "epoch": 0.16801091946891675, + "grad_norm": 1269.400146484375, + "learning_rate": 9.875277638049466e-06, + "loss": 148.561, + "step": 20310 + }, + { + "epoch": 0.16809364271828597, + "grad_norm": 1671.2281494140625, + "learning_rate": 9.87496439751271e-06, + "loss": 157.8267, + "step": 20320 + }, + { + "epoch": 0.1681763659676552, + "grad_norm": 913.9154663085938, + "learning_rate": 9.87465076909741e-06, + "loss": 141.2827, + "step": 20330 + }, + { + "epoch": 0.16825908921702445, + "grad_norm": 1082.798583984375, + "learning_rate": 9.874336752828523e-06, + "loss": 150.5321, + "step": 20340 + }, + { + "epoch": 0.16834181246639368, + "grad_norm": 967.8886108398438, + "learning_rate": 9.87402234873103e-06, + "loss": 142.4086, + "step": 20350 + }, + { + "epoch": 0.1684245357157629, + "grad_norm": 1056.4305419921875, + "learning_rate": 9.873707556829945e-06, + "loss": 97.3924, + "step": 20360 + }, + { + "epoch": 0.16850725896513216, + "grad_norm": 837.5071411132812, + "learning_rate": 9.873392377150318e-06, + "loss": 123.7359, + "step": 20370 + }, + { + "epoch": 0.1685899822145014, + "grad_norm": 874.9818115234375, + "learning_rate": 9.873076809717226e-06, + "loss": 109.1823, + "step": 20380 + }, + { + "epoch": 0.16867270546387061, + "grad_norm": 727.0296020507812, + "learning_rate": 9.872760854555776e-06, + "loss": 143.6749, + "step": 20390 + }, + { + "epoch": 0.16875542871323987, + "grad_norm": 1192.26904296875, + "learning_rate": 9.872444511691108e-06, + "loss": 128.2298, + "step": 20400 + }, + { + "epoch": 0.1688381519626091, + "grad_norm": 765.3291015625, + "learning_rate": 9.872127781148392e-06, + "loss": 140.1519, + "step": 20410 + }, + { + "epoch": 0.16892087521197832, + "grad_norm": 1048.41064453125, + "learning_rate": 9.871810662952828e-06, + "loss": 153.647, + "step": 20420 + }, + { + "epoch": 0.16900359846134755, + "grad_norm": 1050.804931640625, + "learning_rate": 9.87149315712965e-06, + "loss": 160.4528, + "step": 20430 + }, + { + "epoch": 0.1690863217107168, + "grad_norm": 1345.5584716796875, + "learning_rate": 9.871175263704116e-06, + "loss": 119.833, + "step": 20440 + }, + { + "epoch": 0.16916904496008603, + "grad_norm": 1076.057861328125, + "learning_rate": 9.870856982701522e-06, + "loss": 123.713, + "step": 20450 + }, + { + "epoch": 0.16925176820945526, + "grad_norm": 1091.8779296875, + "learning_rate": 9.870538314147194e-06, + "loss": 157.7124, + "step": 20460 + }, + { + "epoch": 0.1693344914588245, + "grad_norm": 1105.4384765625, + "learning_rate": 9.870219258066485e-06, + "loss": 165.7277, + "step": 20470 + }, + { + "epoch": 0.16941721470819374, + "grad_norm": 1101.884033203125, + "learning_rate": 9.86989981448478e-06, + "loss": 93.6696, + "step": 20480 + }, + { + "epoch": 0.16949993795756296, + "grad_norm": 1253.4547119140625, + "learning_rate": 9.869579983427497e-06, + "loss": 156.5219, + "step": 20490 + }, + { + "epoch": 0.16958266120693222, + "grad_norm": 571.4030151367188, + "learning_rate": 9.869259764920081e-06, + "loss": 108.441, + "step": 20500 + }, + { + "epoch": 0.16966538445630144, + "grad_norm": 1125.4053955078125, + "learning_rate": 9.868939158988016e-06, + "loss": 111.0379, + "step": 20510 + }, + { + "epoch": 0.16974810770567067, + "grad_norm": 1643.4638671875, + "learning_rate": 9.868618165656805e-06, + "loss": 174.5399, + "step": 20520 + }, + { + "epoch": 0.16983083095503992, + "grad_norm": 1219.7093505859375, + "learning_rate": 9.868296784951992e-06, + "loss": 143.0416, + "step": 20530 + }, + { + "epoch": 0.16991355420440915, + "grad_norm": 716.170654296875, + "learning_rate": 9.867975016899145e-06, + "loss": 129.3315, + "step": 20540 + }, + { + "epoch": 0.16999627745377838, + "grad_norm": 722.2046508789062, + "learning_rate": 9.867652861523866e-06, + "loss": 110.3743, + "step": 20550 + }, + { + "epoch": 0.17007900070314763, + "grad_norm": 8400.3681640625, + "learning_rate": 9.86733031885179e-06, + "loss": 293.1446, + "step": 20560 + }, + { + "epoch": 0.17016172395251686, + "grad_norm": 1231.3631591796875, + "learning_rate": 9.867007388908579e-06, + "loss": 158.7573, + "step": 20570 + }, + { + "epoch": 0.17024444720188608, + "grad_norm": 813.4060668945312, + "learning_rate": 9.866684071719926e-06, + "loss": 114.0276, + "step": 20580 + }, + { + "epoch": 0.17032717045125534, + "grad_norm": 691.1693115234375, + "learning_rate": 9.866360367311557e-06, + "loss": 124.2069, + "step": 20590 + }, + { + "epoch": 0.17040989370062457, + "grad_norm": 1228.2000732421875, + "learning_rate": 9.866036275709226e-06, + "loss": 113.3982, + "step": 20600 + }, + { + "epoch": 0.1704926169499938, + "grad_norm": 1183.33935546875, + "learning_rate": 9.86571179693872e-06, + "loss": 124.6813, + "step": 20610 + }, + { + "epoch": 0.17057534019936302, + "grad_norm": 1132.4244384765625, + "learning_rate": 9.865386931025858e-06, + "loss": 109.3566, + "step": 20620 + }, + { + "epoch": 0.17065806344873227, + "grad_norm": 536.39453125, + "learning_rate": 9.865061677996487e-06, + "loss": 94.1924, + "step": 20630 + }, + { + "epoch": 0.1707407866981015, + "grad_norm": 1061.803955078125, + "learning_rate": 9.864736037876487e-06, + "loss": 96.9849, + "step": 20640 + }, + { + "epoch": 0.17082350994747073, + "grad_norm": 1086.843505859375, + "learning_rate": 9.864410010691766e-06, + "loss": 158.8272, + "step": 20650 + }, + { + "epoch": 0.17090623319683998, + "grad_norm": 1095.378662109375, + "learning_rate": 9.864083596468263e-06, + "loss": 165.8036, + "step": 20660 + }, + { + "epoch": 0.1709889564462092, + "grad_norm": 773.133544921875, + "learning_rate": 9.863756795231953e-06, + "loss": 107.4877, + "step": 20670 + }, + { + "epoch": 0.17107167969557843, + "grad_norm": 922.110107421875, + "learning_rate": 9.863429607008837e-06, + "loss": 152.2869, + "step": 20680 + }, + { + "epoch": 0.1711544029449477, + "grad_norm": 828.44677734375, + "learning_rate": 9.863102031824946e-06, + "loss": 103.4225, + "step": 20690 + }, + { + "epoch": 0.1712371261943169, + "grad_norm": 1600.447021484375, + "learning_rate": 9.862774069706346e-06, + "loss": 116.9802, + "step": 20700 + }, + { + "epoch": 0.17131984944368614, + "grad_norm": 1819.4007568359375, + "learning_rate": 9.86244572067913e-06, + "loss": 193.74, + "step": 20710 + }, + { + "epoch": 0.1714025726930554, + "grad_norm": 1109.385009765625, + "learning_rate": 9.862116984769424e-06, + "loss": 155.2172, + "step": 20720 + }, + { + "epoch": 0.17148529594242462, + "grad_norm": 1088.8150634765625, + "learning_rate": 9.861787862003384e-06, + "loss": 102.3083, + "step": 20730 + }, + { + "epoch": 0.17156801919179385, + "grad_norm": 863.4269409179688, + "learning_rate": 9.861458352407196e-06, + "loss": 163.3116, + "step": 20740 + }, + { + "epoch": 0.1716507424411631, + "grad_norm": 985.338623046875, + "learning_rate": 9.861128456007076e-06, + "loss": 128.2076, + "step": 20750 + }, + { + "epoch": 0.17173346569053233, + "grad_norm": 2616.19189453125, + "learning_rate": 9.860798172829277e-06, + "loss": 128.8503, + "step": 20760 + }, + { + "epoch": 0.17181618893990155, + "grad_norm": 821.7667846679688, + "learning_rate": 9.860467502900076e-06, + "loss": 139.1303, + "step": 20770 + }, + { + "epoch": 0.1718989121892708, + "grad_norm": 1093.295654296875, + "learning_rate": 9.860136446245779e-06, + "loss": 158.2073, + "step": 20780 + }, + { + "epoch": 0.17198163543864003, + "grad_norm": 2161.525390625, + "learning_rate": 9.859805002892733e-06, + "loss": 159.4854, + "step": 20790 + }, + { + "epoch": 0.17206435868800926, + "grad_norm": 936.6251831054688, + "learning_rate": 9.859473172867304e-06, + "loss": 150.7438, + "step": 20800 + }, + { + "epoch": 0.1721470819373785, + "grad_norm": 735.8521118164062, + "learning_rate": 9.859140956195898e-06, + "loss": 166.0925, + "step": 20810 + }, + { + "epoch": 0.17222980518674774, + "grad_norm": 1382.367431640625, + "learning_rate": 9.858808352904946e-06, + "loss": 163.85, + "step": 20820 + }, + { + "epoch": 0.17231252843611697, + "grad_norm": 1131.4239501953125, + "learning_rate": 9.858475363020913e-06, + "loss": 138.0052, + "step": 20830 + }, + { + "epoch": 0.1723952516854862, + "grad_norm": 1062.67431640625, + "learning_rate": 9.858141986570294e-06, + "loss": 127.1865, + "step": 20840 + }, + { + "epoch": 0.17247797493485545, + "grad_norm": 790.3716430664062, + "learning_rate": 9.85780822357961e-06, + "loss": 99.19, + "step": 20850 + }, + { + "epoch": 0.17256069818422468, + "grad_norm": 1042.801025390625, + "learning_rate": 9.857474074075422e-06, + "loss": 152.4688, + "step": 20860 + }, + { + "epoch": 0.1726434214335939, + "grad_norm": 872.380126953125, + "learning_rate": 9.857139538084313e-06, + "loss": 180.6786, + "step": 20870 + }, + { + "epoch": 0.17272614468296316, + "grad_norm": 873.7167358398438, + "learning_rate": 9.856804615632904e-06, + "loss": 153.8926, + "step": 20880 + }, + { + "epoch": 0.17280886793233238, + "grad_norm": 1261.3304443359375, + "learning_rate": 9.85646930674784e-06, + "loss": 138.4761, + "step": 20890 + }, + { + "epoch": 0.1728915911817016, + "grad_norm": 1224.0684814453125, + "learning_rate": 9.856133611455802e-06, + "loss": 116.0446, + "step": 20900 + }, + { + "epoch": 0.17297431443107086, + "grad_norm": 1034.66552734375, + "learning_rate": 9.855797529783499e-06, + "loss": 143.5475, + "step": 20910 + }, + { + "epoch": 0.1730570376804401, + "grad_norm": 1428.3978271484375, + "learning_rate": 9.855461061757673e-06, + "loss": 202.7229, + "step": 20920 + }, + { + "epoch": 0.17313976092980932, + "grad_norm": 1692.571533203125, + "learning_rate": 9.855124207405093e-06, + "loss": 129.3957, + "step": 20930 + }, + { + "epoch": 0.17322248417917857, + "grad_norm": 901.4539184570312, + "learning_rate": 9.854786966752561e-06, + "loss": 114.9998, + "step": 20940 + }, + { + "epoch": 0.1733052074285478, + "grad_norm": 1734.4339599609375, + "learning_rate": 9.854449339826912e-06, + "loss": 126.6563, + "step": 20950 + }, + { + "epoch": 0.17338793067791702, + "grad_norm": 936.6329956054688, + "learning_rate": 9.854111326655006e-06, + "loss": 148.8187, + "step": 20960 + }, + { + "epoch": 0.17347065392728625, + "grad_norm": 792.8075561523438, + "learning_rate": 9.85377292726374e-06, + "loss": 140.2986, + "step": 20970 + }, + { + "epoch": 0.1735533771766555, + "grad_norm": 852.9043579101562, + "learning_rate": 9.85343414168004e-06, + "loss": 142.9736, + "step": 20980 + }, + { + "epoch": 0.17363610042602473, + "grad_norm": 615.2127685546875, + "learning_rate": 9.853094969930857e-06, + "loss": 131.7546, + "step": 20990 + }, + { + "epoch": 0.17371882367539396, + "grad_norm": 1290.294921875, + "learning_rate": 9.85275541204318e-06, + "loss": 157.3864, + "step": 21000 + }, + { + "epoch": 0.1738015469247632, + "grad_norm": 428.7129821777344, + "learning_rate": 9.852415468044027e-06, + "loss": 117.3043, + "step": 21010 + }, + { + "epoch": 0.17388427017413244, + "grad_norm": 1278.5567626953125, + "learning_rate": 9.852075137960446e-06, + "loss": 136.686, + "step": 21020 + }, + { + "epoch": 0.17396699342350166, + "grad_norm": 1619.6778564453125, + "learning_rate": 9.851734421819511e-06, + "loss": 136.3727, + "step": 21030 + }, + { + "epoch": 0.17404971667287092, + "grad_norm": 1088.0845947265625, + "learning_rate": 9.851393319648338e-06, + "loss": 114.8988, + "step": 21040 + }, + { + "epoch": 0.17413243992224015, + "grad_norm": 738.9354858398438, + "learning_rate": 9.851051831474062e-06, + "loss": 138.9047, + "step": 21050 + }, + { + "epoch": 0.17421516317160937, + "grad_norm": 926.2405395507812, + "learning_rate": 9.850709957323855e-06, + "loss": 127.3761, + "step": 21060 + }, + { + "epoch": 0.17429788642097863, + "grad_norm": 911.6777954101562, + "learning_rate": 9.85036769722492e-06, + "loss": 163.0244, + "step": 21070 + }, + { + "epoch": 0.17438060967034785, + "grad_norm": 868.7709350585938, + "learning_rate": 9.850025051204484e-06, + "loss": 139.7337, + "step": 21080 + }, + { + "epoch": 0.17446333291971708, + "grad_norm": 810.2794189453125, + "learning_rate": 9.849682019289816e-06, + "loss": 129.7191, + "step": 21090 + }, + { + "epoch": 0.17454605616908633, + "grad_norm": 862.4880981445312, + "learning_rate": 9.849338601508204e-06, + "loss": 110.1159, + "step": 21100 + }, + { + "epoch": 0.17462877941845556, + "grad_norm": 1751.4161376953125, + "learning_rate": 9.848994797886978e-06, + "loss": 162.2478, + "step": 21110 + }, + { + "epoch": 0.1747115026678248, + "grad_norm": 961.6451416015625, + "learning_rate": 9.84865060845349e-06, + "loss": 151.0574, + "step": 21120 + }, + { + "epoch": 0.17479422591719404, + "grad_norm": 984.0369873046875, + "learning_rate": 9.848306033235123e-06, + "loss": 114.3529, + "step": 21130 + }, + { + "epoch": 0.17487694916656327, + "grad_norm": 1510.6654052734375, + "learning_rate": 9.847961072259298e-06, + "loss": 142.4745, + "step": 21140 + }, + { + "epoch": 0.1749596724159325, + "grad_norm": 578.0482177734375, + "learning_rate": 9.847615725553457e-06, + "loss": 149.1546, + "step": 21150 + }, + { + "epoch": 0.17504239566530172, + "grad_norm": 2596.45654296875, + "learning_rate": 9.847269993145082e-06, + "loss": 140.9354, + "step": 21160 + }, + { + "epoch": 0.17512511891467097, + "grad_norm": 1056.8643798828125, + "learning_rate": 9.84692387506168e-06, + "loss": 138.6333, + "step": 21170 + }, + { + "epoch": 0.1752078421640402, + "grad_norm": 543.871826171875, + "learning_rate": 9.846577371330788e-06, + "loss": 100.0174, + "step": 21180 + }, + { + "epoch": 0.17529056541340943, + "grad_norm": 505.1315002441406, + "learning_rate": 9.846230481979978e-06, + "loss": 126.8892, + "step": 21190 + }, + { + "epoch": 0.17537328866277868, + "grad_norm": 1152.1531982421875, + "learning_rate": 9.84588320703685e-06, + "loss": 137.2241, + "step": 21200 + }, + { + "epoch": 0.1754560119121479, + "grad_norm": 2514.837646484375, + "learning_rate": 9.845535546529036e-06, + "loss": 150.1807, + "step": 21210 + }, + { + "epoch": 0.17553873516151713, + "grad_norm": 1177.4676513671875, + "learning_rate": 9.845187500484194e-06, + "loss": 136.4538, + "step": 21220 + }, + { + "epoch": 0.1756214584108864, + "grad_norm": 1041.1258544921875, + "learning_rate": 9.844839068930021e-06, + "loss": 131.5591, + "step": 21230 + }, + { + "epoch": 0.17570418166025562, + "grad_norm": 907.1217651367188, + "learning_rate": 9.844490251894237e-06, + "loss": 129.923, + "step": 21240 + }, + { + "epoch": 0.17578690490962484, + "grad_norm": 917.8026123046875, + "learning_rate": 9.844141049404598e-06, + "loss": 112.846, + "step": 21250 + }, + { + "epoch": 0.1758696281589941, + "grad_norm": 1224.1368408203125, + "learning_rate": 9.843791461488887e-06, + "loss": 142.4482, + "step": 21260 + }, + { + "epoch": 0.17595235140836332, + "grad_norm": 1229.176513671875, + "learning_rate": 9.843441488174918e-06, + "loss": 103.1861, + "step": 21270 + }, + { + "epoch": 0.17603507465773255, + "grad_norm": 850.4046630859375, + "learning_rate": 9.843091129490539e-06, + "loss": 127.6695, + "step": 21280 + }, + { + "epoch": 0.1761177979071018, + "grad_norm": 842.6237182617188, + "learning_rate": 9.842740385463628e-06, + "loss": 150.564, + "step": 21290 + }, + { + "epoch": 0.17620052115647103, + "grad_norm": 1639.1712646484375, + "learning_rate": 9.842389256122086e-06, + "loss": 154.4747, + "step": 21300 + }, + { + "epoch": 0.17628324440584026, + "grad_norm": 1027.3468017578125, + "learning_rate": 9.842037741493856e-06, + "loss": 138.2844, + "step": 21310 + }, + { + "epoch": 0.1763659676552095, + "grad_norm": 1297.7607421875, + "learning_rate": 9.841685841606905e-06, + "loss": 171.9979, + "step": 21320 + }, + { + "epoch": 0.17644869090457874, + "grad_norm": 650.8685302734375, + "learning_rate": 9.841333556489232e-06, + "loss": 138.841, + "step": 21330 + }, + { + "epoch": 0.17653141415394796, + "grad_norm": 1357.2320556640625, + "learning_rate": 9.840980886168866e-06, + "loss": 151.9759, + "step": 21340 + }, + { + "epoch": 0.1766141374033172, + "grad_norm": 938.8858032226562, + "learning_rate": 9.840627830673867e-06, + "loss": 141.0563, + "step": 21350 + }, + { + "epoch": 0.17669686065268644, + "grad_norm": 1711.847412109375, + "learning_rate": 9.84027439003233e-06, + "loss": 172.4341, + "step": 21360 + }, + { + "epoch": 0.17677958390205567, + "grad_norm": 854.195068359375, + "learning_rate": 9.839920564272372e-06, + "loss": 137.6896, + "step": 21370 + }, + { + "epoch": 0.1768623071514249, + "grad_norm": 1087.6898193359375, + "learning_rate": 9.839566353422148e-06, + "loss": 117.6248, + "step": 21380 + }, + { + "epoch": 0.17694503040079415, + "grad_norm": 2788.3251953125, + "learning_rate": 9.839211757509838e-06, + "loss": 144.8487, + "step": 21390 + }, + { + "epoch": 0.17702775365016338, + "grad_norm": 1420.71875, + "learning_rate": 9.83885677656366e-06, + "loss": 122.336, + "step": 21400 + }, + { + "epoch": 0.1771104768995326, + "grad_norm": 761.9530029296875, + "learning_rate": 9.838501410611852e-06, + "loss": 132.9662, + "step": 21410 + }, + { + "epoch": 0.17719320014890186, + "grad_norm": 2090.32666015625, + "learning_rate": 9.838145659682695e-06, + "loss": 183.7629, + "step": 21420 + }, + { + "epoch": 0.17727592339827108, + "grad_norm": 3058.696044921875, + "learning_rate": 9.837789523804491e-06, + "loss": 142.1503, + "step": 21430 + }, + { + "epoch": 0.1773586466476403, + "grad_norm": 1059.2083740234375, + "learning_rate": 9.837433003005578e-06, + "loss": 137.1923, + "step": 21440 + }, + { + "epoch": 0.17744136989700957, + "grad_norm": 1151.251708984375, + "learning_rate": 9.83707609731432e-06, + "loss": 89.3938, + "step": 21450 + }, + { + "epoch": 0.1775240931463788, + "grad_norm": 749.4866333007812, + "learning_rate": 9.836718806759119e-06, + "loss": 145.9112, + "step": 21460 + }, + { + "epoch": 0.17760681639574802, + "grad_norm": 1220.1741943359375, + "learning_rate": 9.836361131368398e-06, + "loss": 179.5797, + "step": 21470 + }, + { + "epoch": 0.17768953964511727, + "grad_norm": 446.74859619140625, + "learning_rate": 9.836003071170617e-06, + "loss": 156.5806, + "step": 21480 + }, + { + "epoch": 0.1777722628944865, + "grad_norm": 872.1563720703125, + "learning_rate": 9.835644626194268e-06, + "loss": 141.7276, + "step": 21490 + }, + { + "epoch": 0.17785498614385573, + "grad_norm": 1689.736328125, + "learning_rate": 9.835285796467867e-06, + "loss": 133.7971, + "step": 21500 + }, + { + "epoch": 0.17793770939322495, + "grad_norm": 1551.1575927734375, + "learning_rate": 9.834926582019968e-06, + "loss": 156.3386, + "step": 21510 + }, + { + "epoch": 0.1780204326425942, + "grad_norm": 1045.95849609375, + "learning_rate": 9.834566982879149e-06, + "loss": 118.8335, + "step": 21520 + }, + { + "epoch": 0.17810315589196343, + "grad_norm": 1034.3304443359375, + "learning_rate": 9.83420699907402e-06, + "loss": 136.8709, + "step": 21530 + }, + { + "epoch": 0.17818587914133266, + "grad_norm": 870.5877075195312, + "learning_rate": 9.83384663063323e-06, + "loss": 125.3036, + "step": 21540 + }, + { + "epoch": 0.1782686023907019, + "grad_norm": 3151.6611328125, + "learning_rate": 9.833485877585447e-06, + "loss": 230.3187, + "step": 21550 + }, + { + "epoch": 0.17835132564007114, + "grad_norm": 1180.687255859375, + "learning_rate": 9.833124739959375e-06, + "loss": 128.1897, + "step": 21560 + }, + { + "epoch": 0.17843404888944037, + "grad_norm": 1331.4376220703125, + "learning_rate": 9.83276321778375e-06, + "loss": 161.2605, + "step": 21570 + }, + { + "epoch": 0.17851677213880962, + "grad_norm": 347.8481750488281, + "learning_rate": 9.832401311087334e-06, + "loss": 137.9425, + "step": 21580 + }, + { + "epoch": 0.17859949538817885, + "grad_norm": 1399.0616455078125, + "learning_rate": 9.832039019898922e-06, + "loss": 141.6175, + "step": 21590 + }, + { + "epoch": 0.17868221863754807, + "grad_norm": 1321.5462646484375, + "learning_rate": 9.831676344247343e-06, + "loss": 145.0367, + "step": 21600 + }, + { + "epoch": 0.17876494188691733, + "grad_norm": 2181.245361328125, + "learning_rate": 9.831313284161452e-06, + "loss": 127.1829, + "step": 21610 + }, + { + "epoch": 0.17884766513628655, + "grad_norm": 980.81201171875, + "learning_rate": 9.830949839670134e-06, + "loss": 160.9928, + "step": 21620 + }, + { + "epoch": 0.17893038838565578, + "grad_norm": 1042.4224853515625, + "learning_rate": 9.83058601080231e-06, + "loss": 143.5391, + "step": 21630 + }, + { + "epoch": 0.17901311163502504, + "grad_norm": 1248.8353271484375, + "learning_rate": 9.830221797586925e-06, + "loss": 125.7723, + "step": 21640 + }, + { + "epoch": 0.17909583488439426, + "grad_norm": 765.4020385742188, + "learning_rate": 9.829857200052961e-06, + "loss": 145.4247, + "step": 21650 + }, + { + "epoch": 0.1791785581337635, + "grad_norm": 1584.0721435546875, + "learning_rate": 9.829492218229426e-06, + "loss": 129.5774, + "step": 21660 + }, + { + "epoch": 0.17926128138313274, + "grad_norm": 640.2371215820312, + "learning_rate": 9.829126852145357e-06, + "loss": 175.8682, + "step": 21670 + }, + { + "epoch": 0.17934400463250197, + "grad_norm": 879.1619262695312, + "learning_rate": 9.82876110182983e-06, + "loss": 148.2943, + "step": 21680 + }, + { + "epoch": 0.1794267278818712, + "grad_norm": 1808.1944580078125, + "learning_rate": 9.82839496731194e-06, + "loss": 161.7926, + "step": 21690 + }, + { + "epoch": 0.17950945113124042, + "grad_norm": 1331.4281005859375, + "learning_rate": 9.828028448620824e-06, + "loss": 144.7912, + "step": 21700 + }, + { + "epoch": 0.17959217438060968, + "grad_norm": 916.88134765625, + "learning_rate": 9.827661545785641e-06, + "loss": 111.8015, + "step": 21710 + }, + { + "epoch": 0.1796748976299789, + "grad_norm": 945.744140625, + "learning_rate": 9.827294258835584e-06, + "loss": 135.7884, + "step": 21720 + }, + { + "epoch": 0.17975762087934813, + "grad_norm": 843.4868774414062, + "learning_rate": 9.82692658779988e-06, + "loss": 166.0405, + "step": 21730 + }, + { + "epoch": 0.17984034412871738, + "grad_norm": 866.8135375976562, + "learning_rate": 9.826558532707777e-06, + "loss": 131.8315, + "step": 21740 + }, + { + "epoch": 0.1799230673780866, + "grad_norm": 1258.517822265625, + "learning_rate": 9.826190093588564e-06, + "loss": 192.2693, + "step": 21750 + }, + { + "epoch": 0.18000579062745584, + "grad_norm": 755.1361694335938, + "learning_rate": 9.825821270471555e-06, + "loss": 110.0884, + "step": 21760 + }, + { + "epoch": 0.1800885138768251, + "grad_norm": 505.46856689453125, + "learning_rate": 9.825452063386094e-06, + "loss": 117.8567, + "step": 21770 + }, + { + "epoch": 0.18017123712619432, + "grad_norm": 1369.44287109375, + "learning_rate": 9.825082472361558e-06, + "loss": 135.8278, + "step": 21780 + }, + { + "epoch": 0.18025396037556354, + "grad_norm": 917.852783203125, + "learning_rate": 9.824712497427354e-06, + "loss": 101.3896, + "step": 21790 + }, + { + "epoch": 0.1803366836249328, + "grad_norm": 987.9921264648438, + "learning_rate": 9.824342138612918e-06, + "loss": 140.2833, + "step": 21800 + }, + { + "epoch": 0.18041940687430202, + "grad_norm": 902.6442260742188, + "learning_rate": 9.823971395947723e-06, + "loss": 147.3234, + "step": 21810 + }, + { + "epoch": 0.18050213012367125, + "grad_norm": 767.7576293945312, + "learning_rate": 9.823600269461259e-06, + "loss": 148.7868, + "step": 21820 + }, + { + "epoch": 0.1805848533730405, + "grad_norm": 766.7611694335938, + "learning_rate": 9.823228759183058e-06, + "loss": 124.8973, + "step": 21830 + }, + { + "epoch": 0.18066757662240973, + "grad_norm": 836.1171264648438, + "learning_rate": 9.822856865142683e-06, + "loss": 117.0898, + "step": 21840 + }, + { + "epoch": 0.18075029987177896, + "grad_norm": 1208.75146484375, + "learning_rate": 9.822484587369721e-06, + "loss": 125.5648, + "step": 21850 + }, + { + "epoch": 0.1808330231211482, + "grad_norm": 1158.348388671875, + "learning_rate": 9.822111925893792e-06, + "loss": 139.8755, + "step": 21860 + }, + { + "epoch": 0.18091574637051744, + "grad_norm": 1267.1917724609375, + "learning_rate": 9.821738880744549e-06, + "loss": 135.5524, + "step": 21870 + }, + { + "epoch": 0.18099846961988666, + "grad_norm": 2137.44580078125, + "learning_rate": 9.82136545195167e-06, + "loss": 159.7926, + "step": 21880 + }, + { + "epoch": 0.1810811928692559, + "grad_norm": 955.8012084960938, + "learning_rate": 9.82099163954487e-06, + "loss": 121.1186, + "step": 21890 + }, + { + "epoch": 0.18116391611862515, + "grad_norm": 1042.67724609375, + "learning_rate": 9.820617443553889e-06, + "loss": 132.7085, + "step": 21900 + }, + { + "epoch": 0.18124663936799437, + "grad_norm": 988.7931518554688, + "learning_rate": 9.820242864008503e-06, + "loss": 141.3183, + "step": 21910 + }, + { + "epoch": 0.1813293626173636, + "grad_norm": 1212.5155029296875, + "learning_rate": 9.819867900938514e-06, + "loss": 139.713, + "step": 21920 + }, + { + "epoch": 0.18141208586673285, + "grad_norm": 714.6464233398438, + "learning_rate": 9.819492554373758e-06, + "loss": 122.3466, + "step": 21930 + }, + { + "epoch": 0.18149480911610208, + "grad_norm": 1552.1221923828125, + "learning_rate": 9.819116824344095e-06, + "loss": 127.3137, + "step": 21940 + }, + { + "epoch": 0.1815775323654713, + "grad_norm": 577.6259155273438, + "learning_rate": 9.818740710879424e-06, + "loss": 87.6241, + "step": 21950 + }, + { + "epoch": 0.18166025561484056, + "grad_norm": 947.9591064453125, + "learning_rate": 9.81836421400967e-06, + "loss": 109.6651, + "step": 21960 + }, + { + "epoch": 0.1817429788642098, + "grad_norm": 915.073486328125, + "learning_rate": 9.81798733376479e-06, + "loss": 112.4667, + "step": 21970 + }, + { + "epoch": 0.181825702113579, + "grad_norm": 971.461181640625, + "learning_rate": 9.817610070174768e-06, + "loss": 159.3875, + "step": 21980 + }, + { + "epoch": 0.18190842536294827, + "grad_norm": 839.3546142578125, + "learning_rate": 9.817232423269622e-06, + "loss": 141.2685, + "step": 21990 + }, + { + "epoch": 0.1819911486123175, + "grad_norm": 901.919189453125, + "learning_rate": 9.816854393079402e-06, + "loss": 150.3302, + "step": 22000 + }, + { + "epoch": 0.18207387186168672, + "grad_norm": 1099.916259765625, + "learning_rate": 9.816475979634183e-06, + "loss": 105.6784, + "step": 22010 + }, + { + "epoch": 0.18215659511105597, + "grad_norm": 1539.3607177734375, + "learning_rate": 9.816097182964076e-06, + "loss": 150.3366, + "step": 22020 + }, + { + "epoch": 0.1822393183604252, + "grad_norm": 1262.76513671875, + "learning_rate": 9.81571800309922e-06, + "loss": 205.2206, + "step": 22030 + }, + { + "epoch": 0.18232204160979443, + "grad_norm": 1119.6072998046875, + "learning_rate": 9.815338440069782e-06, + "loss": 97.6272, + "step": 22040 + }, + { + "epoch": 0.18240476485916368, + "grad_norm": 977.2727661132812, + "learning_rate": 9.814958493905962e-06, + "loss": 154.7452, + "step": 22050 + }, + { + "epoch": 0.1824874881085329, + "grad_norm": 1467.2486572265625, + "learning_rate": 9.814578164637996e-06, + "loss": 116.3554, + "step": 22060 + }, + { + "epoch": 0.18257021135790213, + "grad_norm": 1132.6796875, + "learning_rate": 9.81419745229614e-06, + "loss": 146.8583, + "step": 22070 + }, + { + "epoch": 0.18265293460727136, + "grad_norm": 1301.7706298828125, + "learning_rate": 9.813816356910685e-06, + "loss": 150.6081, + "step": 22080 + }, + { + "epoch": 0.18273565785664062, + "grad_norm": 786.4771728515625, + "learning_rate": 9.813434878511956e-06, + "loss": 98.5915, + "step": 22090 + }, + { + "epoch": 0.18281838110600984, + "grad_norm": 1082.4107666015625, + "learning_rate": 9.813053017130305e-06, + "loss": 106.2249, + "step": 22100 + }, + { + "epoch": 0.18290110435537907, + "grad_norm": 844.4359130859375, + "learning_rate": 9.812670772796113e-06, + "loss": 108.4317, + "step": 22110 + }, + { + "epoch": 0.18298382760474832, + "grad_norm": 1356.7515869140625, + "learning_rate": 9.812288145539796e-06, + "loss": 156.1458, + "step": 22120 + }, + { + "epoch": 0.18306655085411755, + "grad_norm": 571.9811401367188, + "learning_rate": 9.811905135391796e-06, + "loss": 128.8242, + "step": 22130 + }, + { + "epoch": 0.18314927410348678, + "grad_norm": 996.9983520507812, + "learning_rate": 9.81152174238259e-06, + "loss": 121.8907, + "step": 22140 + }, + { + "epoch": 0.18323199735285603, + "grad_norm": 1005.370361328125, + "learning_rate": 9.81113796654268e-06, + "loss": 130.7526, + "step": 22150 + }, + { + "epoch": 0.18331472060222526, + "grad_norm": 522.5587158203125, + "learning_rate": 9.810753807902603e-06, + "loss": 167.8494, + "step": 22160 + }, + { + "epoch": 0.18339744385159448, + "grad_norm": 1476.099853515625, + "learning_rate": 9.81036926649292e-06, + "loss": 125.2975, + "step": 22170 + }, + { + "epoch": 0.18348016710096374, + "grad_norm": 864.9098510742188, + "learning_rate": 9.809984342344234e-06, + "loss": 114.3755, + "step": 22180 + }, + { + "epoch": 0.18356289035033296, + "grad_norm": 1131.15966796875, + "learning_rate": 9.80959903548717e-06, + "loss": 130.3137, + "step": 22190 + }, + { + "epoch": 0.1836456135997022, + "grad_norm": 2763.490234375, + "learning_rate": 9.80921334595238e-06, + "loss": 159.7871, + "step": 22200 + }, + { + "epoch": 0.18372833684907144, + "grad_norm": 815.5933227539062, + "learning_rate": 9.808827273770558e-06, + "loss": 103.766, + "step": 22210 + }, + { + "epoch": 0.18381106009844067, + "grad_norm": 1680.257568359375, + "learning_rate": 9.80844081897242e-06, + "loss": 162.1204, + "step": 22220 + }, + { + "epoch": 0.1838937833478099, + "grad_norm": 934.7627563476562, + "learning_rate": 9.808053981588712e-06, + "loss": 144.453, + "step": 22230 + }, + { + "epoch": 0.18397650659717912, + "grad_norm": 1144.36767578125, + "learning_rate": 9.807666761650215e-06, + "loss": 121.5437, + "step": 22240 + }, + { + "epoch": 0.18405922984654838, + "grad_norm": 2383.975341796875, + "learning_rate": 9.80727915918774e-06, + "loss": 178.4958, + "step": 22250 + }, + { + "epoch": 0.1841419530959176, + "grad_norm": 1073.094482421875, + "learning_rate": 9.806891174232122e-06, + "loss": 149.8745, + "step": 22260 + }, + { + "epoch": 0.18422467634528683, + "grad_norm": 1233.2926025390625, + "learning_rate": 9.806502806814236e-06, + "loss": 128.9702, + "step": 22270 + }, + { + "epoch": 0.18430739959465609, + "grad_norm": 837.9373168945312, + "learning_rate": 9.806114056964977e-06, + "loss": 139.9306, + "step": 22280 + }, + { + "epoch": 0.1843901228440253, + "grad_norm": 628.1285400390625, + "learning_rate": 9.805724924715283e-06, + "loss": 123.0449, + "step": 22290 + }, + { + "epoch": 0.18447284609339454, + "grad_norm": 561.8240356445312, + "learning_rate": 9.80533541009611e-06, + "loss": 105.3535, + "step": 22300 + }, + { + "epoch": 0.1845555693427638, + "grad_norm": 1421.851806640625, + "learning_rate": 9.804945513138454e-06, + "loss": 219.4902, + "step": 22310 + }, + { + "epoch": 0.18463829259213302, + "grad_norm": 825.07080078125, + "learning_rate": 9.804555233873335e-06, + "loss": 135.7106, + "step": 22320 + }, + { + "epoch": 0.18472101584150225, + "grad_norm": 2163.25439453125, + "learning_rate": 9.804164572331804e-06, + "loss": 173.7582, + "step": 22330 + }, + { + "epoch": 0.1848037390908715, + "grad_norm": 1122.6065673828125, + "learning_rate": 9.80377352854495e-06, + "loss": 91.7703, + "step": 22340 + }, + { + "epoch": 0.18488646234024073, + "grad_norm": 909.8526000976562, + "learning_rate": 9.80338210254388e-06, + "loss": 124.849, + "step": 22350 + }, + { + "epoch": 0.18496918558960995, + "grad_norm": 784.2378540039062, + "learning_rate": 9.80299029435974e-06, + "loss": 148.4874, + "step": 22360 + }, + { + "epoch": 0.1850519088389792, + "grad_norm": 999.297119140625, + "learning_rate": 9.802598104023706e-06, + "loss": 159.1145, + "step": 22370 + }, + { + "epoch": 0.18513463208834843, + "grad_norm": 753.5262451171875, + "learning_rate": 9.80220553156698e-06, + "loss": 113.2028, + "step": 22380 + }, + { + "epoch": 0.18521735533771766, + "grad_norm": 1254.95947265625, + "learning_rate": 9.801812577020802e-06, + "loss": 139.2906, + "step": 22390 + }, + { + "epoch": 0.1853000785870869, + "grad_norm": 1023.6962890625, + "learning_rate": 9.801419240416432e-06, + "loss": 144.157, + "step": 22400 + }, + { + "epoch": 0.18538280183645614, + "grad_norm": 937.4168701171875, + "learning_rate": 9.80102552178517e-06, + "loss": 124.0214, + "step": 22410 + }, + { + "epoch": 0.18546552508582537, + "grad_norm": 1038.947265625, + "learning_rate": 9.800631421158341e-06, + "loss": 134.8847, + "step": 22420 + }, + { + "epoch": 0.1855482483351946, + "grad_norm": 839.5892333984375, + "learning_rate": 9.800236938567302e-06, + "loss": 114.9243, + "step": 22430 + }, + { + "epoch": 0.18563097158456385, + "grad_norm": 1064.1595458984375, + "learning_rate": 9.799842074043438e-06, + "loss": 132.5262, + "step": 22440 + }, + { + "epoch": 0.18571369483393307, + "grad_norm": 868.5515747070312, + "learning_rate": 9.799446827618172e-06, + "loss": 123.4177, + "step": 22450 + }, + { + "epoch": 0.1857964180833023, + "grad_norm": 1418.9991455078125, + "learning_rate": 9.799051199322944e-06, + "loss": 134.293, + "step": 22460 + }, + { + "epoch": 0.18587914133267155, + "grad_norm": 1264.387939453125, + "learning_rate": 9.798655189189239e-06, + "loss": 155.6345, + "step": 22470 + }, + { + "epoch": 0.18596186458204078, + "grad_norm": 1341.15185546875, + "learning_rate": 9.798258797248563e-06, + "loss": 151.904, + "step": 22480 + }, + { + "epoch": 0.18604458783141, + "grad_norm": 1191.142578125, + "learning_rate": 9.797862023532457e-06, + "loss": 143.8828, + "step": 22490 + }, + { + "epoch": 0.18612731108077926, + "grad_norm": 1029.2672119140625, + "learning_rate": 9.797464868072489e-06, + "loss": 111.6996, + "step": 22500 + }, + { + "epoch": 0.1862100343301485, + "grad_norm": 898.6533203125, + "learning_rate": 9.797067330900256e-06, + "loss": 123.9605, + "step": 22510 + }, + { + "epoch": 0.18629275757951771, + "grad_norm": 1148.5517578125, + "learning_rate": 9.796669412047392e-06, + "loss": 107.9766, + "step": 22520 + }, + { + "epoch": 0.18637548082888697, + "grad_norm": 1347.3740234375, + "learning_rate": 9.796271111545559e-06, + "loss": 132.5673, + "step": 22530 + }, + { + "epoch": 0.1864582040782562, + "grad_norm": 732.031982421875, + "learning_rate": 9.795872429426443e-06, + "loss": 113.1994, + "step": 22540 + }, + { + "epoch": 0.18654092732762542, + "grad_norm": 847.0791625976562, + "learning_rate": 9.79547336572177e-06, + "loss": 115.746, + "step": 22550 + }, + { + "epoch": 0.18662365057699468, + "grad_norm": 897.2965698242188, + "learning_rate": 9.795073920463289e-06, + "loss": 123.8876, + "step": 22560 + }, + { + "epoch": 0.1867063738263639, + "grad_norm": 1148.21875, + "learning_rate": 9.794674093682781e-06, + "loss": 133.7378, + "step": 22570 + }, + { + "epoch": 0.18678909707573313, + "grad_norm": 1180.834716796875, + "learning_rate": 9.79427388541206e-06, + "loss": 100.9495, + "step": 22580 + }, + { + "epoch": 0.18687182032510238, + "grad_norm": 681.8207397460938, + "learning_rate": 9.79387329568297e-06, + "loss": 108.7203, + "step": 22590 + }, + { + "epoch": 0.1869545435744716, + "grad_norm": 859.6419067382812, + "learning_rate": 9.793472324527383e-06, + "loss": 179.532, + "step": 22600 + }, + { + "epoch": 0.18703726682384084, + "grad_norm": 615.40380859375, + "learning_rate": 9.793070971977203e-06, + "loss": 103.2244, + "step": 22610 + }, + { + "epoch": 0.18711999007321006, + "grad_norm": 896.5668334960938, + "learning_rate": 9.79266923806436e-06, + "loss": 135.8797, + "step": 22620 + }, + { + "epoch": 0.18720271332257932, + "grad_norm": 685.52685546875, + "learning_rate": 9.792267122820823e-06, + "loss": 102.7591, + "step": 22630 + }, + { + "epoch": 0.18728543657194854, + "grad_norm": 656.8187255859375, + "learning_rate": 9.791864626278584e-06, + "loss": 134.8737, + "step": 22640 + }, + { + "epoch": 0.18736815982131777, + "grad_norm": 406.96820068359375, + "learning_rate": 9.791461748469669e-06, + "loss": 129.8552, + "step": 22650 + }, + { + "epoch": 0.18745088307068702, + "grad_norm": 1129.7803955078125, + "learning_rate": 9.791058489426134e-06, + "loss": 133.4618, + "step": 22660 + }, + { + "epoch": 0.18753360632005625, + "grad_norm": 1034.3385009765625, + "learning_rate": 9.790654849180059e-06, + "loss": 107.0448, + "step": 22670 + }, + { + "epoch": 0.18761632956942548, + "grad_norm": 1405.0714111328125, + "learning_rate": 9.790250827763566e-06, + "loss": 121.3317, + "step": 22680 + }, + { + "epoch": 0.18769905281879473, + "grad_norm": 1143.3116455078125, + "learning_rate": 9.7898464252088e-06, + "loss": 93.2796, + "step": 22690 + }, + { + "epoch": 0.18778177606816396, + "grad_norm": 1161.796630859375, + "learning_rate": 9.789441641547935e-06, + "loss": 112.5664, + "step": 22700 + }, + { + "epoch": 0.18786449931753318, + "grad_norm": 1665.2811279296875, + "learning_rate": 9.789036476813178e-06, + "loss": 162.8835, + "step": 22710 + }, + { + "epoch": 0.18794722256690244, + "grad_norm": 885.5316772460938, + "learning_rate": 9.788630931036769e-06, + "loss": 127.6729, + "step": 22720 + }, + { + "epoch": 0.18802994581627167, + "grad_norm": 1863.15673828125, + "learning_rate": 9.788225004250974e-06, + "loss": 136.1164, + "step": 22730 + }, + { + "epoch": 0.1881126690656409, + "grad_norm": 770.5360717773438, + "learning_rate": 9.78781869648809e-06, + "loss": 145.4966, + "step": 22740 + }, + { + "epoch": 0.18819539231501015, + "grad_norm": 1846.6016845703125, + "learning_rate": 9.787412007780445e-06, + "loss": 119.8534, + "step": 22750 + }, + { + "epoch": 0.18827811556437937, + "grad_norm": 930.654052734375, + "learning_rate": 9.787004938160398e-06, + "loss": 112.2845, + "step": 22760 + }, + { + "epoch": 0.1883608388137486, + "grad_norm": 3968.6298828125, + "learning_rate": 9.786597487660336e-06, + "loss": 166.1542, + "step": 22770 + }, + { + "epoch": 0.18844356206311783, + "grad_norm": 687.5050659179688, + "learning_rate": 9.78618965631268e-06, + "loss": 118.5364, + "step": 22780 + }, + { + "epoch": 0.18852628531248708, + "grad_norm": 1421.8592529296875, + "learning_rate": 9.785781444149883e-06, + "loss": 102.7248, + "step": 22790 + }, + { + "epoch": 0.1886090085618563, + "grad_norm": 1723.80029296875, + "learning_rate": 9.785372851204415e-06, + "loss": 136.1481, + "step": 22800 + }, + { + "epoch": 0.18869173181122553, + "grad_norm": 1364.1827392578125, + "learning_rate": 9.784963877508794e-06, + "loss": 135.9108, + "step": 22810 + }, + { + "epoch": 0.1887744550605948, + "grad_norm": 2061.134765625, + "learning_rate": 9.784554523095554e-06, + "loss": 201.537, + "step": 22820 + }, + { + "epoch": 0.188857178309964, + "grad_norm": 958.0075073242188, + "learning_rate": 9.784144787997272e-06, + "loss": 96.8345, + "step": 22830 + }, + { + "epoch": 0.18893990155933324, + "grad_norm": 717.226318359375, + "learning_rate": 9.783734672246545e-06, + "loss": 130.6322, + "step": 22840 + }, + { + "epoch": 0.1890226248087025, + "grad_norm": 716.5567016601562, + "learning_rate": 9.783324175876004e-06, + "loss": 105.1676, + "step": 22850 + }, + { + "epoch": 0.18910534805807172, + "grad_norm": 1025.28173828125, + "learning_rate": 9.782913298918311e-06, + "loss": 151.713, + "step": 22860 + }, + { + "epoch": 0.18918807130744095, + "grad_norm": 649.5341186523438, + "learning_rate": 9.782502041406157e-06, + "loss": 135.4802, + "step": 22870 + }, + { + "epoch": 0.1892707945568102, + "grad_norm": 1117.214599609375, + "learning_rate": 9.782090403372263e-06, + "loss": 148.4325, + "step": 22880 + }, + { + "epoch": 0.18935351780617943, + "grad_norm": 1014.591552734375, + "learning_rate": 9.781678384849385e-06, + "loss": 111.5626, + "step": 22890 + }, + { + "epoch": 0.18943624105554865, + "grad_norm": 2505.06982421875, + "learning_rate": 9.7812659858703e-06, + "loss": 142.1111, + "step": 22900 + }, + { + "epoch": 0.1895189643049179, + "grad_norm": 1313.8260498046875, + "learning_rate": 9.780853206467826e-06, + "loss": 133.5671, + "step": 22910 + }, + { + "epoch": 0.18960168755428713, + "grad_norm": 984.4515380859375, + "learning_rate": 9.780440046674803e-06, + "loss": 167.4173, + "step": 22920 + }, + { + "epoch": 0.18968441080365636, + "grad_norm": 893.4295043945312, + "learning_rate": 9.780026506524106e-06, + "loss": 159.9201, + "step": 22930 + }, + { + "epoch": 0.18976713405302562, + "grad_norm": 818.2760620117188, + "learning_rate": 9.779612586048635e-06, + "loss": 132.297, + "step": 22940 + }, + { + "epoch": 0.18984985730239484, + "grad_norm": 1028.792236328125, + "learning_rate": 9.779198285281326e-06, + "loss": 175.7389, + "step": 22950 + }, + { + "epoch": 0.18993258055176407, + "grad_norm": 1137.0174560546875, + "learning_rate": 9.778783604255145e-06, + "loss": 120.6092, + "step": 22960 + }, + { + "epoch": 0.1900153038011333, + "grad_norm": 2498.3369140625, + "learning_rate": 9.778368543003083e-06, + "loss": 144.8033, + "step": 22970 + }, + { + "epoch": 0.19009802705050255, + "grad_norm": 1361.688232421875, + "learning_rate": 9.777953101558164e-06, + "loss": 133.7076, + "step": 22980 + }, + { + "epoch": 0.19018075029987178, + "grad_norm": 1047.64453125, + "learning_rate": 9.777537279953448e-06, + "loss": 120.5423, + "step": 22990 + }, + { + "epoch": 0.190263473549241, + "grad_norm": 982.462158203125, + "learning_rate": 9.777121078222015e-06, + "loss": 108.502, + "step": 23000 + }, + { + "epoch": 0.19034619679861026, + "grad_norm": 1133.6990966796875, + "learning_rate": 9.77670449639698e-06, + "loss": 129.8804, + "step": 23010 + }, + { + "epoch": 0.19042892004797948, + "grad_norm": 671.9098510742188, + "learning_rate": 9.776287534511492e-06, + "loss": 130.2723, + "step": 23020 + }, + { + "epoch": 0.1905116432973487, + "grad_norm": 1220.64892578125, + "learning_rate": 9.775870192598726e-06, + "loss": 122.8446, + "step": 23030 + }, + { + "epoch": 0.19059436654671796, + "grad_norm": 2241.37548828125, + "learning_rate": 9.775452470691886e-06, + "loss": 133.7913, + "step": 23040 + }, + { + "epoch": 0.1906770897960872, + "grad_norm": 1148.03662109375, + "learning_rate": 9.77503436882421e-06, + "loss": 138.1318, + "step": 23050 + }, + { + "epoch": 0.19075981304545642, + "grad_norm": 980.452880859375, + "learning_rate": 9.774615887028964e-06, + "loss": 147.464, + "step": 23060 + }, + { + "epoch": 0.19084253629482567, + "grad_norm": 2212.347412109375, + "learning_rate": 9.774197025339442e-06, + "loss": 102.5226, + "step": 23070 + }, + { + "epoch": 0.1909252595441949, + "grad_norm": 928.7798461914062, + "learning_rate": 9.773777783788976e-06, + "loss": 113.3667, + "step": 23080 + }, + { + "epoch": 0.19100798279356412, + "grad_norm": 1282.0994873046875, + "learning_rate": 9.77335816241092e-06, + "loss": 144.8801, + "step": 23090 + }, + { + "epoch": 0.19109070604293338, + "grad_norm": 837.4090576171875, + "learning_rate": 9.77293816123866e-06, + "loss": 104.0558, + "step": 23100 + }, + { + "epoch": 0.1911734292923026, + "grad_norm": 881.4822998046875, + "learning_rate": 9.772517780305618e-06, + "loss": 121.4086, + "step": 23110 + }, + { + "epoch": 0.19125615254167183, + "grad_norm": 880.7931518554688, + "learning_rate": 9.772097019645236e-06, + "loss": 139.5056, + "step": 23120 + }, + { + "epoch": 0.19133887579104109, + "grad_norm": 776.1262817382812, + "learning_rate": 9.771675879290998e-06, + "loss": 146.6072, + "step": 23130 + }, + { + "epoch": 0.1914215990404103, + "grad_norm": 904.765380859375, + "learning_rate": 9.771254359276407e-06, + "loss": 116.9324, + "step": 23140 + }, + { + "epoch": 0.19150432228977954, + "grad_norm": 730.8990478515625, + "learning_rate": 9.770832459635004e-06, + "loss": 133.2764, + "step": 23150 + }, + { + "epoch": 0.19158704553914876, + "grad_norm": 690.1975708007812, + "learning_rate": 9.77041018040036e-06, + "loss": 175.1648, + "step": 23160 + }, + { + "epoch": 0.19166976878851802, + "grad_norm": 907.7679443359375, + "learning_rate": 9.769987521606068e-06, + "loss": 105.3016, + "step": 23170 + }, + { + "epoch": 0.19175249203788725, + "grad_norm": 693.336181640625, + "learning_rate": 9.769564483285761e-06, + "loss": 192.6149, + "step": 23180 + }, + { + "epoch": 0.19183521528725647, + "grad_norm": 557.3772583007812, + "learning_rate": 9.769141065473099e-06, + "loss": 115.1971, + "step": 23190 + }, + { + "epoch": 0.19191793853662573, + "grad_norm": 858.6890869140625, + "learning_rate": 9.768717268201768e-06, + "loss": 145.7314, + "step": 23200 + }, + { + "epoch": 0.19200066178599495, + "grad_norm": 2752.160888671875, + "learning_rate": 9.768293091505491e-06, + "loss": 128.7382, + "step": 23210 + }, + { + "epoch": 0.19208338503536418, + "grad_norm": 971.1498413085938, + "learning_rate": 9.767868535418014e-06, + "loss": 126.5057, + "step": 23220 + }, + { + "epoch": 0.19216610828473343, + "grad_norm": 793.6380615234375, + "learning_rate": 9.767443599973122e-06, + "loss": 108.5758, + "step": 23230 + }, + { + "epoch": 0.19224883153410266, + "grad_norm": 1140.6827392578125, + "learning_rate": 9.76701828520462e-06, + "loss": 117.5244, + "step": 23240 + }, + { + "epoch": 0.1923315547834719, + "grad_norm": 840.5177001953125, + "learning_rate": 9.766592591146353e-06, + "loss": 107.5938, + "step": 23250 + }, + { + "epoch": 0.19241427803284114, + "grad_norm": 1344.9217529296875, + "learning_rate": 9.766166517832188e-06, + "loss": 120.1126, + "step": 23260 + }, + { + "epoch": 0.19249700128221037, + "grad_norm": 1015.5147705078125, + "learning_rate": 9.765740065296025e-06, + "loss": 131.5013, + "step": 23270 + }, + { + "epoch": 0.1925797245315796, + "grad_norm": 574.64306640625, + "learning_rate": 9.765313233571798e-06, + "loss": 131.5927, + "step": 23280 + }, + { + "epoch": 0.19266244778094885, + "grad_norm": 1062.6119384765625, + "learning_rate": 9.76488602269347e-06, + "loss": 162.6192, + "step": 23290 + }, + { + "epoch": 0.19274517103031807, + "grad_norm": 1031.8822021484375, + "learning_rate": 9.764458432695026e-06, + "loss": 91.6339, + "step": 23300 + }, + { + "epoch": 0.1928278942796873, + "grad_norm": 715.5499877929688, + "learning_rate": 9.76403046361049e-06, + "loss": 98.5823, + "step": 23310 + }, + { + "epoch": 0.19291061752905656, + "grad_norm": 707.9616088867188, + "learning_rate": 9.763602115473914e-06, + "loss": 129.4643, + "step": 23320 + }, + { + "epoch": 0.19299334077842578, + "grad_norm": 1023.2615966796875, + "learning_rate": 9.763173388319381e-06, + "loss": 108.6447, + "step": 23330 + }, + { + "epoch": 0.193076064027795, + "grad_norm": 745.7240600585938, + "learning_rate": 9.762744282181e-06, + "loss": 160.1926, + "step": 23340 + }, + { + "epoch": 0.19315878727716423, + "grad_norm": 1169.117431640625, + "learning_rate": 9.762314797092916e-06, + "loss": 136.8101, + "step": 23350 + }, + { + "epoch": 0.1932415105265335, + "grad_norm": 2356.3876953125, + "learning_rate": 9.761884933089301e-06, + "loss": 149.6885, + "step": 23360 + }, + { + "epoch": 0.19332423377590272, + "grad_norm": 748.1530151367188, + "learning_rate": 9.761454690204352e-06, + "loss": 90.2527, + "step": 23370 + }, + { + "epoch": 0.19340695702527194, + "grad_norm": 773.795654296875, + "learning_rate": 9.76102406847231e-06, + "loss": 133.7801, + "step": 23380 + }, + { + "epoch": 0.1934896802746412, + "grad_norm": 785.1251831054688, + "learning_rate": 9.760593067927428e-06, + "loss": 92.1381, + "step": 23390 + }, + { + "epoch": 0.19357240352401042, + "grad_norm": 987.9070434570312, + "learning_rate": 9.760161688604008e-06, + "loss": 152.8994, + "step": 23400 + }, + { + "epoch": 0.19365512677337965, + "grad_norm": 1091.9166259765625, + "learning_rate": 9.759729930536367e-06, + "loss": 106.4156, + "step": 23410 + }, + { + "epoch": 0.1937378500227489, + "grad_norm": 828.226806640625, + "learning_rate": 9.75929779375886e-06, + "loss": 127.5234, + "step": 23420 + }, + { + "epoch": 0.19382057327211813, + "grad_norm": 1164.93359375, + "learning_rate": 9.75886527830587e-06, + "loss": 147.2189, + "step": 23430 + }, + { + "epoch": 0.19390329652148736, + "grad_norm": 1556.861572265625, + "learning_rate": 9.75843238421181e-06, + "loss": 132.9836, + "step": 23440 + }, + { + "epoch": 0.1939860197708566, + "grad_norm": 1154.7764892578125, + "learning_rate": 9.757999111511121e-06, + "loss": 131.7635, + "step": 23450 + }, + { + "epoch": 0.19406874302022584, + "grad_norm": 1170.080078125, + "learning_rate": 9.757565460238281e-06, + "loss": 120.619, + "step": 23460 + }, + { + "epoch": 0.19415146626959506, + "grad_norm": 1463.3924560546875, + "learning_rate": 9.757131430427791e-06, + "loss": 129.6818, + "step": 23470 + }, + { + "epoch": 0.19423418951896432, + "grad_norm": 1100.9061279296875, + "learning_rate": 9.756697022114185e-06, + "loss": 108.3849, + "step": 23480 + }, + { + "epoch": 0.19431691276833354, + "grad_norm": 1876.0504150390625, + "learning_rate": 9.756262235332029e-06, + "loss": 103.7323, + "step": 23490 + }, + { + "epoch": 0.19439963601770277, + "grad_norm": 974.3872680664062, + "learning_rate": 9.755827070115915e-06, + "loss": 105.9175, + "step": 23500 + }, + { + "epoch": 0.194482359267072, + "grad_norm": 888.02099609375, + "learning_rate": 9.755391526500466e-06, + "loss": 108.9482, + "step": 23510 + }, + { + "epoch": 0.19456508251644125, + "grad_norm": 863.0517578125, + "learning_rate": 9.75495560452034e-06, + "loss": 131.4029, + "step": 23520 + }, + { + "epoch": 0.19464780576581048, + "grad_norm": 1131.92431640625, + "learning_rate": 9.754519304210214e-06, + "loss": 122.4951, + "step": 23530 + }, + { + "epoch": 0.1947305290151797, + "grad_norm": 2271.8134765625, + "learning_rate": 9.754082625604812e-06, + "loss": 121.6546, + "step": 23540 + }, + { + "epoch": 0.19481325226454896, + "grad_norm": 919.2047119140625, + "learning_rate": 9.753645568738872e-06, + "loss": 138.8903, + "step": 23550 + }, + { + "epoch": 0.19489597551391818, + "grad_norm": 840.5191650390625, + "learning_rate": 9.75320813364717e-06, + "loss": 91.0436, + "step": 23560 + }, + { + "epoch": 0.1949786987632874, + "grad_norm": 621.6953125, + "learning_rate": 9.752770320364512e-06, + "loss": 128.6445, + "step": 23570 + }, + { + "epoch": 0.19506142201265667, + "grad_norm": 754.2584838867188, + "learning_rate": 9.752332128925732e-06, + "loss": 111.4495, + "step": 23580 + }, + { + "epoch": 0.1951441452620259, + "grad_norm": 1024.14501953125, + "learning_rate": 9.751893559365693e-06, + "loss": 170.7815, + "step": 23590 + }, + { + "epoch": 0.19522686851139512, + "grad_norm": 1205.4876708984375, + "learning_rate": 9.751454611719294e-06, + "loss": 121.6351, + "step": 23600 + }, + { + "epoch": 0.19530959176076437, + "grad_norm": 1503.17236328125, + "learning_rate": 9.751015286021455e-06, + "loss": 164.3172, + "step": 23610 + }, + { + "epoch": 0.1953923150101336, + "grad_norm": 937.5809936523438, + "learning_rate": 9.750575582307136e-06, + "loss": 160.1191, + "step": 23620 + }, + { + "epoch": 0.19547503825950283, + "grad_norm": 983.7006225585938, + "learning_rate": 9.75013550061132e-06, + "loss": 120.9869, + "step": 23630 + }, + { + "epoch": 0.19555776150887208, + "grad_norm": 1772.2957763671875, + "learning_rate": 9.749695040969022e-06, + "loss": 114.3774, + "step": 23640 + }, + { + "epoch": 0.1956404847582413, + "grad_norm": 924.7174072265625, + "learning_rate": 9.749254203415288e-06, + "loss": 124.1198, + "step": 23650 + }, + { + "epoch": 0.19572320800761053, + "grad_norm": 972.820068359375, + "learning_rate": 9.748812987985193e-06, + "loss": 123.3391, + "step": 23660 + }, + { + "epoch": 0.1958059312569798, + "grad_norm": 1330.291748046875, + "learning_rate": 9.748371394713842e-06, + "loss": 126.0948, + "step": 23670 + }, + { + "epoch": 0.195888654506349, + "grad_norm": 1056.2666015625, + "learning_rate": 9.747929423636372e-06, + "loss": 128.4867, + "step": 23680 + }, + { + "epoch": 0.19597137775571824, + "grad_norm": 670.9091186523438, + "learning_rate": 9.74748707478795e-06, + "loss": 140.6503, + "step": 23690 + }, + { + "epoch": 0.19605410100508747, + "grad_norm": 1226.66162109375, + "learning_rate": 9.747044348203766e-06, + "loss": 130.9208, + "step": 23700 + }, + { + "epoch": 0.19613682425445672, + "grad_norm": 907.8272705078125, + "learning_rate": 9.74660124391905e-06, + "loss": 127.5544, + "step": 23710 + }, + { + "epoch": 0.19621954750382595, + "grad_norm": 898.1268920898438, + "learning_rate": 9.746157761969058e-06, + "loss": 164.2529, + "step": 23720 + }, + { + "epoch": 0.19630227075319517, + "grad_norm": 1068.112060546875, + "learning_rate": 9.745713902389074e-06, + "loss": 143.5591, + "step": 23730 + }, + { + "epoch": 0.19638499400256443, + "grad_norm": 1102.2633056640625, + "learning_rate": 9.745269665214415e-06, + "loss": 122.1263, + "step": 23740 + }, + { + "epoch": 0.19646771725193365, + "grad_norm": 556.4323120117188, + "learning_rate": 9.744825050480425e-06, + "loss": 121.492, + "step": 23750 + }, + { + "epoch": 0.19655044050130288, + "grad_norm": 1045.6256103515625, + "learning_rate": 9.744380058222483e-06, + "loss": 131.9083, + "step": 23760 + }, + { + "epoch": 0.19663316375067214, + "grad_norm": 1136.8719482421875, + "learning_rate": 9.743934688475994e-06, + "loss": 115.8493, + "step": 23770 + }, + { + "epoch": 0.19671588700004136, + "grad_norm": 962.9896850585938, + "learning_rate": 9.743488941276394e-06, + "loss": 117.902, + "step": 23780 + }, + { + "epoch": 0.1967986102494106, + "grad_norm": 1086.8525390625, + "learning_rate": 9.743042816659147e-06, + "loss": 134.2704, + "step": 23790 + }, + { + "epoch": 0.19688133349877984, + "grad_norm": 1227.1202392578125, + "learning_rate": 9.742596314659751e-06, + "loss": 127.5619, + "step": 23800 + }, + { + "epoch": 0.19696405674814907, + "grad_norm": 1608.360595703125, + "learning_rate": 9.742149435313732e-06, + "loss": 120.5991, + "step": 23810 + }, + { + "epoch": 0.1970467799975183, + "grad_norm": 664.271728515625, + "learning_rate": 9.741702178656647e-06, + "loss": 151.2022, + "step": 23820 + }, + { + "epoch": 0.19712950324688755, + "grad_norm": 1345.0858154296875, + "learning_rate": 9.74125454472408e-06, + "loss": 136.8234, + "step": 23830 + }, + { + "epoch": 0.19721222649625678, + "grad_norm": 732.8429565429688, + "learning_rate": 9.740806533551647e-06, + "loss": 234.7962, + "step": 23840 + }, + { + "epoch": 0.197294949745626, + "grad_norm": 1542.8756103515625, + "learning_rate": 9.740358145174999e-06, + "loss": 130.3346, + "step": 23850 + }, + { + "epoch": 0.19737767299499526, + "grad_norm": 821.8853149414062, + "learning_rate": 9.739909379629805e-06, + "loss": 139.0029, + "step": 23860 + }, + { + "epoch": 0.19746039624436448, + "grad_norm": 812.029541015625, + "learning_rate": 9.739460236951778e-06, + "loss": 127.8701, + "step": 23870 + }, + { + "epoch": 0.1975431194937337, + "grad_norm": 969.9833984375, + "learning_rate": 9.739010717176649e-06, + "loss": 99.5856, + "step": 23880 + }, + { + "epoch": 0.19762584274310294, + "grad_norm": 1117.0540771484375, + "learning_rate": 9.738560820340189e-06, + "loss": 114.7225, + "step": 23890 + }, + { + "epoch": 0.1977085659924722, + "grad_norm": 601.694091796875, + "learning_rate": 9.738110546478188e-06, + "loss": 123.2165, + "step": 23900 + }, + { + "epoch": 0.19779128924184142, + "grad_norm": 1268.921630859375, + "learning_rate": 9.737659895626478e-06, + "loss": 130.8849, + "step": 23910 + }, + { + "epoch": 0.19787401249121064, + "grad_norm": 1429.4453125, + "learning_rate": 9.737208867820914e-06, + "loss": 123.7755, + "step": 23920 + }, + { + "epoch": 0.1979567357405799, + "grad_norm": 654.82080078125, + "learning_rate": 9.736757463097378e-06, + "loss": 152.5086, + "step": 23930 + }, + { + "epoch": 0.19803945898994912, + "grad_norm": 852.7525024414062, + "learning_rate": 9.736305681491792e-06, + "loss": 116.6475, + "step": 23940 + }, + { + "epoch": 0.19812218223931835, + "grad_norm": 1039.8662109375, + "learning_rate": 9.735853523040098e-06, + "loss": 138.9281, + "step": 23950 + }, + { + "epoch": 0.1982049054886876, + "grad_norm": 1178.4395751953125, + "learning_rate": 9.735400987778274e-06, + "loss": 143.172, + "step": 23960 + }, + { + "epoch": 0.19828762873805683, + "grad_norm": 1232.4547119140625, + "learning_rate": 9.734948075742328e-06, + "loss": 121.5333, + "step": 23970 + }, + { + "epoch": 0.19837035198742606, + "grad_norm": 865.7545166015625, + "learning_rate": 9.734494786968293e-06, + "loss": 154.3453, + "step": 23980 + }, + { + "epoch": 0.1984530752367953, + "grad_norm": 669.6997680664062, + "learning_rate": 9.734041121492235e-06, + "loss": 144.5734, + "step": 23990 + }, + { + "epoch": 0.19853579848616454, + "grad_norm": 1144.2825927734375, + "learning_rate": 9.733587079350254e-06, + "loss": 107.7752, + "step": 24000 + }, + { + "epoch": 0.19861852173553377, + "grad_norm": 2160.167236328125, + "learning_rate": 9.73313266057847e-06, + "loss": 129.0158, + "step": 24010 + }, + { + "epoch": 0.19870124498490302, + "grad_norm": 1064.593994140625, + "learning_rate": 9.732677865213044e-06, + "loss": 141.8949, + "step": 24020 + }, + { + "epoch": 0.19878396823427225, + "grad_norm": 1279.8077392578125, + "learning_rate": 9.73222269329016e-06, + "loss": 153.0952, + "step": 24030 + }, + { + "epoch": 0.19886669148364147, + "grad_norm": 950.4849853515625, + "learning_rate": 9.731767144846034e-06, + "loss": 146.848, + "step": 24040 + }, + { + "epoch": 0.1989494147330107, + "grad_norm": 1283.0186767578125, + "learning_rate": 9.731311219916912e-06, + "loss": 150.6841, + "step": 24050 + }, + { + "epoch": 0.19903213798237995, + "grad_norm": 704.3133544921875, + "learning_rate": 9.730854918539072e-06, + "loss": 149.4796, + "step": 24060 + }, + { + "epoch": 0.19911486123174918, + "grad_norm": 846.5486450195312, + "learning_rate": 9.730398240748816e-06, + "loss": 138.2696, + "step": 24070 + }, + { + "epoch": 0.1991975844811184, + "grad_norm": 779.280029296875, + "learning_rate": 9.729941186582482e-06, + "loss": 114.9246, + "step": 24080 + }, + { + "epoch": 0.19928030773048766, + "grad_norm": 1280.6629638671875, + "learning_rate": 9.729483756076436e-06, + "loss": 96.8995, + "step": 24090 + }, + { + "epoch": 0.1993630309798569, + "grad_norm": 626.4833374023438, + "learning_rate": 9.729025949267072e-06, + "loss": 117.7161, + "step": 24100 + }, + { + "epoch": 0.1994457542292261, + "grad_norm": 866.7782592773438, + "learning_rate": 9.728567766190817e-06, + "loss": 110.1413, + "step": 24110 + }, + { + "epoch": 0.19952847747859537, + "grad_norm": 1430.4652099609375, + "learning_rate": 9.728109206884125e-06, + "loss": 122.4254, + "step": 24120 + }, + { + "epoch": 0.1996112007279646, + "grad_norm": 1178.0247802734375, + "learning_rate": 9.727650271383485e-06, + "loss": 131.602, + "step": 24130 + }, + { + "epoch": 0.19969392397733382, + "grad_norm": 944.5925903320312, + "learning_rate": 9.727190959725407e-06, + "loss": 91.2385, + "step": 24140 + }, + { + "epoch": 0.19977664722670307, + "grad_norm": 1111.4093017578125, + "learning_rate": 9.72673127194644e-06, + "loss": 185.1948, + "step": 24150 + }, + { + "epoch": 0.1998593704760723, + "grad_norm": 1125.5455322265625, + "learning_rate": 9.72627120808316e-06, + "loss": 157.4787, + "step": 24160 + }, + { + "epoch": 0.19994209372544153, + "grad_norm": 1090.2935791015625, + "learning_rate": 9.725810768172169e-06, + "loss": 106.9974, + "step": 24170 + }, + { + "epoch": 0.20002481697481078, + "grad_norm": 620.5934448242188, + "learning_rate": 9.725349952250105e-06, + "loss": 115.3673, + "step": 24180 + }, + { + "epoch": 0.20010754022418, + "grad_norm": 1263.1217041015625, + "learning_rate": 9.724888760353631e-06, + "loss": 148.2358, + "step": 24190 + }, + { + "epoch": 0.20019026347354923, + "grad_norm": 1106.949951171875, + "learning_rate": 9.72442719251944e-06, + "loss": 170.5474, + "step": 24200 + }, + { + "epoch": 0.2002729867229185, + "grad_norm": 1334.36962890625, + "learning_rate": 9.723965248784264e-06, + "loss": 149.0543, + "step": 24210 + }, + { + "epoch": 0.20035570997228772, + "grad_norm": 1211.2431640625, + "learning_rate": 9.723502929184851e-06, + "loss": 126.5367, + "step": 24220 + }, + { + "epoch": 0.20043843322165694, + "grad_norm": 863.3626708984375, + "learning_rate": 9.723040233757987e-06, + "loss": 142.1387, + "step": 24230 + }, + { + "epoch": 0.20052115647102617, + "grad_norm": 1154.0657958984375, + "learning_rate": 9.722577162540489e-06, + "loss": 123.5952, + "step": 24240 + }, + { + "epoch": 0.20060387972039542, + "grad_norm": 1012.69140625, + "learning_rate": 9.7221137155692e-06, + "loss": 110.8877, + "step": 24250 + }, + { + "epoch": 0.20068660296976465, + "grad_norm": 1386.55859375, + "learning_rate": 9.721649892880995e-06, + "loss": 170.2592, + "step": 24260 + }, + { + "epoch": 0.20076932621913388, + "grad_norm": 1342.9095458984375, + "learning_rate": 9.721185694512776e-06, + "loss": 134.4591, + "step": 24270 + }, + { + "epoch": 0.20085204946850313, + "grad_norm": 392.2961120605469, + "learning_rate": 9.720721120501478e-06, + "loss": 134.1603, + "step": 24280 + }, + { + "epoch": 0.20093477271787236, + "grad_norm": 826.9261474609375, + "learning_rate": 9.720256170884066e-06, + "loss": 101.2419, + "step": 24290 + }, + { + "epoch": 0.20101749596724158, + "grad_norm": 791.0903930664062, + "learning_rate": 9.719790845697534e-06, + "loss": 129.4301, + "step": 24300 + }, + { + "epoch": 0.20110021921661084, + "grad_norm": 1188.9549560546875, + "learning_rate": 9.719325144978907e-06, + "loss": 145.1762, + "step": 24310 + }, + { + "epoch": 0.20118294246598006, + "grad_norm": 1141.0123291015625, + "learning_rate": 9.718859068765234e-06, + "loss": 107.9242, + "step": 24320 + }, + { + "epoch": 0.2012656657153493, + "grad_norm": 1065.128173828125, + "learning_rate": 9.718392617093602e-06, + "loss": 113.2393, + "step": 24330 + }, + { + "epoch": 0.20134838896471854, + "grad_norm": 1253.3646240234375, + "learning_rate": 9.717925790001125e-06, + "loss": 77.6727, + "step": 24340 + }, + { + "epoch": 0.20143111221408777, + "grad_norm": 1093.85498046875, + "learning_rate": 9.717458587524946e-06, + "loss": 112.4173, + "step": 24350 + }, + { + "epoch": 0.201513835463457, + "grad_norm": 787.4635009765625, + "learning_rate": 9.716991009702236e-06, + "loss": 114.5591, + "step": 24360 + }, + { + "epoch": 0.20159655871282625, + "grad_norm": 926.3908081054688, + "learning_rate": 9.7165230565702e-06, + "loss": 123.4194, + "step": 24370 + }, + { + "epoch": 0.20167928196219548, + "grad_norm": 792.05859375, + "learning_rate": 9.71605472816607e-06, + "loss": 128.6131, + "step": 24380 + }, + { + "epoch": 0.2017620052115647, + "grad_norm": 835.9586791992188, + "learning_rate": 9.71558602452711e-06, + "loss": 125.1441, + "step": 24390 + }, + { + "epoch": 0.20184472846093396, + "grad_norm": 1247.4454345703125, + "learning_rate": 9.71511694569061e-06, + "loss": 166.359, + "step": 24400 + }, + { + "epoch": 0.20192745171030319, + "grad_norm": 828.7860107421875, + "learning_rate": 9.714647491693897e-06, + "loss": 140.4719, + "step": 24410 + }, + { + "epoch": 0.2020101749596724, + "grad_norm": 843.8282470703125, + "learning_rate": 9.714177662574316e-06, + "loss": 101.7997, + "step": 24420 + }, + { + "epoch": 0.20209289820904164, + "grad_norm": 967.5045776367188, + "learning_rate": 9.713707458369258e-06, + "loss": 124.0601, + "step": 24430 + }, + { + "epoch": 0.2021756214584109, + "grad_norm": 692.8041381835938, + "learning_rate": 9.713236879116127e-06, + "loss": 120.8918, + "step": 24440 + }, + { + "epoch": 0.20225834470778012, + "grad_norm": 1022.600341796875, + "learning_rate": 9.71276592485237e-06, + "loss": 128.2274, + "step": 24450 + }, + { + "epoch": 0.20234106795714935, + "grad_norm": 1309.6890869140625, + "learning_rate": 9.712294595615458e-06, + "loss": 134.2852, + "step": 24460 + }, + { + "epoch": 0.2024237912065186, + "grad_norm": 596.5182495117188, + "learning_rate": 9.711822891442887e-06, + "loss": 141.3561, + "step": 24470 + }, + { + "epoch": 0.20250651445588783, + "grad_norm": 1456.3428955078125, + "learning_rate": 9.711350812372198e-06, + "loss": 166.3525, + "step": 24480 + }, + { + "epoch": 0.20258923770525705, + "grad_norm": 1364.7294921875, + "learning_rate": 9.710878358440945e-06, + "loss": 178.6096, + "step": 24490 + }, + { + "epoch": 0.2026719609546263, + "grad_norm": 941.8969116210938, + "learning_rate": 9.710405529686722e-06, + "loss": 154.1988, + "step": 24500 + }, + { + "epoch": 0.20275468420399553, + "grad_norm": 1231.3597412109375, + "learning_rate": 9.709932326147147e-06, + "loss": 109.8311, + "step": 24510 + }, + { + "epoch": 0.20283740745336476, + "grad_norm": 981.5293579101562, + "learning_rate": 9.709458747859874e-06, + "loss": 106.3588, + "step": 24520 + }, + { + "epoch": 0.20292013070273401, + "grad_norm": 882.2429809570312, + "learning_rate": 9.708984794862581e-06, + "loss": 106.4644, + "step": 24530 + }, + { + "epoch": 0.20300285395210324, + "grad_norm": 1536.478271484375, + "learning_rate": 9.708510467192981e-06, + "loss": 115.3478, + "step": 24540 + }, + { + "epoch": 0.20308557720147247, + "grad_norm": 480.1463928222656, + "learning_rate": 9.70803576488881e-06, + "loss": 131.8311, + "step": 24550 + }, + { + "epoch": 0.20316830045084172, + "grad_norm": 751.3448486328125, + "learning_rate": 9.707560687987843e-06, + "loss": 100.0164, + "step": 24560 + }, + { + "epoch": 0.20325102370021095, + "grad_norm": 983.8947143554688, + "learning_rate": 9.707085236527873e-06, + "loss": 124.7845, + "step": 24570 + }, + { + "epoch": 0.20333374694958017, + "grad_norm": 778.719970703125, + "learning_rate": 9.706609410546736e-06, + "loss": 70.2131, + "step": 24580 + }, + { + "epoch": 0.20341647019894943, + "grad_norm": 799.94580078125, + "learning_rate": 9.706133210082288e-06, + "loss": 107.5597, + "step": 24590 + }, + { + "epoch": 0.20349919344831865, + "grad_norm": 775.4839477539062, + "learning_rate": 9.705656635172418e-06, + "loss": 123.562, + "step": 24600 + }, + { + "epoch": 0.20358191669768788, + "grad_norm": 906.2435302734375, + "learning_rate": 9.705179685855048e-06, + "loss": 141.3586, + "step": 24610 + }, + { + "epoch": 0.2036646399470571, + "grad_norm": 572.1375732421875, + "learning_rate": 9.704702362168121e-06, + "loss": 174.9464, + "step": 24620 + }, + { + "epoch": 0.20374736319642636, + "grad_norm": 1068.9876708984375, + "learning_rate": 9.704224664149621e-06, + "loss": 122.0626, + "step": 24630 + }, + { + "epoch": 0.2038300864457956, + "grad_norm": 606.3220825195312, + "learning_rate": 9.703746591837552e-06, + "loss": 109.6907, + "step": 24640 + }, + { + "epoch": 0.20391280969516482, + "grad_norm": 1032.2161865234375, + "learning_rate": 9.703268145269957e-06, + "loss": 126.0382, + "step": 24650 + }, + { + "epoch": 0.20399553294453407, + "grad_norm": 1022.0555419921875, + "learning_rate": 9.702789324484898e-06, + "loss": 188.598, + "step": 24660 + }, + { + "epoch": 0.2040782561939033, + "grad_norm": 1047.5244140625, + "learning_rate": 9.702310129520476e-06, + "loss": 122.4435, + "step": 24670 + }, + { + "epoch": 0.20416097944327252, + "grad_norm": 1051.2161865234375, + "learning_rate": 9.701830560414817e-06, + "loss": 144.9207, + "step": 24680 + }, + { + "epoch": 0.20424370269264178, + "grad_norm": 1291.77099609375, + "learning_rate": 9.701350617206081e-06, + "loss": 141.4524, + "step": 24690 + }, + { + "epoch": 0.204326425942011, + "grad_norm": 1048.1988525390625, + "learning_rate": 9.700870299932453e-06, + "loss": 133.1601, + "step": 24700 + }, + { + "epoch": 0.20440914919138023, + "grad_norm": 1616.698486328125, + "learning_rate": 9.700389608632146e-06, + "loss": 159.5847, + "step": 24710 + }, + { + "epoch": 0.20449187244074948, + "grad_norm": 863.6107788085938, + "learning_rate": 9.699908543343413e-06, + "loss": 135.6566, + "step": 24720 + }, + { + "epoch": 0.2045745956901187, + "grad_norm": 1772.513916015625, + "learning_rate": 9.699427104104525e-06, + "loss": 202.6929, + "step": 24730 + }, + { + "epoch": 0.20465731893948794, + "grad_norm": 822.7219848632812, + "learning_rate": 9.698945290953789e-06, + "loss": 140.5493, + "step": 24740 + }, + { + "epoch": 0.2047400421888572, + "grad_norm": 811.2693481445312, + "learning_rate": 9.698463103929542e-06, + "loss": 81.8386, + "step": 24750 + }, + { + "epoch": 0.20482276543822642, + "grad_norm": 530.8605346679688, + "learning_rate": 9.69798054307015e-06, + "loss": 106.7644, + "step": 24760 + }, + { + "epoch": 0.20490548868759564, + "grad_norm": 2197.434326171875, + "learning_rate": 9.697497608414007e-06, + "loss": 189.2195, + "step": 24770 + }, + { + "epoch": 0.20498821193696487, + "grad_norm": 1628.6551513671875, + "learning_rate": 9.697014299999536e-06, + "loss": 110.0945, + "step": 24780 + }, + { + "epoch": 0.20507093518633412, + "grad_norm": 988.3732299804688, + "learning_rate": 9.696530617865197e-06, + "loss": 147.2918, + "step": 24790 + }, + { + "epoch": 0.20515365843570335, + "grad_norm": 934.0083618164062, + "learning_rate": 9.696046562049469e-06, + "loss": 124.3846, + "step": 24800 + }, + { + "epoch": 0.20523638168507258, + "grad_norm": 1064.4521484375, + "learning_rate": 9.695562132590865e-06, + "loss": 130.5005, + "step": 24810 + }, + { + "epoch": 0.20531910493444183, + "grad_norm": 936.8753662109375, + "learning_rate": 9.695077329527936e-06, + "loss": 79.4409, + "step": 24820 + }, + { + "epoch": 0.20540182818381106, + "grad_norm": 2023.431396484375, + "learning_rate": 9.694592152899249e-06, + "loss": 135.3671, + "step": 24830 + }, + { + "epoch": 0.20548455143318028, + "grad_norm": 1526.9307861328125, + "learning_rate": 9.694106602743411e-06, + "loss": 141.677, + "step": 24840 + }, + { + "epoch": 0.20556727468254954, + "grad_norm": 1296.6025390625, + "learning_rate": 9.693620679099055e-06, + "loss": 106.9513, + "step": 24850 + }, + { + "epoch": 0.20564999793191877, + "grad_norm": 675.1536865234375, + "learning_rate": 9.693134382004839e-06, + "loss": 122.0934, + "step": 24860 + }, + { + "epoch": 0.205732721181288, + "grad_norm": 1233.307373046875, + "learning_rate": 9.69264771149946e-06, + "loss": 152.0633, + "step": 24870 + }, + { + "epoch": 0.20581544443065725, + "grad_norm": 2084.801025390625, + "learning_rate": 9.692160667621639e-06, + "loss": 191.7889, + "step": 24880 + }, + { + "epoch": 0.20589816768002647, + "grad_norm": 1063.7586669921875, + "learning_rate": 9.69167325041013e-06, + "loss": 98.7458, + "step": 24890 + }, + { + "epoch": 0.2059808909293957, + "grad_norm": 1160.4176025390625, + "learning_rate": 9.69118545990371e-06, + "loss": 136.4698, + "step": 24900 + }, + { + "epoch": 0.20606361417876495, + "grad_norm": 1240.2447509765625, + "learning_rate": 9.690697296141194e-06, + "loss": 131.5934, + "step": 24910 + }, + { + "epoch": 0.20614633742813418, + "grad_norm": 1285.3697509765625, + "learning_rate": 9.690208759161418e-06, + "loss": 143.4328, + "step": 24920 + }, + { + "epoch": 0.2062290606775034, + "grad_norm": 899.1880493164062, + "learning_rate": 9.689719849003261e-06, + "loss": 150.501, + "step": 24930 + }, + { + "epoch": 0.20631178392687266, + "grad_norm": 1172.1856689453125, + "learning_rate": 9.689230565705617e-06, + "loss": 88.1189, + "step": 24940 + }, + { + "epoch": 0.2063945071762419, + "grad_norm": 940.1471557617188, + "learning_rate": 9.688740909307416e-06, + "loss": 126.9263, + "step": 24950 + }, + { + "epoch": 0.2064772304256111, + "grad_norm": 1031.7501220703125, + "learning_rate": 9.68825087984762e-06, + "loss": 145.9387, + "step": 24960 + }, + { + "epoch": 0.20655995367498034, + "grad_norm": 747.2893676757812, + "learning_rate": 9.687760477365217e-06, + "loss": 111.3116, + "step": 24970 + }, + { + "epoch": 0.2066426769243496, + "grad_norm": 1009.5612182617188, + "learning_rate": 9.687269701899228e-06, + "loss": 122.0988, + "step": 24980 + }, + { + "epoch": 0.20672540017371882, + "grad_norm": 1089.2601318359375, + "learning_rate": 9.6867785534887e-06, + "loss": 118.2881, + "step": 24990 + }, + { + "epoch": 0.20680812342308805, + "grad_norm": 1918.05908203125, + "learning_rate": 9.686287032172712e-06, + "loss": 119.0276, + "step": 25000 + }, + { + "epoch": 0.2068908466724573, + "grad_norm": 1754.23486328125, + "learning_rate": 9.685795137990372e-06, + "loss": 149.8688, + "step": 25010 + }, + { + "epoch": 0.20697356992182653, + "grad_norm": 701.3511352539062, + "learning_rate": 9.685302870980819e-06, + "loss": 111.5625, + "step": 25020 + }, + { + "epoch": 0.20705629317119575, + "grad_norm": 1214.149658203125, + "learning_rate": 9.684810231183218e-06, + "loss": 104.316, + "step": 25030 + }, + { + "epoch": 0.207139016420565, + "grad_norm": 1371.7374267578125, + "learning_rate": 9.684317218636767e-06, + "loss": 137.7989, + "step": 25040 + }, + { + "epoch": 0.20722173966993424, + "grad_norm": 976.9849243164062, + "learning_rate": 9.683823833380692e-06, + "loss": 107.9419, + "step": 25050 + }, + { + "epoch": 0.20730446291930346, + "grad_norm": 2080.160400390625, + "learning_rate": 9.683330075454252e-06, + "loss": 134.3795, + "step": 25060 + }, + { + "epoch": 0.20738718616867272, + "grad_norm": 1066.323974609375, + "learning_rate": 9.68283594489673e-06, + "loss": 113.7562, + "step": 25070 + }, + { + "epoch": 0.20746990941804194, + "grad_norm": 1623.5059814453125, + "learning_rate": 9.682341441747446e-06, + "loss": 124.4294, + "step": 25080 + }, + { + "epoch": 0.20755263266741117, + "grad_norm": 1231.952880859375, + "learning_rate": 9.68184656604574e-06, + "loss": 168.1638, + "step": 25090 + }, + { + "epoch": 0.20763535591678042, + "grad_norm": 604.9308471679688, + "learning_rate": 9.681351317830991e-06, + "loss": 150.3729, + "step": 25100 + }, + { + "epoch": 0.20771807916614965, + "grad_norm": 915.5381469726562, + "learning_rate": 9.680855697142601e-06, + "loss": 89.7146, + "step": 25110 + }, + { + "epoch": 0.20780080241551888, + "grad_norm": 517.771484375, + "learning_rate": 9.680359704020005e-06, + "loss": 110.3232, + "step": 25120 + }, + { + "epoch": 0.20788352566488813, + "grad_norm": 999.2185668945312, + "learning_rate": 9.67986333850267e-06, + "loss": 155.6853, + "step": 25130 + }, + { + "epoch": 0.20796624891425736, + "grad_norm": 837.3745727539062, + "learning_rate": 9.679366600630085e-06, + "loss": 123.4482, + "step": 25140 + }, + { + "epoch": 0.20804897216362658, + "grad_norm": 1082.25146484375, + "learning_rate": 9.678869490441775e-06, + "loss": 92.413, + "step": 25150 + }, + { + "epoch": 0.2081316954129958, + "grad_norm": 1187.2119140625, + "learning_rate": 9.678372007977292e-06, + "loss": 130.2228, + "step": 25160 + }, + { + "epoch": 0.20821441866236506, + "grad_norm": 1067.64306640625, + "learning_rate": 9.67787415327622e-06, + "loss": 106.1636, + "step": 25170 + }, + { + "epoch": 0.2082971419117343, + "grad_norm": 1119.31640625, + "learning_rate": 9.67737592637817e-06, + "loss": 125.416, + "step": 25180 + }, + { + "epoch": 0.20837986516110352, + "grad_norm": 1132.6695556640625, + "learning_rate": 9.676877327322785e-06, + "loss": 121.1855, + "step": 25190 + }, + { + "epoch": 0.20846258841047277, + "grad_norm": 921.9434204101562, + "learning_rate": 9.676378356149733e-06, + "loss": 119.0814, + "step": 25200 + }, + { + "epoch": 0.208545311659842, + "grad_norm": 1111.28857421875, + "learning_rate": 9.675879012898719e-06, + "loss": 112.7059, + "step": 25210 + }, + { + "epoch": 0.20862803490921122, + "grad_norm": 952.7010498046875, + "learning_rate": 9.67537929760947e-06, + "loss": 135.5189, + "step": 25220 + }, + { + "epoch": 0.20871075815858048, + "grad_norm": 668.4132080078125, + "learning_rate": 9.674879210321747e-06, + "loss": 126.6339, + "step": 25230 + }, + { + "epoch": 0.2087934814079497, + "grad_norm": 1562.3443603515625, + "learning_rate": 9.67437875107534e-06, + "loss": 157.3701, + "step": 25240 + }, + { + "epoch": 0.20887620465731893, + "grad_norm": 787.549072265625, + "learning_rate": 9.673877919910069e-06, + "loss": 144.3811, + "step": 25250 + }, + { + "epoch": 0.20895892790668819, + "grad_norm": 1168.210693359375, + "learning_rate": 9.673376716865781e-06, + "loss": 147.3342, + "step": 25260 + }, + { + "epoch": 0.2090416511560574, + "grad_norm": 922.6990356445312, + "learning_rate": 9.672875141982358e-06, + "loss": 134.1699, + "step": 25270 + }, + { + "epoch": 0.20912437440542664, + "grad_norm": 920.8440551757812, + "learning_rate": 9.672373195299704e-06, + "loss": 90.3388, + "step": 25280 + }, + { + "epoch": 0.2092070976547959, + "grad_norm": 742.2246704101562, + "learning_rate": 9.67187087685776e-06, + "loss": 127.1734, + "step": 25290 + }, + { + "epoch": 0.20928982090416512, + "grad_norm": 1413.235107421875, + "learning_rate": 9.671368186696488e-06, + "loss": 121.8572, + "step": 25300 + }, + { + "epoch": 0.20937254415353435, + "grad_norm": 1089.089111328125, + "learning_rate": 9.670865124855889e-06, + "loss": 155.726, + "step": 25310 + }, + { + "epoch": 0.2094552674029036, + "grad_norm": 1251.3277587890625, + "learning_rate": 9.67036169137599e-06, + "loss": 134.8554, + "step": 25320 + }, + { + "epoch": 0.20953799065227283, + "grad_norm": 975.4923706054688, + "learning_rate": 9.669857886296842e-06, + "loss": 137.952, + "step": 25330 + }, + { + "epoch": 0.20962071390164205, + "grad_norm": 1131.67822265625, + "learning_rate": 9.669353709658537e-06, + "loss": 119.2049, + "step": 25340 + }, + { + "epoch": 0.20970343715101128, + "grad_norm": 637.6780395507812, + "learning_rate": 9.668849161501186e-06, + "loss": 104.5548, + "step": 25350 + }, + { + "epoch": 0.20978616040038053, + "grad_norm": 1121.5142822265625, + "learning_rate": 9.668344241864934e-06, + "loss": 115.9487, + "step": 25360 + }, + { + "epoch": 0.20986888364974976, + "grad_norm": 937.0314331054688, + "learning_rate": 9.667838950789957e-06, + "loss": 112.8814, + "step": 25370 + }, + { + "epoch": 0.209951606899119, + "grad_norm": 1264.4849853515625, + "learning_rate": 9.667333288316454e-06, + "loss": 135.4153, + "step": 25380 + }, + { + "epoch": 0.21003433014848824, + "grad_norm": 495.7436828613281, + "learning_rate": 9.666827254484663e-06, + "loss": 111.5311, + "step": 25390 + }, + { + "epoch": 0.21011705339785747, + "grad_norm": 853.3590087890625, + "learning_rate": 9.666320849334846e-06, + "loss": 102.2723, + "step": 25400 + }, + { + "epoch": 0.2101997766472267, + "grad_norm": 1231.4791259765625, + "learning_rate": 9.665814072907293e-06, + "loss": 118.1443, + "step": 25410 + }, + { + "epoch": 0.21028249989659595, + "grad_norm": 688.8372192382812, + "learning_rate": 9.665306925242329e-06, + "loss": 120.3173, + "step": 25420 + }, + { + "epoch": 0.21036522314596517, + "grad_norm": 1288.0115966796875, + "learning_rate": 9.664799406380302e-06, + "loss": 129.2124, + "step": 25430 + }, + { + "epoch": 0.2104479463953344, + "grad_norm": 734.3385009765625, + "learning_rate": 9.664291516361597e-06, + "loss": 141.6067, + "step": 25440 + }, + { + "epoch": 0.21053066964470366, + "grad_norm": 808.9627685546875, + "learning_rate": 9.663783255226622e-06, + "loss": 128.5043, + "step": 25450 + }, + { + "epoch": 0.21061339289407288, + "grad_norm": 2131.609619140625, + "learning_rate": 9.663274623015816e-06, + "loss": 130.3591, + "step": 25460 + }, + { + "epoch": 0.2106961161434421, + "grad_norm": 1364.452880859375, + "learning_rate": 9.662765619769651e-06, + "loss": 186.9397, + "step": 25470 + }, + { + "epoch": 0.21077883939281136, + "grad_norm": 1547.8067626953125, + "learning_rate": 9.662256245528622e-06, + "loss": 130.2646, + "step": 25480 + }, + { + "epoch": 0.2108615626421806, + "grad_norm": 760.1724853515625, + "learning_rate": 9.661746500333265e-06, + "loss": 107.9425, + "step": 25490 + }, + { + "epoch": 0.21094428589154982, + "grad_norm": 691.5214233398438, + "learning_rate": 9.66123638422413e-06, + "loss": 131.0554, + "step": 25500 + }, + { + "epoch": 0.21102700914091904, + "grad_norm": 1053.4617919921875, + "learning_rate": 9.66072589724181e-06, + "loss": 139.2496, + "step": 25510 + }, + { + "epoch": 0.2111097323902883, + "grad_norm": 903.3119506835938, + "learning_rate": 9.66021503942692e-06, + "loss": 103.5156, + "step": 25520 + }, + { + "epoch": 0.21119245563965752, + "grad_norm": 1562.44482421875, + "learning_rate": 9.659703810820105e-06, + "loss": 154.1499, + "step": 25530 + }, + { + "epoch": 0.21127517888902675, + "grad_norm": 724.2481079101562, + "learning_rate": 9.659192211462043e-06, + "loss": 122.7316, + "step": 25540 + }, + { + "epoch": 0.211357902138396, + "grad_norm": 1171.6414794921875, + "learning_rate": 9.658680241393441e-06, + "loss": 112.3694, + "step": 25550 + }, + { + "epoch": 0.21144062538776523, + "grad_norm": 687.9328002929688, + "learning_rate": 9.658167900655032e-06, + "loss": 85.1699, + "step": 25560 + }, + { + "epoch": 0.21152334863713446, + "grad_norm": 1203.814208984375, + "learning_rate": 9.657655189287582e-06, + "loss": 111.8283, + "step": 25570 + }, + { + "epoch": 0.2116060718865037, + "grad_norm": 2333.49365234375, + "learning_rate": 9.657142107331883e-06, + "loss": 168.9521, + "step": 25580 + }, + { + "epoch": 0.21168879513587294, + "grad_norm": 919.4609375, + "learning_rate": 9.65662865482876e-06, + "loss": 110.3846, + "step": 25590 + }, + { + "epoch": 0.21177151838524216, + "grad_norm": 1061.5003662109375, + "learning_rate": 9.656114831819067e-06, + "loss": 133.6754, + "step": 25600 + }, + { + "epoch": 0.21185424163461142, + "grad_norm": 498.1741638183594, + "learning_rate": 9.655600638343685e-06, + "loss": 109.6762, + "step": 25610 + }, + { + "epoch": 0.21193696488398064, + "grad_norm": 946.8280639648438, + "learning_rate": 9.655086074443527e-06, + "loss": 183.0106, + "step": 25620 + }, + { + "epoch": 0.21201968813334987, + "grad_norm": 1837.9036865234375, + "learning_rate": 9.654571140159534e-06, + "loss": 122.9791, + "step": 25630 + }, + { + "epoch": 0.21210241138271912, + "grad_norm": 1461.47900390625, + "learning_rate": 9.654055835532676e-06, + "loss": 191.4103, + "step": 25640 + }, + { + "epoch": 0.21218513463208835, + "grad_norm": 820.8297119140625, + "learning_rate": 9.653540160603956e-06, + "loss": 115.1019, + "step": 25650 + }, + { + "epoch": 0.21226785788145758, + "grad_norm": 1157.8956298828125, + "learning_rate": 9.653024115414402e-06, + "loss": 142.3736, + "step": 25660 + }, + { + "epoch": 0.21235058113082683, + "grad_norm": 917.7392578125, + "learning_rate": 9.652507700005072e-06, + "loss": 130.4384, + "step": 25670 + }, + { + "epoch": 0.21243330438019606, + "grad_norm": 1544.7633056640625, + "learning_rate": 9.651990914417057e-06, + "loss": 135.7345, + "step": 25680 + }, + { + "epoch": 0.21251602762956529, + "grad_norm": 768.8818359375, + "learning_rate": 9.651473758691477e-06, + "loss": 99.1156, + "step": 25690 + }, + { + "epoch": 0.2125987508789345, + "grad_norm": 1055.4837646484375, + "learning_rate": 9.650956232869475e-06, + "loss": 113.1447, + "step": 25700 + }, + { + "epoch": 0.21268147412830377, + "grad_norm": 1085.100341796875, + "learning_rate": 9.650438336992231e-06, + "loss": 156.4204, + "step": 25710 + }, + { + "epoch": 0.212764197377673, + "grad_norm": 535.8530883789062, + "learning_rate": 9.64992007110095e-06, + "loss": 113.1042, + "step": 25720 + }, + { + "epoch": 0.21284692062704222, + "grad_norm": 1140.1822509765625, + "learning_rate": 9.64940143523687e-06, + "loss": 134.6167, + "step": 25730 + }, + { + "epoch": 0.21292964387641147, + "grad_norm": 1358.8885498046875, + "learning_rate": 9.648882429441258e-06, + "loss": 133.8381, + "step": 25740 + }, + { + "epoch": 0.2130123671257807, + "grad_norm": 1481.0440673828125, + "learning_rate": 9.648363053755406e-06, + "loss": 120.2028, + "step": 25750 + }, + { + "epoch": 0.21309509037514993, + "grad_norm": 1080.2623291015625, + "learning_rate": 9.647843308220636e-06, + "loss": 105.3537, + "step": 25760 + }, + { + "epoch": 0.21317781362451918, + "grad_norm": 712.189453125, + "learning_rate": 9.647323192878306e-06, + "loss": 101.1071, + "step": 25770 + }, + { + "epoch": 0.2132605368738884, + "grad_norm": 1282.9239501953125, + "learning_rate": 9.646802707769798e-06, + "loss": 121.2276, + "step": 25780 + }, + { + "epoch": 0.21334326012325763, + "grad_norm": 558.678955078125, + "learning_rate": 9.646281852936525e-06, + "loss": 92.6775, + "step": 25790 + }, + { + "epoch": 0.2134259833726269, + "grad_norm": 994.920654296875, + "learning_rate": 9.64576062841993e-06, + "loss": 124.3649, + "step": 25800 + }, + { + "epoch": 0.2135087066219961, + "grad_norm": 1420.953857421875, + "learning_rate": 9.64523903426148e-06, + "loss": 122.571, + "step": 25810 + }, + { + "epoch": 0.21359142987136534, + "grad_norm": 859.2244873046875, + "learning_rate": 9.64471707050268e-06, + "loss": 126.6968, + "step": 25820 + }, + { + "epoch": 0.2136741531207346, + "grad_norm": 1234.9310302734375, + "learning_rate": 9.644194737185058e-06, + "loss": 139.6012, + "step": 25830 + }, + { + "epoch": 0.21375687637010382, + "grad_norm": 1804.944091796875, + "learning_rate": 9.643672034350177e-06, + "loss": 150.6442, + "step": 25840 + }, + { + "epoch": 0.21383959961947305, + "grad_norm": 1387.7882080078125, + "learning_rate": 9.643148962039622e-06, + "loss": 93.8409, + "step": 25850 + }, + { + "epoch": 0.2139223228688423, + "grad_norm": 1455.9395751953125, + "learning_rate": 9.642625520295014e-06, + "loss": 174.9808, + "step": 25860 + }, + { + "epoch": 0.21400504611821153, + "grad_norm": 1184.424072265625, + "learning_rate": 9.642101709158001e-06, + "loss": 129.4004, + "step": 25870 + }, + { + "epoch": 0.21408776936758075, + "grad_norm": 1059.5274658203125, + "learning_rate": 9.641577528670257e-06, + "loss": 129.7015, + "step": 25880 + }, + { + "epoch": 0.21417049261694998, + "grad_norm": 753.4532470703125, + "learning_rate": 9.641052978873494e-06, + "loss": 109.8265, + "step": 25890 + }, + { + "epoch": 0.21425321586631924, + "grad_norm": 676.7515258789062, + "learning_rate": 9.640528059809442e-06, + "loss": 74.3417, + "step": 25900 + }, + { + "epoch": 0.21433593911568846, + "grad_norm": 2531.762939453125, + "learning_rate": 9.640002771519872e-06, + "loss": 133.8275, + "step": 25910 + }, + { + "epoch": 0.2144186623650577, + "grad_norm": 1403.41943359375, + "learning_rate": 9.639477114046575e-06, + "loss": 164.8685, + "step": 25920 + }, + { + "epoch": 0.21450138561442694, + "grad_norm": 922.8995971679688, + "learning_rate": 9.638951087431376e-06, + "loss": 114.9469, + "step": 25930 + }, + { + "epoch": 0.21458410886379617, + "grad_norm": 1005.201416015625, + "learning_rate": 9.638424691716129e-06, + "loss": 144.4065, + "step": 25940 + }, + { + "epoch": 0.2146668321131654, + "grad_norm": 917.942626953125, + "learning_rate": 9.637897926942716e-06, + "loss": 140.8008, + "step": 25950 + }, + { + "epoch": 0.21474955536253465, + "grad_norm": 1009.6735229492188, + "learning_rate": 9.637370793153051e-06, + "loss": 132.449, + "step": 25960 + }, + { + "epoch": 0.21483227861190388, + "grad_norm": 904.17529296875, + "learning_rate": 9.636843290389076e-06, + "loss": 114.3653, + "step": 25970 + }, + { + "epoch": 0.2149150018612731, + "grad_norm": 1091.709716796875, + "learning_rate": 9.636315418692759e-06, + "loss": 108.0948, + "step": 25980 + }, + { + "epoch": 0.21499772511064236, + "grad_norm": 2188.771240234375, + "learning_rate": 9.635787178106102e-06, + "loss": 107.5951, + "step": 25990 + }, + { + "epoch": 0.21508044836001158, + "grad_norm": 1075.4591064453125, + "learning_rate": 9.635258568671135e-06, + "loss": 137.4553, + "step": 26000 + }, + { + "epoch": 0.2151631716093808, + "grad_norm": 916.110107421875, + "learning_rate": 9.634729590429917e-06, + "loss": 123.9193, + "step": 26010 + }, + { + "epoch": 0.21524589485875006, + "grad_norm": 1345.30615234375, + "learning_rate": 9.634200243424535e-06, + "loss": 139.9196, + "step": 26020 + }, + { + "epoch": 0.2153286181081193, + "grad_norm": 1103.3697509765625, + "learning_rate": 9.633670527697108e-06, + "loss": 130.9413, + "step": 26030 + }, + { + "epoch": 0.21541134135748852, + "grad_norm": 1355.6485595703125, + "learning_rate": 9.633140443289784e-06, + "loss": 190.8187, + "step": 26040 + }, + { + "epoch": 0.21549406460685774, + "grad_norm": 575.96142578125, + "learning_rate": 9.632609990244737e-06, + "loss": 103.7051, + "step": 26050 + }, + { + "epoch": 0.215576787856227, + "grad_norm": 779.9686889648438, + "learning_rate": 9.632079168604175e-06, + "loss": 109.2936, + "step": 26060 + }, + { + "epoch": 0.21565951110559622, + "grad_norm": 1010.6686401367188, + "learning_rate": 9.63154797841033e-06, + "loss": 159.4392, + "step": 26070 + }, + { + "epoch": 0.21574223435496545, + "grad_norm": 826.314208984375, + "learning_rate": 9.63101641970547e-06, + "loss": 129.3246, + "step": 26080 + }, + { + "epoch": 0.2158249576043347, + "grad_norm": 898.4963989257812, + "learning_rate": 9.630484492531886e-06, + "loss": 129.947, + "step": 26090 + }, + { + "epoch": 0.21590768085370393, + "grad_norm": 843.03857421875, + "learning_rate": 9.629952196931902e-06, + "loss": 113.0155, + "step": 26100 + }, + { + "epoch": 0.21599040410307316, + "grad_norm": 660.1622314453125, + "learning_rate": 9.629419532947872e-06, + "loss": 156.4598, + "step": 26110 + }, + { + "epoch": 0.2160731273524424, + "grad_norm": 1190.5133056640625, + "learning_rate": 9.628886500622174e-06, + "loss": 128.8638, + "step": 26120 + }, + { + "epoch": 0.21615585060181164, + "grad_norm": 924.6058959960938, + "learning_rate": 9.62835309999722e-06, + "loss": 99.8876, + "step": 26130 + }, + { + "epoch": 0.21623857385118087, + "grad_norm": 1131.3111572265625, + "learning_rate": 9.627819331115453e-06, + "loss": 126.6344, + "step": 26140 + }, + { + "epoch": 0.21632129710055012, + "grad_norm": 784.7423706054688, + "learning_rate": 9.627285194019342e-06, + "loss": 102.5163, + "step": 26150 + }, + { + "epoch": 0.21640402034991935, + "grad_norm": 1271.62890625, + "learning_rate": 9.626750688751382e-06, + "loss": 115.6172, + "step": 26160 + }, + { + "epoch": 0.21648674359928857, + "grad_norm": 1133.4263916015625, + "learning_rate": 9.626215815354104e-06, + "loss": 98.0378, + "step": 26170 + }, + { + "epoch": 0.21656946684865783, + "grad_norm": 822.8490600585938, + "learning_rate": 9.625680573870067e-06, + "loss": 123.6515, + "step": 26180 + }, + { + "epoch": 0.21665219009802705, + "grad_norm": 796.753662109375, + "learning_rate": 9.625144964341853e-06, + "loss": 127.32, + "step": 26190 + }, + { + "epoch": 0.21673491334739628, + "grad_norm": 976.165771484375, + "learning_rate": 9.624608986812082e-06, + "loss": 113.2206, + "step": 26200 + }, + { + "epoch": 0.21681763659676553, + "grad_norm": 1549.4820556640625, + "learning_rate": 9.624072641323398e-06, + "loss": 121.0571, + "step": 26210 + }, + { + "epoch": 0.21690035984613476, + "grad_norm": 1164.861328125, + "learning_rate": 9.623535927918474e-06, + "loss": 168.508, + "step": 26220 + }, + { + "epoch": 0.216983083095504, + "grad_norm": 1254.7081298828125, + "learning_rate": 9.622998846640018e-06, + "loss": 114.7848, + "step": 26230 + }, + { + "epoch": 0.2170658063448732, + "grad_norm": 744.2569580078125, + "learning_rate": 9.62246139753076e-06, + "loss": 125.7201, + "step": 26240 + }, + { + "epoch": 0.21714852959424247, + "grad_norm": 1193.7801513671875, + "learning_rate": 9.621923580633462e-06, + "loss": 109.2309, + "step": 26250 + }, + { + "epoch": 0.2172312528436117, + "grad_norm": 933.6427612304688, + "learning_rate": 9.621385395990915e-06, + "loss": 137.9964, + "step": 26260 + }, + { + "epoch": 0.21731397609298092, + "grad_norm": 1021.1292724609375, + "learning_rate": 9.620846843645944e-06, + "loss": 105.2249, + "step": 26270 + }, + { + "epoch": 0.21739669934235017, + "grad_norm": 1435.4403076171875, + "learning_rate": 9.620307923641395e-06, + "loss": 120.3464, + "step": 26280 + }, + { + "epoch": 0.2174794225917194, + "grad_norm": 1647.5338134765625, + "learning_rate": 9.61976863602015e-06, + "loss": 115.3988, + "step": 26290 + }, + { + "epoch": 0.21756214584108863, + "grad_norm": 729.9489135742188, + "learning_rate": 9.619228980825114e-06, + "loss": 171.7545, + "step": 26300 + }, + { + "epoch": 0.21764486909045788, + "grad_norm": 1271.6209716796875, + "learning_rate": 9.61868895809923e-06, + "loss": 120.3744, + "step": 26310 + }, + { + "epoch": 0.2177275923398271, + "grad_norm": 844.6718139648438, + "learning_rate": 9.618148567885462e-06, + "loss": 142.1199, + "step": 26320 + }, + { + "epoch": 0.21781031558919633, + "grad_norm": 837.2586669921875, + "learning_rate": 9.617607810226806e-06, + "loss": 154.3331, + "step": 26330 + }, + { + "epoch": 0.2178930388385656, + "grad_norm": 1400.4326171875, + "learning_rate": 9.61706668516629e-06, + "loss": 124.3974, + "step": 26340 + }, + { + "epoch": 0.21797576208793482, + "grad_norm": 1115.8116455078125, + "learning_rate": 9.616525192746965e-06, + "loss": 105.8458, + "step": 26350 + }, + { + "epoch": 0.21805848533730404, + "grad_norm": 1421.916748046875, + "learning_rate": 9.61598333301192e-06, + "loss": 156.9738, + "step": 26360 + }, + { + "epoch": 0.2181412085866733, + "grad_norm": 798.7007446289062, + "learning_rate": 9.615441106004264e-06, + "loss": 111.3176, + "step": 26370 + }, + { + "epoch": 0.21822393183604252, + "grad_norm": 1311.8187255859375, + "learning_rate": 9.614898511767142e-06, + "loss": 112.8957, + "step": 26380 + }, + { + "epoch": 0.21830665508541175, + "grad_norm": 1265.518798828125, + "learning_rate": 9.614355550343724e-06, + "loss": 97.6749, + "step": 26390 + }, + { + "epoch": 0.218389378334781, + "grad_norm": 1181.8995361328125, + "learning_rate": 9.613812221777212e-06, + "loss": 126.788, + "step": 26400 + }, + { + "epoch": 0.21847210158415023, + "grad_norm": 921.5441284179688, + "learning_rate": 9.613268526110838e-06, + "loss": 126.4273, + "step": 26410 + }, + { + "epoch": 0.21855482483351946, + "grad_norm": 2307.983642578125, + "learning_rate": 9.612724463387857e-06, + "loss": 124.0576, + "step": 26420 + }, + { + "epoch": 0.21863754808288868, + "grad_norm": 1391.1617431640625, + "learning_rate": 9.612180033651561e-06, + "loss": 122.5418, + "step": 26430 + }, + { + "epoch": 0.21872027133225794, + "grad_norm": 883.1130981445312, + "learning_rate": 9.611635236945267e-06, + "loss": 105.9835, + "step": 26440 + }, + { + "epoch": 0.21880299458162716, + "grad_norm": 1083.864990234375, + "learning_rate": 9.61109007331232e-06, + "loss": 124.8244, + "step": 26450 + }, + { + "epoch": 0.2188857178309964, + "grad_norm": 1159.39697265625, + "learning_rate": 9.610544542796101e-06, + "loss": 120.3728, + "step": 26460 + }, + { + "epoch": 0.21896844108036564, + "grad_norm": 1433.8590087890625, + "learning_rate": 9.609998645440011e-06, + "loss": 132.6535, + "step": 26470 + }, + { + "epoch": 0.21905116432973487, + "grad_norm": 1183.7196044921875, + "learning_rate": 9.609452381287486e-06, + "loss": 133.8586, + "step": 26480 + }, + { + "epoch": 0.2191338875791041, + "grad_norm": 859.5003662109375, + "learning_rate": 9.608905750381988e-06, + "loss": 101.3727, + "step": 26490 + }, + { + "epoch": 0.21921661082847335, + "grad_norm": 800.9340209960938, + "learning_rate": 9.608358752767013e-06, + "loss": 142.6461, + "step": 26500 + }, + { + "epoch": 0.21929933407784258, + "grad_norm": 1471.0047607421875, + "learning_rate": 9.60781138848608e-06, + "loss": 110.0766, + "step": 26510 + }, + { + "epoch": 0.2193820573272118, + "grad_norm": 954.5092163085938, + "learning_rate": 9.607263657582744e-06, + "loss": 110.3453, + "step": 26520 + }, + { + "epoch": 0.21946478057658106, + "grad_norm": 847.3948974609375, + "learning_rate": 9.60671556010058e-06, + "loss": 119.9163, + "step": 26530 + }, + { + "epoch": 0.21954750382595029, + "grad_norm": 559.4891357421875, + "learning_rate": 9.606167096083205e-06, + "loss": 106.7365, + "step": 26540 + }, + { + "epoch": 0.2196302270753195, + "grad_norm": 1348.5836181640625, + "learning_rate": 9.60561826557425e-06, + "loss": 148.4306, + "step": 26550 + }, + { + "epoch": 0.21971295032468877, + "grad_norm": 912.4396362304688, + "learning_rate": 9.60506906861739e-06, + "loss": 116.1079, + "step": 26560 + }, + { + "epoch": 0.219795673574058, + "grad_norm": 939.4193115234375, + "learning_rate": 9.604519505256316e-06, + "loss": 127.2096, + "step": 26570 + }, + { + "epoch": 0.21987839682342722, + "grad_norm": 807.13623046875, + "learning_rate": 9.603969575534757e-06, + "loss": 102.2194, + "step": 26580 + }, + { + "epoch": 0.21996112007279647, + "grad_norm": 695.100830078125, + "learning_rate": 9.60341927949647e-06, + "loss": 113.9714, + "step": 26590 + }, + { + "epoch": 0.2200438433221657, + "grad_norm": 456.3763122558594, + "learning_rate": 9.602868617185238e-06, + "loss": 105.249, + "step": 26600 + }, + { + "epoch": 0.22012656657153493, + "grad_norm": 1128.632568359375, + "learning_rate": 9.602317588644872e-06, + "loss": 104.7491, + "step": 26610 + }, + { + "epoch": 0.22020928982090415, + "grad_norm": 948.9160766601562, + "learning_rate": 9.601766193919217e-06, + "loss": 104.4173, + "step": 26620 + }, + { + "epoch": 0.2202920130702734, + "grad_norm": 725.5731201171875, + "learning_rate": 9.601214433052147e-06, + "loss": 103.2853, + "step": 26630 + }, + { + "epoch": 0.22037473631964263, + "grad_norm": 979.1326293945312, + "learning_rate": 9.600662306087562e-06, + "loss": 122.0349, + "step": 26640 + }, + { + "epoch": 0.22045745956901186, + "grad_norm": 772.8959350585938, + "learning_rate": 9.600109813069389e-06, + "loss": 118.9232, + "step": 26650 + }, + { + "epoch": 0.22054018281838111, + "grad_norm": 879.559814453125, + "learning_rate": 9.599556954041591e-06, + "loss": 154.7716, + "step": 26660 + }, + { + "epoch": 0.22062290606775034, + "grad_norm": 852.1553344726562, + "learning_rate": 9.599003729048157e-06, + "loss": 115.1464, + "step": 26670 + }, + { + "epoch": 0.22070562931711957, + "grad_norm": 1091.1187744140625, + "learning_rate": 9.598450138133101e-06, + "loss": 124.5991, + "step": 26680 + }, + { + "epoch": 0.22078835256648882, + "grad_norm": 1003.1347045898438, + "learning_rate": 9.597896181340471e-06, + "loss": 133.6112, + "step": 26690 + }, + { + "epoch": 0.22087107581585805, + "grad_norm": 1310.1240234375, + "learning_rate": 9.597341858714344e-06, + "loss": 120.8151, + "step": 26700 + }, + { + "epoch": 0.22095379906522727, + "grad_norm": 973.50439453125, + "learning_rate": 9.596787170298824e-06, + "loss": 132.3573, + "step": 26710 + }, + { + "epoch": 0.22103652231459653, + "grad_norm": 838.1239013671875, + "learning_rate": 9.596232116138047e-06, + "loss": 135.2263, + "step": 26720 + }, + { + "epoch": 0.22111924556396576, + "grad_norm": 776.5439453125, + "learning_rate": 9.595676696276173e-06, + "loss": 140.2891, + "step": 26730 + }, + { + "epoch": 0.22120196881333498, + "grad_norm": 898.1226806640625, + "learning_rate": 9.595120910757396e-06, + "loss": 156.5662, + "step": 26740 + }, + { + "epoch": 0.22128469206270424, + "grad_norm": 988.5482788085938, + "learning_rate": 9.594564759625936e-06, + "loss": 119.9368, + "step": 26750 + }, + { + "epoch": 0.22136741531207346, + "grad_norm": 2739.469482421875, + "learning_rate": 9.594008242926046e-06, + "loss": 117.0178, + "step": 26760 + }, + { + "epoch": 0.2214501385614427, + "grad_norm": 1172.0687255859375, + "learning_rate": 9.593451360702003e-06, + "loss": 109.8631, + "step": 26770 + }, + { + "epoch": 0.22153286181081192, + "grad_norm": 1231.6884765625, + "learning_rate": 9.592894112998115e-06, + "loss": 123.6822, + "step": 26780 + }, + { + "epoch": 0.22161558506018117, + "grad_norm": 2576.349609375, + "learning_rate": 9.592336499858721e-06, + "loss": 134.2115, + "step": 26790 + }, + { + "epoch": 0.2216983083095504, + "grad_norm": 851.02197265625, + "learning_rate": 9.59177852132819e-06, + "loss": 130.6942, + "step": 26800 + }, + { + "epoch": 0.22178103155891962, + "grad_norm": 1379.3516845703125, + "learning_rate": 9.591220177450912e-06, + "loss": 148.0982, + "step": 26810 + }, + { + "epoch": 0.22186375480828888, + "grad_norm": 1033.928955078125, + "learning_rate": 9.590661468271319e-06, + "loss": 99.2162, + "step": 26820 + }, + { + "epoch": 0.2219464780576581, + "grad_norm": 987.3099365234375, + "learning_rate": 9.59010239383386e-06, + "loss": 138.9152, + "step": 26830 + }, + { + "epoch": 0.22202920130702733, + "grad_norm": 687.860595703125, + "learning_rate": 9.589542954183018e-06, + "loss": 112.7026, + "step": 26840 + }, + { + "epoch": 0.22211192455639658, + "grad_norm": 732.00537109375, + "learning_rate": 9.588983149363307e-06, + "loss": 123.2144, + "step": 26850 + }, + { + "epoch": 0.2221946478057658, + "grad_norm": 1079.510009765625, + "learning_rate": 9.588422979419267e-06, + "loss": 87.7841, + "step": 26860 + }, + { + "epoch": 0.22227737105513504, + "grad_norm": 1468.6038818359375, + "learning_rate": 9.587862444395471e-06, + "loss": 136.3903, + "step": 26870 + }, + { + "epoch": 0.2223600943045043, + "grad_norm": 889.5414428710938, + "learning_rate": 9.587301544336513e-06, + "loss": 115.5707, + "step": 26880 + }, + { + "epoch": 0.22244281755387352, + "grad_norm": 590.138916015625, + "learning_rate": 9.586740279287024e-06, + "loss": 117.9152, + "step": 26890 + }, + { + "epoch": 0.22252554080324274, + "grad_norm": 1332.5377197265625, + "learning_rate": 9.586178649291664e-06, + "loss": 131.7125, + "step": 26900 + }, + { + "epoch": 0.222608264052612, + "grad_norm": 1022.7705688476562, + "learning_rate": 9.585616654395113e-06, + "loss": 115.9927, + "step": 26910 + }, + { + "epoch": 0.22269098730198122, + "grad_norm": 847.87255859375, + "learning_rate": 9.585054294642093e-06, + "loss": 169.3321, + "step": 26920 + }, + { + "epoch": 0.22277371055135045, + "grad_norm": 1865.4990234375, + "learning_rate": 9.584491570077343e-06, + "loss": 128.1739, + "step": 26930 + }, + { + "epoch": 0.2228564338007197, + "grad_norm": 865.5852661132812, + "learning_rate": 9.58392848074564e-06, + "loss": 119.8757, + "step": 26940 + }, + { + "epoch": 0.22293915705008893, + "grad_norm": 1116.7584228515625, + "learning_rate": 9.583365026691785e-06, + "loss": 111.9066, + "step": 26950 + }, + { + "epoch": 0.22302188029945816, + "grad_norm": 1089.7393798828125, + "learning_rate": 9.58280120796061e-06, + "loss": 123.0073, + "step": 26960 + }, + { + "epoch": 0.22310460354882738, + "grad_norm": 854.9454956054688, + "learning_rate": 9.582237024596974e-06, + "loss": 131.7255, + "step": 26970 + }, + { + "epoch": 0.22318732679819664, + "grad_norm": 840.7254028320312, + "learning_rate": 9.581672476645768e-06, + "loss": 134.3853, + "step": 26980 + }, + { + "epoch": 0.22327005004756587, + "grad_norm": 1240.8424072265625, + "learning_rate": 9.58110756415191e-06, + "loss": 127.8951, + "step": 26990 + }, + { + "epoch": 0.2233527732969351, + "grad_norm": 1180.210205078125, + "learning_rate": 9.580542287160348e-06, + "loss": 127.6457, + "step": 27000 + }, + { + "epoch": 0.22343549654630435, + "grad_norm": 1015.6724853515625, + "learning_rate": 9.579976645716058e-06, + "loss": 130.7046, + "step": 27010 + }, + { + "epoch": 0.22351821979567357, + "grad_norm": 1194.459228515625, + "learning_rate": 9.579410639864046e-06, + "loss": 133.5198, + "step": 27020 + }, + { + "epoch": 0.2236009430450428, + "grad_norm": 1310.887451171875, + "learning_rate": 9.578844269649345e-06, + "loss": 123.0892, + "step": 27030 + }, + { + "epoch": 0.22368366629441205, + "grad_norm": 764.6407470703125, + "learning_rate": 9.578277535117022e-06, + "loss": 118.5598, + "step": 27040 + }, + { + "epoch": 0.22376638954378128, + "grad_norm": 1107.6348876953125, + "learning_rate": 9.577710436312164e-06, + "loss": 113.8774, + "step": 27050 + }, + { + "epoch": 0.2238491127931505, + "grad_norm": 1522.5843505859375, + "learning_rate": 9.577142973279896e-06, + "loss": 137.2552, + "step": 27060 + }, + { + "epoch": 0.22393183604251976, + "grad_norm": 984.351318359375, + "learning_rate": 9.576575146065369e-06, + "loss": 128.6748, + "step": 27070 + }, + { + "epoch": 0.224014559291889, + "grad_norm": 642.086181640625, + "learning_rate": 9.576006954713762e-06, + "loss": 117.2645, + "step": 27080 + }, + { + "epoch": 0.2240972825412582, + "grad_norm": 1214.22998046875, + "learning_rate": 9.57543839927028e-06, + "loss": 128.8348, + "step": 27090 + }, + { + "epoch": 0.22418000579062747, + "grad_norm": 1050.501953125, + "learning_rate": 9.574869479780165e-06, + "loss": 147.3516, + "step": 27100 + }, + { + "epoch": 0.2242627290399967, + "grad_norm": 1185.0849609375, + "learning_rate": 9.57430019628868e-06, + "loss": 147.9033, + "step": 27110 + }, + { + "epoch": 0.22434545228936592, + "grad_norm": 1115.831298828125, + "learning_rate": 9.573730548841122e-06, + "loss": 111.629, + "step": 27120 + }, + { + "epoch": 0.22442817553873518, + "grad_norm": 1058.506103515625, + "learning_rate": 9.573160537482816e-06, + "loss": 135.1445, + "step": 27130 + }, + { + "epoch": 0.2245108987881044, + "grad_norm": 1066.4493408203125, + "learning_rate": 9.572590162259112e-06, + "loss": 171.7336, + "step": 27140 + }, + { + "epoch": 0.22459362203747363, + "grad_norm": 735.5531005859375, + "learning_rate": 9.572019423215395e-06, + "loss": 110.0656, + "step": 27150 + }, + { + "epoch": 0.22467634528684285, + "grad_norm": 1075.277587890625, + "learning_rate": 9.571448320397076e-06, + "loss": 102.259, + "step": 27160 + }, + { + "epoch": 0.2247590685362121, + "grad_norm": 677.2155151367188, + "learning_rate": 9.570876853849593e-06, + "loss": 128.2644, + "step": 27170 + }, + { + "epoch": 0.22484179178558134, + "grad_norm": 993.1212768554688, + "learning_rate": 9.570305023618417e-06, + "loss": 185.3893, + "step": 27180 + }, + { + "epoch": 0.22492451503495056, + "grad_norm": 963.5702514648438, + "learning_rate": 9.569732829749045e-06, + "loss": 125.3146, + "step": 27190 + }, + { + "epoch": 0.22500723828431982, + "grad_norm": 950.5331420898438, + "learning_rate": 9.569160272287003e-06, + "loss": 126.4869, + "step": 27200 + }, + { + "epoch": 0.22508996153368904, + "grad_norm": 820.460205078125, + "learning_rate": 9.56858735127785e-06, + "loss": 76.8966, + "step": 27210 + }, + { + "epoch": 0.22517268478305827, + "grad_norm": 572.8544311523438, + "learning_rate": 9.568014066767166e-06, + "loss": 126.8829, + "step": 27220 + }, + { + "epoch": 0.22525540803242752, + "grad_norm": 998.837646484375, + "learning_rate": 9.567440418800569e-06, + "loss": 134.7057, + "step": 27230 + }, + { + "epoch": 0.22533813128179675, + "grad_norm": 956.578857421875, + "learning_rate": 9.566866407423698e-06, + "loss": 143.3908, + "step": 27240 + }, + { + "epoch": 0.22542085453116598, + "grad_norm": 1049.07861328125, + "learning_rate": 9.566292032682228e-06, + "loss": 137.0985, + "step": 27250 + }, + { + "epoch": 0.22550357778053523, + "grad_norm": 870.1298217773438, + "learning_rate": 9.565717294621856e-06, + "loss": 114.6946, + "step": 27260 + }, + { + "epoch": 0.22558630102990446, + "grad_norm": 784.1261596679688, + "learning_rate": 9.565142193288313e-06, + "loss": 150.3023, + "step": 27270 + }, + { + "epoch": 0.22566902427927368, + "grad_norm": 721.9214477539062, + "learning_rate": 9.564566728727358e-06, + "loss": 92.1085, + "step": 27280 + }, + { + "epoch": 0.22575174752864294, + "grad_norm": 819.4923706054688, + "learning_rate": 9.563990900984775e-06, + "loss": 103.506, + "step": 27290 + }, + { + "epoch": 0.22583447077801216, + "grad_norm": 2060.950927734375, + "learning_rate": 9.563414710106382e-06, + "loss": 212.3363, + "step": 27300 + }, + { + "epoch": 0.2259171940273814, + "grad_norm": 921.5272216796875, + "learning_rate": 9.562838156138025e-06, + "loss": 142.4072, + "step": 27310 + }, + { + "epoch": 0.22599991727675062, + "grad_norm": 935.8705444335938, + "learning_rate": 9.562261239125575e-06, + "loss": 122.9581, + "step": 27320 + }, + { + "epoch": 0.22608264052611987, + "grad_norm": 925.080810546875, + "learning_rate": 9.561683959114938e-06, + "loss": 127.3665, + "step": 27330 + }, + { + "epoch": 0.2261653637754891, + "grad_norm": 1195.8099365234375, + "learning_rate": 9.561106316152043e-06, + "loss": 133.1869, + "step": 27340 + }, + { + "epoch": 0.22624808702485832, + "grad_norm": 784.7255249023438, + "learning_rate": 9.56052831028285e-06, + "loss": 139.8995, + "step": 27350 + }, + { + "epoch": 0.22633081027422758, + "grad_norm": 1093.8494873046875, + "learning_rate": 9.559949941553351e-06, + "loss": 119.8402, + "step": 27360 + }, + { + "epoch": 0.2264135335235968, + "grad_norm": 973.4978637695312, + "learning_rate": 9.559371210009562e-06, + "loss": 124.6592, + "step": 27370 + }, + { + "epoch": 0.22649625677296603, + "grad_norm": 869.2335815429688, + "learning_rate": 9.55879211569753e-06, + "loss": 94.1615, + "step": 27380 + }, + { + "epoch": 0.22657898002233529, + "grad_norm": 1302.50244140625, + "learning_rate": 9.55821265866333e-06, + "loss": 123.9871, + "step": 27390 + }, + { + "epoch": 0.2266617032717045, + "grad_norm": 1908.8446044921875, + "learning_rate": 9.55763283895307e-06, + "loss": 140.2897, + "step": 27400 + }, + { + "epoch": 0.22674442652107374, + "grad_norm": 553.2281494140625, + "learning_rate": 9.557052656612882e-06, + "loss": 98.0197, + "step": 27410 + }, + { + "epoch": 0.226827149770443, + "grad_norm": 693.7083740234375, + "learning_rate": 9.556472111688928e-06, + "loss": 121.6574, + "step": 27420 + }, + { + "epoch": 0.22690987301981222, + "grad_norm": 1643.4599609375, + "learning_rate": 9.555891204227399e-06, + "loss": 107.1005, + "step": 27430 + }, + { + "epoch": 0.22699259626918145, + "grad_norm": 792.270263671875, + "learning_rate": 9.555309934274515e-06, + "loss": 177.7701, + "step": 27440 + }, + { + "epoch": 0.2270753195185507, + "grad_norm": 456.06939697265625, + "learning_rate": 9.554728301876525e-06, + "loss": 137.5959, + "step": 27450 + }, + { + "epoch": 0.22715804276791993, + "grad_norm": 966.9349365234375, + "learning_rate": 9.554146307079711e-06, + "loss": 99.2951, + "step": 27460 + }, + { + "epoch": 0.22724076601728915, + "grad_norm": 1171.660400390625, + "learning_rate": 9.553563949930374e-06, + "loss": 126.8152, + "step": 27470 + }, + { + "epoch": 0.2273234892666584, + "grad_norm": 631.3641967773438, + "learning_rate": 9.552981230474849e-06, + "loss": 101.0075, + "step": 27480 + }, + { + "epoch": 0.22740621251602763, + "grad_norm": 1808.03466796875, + "learning_rate": 9.552398148759506e-06, + "loss": 105.2634, + "step": 27490 + }, + { + "epoch": 0.22748893576539686, + "grad_norm": 1841.3206787109375, + "learning_rate": 9.551814704830734e-06, + "loss": 124.4455, + "step": 27500 + }, + { + "epoch": 0.2275716590147661, + "grad_norm": 2790.470947265625, + "learning_rate": 9.551230898734955e-06, + "loss": 143.8618, + "step": 27510 + }, + { + "epoch": 0.22765438226413534, + "grad_norm": 916.5533447265625, + "learning_rate": 9.550646730518623e-06, + "loss": 105.8261, + "step": 27520 + }, + { + "epoch": 0.22773710551350457, + "grad_norm": 1437.979248046875, + "learning_rate": 9.550062200228214e-06, + "loss": 101.92, + "step": 27530 + }, + { + "epoch": 0.2278198287628738, + "grad_norm": 1014.4559326171875, + "learning_rate": 9.549477307910238e-06, + "loss": 126.4266, + "step": 27540 + }, + { + "epoch": 0.22790255201224305, + "grad_norm": 1683.48779296875, + "learning_rate": 9.548892053611232e-06, + "loss": 133.186, + "step": 27550 + }, + { + "epoch": 0.22798527526161227, + "grad_norm": 864.87744140625, + "learning_rate": 9.54830643737776e-06, + "loss": 137.8783, + "step": 27560 + }, + { + "epoch": 0.2280679985109815, + "grad_norm": 1702.0152587890625, + "learning_rate": 9.54772045925642e-06, + "loss": 125.6688, + "step": 27570 + }, + { + "epoch": 0.22815072176035076, + "grad_norm": 1634.5037841796875, + "learning_rate": 9.547134119293835e-06, + "loss": 126.7895, + "step": 27580 + }, + { + "epoch": 0.22823344500971998, + "grad_norm": 1126.988525390625, + "learning_rate": 9.546547417536656e-06, + "loss": 117.5014, + "step": 27590 + }, + { + "epoch": 0.2283161682590892, + "grad_norm": 774.8274536132812, + "learning_rate": 9.545960354031564e-06, + "loss": 110.8326, + "step": 27600 + }, + { + "epoch": 0.22839889150845846, + "grad_norm": 920.1649780273438, + "learning_rate": 9.545372928825271e-06, + "loss": 85.081, + "step": 27610 + }, + { + "epoch": 0.2284816147578277, + "grad_norm": 550.8583374023438, + "learning_rate": 9.544785141964514e-06, + "loss": 82.1225, + "step": 27620 + }, + { + "epoch": 0.22856433800719692, + "grad_norm": 2031.975830078125, + "learning_rate": 9.544196993496062e-06, + "loss": 113.8713, + "step": 27630 + }, + { + "epoch": 0.22864706125656617, + "grad_norm": 660.8304443359375, + "learning_rate": 9.54360848346671e-06, + "loss": 111.5653, + "step": 27640 + }, + { + "epoch": 0.2287297845059354, + "grad_norm": 1158.63916015625, + "learning_rate": 9.543019611923283e-06, + "loss": 105.64, + "step": 27650 + }, + { + "epoch": 0.22881250775530462, + "grad_norm": 1066.19970703125, + "learning_rate": 9.542430378912634e-06, + "loss": 112.1879, + "step": 27660 + }, + { + "epoch": 0.22889523100467388, + "grad_norm": 1097.3199462890625, + "learning_rate": 9.541840784481648e-06, + "loss": 112.1237, + "step": 27670 + }, + { + "epoch": 0.2289779542540431, + "grad_norm": 1081.3756103515625, + "learning_rate": 9.541250828677235e-06, + "loss": 144.3419, + "step": 27680 + }, + { + "epoch": 0.22906067750341233, + "grad_norm": 899.7421264648438, + "learning_rate": 9.540660511546335e-06, + "loss": 129.8028, + "step": 27690 + }, + { + "epoch": 0.22914340075278156, + "grad_norm": 835.4717407226562, + "learning_rate": 9.540069833135917e-06, + "loss": 131.3196, + "step": 27700 + }, + { + "epoch": 0.2292261240021508, + "grad_norm": 603.4605712890625, + "learning_rate": 9.539478793492978e-06, + "loss": 94.5269, + "step": 27710 + }, + { + "epoch": 0.22930884725152004, + "grad_norm": 827.4793701171875, + "learning_rate": 9.538887392664544e-06, + "loss": 97.6406, + "step": 27720 + }, + { + "epoch": 0.22939157050088926, + "grad_norm": 947.7173461914062, + "learning_rate": 9.53829563069767e-06, + "loss": 134.4241, + "step": 27730 + }, + { + "epoch": 0.22947429375025852, + "grad_norm": 1094.2747802734375, + "learning_rate": 9.537703507639444e-06, + "loss": 102.8811, + "step": 27740 + }, + { + "epoch": 0.22955701699962774, + "grad_norm": 1054.0640869140625, + "learning_rate": 9.537111023536973e-06, + "loss": 110.9093, + "step": 27750 + }, + { + "epoch": 0.22963974024899697, + "grad_norm": 902.435302734375, + "learning_rate": 9.536518178437402e-06, + "loss": 120.4104, + "step": 27760 + }, + { + "epoch": 0.22972246349836623, + "grad_norm": 1348.26025390625, + "learning_rate": 9.535924972387898e-06, + "loss": 109.3034, + "step": 27770 + }, + { + "epoch": 0.22980518674773545, + "grad_norm": 857.2482299804688, + "learning_rate": 9.535331405435662e-06, + "loss": 125.6188, + "step": 27780 + }, + { + "epoch": 0.22988790999710468, + "grad_norm": 1263.4981689453125, + "learning_rate": 9.534737477627918e-06, + "loss": 152.1994, + "step": 27790 + }, + { + "epoch": 0.22997063324647393, + "grad_norm": 1043.9830322265625, + "learning_rate": 9.534143189011928e-06, + "loss": 139.1974, + "step": 27800 + }, + { + "epoch": 0.23005335649584316, + "grad_norm": 1181.6470947265625, + "learning_rate": 9.533548539634971e-06, + "loss": 124.807, + "step": 27810 + }, + { + "epoch": 0.23013607974521239, + "grad_norm": 1061.00244140625, + "learning_rate": 9.532953529544365e-06, + "loss": 127.3019, + "step": 27820 + }, + { + "epoch": 0.23021880299458164, + "grad_norm": 2072.89404296875, + "learning_rate": 9.532358158787446e-06, + "loss": 112.2069, + "step": 27830 + }, + { + "epoch": 0.23030152624395087, + "grad_norm": 726.18505859375, + "learning_rate": 9.531762427411592e-06, + "loss": 126.4197, + "step": 27840 + }, + { + "epoch": 0.2303842494933201, + "grad_norm": 1991.278564453125, + "learning_rate": 9.531166335464198e-06, + "loss": 257.9386, + "step": 27850 + }, + { + "epoch": 0.23046697274268935, + "grad_norm": 648.2035522460938, + "learning_rate": 9.530569882992698e-06, + "loss": 153.0686, + "step": 27860 + }, + { + "epoch": 0.23054969599205857, + "grad_norm": 913.1694946289062, + "learning_rate": 9.52997307004454e-06, + "loss": 122.5074, + "step": 27870 + }, + { + "epoch": 0.2306324192414278, + "grad_norm": 1065.9097900390625, + "learning_rate": 9.529375896667218e-06, + "loss": 95.8606, + "step": 27880 + }, + { + "epoch": 0.23071514249079703, + "grad_norm": 2741.90234375, + "learning_rate": 9.528778362908241e-06, + "loss": 141.1961, + "step": 27890 + }, + { + "epoch": 0.23079786574016628, + "grad_norm": 1098.0302734375, + "learning_rate": 9.528180468815155e-06, + "loss": 103.5171, + "step": 27900 + }, + { + "epoch": 0.2308805889895355, + "grad_norm": 899.1497192382812, + "learning_rate": 9.527582214435531e-06, + "loss": 143.412, + "step": 27910 + }, + { + "epoch": 0.23096331223890473, + "grad_norm": 311.1416320800781, + "learning_rate": 9.526983599816968e-06, + "loss": 91.3562, + "step": 27920 + }, + { + "epoch": 0.231046035488274, + "grad_norm": 1649.770263671875, + "learning_rate": 9.526384625007096e-06, + "loss": 124.463, + "step": 27930 + }, + { + "epoch": 0.23112875873764321, + "grad_norm": 804.8977661132812, + "learning_rate": 9.525785290053573e-06, + "loss": 138.7314, + "step": 27940 + }, + { + "epoch": 0.23121148198701244, + "grad_norm": 859.3971557617188, + "learning_rate": 9.525185595004085e-06, + "loss": 85.416, + "step": 27950 + }, + { + "epoch": 0.2312942052363817, + "grad_norm": 901.3175659179688, + "learning_rate": 9.524585539906345e-06, + "loss": 93.4797, + "step": 27960 + }, + { + "epoch": 0.23137692848575092, + "grad_norm": 1204.8818359375, + "learning_rate": 9.523985124808102e-06, + "loss": 140.1107, + "step": 27970 + }, + { + "epoch": 0.23145965173512015, + "grad_norm": 1186.2174072265625, + "learning_rate": 9.523384349757123e-06, + "loss": 111.8204, + "step": 27980 + }, + { + "epoch": 0.2315423749844894, + "grad_norm": 1178.4058837890625, + "learning_rate": 9.522783214801213e-06, + "loss": 164.7376, + "step": 27990 + }, + { + "epoch": 0.23162509823385863, + "grad_norm": 864.9526977539062, + "learning_rate": 9.522181719988196e-06, + "loss": 112.4256, + "step": 28000 + }, + { + "epoch": 0.23170782148322785, + "grad_norm": 1473.5499267578125, + "learning_rate": 9.521579865365935e-06, + "loss": 117.2831, + "step": 28010 + }, + { + "epoch": 0.2317905447325971, + "grad_norm": 525.5335083007812, + "learning_rate": 9.520977650982316e-06, + "loss": 150.6173, + "step": 28020 + }, + { + "epoch": 0.23187326798196634, + "grad_norm": 886.9190063476562, + "learning_rate": 9.520375076885253e-06, + "loss": 146.7385, + "step": 28030 + }, + { + "epoch": 0.23195599123133556, + "grad_norm": 937.6173095703125, + "learning_rate": 9.519772143122691e-06, + "loss": 104.6981, + "step": 28040 + }, + { + "epoch": 0.2320387144807048, + "grad_norm": 834.8984375, + "learning_rate": 9.519168849742603e-06, + "loss": 96.541, + "step": 28050 + }, + { + "epoch": 0.23212143773007404, + "grad_norm": 863.10986328125, + "learning_rate": 9.51856519679299e-06, + "loss": 93.61, + "step": 28060 + }, + { + "epoch": 0.23220416097944327, + "grad_norm": 683.4520263671875, + "learning_rate": 9.517961184321882e-06, + "loss": 131.8342, + "step": 28070 + }, + { + "epoch": 0.2322868842288125, + "grad_norm": 2610.283203125, + "learning_rate": 9.517356812377336e-06, + "loss": 189.3643, + "step": 28080 + }, + { + "epoch": 0.23236960747818175, + "grad_norm": 993.5272827148438, + "learning_rate": 9.516752081007441e-06, + "loss": 133.2566, + "step": 28090 + }, + { + "epoch": 0.23245233072755098, + "grad_norm": 1212.4417724609375, + "learning_rate": 9.51614699026031e-06, + "loss": 129.3865, + "step": 28100 + }, + { + "epoch": 0.2325350539769202, + "grad_norm": 812.443359375, + "learning_rate": 9.515541540184093e-06, + "loss": 95.0065, + "step": 28110 + }, + { + "epoch": 0.23261777722628946, + "grad_norm": 1204.5474853515625, + "learning_rate": 9.514935730826957e-06, + "loss": 145.4519, + "step": 28120 + }, + { + "epoch": 0.23270050047565868, + "grad_norm": 1555.590576171875, + "learning_rate": 9.514329562237107e-06, + "loss": 136.7933, + "step": 28130 + }, + { + "epoch": 0.2327832237250279, + "grad_norm": 870.7056274414062, + "learning_rate": 9.51372303446277e-06, + "loss": 122.1039, + "step": 28140 + }, + { + "epoch": 0.23286594697439716, + "grad_norm": 1071.2955322265625, + "learning_rate": 9.513116147552207e-06, + "loss": 102.6043, + "step": 28150 + }, + { + "epoch": 0.2329486702237664, + "grad_norm": 797.454833984375, + "learning_rate": 9.512508901553703e-06, + "loss": 140.6481, + "step": 28160 + }, + { + "epoch": 0.23303139347313562, + "grad_norm": 585.2443237304688, + "learning_rate": 9.511901296515578e-06, + "loss": 113.4713, + "step": 28170 + }, + { + "epoch": 0.23311411672250487, + "grad_norm": 870.2528076171875, + "learning_rate": 9.511293332486172e-06, + "loss": 130.058, + "step": 28180 + }, + { + "epoch": 0.2331968399718741, + "grad_norm": 1487.830322265625, + "learning_rate": 9.51068500951386e-06, + "loss": 132.6228, + "step": 28190 + }, + { + "epoch": 0.23327956322124332, + "grad_norm": 3511.711181640625, + "learning_rate": 9.510076327647043e-06, + "loss": 113.2403, + "step": 28200 + }, + { + "epoch": 0.23336228647061258, + "grad_norm": 1605.590087890625, + "learning_rate": 9.509467286934151e-06, + "loss": 150.1105, + "step": 28210 + }, + { + "epoch": 0.2334450097199818, + "grad_norm": 1058.44189453125, + "learning_rate": 9.508857887423644e-06, + "loss": 122.3643, + "step": 28220 + }, + { + "epoch": 0.23352773296935103, + "grad_norm": 820.7523803710938, + "learning_rate": 9.508248129164006e-06, + "loss": 80.4105, + "step": 28230 + }, + { + "epoch": 0.23361045621872026, + "grad_norm": 912.558837890625, + "learning_rate": 9.507638012203755e-06, + "loss": 152.501, + "step": 28240 + }, + { + "epoch": 0.2336931794680895, + "grad_norm": 1337.6898193359375, + "learning_rate": 9.507027536591436e-06, + "loss": 149.5806, + "step": 28250 + }, + { + "epoch": 0.23377590271745874, + "grad_norm": 1526.1043701171875, + "learning_rate": 9.506416702375618e-06, + "loss": 153.4466, + "step": 28260 + }, + { + "epoch": 0.23385862596682797, + "grad_norm": 933.4179077148438, + "learning_rate": 9.505805509604906e-06, + "loss": 106.969, + "step": 28270 + }, + { + "epoch": 0.23394134921619722, + "grad_norm": 1189.154296875, + "learning_rate": 9.505193958327927e-06, + "loss": 129.8097, + "step": 28280 + }, + { + "epoch": 0.23402407246556645, + "grad_norm": 691.6627197265625, + "learning_rate": 9.504582048593343e-06, + "loss": 99.3678, + "step": 28290 + }, + { + "epoch": 0.23410679571493567, + "grad_norm": 1236.778076171875, + "learning_rate": 9.503969780449838e-06, + "loss": 119.0243, + "step": 28300 + }, + { + "epoch": 0.23418951896430493, + "grad_norm": 1243.570068359375, + "learning_rate": 9.503357153946126e-06, + "loss": 104.1002, + "step": 28310 + }, + { + "epoch": 0.23427224221367415, + "grad_norm": 1092.5941162109375, + "learning_rate": 9.502744169130955e-06, + "loss": 97.2079, + "step": 28320 + }, + { + "epoch": 0.23435496546304338, + "grad_norm": 1051.38671875, + "learning_rate": 9.502130826053095e-06, + "loss": 132.0031, + "step": 28330 + }, + { + "epoch": 0.23443768871241263, + "grad_norm": 1343.4345703125, + "learning_rate": 9.501517124761347e-06, + "loss": 112.9695, + "step": 28340 + }, + { + "epoch": 0.23452041196178186, + "grad_norm": 637.9159545898438, + "learning_rate": 9.50090306530454e-06, + "loss": 85.6707, + "step": 28350 + }, + { + "epoch": 0.2346031352111511, + "grad_norm": 976.9581909179688, + "learning_rate": 9.500288647731533e-06, + "loss": 127.3839, + "step": 28360 + }, + { + "epoch": 0.23468585846052034, + "grad_norm": 1000.0506591796875, + "learning_rate": 9.49967387209121e-06, + "loss": 168.1098, + "step": 28370 + }, + { + "epoch": 0.23476858170988957, + "grad_norm": 838.3327026367188, + "learning_rate": 9.499058738432492e-06, + "loss": 112.4135, + "step": 28380 + }, + { + "epoch": 0.2348513049592588, + "grad_norm": 708.0794067382812, + "learning_rate": 9.498443246804314e-06, + "loss": 120.7116, + "step": 28390 + }, + { + "epoch": 0.23493402820862805, + "grad_norm": 1063.5574951171875, + "learning_rate": 9.497827397255655e-06, + "loss": 99.907, + "step": 28400 + }, + { + "epoch": 0.23501675145799727, + "grad_norm": 933.8026123046875, + "learning_rate": 9.49721118983551e-06, + "loss": 153.6268, + "step": 28410 + }, + { + "epoch": 0.2350994747073665, + "grad_norm": 836.0083618164062, + "learning_rate": 9.49659462459291e-06, + "loss": 123.5795, + "step": 28420 + }, + { + "epoch": 0.23518219795673573, + "grad_norm": 1091.86669921875, + "learning_rate": 9.495977701576913e-06, + "loss": 141.2293, + "step": 28430 + }, + { + "epoch": 0.23526492120610498, + "grad_norm": 960.7584838867188, + "learning_rate": 9.495360420836603e-06, + "loss": 113.6143, + "step": 28440 + }, + { + "epoch": 0.2353476444554742, + "grad_norm": 1572.40771484375, + "learning_rate": 9.494742782421099e-06, + "loss": 159.3734, + "step": 28450 + }, + { + "epoch": 0.23543036770484344, + "grad_norm": 1193.2608642578125, + "learning_rate": 9.494124786379535e-06, + "loss": 128.3347, + "step": 28460 + }, + { + "epoch": 0.2355130909542127, + "grad_norm": 1346.21630859375, + "learning_rate": 9.49350643276109e-06, + "loss": 105.2375, + "step": 28470 + }, + { + "epoch": 0.23559581420358192, + "grad_norm": 1731.52001953125, + "learning_rate": 9.49288772161496e-06, + "loss": 134.3214, + "step": 28480 + }, + { + "epoch": 0.23567853745295114, + "grad_norm": 982.36279296875, + "learning_rate": 9.492268652990374e-06, + "loss": 120.5295, + "step": 28490 + }, + { + "epoch": 0.2357612607023204, + "grad_norm": 1203.948486328125, + "learning_rate": 9.491649226936586e-06, + "loss": 149.1518, + "step": 28500 + }, + { + "epoch": 0.23584398395168962, + "grad_norm": 1445.0263671875, + "learning_rate": 9.491029443502884e-06, + "loss": 133.0336, + "step": 28510 + }, + { + "epoch": 0.23592670720105885, + "grad_norm": 1120.74462890625, + "learning_rate": 9.490409302738582e-06, + "loss": 104.1838, + "step": 28520 + }, + { + "epoch": 0.2360094304504281, + "grad_norm": 1055.50830078125, + "learning_rate": 9.489788804693017e-06, + "loss": 97.1542, + "step": 28530 + }, + { + "epoch": 0.23609215369979733, + "grad_norm": 645.5867309570312, + "learning_rate": 9.489167949415563e-06, + "loss": 124.6525, + "step": 28540 + }, + { + "epoch": 0.23617487694916656, + "grad_norm": 723.2225952148438, + "learning_rate": 9.48854673695562e-06, + "loss": 117.6365, + "step": 28550 + }, + { + "epoch": 0.2362576001985358, + "grad_norm": 1657.0540771484375, + "learning_rate": 9.48792516736261e-06, + "loss": 131.2895, + "step": 28560 + }, + { + "epoch": 0.23634032344790504, + "grad_norm": 967.8582763671875, + "learning_rate": 9.487303240685992e-06, + "loss": 100.9019, + "step": 28570 + }, + { + "epoch": 0.23642304669727426, + "grad_norm": 2011.5921630859375, + "learning_rate": 9.48668095697525e-06, + "loss": 167.2856, + "step": 28580 + }, + { + "epoch": 0.2365057699466435, + "grad_norm": 991.98291015625, + "learning_rate": 9.486058316279894e-06, + "loss": 158.8021, + "step": 28590 + }, + { + "epoch": 0.23658849319601274, + "grad_norm": 905.6279296875, + "learning_rate": 9.485435318649468e-06, + "loss": 124.5288, + "step": 28600 + }, + { + "epoch": 0.23667121644538197, + "grad_norm": 793.5623779296875, + "learning_rate": 9.484811964133537e-06, + "loss": 138.7439, + "step": 28610 + }, + { + "epoch": 0.2367539396947512, + "grad_norm": 1176.435791015625, + "learning_rate": 9.484188252781701e-06, + "loss": 109.5845, + "step": 28620 + }, + { + "epoch": 0.23683666294412045, + "grad_norm": 840.8705444335938, + "learning_rate": 9.483564184643586e-06, + "loss": 90.2001, + "step": 28630 + }, + { + "epoch": 0.23691938619348968, + "grad_norm": 705.6690673828125, + "learning_rate": 9.482939759768845e-06, + "loss": 145.6554, + "step": 28640 + }, + { + "epoch": 0.2370021094428589, + "grad_norm": 1252.253662109375, + "learning_rate": 9.48231497820716e-06, + "loss": 129.5605, + "step": 28650 + }, + { + "epoch": 0.23708483269222816, + "grad_norm": 1174.8831787109375, + "learning_rate": 9.481689840008246e-06, + "loss": 121.5843, + "step": 28660 + }, + { + "epoch": 0.23716755594159739, + "grad_norm": 1164.958740234375, + "learning_rate": 9.481064345221838e-06, + "loss": 130.8124, + "step": 28670 + }, + { + "epoch": 0.2372502791909666, + "grad_norm": 1460.279052734375, + "learning_rate": 9.480438493897707e-06, + "loss": 186.2501, + "step": 28680 + }, + { + "epoch": 0.23733300244033587, + "grad_norm": 1548.7734375, + "learning_rate": 9.479812286085645e-06, + "loss": 122.9342, + "step": 28690 + }, + { + "epoch": 0.2374157256897051, + "grad_norm": 485.3841857910156, + "learning_rate": 9.47918572183548e-06, + "loss": 116.2105, + "step": 28700 + }, + { + "epoch": 0.23749844893907432, + "grad_norm": 2227.711181640625, + "learning_rate": 9.478558801197065e-06, + "loss": 108.795, + "step": 28710 + }, + { + "epoch": 0.23758117218844357, + "grad_norm": 1480.4208984375, + "learning_rate": 9.47793152422028e-06, + "loss": 103.7922, + "step": 28720 + }, + { + "epoch": 0.2376638954378128, + "grad_norm": 845.8829956054688, + "learning_rate": 9.477303890955032e-06, + "loss": 112.599, + "step": 28730 + }, + { + "epoch": 0.23774661868718203, + "grad_norm": 593.84619140625, + "learning_rate": 9.476675901451264e-06, + "loss": 124.1586, + "step": 28740 + }, + { + "epoch": 0.23782934193655128, + "grad_norm": 636.6671752929688, + "learning_rate": 9.476047555758938e-06, + "loss": 172.5131, + "step": 28750 + }, + { + "epoch": 0.2379120651859205, + "grad_norm": 4360.0341796875, + "learning_rate": 9.475418853928051e-06, + "loss": 191.1747, + "step": 28760 + }, + { + "epoch": 0.23799478843528973, + "grad_norm": 483.2803649902344, + "learning_rate": 9.474789796008625e-06, + "loss": 138.2722, + "step": 28770 + }, + { + "epoch": 0.23807751168465896, + "grad_norm": 1149.00048828125, + "learning_rate": 9.474160382050711e-06, + "loss": 126.3032, + "step": 28780 + }, + { + "epoch": 0.23816023493402821, + "grad_norm": 1440.6688232421875, + "learning_rate": 9.47353061210439e-06, + "loss": 96.6904, + "step": 28790 + }, + { + "epoch": 0.23824295818339744, + "grad_norm": 2017.08837890625, + "learning_rate": 9.47290048621977e-06, + "loss": 116.2174, + "step": 28800 + }, + { + "epoch": 0.23832568143276667, + "grad_norm": 900.5809326171875, + "learning_rate": 9.472270004446984e-06, + "loss": 110.8572, + "step": 28810 + }, + { + "epoch": 0.23840840468213592, + "grad_norm": 853.006591796875, + "learning_rate": 9.4716391668362e-06, + "loss": 115.7237, + "step": 28820 + }, + { + "epoch": 0.23849112793150515, + "grad_norm": 1115.9259033203125, + "learning_rate": 9.471007973437607e-06, + "loss": 108.8435, + "step": 28830 + }, + { + "epoch": 0.23857385118087437, + "grad_norm": 1762.0460205078125, + "learning_rate": 9.470376424301432e-06, + "loss": 148.8191, + "step": 28840 + }, + { + "epoch": 0.23865657443024363, + "grad_norm": 1468.8228759765625, + "learning_rate": 9.46974451947792e-06, + "loss": 154.2129, + "step": 28850 + }, + { + "epoch": 0.23873929767961286, + "grad_norm": 1209.044921875, + "learning_rate": 9.469112259017349e-06, + "loss": 107.4766, + "step": 28860 + }, + { + "epoch": 0.23882202092898208, + "grad_norm": 1850.443603515625, + "learning_rate": 9.468479642970027e-06, + "loss": 117.6253, + "step": 28870 + }, + { + "epoch": 0.23890474417835134, + "grad_norm": 1324.356689453125, + "learning_rate": 9.467846671386287e-06, + "loss": 178.3749, + "step": 28880 + }, + { + "epoch": 0.23898746742772056, + "grad_norm": 1060.7711181640625, + "learning_rate": 9.467213344316493e-06, + "loss": 101.0151, + "step": 28890 + }, + { + "epoch": 0.2390701906770898, + "grad_norm": 2596.78076171875, + "learning_rate": 9.466579661811032e-06, + "loss": 149.5662, + "step": 28900 + }, + { + "epoch": 0.23915291392645904, + "grad_norm": 1160.0654296875, + "learning_rate": 9.46594562392033e-06, + "loss": 114.8278, + "step": 28910 + }, + { + "epoch": 0.23923563717582827, + "grad_norm": 1474.780029296875, + "learning_rate": 9.465311230694828e-06, + "loss": 94.9893, + "step": 28920 + }, + { + "epoch": 0.2393183604251975, + "grad_norm": 827.9722900390625, + "learning_rate": 9.464676482185005e-06, + "loss": 82.4494, + "step": 28930 + }, + { + "epoch": 0.23940108367456675, + "grad_norm": 490.88360595703125, + "learning_rate": 9.464041378441365e-06, + "loss": 160.122, + "step": 28940 + }, + { + "epoch": 0.23948380692393598, + "grad_norm": 2539.133544921875, + "learning_rate": 9.46340591951444e-06, + "loss": 135.2263, + "step": 28950 + }, + { + "epoch": 0.2395665301733052, + "grad_norm": 620.4109497070312, + "learning_rate": 9.462770105454789e-06, + "loss": 130.8018, + "step": 28960 + }, + { + "epoch": 0.23964925342267443, + "grad_norm": 575.00146484375, + "learning_rate": 9.462133936313002e-06, + "loss": 123.9707, + "step": 28970 + }, + { + "epoch": 0.23973197667204368, + "grad_norm": 764.5715942382812, + "learning_rate": 9.461497412139697e-06, + "loss": 103.3378, + "step": 28980 + }, + { + "epoch": 0.2398146999214129, + "grad_norm": 1179.3966064453125, + "learning_rate": 9.46086053298552e-06, + "loss": 139.9406, + "step": 28990 + }, + { + "epoch": 0.23989742317078214, + "grad_norm": 1064.7276611328125, + "learning_rate": 9.460223298901138e-06, + "loss": 100.375, + "step": 29000 + }, + { + "epoch": 0.2399801464201514, + "grad_norm": 833.2089233398438, + "learning_rate": 9.459585709937262e-06, + "loss": 120.6056, + "step": 29010 + }, + { + "epoch": 0.24006286966952062, + "grad_norm": 1361.6549072265625, + "learning_rate": 9.458947766144617e-06, + "loss": 129.4685, + "step": 29020 + }, + { + "epoch": 0.24014559291888984, + "grad_norm": 1131.559326171875, + "learning_rate": 9.458309467573963e-06, + "loss": 90.7656, + "step": 29030 + }, + { + "epoch": 0.2402283161682591, + "grad_norm": 1216.7392578125, + "learning_rate": 9.457670814276083e-06, + "loss": 105.4316, + "step": 29040 + }, + { + "epoch": 0.24031103941762832, + "grad_norm": 853.8714599609375, + "learning_rate": 9.457031806301795e-06, + "loss": 94.2898, + "step": 29050 + }, + { + "epoch": 0.24039376266699755, + "grad_norm": 826.33837890625, + "learning_rate": 9.456392443701943e-06, + "loss": 118.5048, + "step": 29060 + }, + { + "epoch": 0.2404764859163668, + "grad_norm": 969.9588012695312, + "learning_rate": 9.455752726527395e-06, + "loss": 158.1088, + "step": 29070 + }, + { + "epoch": 0.24055920916573603, + "grad_norm": 591.8674926757812, + "learning_rate": 9.45511265482905e-06, + "loss": 106.0139, + "step": 29080 + }, + { + "epoch": 0.24064193241510526, + "grad_norm": 1056.0914306640625, + "learning_rate": 9.454472228657841e-06, + "loss": 148.1635, + "step": 29090 + }, + { + "epoch": 0.2407246556644745, + "grad_norm": 1000.041259765625, + "learning_rate": 9.453831448064717e-06, + "loss": 119.1304, + "step": 29100 + }, + { + "epoch": 0.24080737891384374, + "grad_norm": 765.8133544921875, + "learning_rate": 9.453190313100666e-06, + "loss": 83.0749, + "step": 29110 + }, + { + "epoch": 0.24089010216321297, + "grad_norm": 963.5242919921875, + "learning_rate": 9.4525488238167e-06, + "loss": 153.7064, + "step": 29120 + }, + { + "epoch": 0.24097282541258222, + "grad_norm": 809.49267578125, + "learning_rate": 9.451906980263857e-06, + "loss": 122.1319, + "step": 29130 + }, + { + "epoch": 0.24105554866195145, + "grad_norm": 800.9232788085938, + "learning_rate": 9.451264782493208e-06, + "loss": 101.5012, + "step": 29140 + }, + { + "epoch": 0.24113827191132067, + "grad_norm": 1233.4398193359375, + "learning_rate": 9.450622230555849e-06, + "loss": 144.6246, + "step": 29150 + }, + { + "epoch": 0.2412209951606899, + "grad_norm": 692.1824951171875, + "learning_rate": 9.449979324502905e-06, + "loss": 160.0062, + "step": 29160 + }, + { + "epoch": 0.24130371841005915, + "grad_norm": 946.5017700195312, + "learning_rate": 9.449336064385529e-06, + "loss": 105.0953, + "step": 29170 + }, + { + "epoch": 0.24138644165942838, + "grad_norm": 1230.076416015625, + "learning_rate": 9.4486924502549e-06, + "loss": 120.0082, + "step": 29180 + }, + { + "epoch": 0.2414691649087976, + "grad_norm": 839.8562622070312, + "learning_rate": 9.448048482162231e-06, + "loss": 137.8697, + "step": 29190 + }, + { + "epoch": 0.24155188815816686, + "grad_norm": 882.9497680664062, + "learning_rate": 9.447404160158758e-06, + "loss": 119.5869, + "step": 29200 + }, + { + "epoch": 0.2416346114075361, + "grad_norm": 1333.1221923828125, + "learning_rate": 9.446759484295745e-06, + "loss": 116.6337, + "step": 29210 + }, + { + "epoch": 0.2417173346569053, + "grad_norm": 955.5294799804688, + "learning_rate": 9.44611445462449e-06, + "loss": 134.2271, + "step": 29220 + }, + { + "epoch": 0.24180005790627457, + "grad_norm": 1251.0631103515625, + "learning_rate": 9.445469071196312e-06, + "loss": 124.4641, + "step": 29230 + }, + { + "epoch": 0.2418827811556438, + "grad_norm": 834.0491943359375, + "learning_rate": 9.444823334062562e-06, + "loss": 116.2968, + "step": 29240 + }, + { + "epoch": 0.24196550440501302, + "grad_norm": 1349.356201171875, + "learning_rate": 9.444177243274619e-06, + "loss": 131.4607, + "step": 29250 + }, + { + "epoch": 0.24204822765438228, + "grad_norm": 1118.03173828125, + "learning_rate": 9.443530798883887e-06, + "loss": 107.2266, + "step": 29260 + }, + { + "epoch": 0.2421309509037515, + "grad_norm": 1354.8619384765625, + "learning_rate": 9.442884000941803e-06, + "loss": 129.6626, + "step": 29270 + }, + { + "epoch": 0.24221367415312073, + "grad_norm": 1301.5723876953125, + "learning_rate": 9.44223684949983e-06, + "loss": 126.9563, + "step": 29280 + }, + { + "epoch": 0.24229639740248998, + "grad_norm": 639.9192504882812, + "learning_rate": 9.441589344609457e-06, + "loss": 97.4439, + "step": 29290 + }, + { + "epoch": 0.2423791206518592, + "grad_norm": 1386.3795166015625, + "learning_rate": 9.440941486322205e-06, + "loss": 150.2773, + "step": 29300 + }, + { + "epoch": 0.24246184390122844, + "grad_norm": 1358.3714599609375, + "learning_rate": 9.44029327468962e-06, + "loss": 110.78, + "step": 29310 + }, + { + "epoch": 0.24254456715059766, + "grad_norm": 757.8045043945312, + "learning_rate": 9.439644709763276e-06, + "loss": 112.67, + "step": 29320 + }, + { + "epoch": 0.24262729039996692, + "grad_norm": 930.5925903320312, + "learning_rate": 9.43899579159478e-06, + "loss": 121.9743, + "step": 29330 + }, + { + "epoch": 0.24271001364933614, + "grad_norm": 1049.4073486328125, + "learning_rate": 9.438346520235759e-06, + "loss": 100.5406, + "step": 29340 + }, + { + "epoch": 0.24279273689870537, + "grad_norm": 1115.40966796875, + "learning_rate": 9.437696895737876e-06, + "loss": 121.6903, + "step": 29350 + }, + { + "epoch": 0.24287546014807462, + "grad_norm": 698.0397338867188, + "learning_rate": 9.437046918152817e-06, + "loss": 88.6896, + "step": 29360 + }, + { + "epoch": 0.24295818339744385, + "grad_norm": 824.7769165039062, + "learning_rate": 9.436396587532297e-06, + "loss": 126.8226, + "step": 29370 + }, + { + "epoch": 0.24304090664681308, + "grad_norm": 1229.65869140625, + "learning_rate": 9.435745903928062e-06, + "loss": 113.3302, + "step": 29380 + }, + { + "epoch": 0.24312362989618233, + "grad_norm": 845.088623046875, + "learning_rate": 9.435094867391881e-06, + "loss": 154.009, + "step": 29390 + }, + { + "epoch": 0.24320635314555156, + "grad_norm": 961.1011962890625, + "learning_rate": 9.434443477975557e-06, + "loss": 103.9956, + "step": 29400 + }, + { + "epoch": 0.24328907639492078, + "grad_norm": 757.7135009765625, + "learning_rate": 9.433791735730917e-06, + "loss": 98.1805, + "step": 29410 + }, + { + "epoch": 0.24337179964429004, + "grad_norm": 1421.3348388671875, + "learning_rate": 9.433139640709817e-06, + "loss": 132.9433, + "step": 29420 + }, + { + "epoch": 0.24345452289365926, + "grad_norm": 608.5304565429688, + "learning_rate": 9.432487192964142e-06, + "loss": 122.0067, + "step": 29430 + }, + { + "epoch": 0.2435372461430285, + "grad_norm": 1166.598876953125, + "learning_rate": 9.431834392545803e-06, + "loss": 127.8436, + "step": 29440 + }, + { + "epoch": 0.24361996939239774, + "grad_norm": 1254.7440185546875, + "learning_rate": 9.43118123950674e-06, + "loss": 124.5958, + "step": 29450 + }, + { + "epoch": 0.24370269264176697, + "grad_norm": 924.8063354492188, + "learning_rate": 9.430527733898922e-06, + "loss": 102.3073, + "step": 29460 + }, + { + "epoch": 0.2437854158911362, + "grad_norm": 957.7825927734375, + "learning_rate": 9.429873875774344e-06, + "loss": 112.2574, + "step": 29470 + }, + { + "epoch": 0.24386813914050545, + "grad_norm": 867.5020141601562, + "learning_rate": 9.429219665185034e-06, + "loss": 109.0799, + "step": 29480 + }, + { + "epoch": 0.24395086238987468, + "grad_norm": 1606.21435546875, + "learning_rate": 9.428565102183043e-06, + "loss": 114.3639, + "step": 29490 + }, + { + "epoch": 0.2440335856392439, + "grad_norm": 782.0588989257812, + "learning_rate": 9.42791018682045e-06, + "loss": 102.8368, + "step": 29500 + }, + { + "epoch": 0.24411630888861313, + "grad_norm": 1499.0159912109375, + "learning_rate": 9.427254919149367e-06, + "loss": 129.8493, + "step": 29510 + }, + { + "epoch": 0.24419903213798239, + "grad_norm": 1734.2025146484375, + "learning_rate": 9.426599299221925e-06, + "loss": 118.0028, + "step": 29520 + }, + { + "epoch": 0.2442817553873516, + "grad_norm": 1104.4652099609375, + "learning_rate": 9.425943327090295e-06, + "loss": 133.7769, + "step": 29530 + }, + { + "epoch": 0.24436447863672084, + "grad_norm": 809.5218505859375, + "learning_rate": 9.425287002806666e-06, + "loss": 101.5154, + "step": 29540 + }, + { + "epoch": 0.2444472018860901, + "grad_norm": 860.76123046875, + "learning_rate": 9.42463032642326e-06, + "loss": 126.5965, + "step": 29550 + }, + { + "epoch": 0.24452992513545932, + "grad_norm": 1226.3048095703125, + "learning_rate": 9.423973297992324e-06, + "loss": 133.2678, + "step": 29560 + }, + { + "epoch": 0.24461264838482855, + "grad_norm": 638.6930541992188, + "learning_rate": 9.423315917566137e-06, + "loss": 153.0996, + "step": 29570 + }, + { + "epoch": 0.2446953716341978, + "grad_norm": 598.7249755859375, + "learning_rate": 9.422658185197002e-06, + "loss": 122.7943, + "step": 29580 + }, + { + "epoch": 0.24477809488356703, + "grad_norm": 711.6947021484375, + "learning_rate": 9.422000100937253e-06, + "loss": 93.9475, + "step": 29590 + }, + { + "epoch": 0.24486081813293625, + "grad_norm": 1122.1689453125, + "learning_rate": 9.42134166483925e-06, + "loss": 97.7506, + "step": 29600 + }, + { + "epoch": 0.2449435413823055, + "grad_norm": 1915.1302490234375, + "learning_rate": 9.420682876955382e-06, + "loss": 115.1031, + "step": 29610 + }, + { + "epoch": 0.24502626463167473, + "grad_norm": 659.7858276367188, + "learning_rate": 9.420023737338065e-06, + "loss": 120.2869, + "step": 29620 + }, + { + "epoch": 0.24510898788104396, + "grad_norm": 834.023193359375, + "learning_rate": 9.419364246039745e-06, + "loss": 125.4224, + "step": 29630 + }, + { + "epoch": 0.24519171113041321, + "grad_norm": 1009.0372924804688, + "learning_rate": 9.418704403112894e-06, + "loss": 109.2442, + "step": 29640 + }, + { + "epoch": 0.24527443437978244, + "grad_norm": 2674.98193359375, + "learning_rate": 9.418044208610013e-06, + "loss": 156.5225, + "step": 29650 + }, + { + "epoch": 0.24535715762915167, + "grad_norm": 1011.0217895507812, + "learning_rate": 9.41738366258363e-06, + "loss": 126.1811, + "step": 29660 + }, + { + "epoch": 0.24543988087852092, + "grad_norm": 610.7017211914062, + "learning_rate": 9.416722765086304e-06, + "loss": 144.7449, + "step": 29670 + }, + { + "epoch": 0.24552260412789015, + "grad_norm": 1031.1539306640625, + "learning_rate": 9.416061516170615e-06, + "loss": 108.1692, + "step": 29680 + }, + { + "epoch": 0.24560532737725937, + "grad_norm": 1801.7032470703125, + "learning_rate": 9.415399915889179e-06, + "loss": 121.3443, + "step": 29690 + }, + { + "epoch": 0.2456880506266286, + "grad_norm": 1428.9144287109375, + "learning_rate": 9.414737964294636e-06, + "loss": 116.9526, + "step": 29700 + }, + { + "epoch": 0.24577077387599786, + "grad_norm": 937.070556640625, + "learning_rate": 9.414075661439653e-06, + "loss": 111.9231, + "step": 29710 + }, + { + "epoch": 0.24585349712536708, + "grad_norm": 1381.5968017578125, + "learning_rate": 9.413413007376928e-06, + "loss": 163.5947, + "step": 29720 + }, + { + "epoch": 0.2459362203747363, + "grad_norm": 1236.41552734375, + "learning_rate": 9.412750002159186e-06, + "loss": 110.7294, + "step": 29730 + }, + { + "epoch": 0.24601894362410556, + "grad_norm": 757.4229125976562, + "learning_rate": 9.412086645839177e-06, + "loss": 88.9742, + "step": 29740 + }, + { + "epoch": 0.2461016668734748, + "grad_norm": 1002.5419311523438, + "learning_rate": 9.411422938469683e-06, + "loss": 137.723, + "step": 29750 + }, + { + "epoch": 0.24618439012284402, + "grad_norm": 636.2584228515625, + "learning_rate": 9.41075888010351e-06, + "loss": 108.3714, + "step": 29760 + }, + { + "epoch": 0.24626711337221327, + "grad_norm": 737.2359008789062, + "learning_rate": 9.410094470793497e-06, + "loss": 135.6444, + "step": 29770 + }, + { + "epoch": 0.2463498366215825, + "grad_norm": 1887.1973876953125, + "learning_rate": 9.409429710592505e-06, + "loss": 126.0426, + "step": 29780 + }, + { + "epoch": 0.24643255987095172, + "grad_norm": 1311.354248046875, + "learning_rate": 9.408764599553429e-06, + "loss": 156.2838, + "step": 29790 + }, + { + "epoch": 0.24651528312032098, + "grad_norm": 1149.238525390625, + "learning_rate": 9.408099137729188e-06, + "loss": 130.7976, + "step": 29800 + }, + { + "epoch": 0.2465980063696902, + "grad_norm": 1126.7828369140625, + "learning_rate": 9.407433325172727e-06, + "loss": 153.4184, + "step": 29810 + }, + { + "epoch": 0.24668072961905943, + "grad_norm": 1250.0152587890625, + "learning_rate": 9.406767161937025e-06, + "loss": 142.3581, + "step": 29820 + }, + { + "epoch": 0.24676345286842868, + "grad_norm": 1395.9898681640625, + "learning_rate": 9.406100648075084e-06, + "loss": 122.4098, + "step": 29830 + }, + { + "epoch": 0.2468461761177979, + "grad_norm": 1577.0543212890625, + "learning_rate": 9.405433783639936e-06, + "loss": 112.6034, + "step": 29840 + }, + { + "epoch": 0.24692889936716714, + "grad_norm": 1184.2935791015625, + "learning_rate": 9.40476656868464e-06, + "loss": 148.0747, + "step": 29850 + }, + { + "epoch": 0.24701162261653636, + "grad_norm": 805.5813598632812, + "learning_rate": 9.404099003262282e-06, + "loss": 155.5525, + "step": 29860 + }, + { + "epoch": 0.24709434586590562, + "grad_norm": 1295.2099609375, + "learning_rate": 9.40343108742598e-06, + "loss": 149.4768, + "step": 29870 + }, + { + "epoch": 0.24717706911527484, + "grad_norm": 995.8720092773438, + "learning_rate": 9.402762821228875e-06, + "loss": 140.2816, + "step": 29880 + }, + { + "epoch": 0.24725979236464407, + "grad_norm": 866.3977661132812, + "learning_rate": 9.402094204724138e-06, + "loss": 129.4959, + "step": 29890 + }, + { + "epoch": 0.24734251561401333, + "grad_norm": 1852.4481201171875, + "learning_rate": 9.401425237964966e-06, + "loss": 102.9619, + "step": 29900 + }, + { + "epoch": 0.24742523886338255, + "grad_norm": 743.556640625, + "learning_rate": 9.400755921004592e-06, + "loss": 85.2109, + "step": 29910 + }, + { + "epoch": 0.24750796211275178, + "grad_norm": 809.3905639648438, + "learning_rate": 9.400086253896264e-06, + "loss": 106.4736, + "step": 29920 + }, + { + "epoch": 0.24759068536212103, + "grad_norm": 783.1993408203125, + "learning_rate": 9.399416236693264e-06, + "loss": 125.8943, + "step": 29930 + }, + { + "epoch": 0.24767340861149026, + "grad_norm": 772.568115234375, + "learning_rate": 9.398745869448909e-06, + "loss": 123.8559, + "step": 29940 + }, + { + "epoch": 0.24775613186085949, + "grad_norm": 1002.6859130859375, + "learning_rate": 9.39807515221653e-06, + "loss": 101.8871, + "step": 29950 + }, + { + "epoch": 0.24783885511022874, + "grad_norm": 1258.36572265625, + "learning_rate": 9.397404085049496e-06, + "loss": 98.4138, + "step": 29960 + }, + { + "epoch": 0.24792157835959797, + "grad_norm": 1145.0703125, + "learning_rate": 9.3967326680012e-06, + "loss": 118.3927, + "step": 29970 + }, + { + "epoch": 0.2480043016089672, + "grad_norm": 1031.3804931640625, + "learning_rate": 9.396060901125064e-06, + "loss": 105.3649, + "step": 29980 + }, + { + "epoch": 0.24808702485833645, + "grad_norm": 2366.289794921875, + "learning_rate": 9.395388784474538e-06, + "loss": 168.2479, + "step": 29990 + }, + { + "epoch": 0.24816974810770567, + "grad_norm": 369.7084045410156, + "learning_rate": 9.394716318103098e-06, + "loss": 121.3149, + "step": 30000 + }, + { + "epoch": 0.2482524713570749, + "grad_norm": 1409.655029296875, + "learning_rate": 9.394043502064249e-06, + "loss": 105.2097, + "step": 30010 + }, + { + "epoch": 0.24833519460644415, + "grad_norm": 1403.9825439453125, + "learning_rate": 9.393370336411527e-06, + "loss": 147.3934, + "step": 30020 + }, + { + "epoch": 0.24841791785581338, + "grad_norm": 929.0065307617188, + "learning_rate": 9.392696821198488e-06, + "loss": 124.7842, + "step": 30030 + }, + { + "epoch": 0.2485006411051826, + "grad_norm": 828.625244140625, + "learning_rate": 9.392022956478724e-06, + "loss": 112.9368, + "step": 30040 + }, + { + "epoch": 0.24858336435455183, + "grad_norm": 3000.644287109375, + "learning_rate": 9.391348742305849e-06, + "loss": 148.1125, + "step": 30050 + }, + { + "epoch": 0.2486660876039211, + "grad_norm": 908.979248046875, + "learning_rate": 9.390674178733508e-06, + "loss": 109.6535, + "step": 30060 + }, + { + "epoch": 0.24874881085329031, + "grad_norm": 1394.7421875, + "learning_rate": 9.389999265815373e-06, + "loss": 112.9092, + "step": 30070 + }, + { + "epoch": 0.24883153410265954, + "grad_norm": 1016.4574584960938, + "learning_rate": 9.389324003605144e-06, + "loss": 168.127, + "step": 30080 + }, + { + "epoch": 0.2489142573520288, + "grad_norm": 1174.79443359375, + "learning_rate": 9.388648392156547e-06, + "loss": 112.0588, + "step": 30090 + }, + { + "epoch": 0.24899698060139802, + "grad_norm": 2049.481689453125, + "learning_rate": 9.387972431523341e-06, + "loss": 127.4066, + "step": 30100 + }, + { + "epoch": 0.24907970385076725, + "grad_norm": 712.5939331054688, + "learning_rate": 9.387296121759305e-06, + "loss": 98.8517, + "step": 30110 + }, + { + "epoch": 0.2491624271001365, + "grad_norm": 872.814208984375, + "learning_rate": 9.386619462918254e-06, + "loss": 100.3602, + "step": 30120 + }, + { + "epoch": 0.24924515034950573, + "grad_norm": 649.6997680664062, + "learning_rate": 9.385942455054022e-06, + "loss": 119.4873, + "step": 30130 + }, + { + "epoch": 0.24932787359887496, + "grad_norm": 655.9243774414062, + "learning_rate": 9.385265098220478e-06, + "loss": 124.5341, + "step": 30140 + }, + { + "epoch": 0.2494105968482442, + "grad_norm": 822.2200927734375, + "learning_rate": 9.384587392471516e-06, + "loss": 162.9077, + "step": 30150 + }, + { + "epoch": 0.24949332009761344, + "grad_norm": 940.898193359375, + "learning_rate": 9.383909337861058e-06, + "loss": 118.583, + "step": 30160 + }, + { + "epoch": 0.24957604334698266, + "grad_norm": 1128.41943359375, + "learning_rate": 9.383230934443053e-06, + "loss": 136.6669, + "step": 30170 + }, + { + "epoch": 0.24965876659635192, + "grad_norm": 631.8690795898438, + "learning_rate": 9.382552182271478e-06, + "loss": 97.5566, + "step": 30180 + }, + { + "epoch": 0.24974148984572114, + "grad_norm": 1021.1989135742188, + "learning_rate": 9.38187308140034e-06, + "loss": 146.495, + "step": 30190 + }, + { + "epoch": 0.24982421309509037, + "grad_norm": 1181.3828125, + "learning_rate": 9.381193631883672e-06, + "loss": 150.6252, + "step": 30200 + }, + { + "epoch": 0.24990693634445962, + "grad_norm": 814.0835571289062, + "learning_rate": 9.380513833775531e-06, + "loss": 114.7124, + "step": 30210 + }, + { + "epoch": 0.24998965959382885, + "grad_norm": 1297.4193115234375, + "learning_rate": 9.37983368713001e-06, + "loss": 97.1973, + "step": 30220 + }, + { + "epoch": 0.2500723828431981, + "grad_norm": 800.78564453125, + "learning_rate": 9.379153192001223e-06, + "loss": 98.411, + "step": 30230 + }, + { + "epoch": 0.2501551060925673, + "grad_norm": 1123.505859375, + "learning_rate": 9.378472348443315e-06, + "loss": 119.3296, + "step": 30240 + }, + { + "epoch": 0.25023782934193656, + "grad_norm": 888.609375, + "learning_rate": 9.377791156510456e-06, + "loss": 74.0182, + "step": 30250 + }, + { + "epoch": 0.2503205525913058, + "grad_norm": 713.3021240234375, + "learning_rate": 9.377109616256846e-06, + "loss": 147.7178, + "step": 30260 + }, + { + "epoch": 0.250403275840675, + "grad_norm": 1878.27880859375, + "learning_rate": 9.37642772773671e-06, + "loss": 154.91, + "step": 30270 + }, + { + "epoch": 0.25048599909004426, + "grad_norm": 625.2662353515625, + "learning_rate": 9.375745491004307e-06, + "loss": 90.2972, + "step": 30280 + }, + { + "epoch": 0.2505687223394135, + "grad_norm": 621.6907958984375, + "learning_rate": 9.375062906113916e-06, + "loss": 126.9956, + "step": 30290 + }, + { + "epoch": 0.2506514455887827, + "grad_norm": 1250.3077392578125, + "learning_rate": 9.37437997311985e-06, + "loss": 107.0872, + "step": 30300 + }, + { + "epoch": 0.25073416883815197, + "grad_norm": 720.9534912109375, + "learning_rate": 9.373696692076446e-06, + "loss": 105.0815, + "step": 30310 + }, + { + "epoch": 0.25081689208752117, + "grad_norm": 684.220703125, + "learning_rate": 9.373013063038066e-06, + "loss": 129.8487, + "step": 30320 + }, + { + "epoch": 0.2508996153368904, + "grad_norm": 1063.6759033203125, + "learning_rate": 9.372329086059108e-06, + "loss": 135.9542, + "step": 30330 + }, + { + "epoch": 0.2509823385862597, + "grad_norm": 1607.3919677734375, + "learning_rate": 9.37164476119399e-06, + "loss": 142.6617, + "step": 30340 + }, + { + "epoch": 0.2510650618356289, + "grad_norm": 542.721435546875, + "learning_rate": 9.370960088497162e-06, + "loss": 106.839, + "step": 30350 + }, + { + "epoch": 0.25114778508499813, + "grad_norm": 549.24560546875, + "learning_rate": 9.370275068023097e-06, + "loss": 129.0447, + "step": 30360 + }, + { + "epoch": 0.2512305083343674, + "grad_norm": 960.9791259765625, + "learning_rate": 9.369589699826306e-06, + "loss": 140.4398, + "step": 30370 + }, + { + "epoch": 0.2513132315837366, + "grad_norm": 1057.4302978515625, + "learning_rate": 9.368903983961315e-06, + "loss": 138.126, + "step": 30380 + }, + { + "epoch": 0.25139595483310584, + "grad_norm": 1365.5726318359375, + "learning_rate": 9.368217920482684e-06, + "loss": 139.174, + "step": 30390 + }, + { + "epoch": 0.2514786780824751, + "grad_norm": 887.7735595703125, + "learning_rate": 9.367531509445001e-06, + "loss": 129.102, + "step": 30400 + }, + { + "epoch": 0.2515614013318443, + "grad_norm": 1251.561767578125, + "learning_rate": 9.366844750902878e-06, + "loss": 121.9665, + "step": 30410 + }, + { + "epoch": 0.25164412458121355, + "grad_norm": 881.740234375, + "learning_rate": 9.36615764491096e-06, + "loss": 82.5869, + "step": 30420 + }, + { + "epoch": 0.2517268478305828, + "grad_norm": 821.780029296875, + "learning_rate": 9.365470191523917e-06, + "loss": 146.2663, + "step": 30430 + }, + { + "epoch": 0.251809571079952, + "grad_norm": 626.3407592773438, + "learning_rate": 9.364782390796446e-06, + "loss": 86.4238, + "step": 30440 + }, + { + "epoch": 0.25189229432932125, + "grad_norm": 1124.6002197265625, + "learning_rate": 9.364094242783272e-06, + "loss": 146.8187, + "step": 30450 + }, + { + "epoch": 0.2519750175786905, + "grad_norm": 631.0712890625, + "learning_rate": 9.363405747539147e-06, + "loss": 98.5037, + "step": 30460 + }, + { + "epoch": 0.2520577408280597, + "grad_norm": 949.3443603515625, + "learning_rate": 9.362716905118851e-06, + "loss": 139.6968, + "step": 30470 + }, + { + "epoch": 0.25214046407742896, + "grad_norm": 513.1497802734375, + "learning_rate": 9.362027715577195e-06, + "loss": 118.3806, + "step": 30480 + }, + { + "epoch": 0.2522231873267982, + "grad_norm": 1057.8067626953125, + "learning_rate": 9.361338178969012e-06, + "loss": 108.9348, + "step": 30490 + }, + { + "epoch": 0.2523059105761674, + "grad_norm": 903.6969604492188, + "learning_rate": 9.360648295349165e-06, + "loss": 105.4085, + "step": 30500 + }, + { + "epoch": 0.25238863382553667, + "grad_norm": 2535.44189453125, + "learning_rate": 9.359958064772547e-06, + "loss": 161.6714, + "step": 30510 + }, + { + "epoch": 0.2524713570749059, + "grad_norm": 1677.7100830078125, + "learning_rate": 9.359267487294075e-06, + "loss": 128.2102, + "step": 30520 + }, + { + "epoch": 0.2525540803242751, + "grad_norm": 1912.9716796875, + "learning_rate": 9.358576562968695e-06, + "loss": 118.4899, + "step": 30530 + }, + { + "epoch": 0.2526368035736444, + "grad_norm": 763.476318359375, + "learning_rate": 9.357885291851382e-06, + "loss": 124.9722, + "step": 30540 + }, + { + "epoch": 0.25271952682301363, + "grad_norm": 1949.4473876953125, + "learning_rate": 9.357193673997133e-06, + "loss": 104.1943, + "step": 30550 + }, + { + "epoch": 0.25280225007238283, + "grad_norm": 1901.4537353515625, + "learning_rate": 9.356501709460984e-06, + "loss": 108.5047, + "step": 30560 + }, + { + "epoch": 0.2528849733217521, + "grad_norm": 773.7173461914062, + "learning_rate": 9.355809398297986e-06, + "loss": 95.4959, + "step": 30570 + }, + { + "epoch": 0.25296769657112134, + "grad_norm": 1136.826171875, + "learning_rate": 9.355116740563225e-06, + "loss": 136.653, + "step": 30580 + }, + { + "epoch": 0.25305041982049054, + "grad_norm": 826.9710693359375, + "learning_rate": 9.354423736311813e-06, + "loss": 119.1377, + "step": 30590 + }, + { + "epoch": 0.2531331430698598, + "grad_norm": 1087.69677734375, + "learning_rate": 9.353730385598887e-06, + "loss": 101.276, + "step": 30600 + }, + { + "epoch": 0.25321586631922904, + "grad_norm": 567.2242431640625, + "learning_rate": 9.353036688479615e-06, + "loss": 116.7849, + "step": 30610 + }, + { + "epoch": 0.25329858956859824, + "grad_norm": 1647.7808837890625, + "learning_rate": 9.352342645009193e-06, + "loss": 142.3532, + "step": 30620 + }, + { + "epoch": 0.2533813128179675, + "grad_norm": 1223.5712890625, + "learning_rate": 9.35164825524284e-06, + "loss": 106.8768, + "step": 30630 + }, + { + "epoch": 0.25346403606733675, + "grad_norm": 1215.446044921875, + "learning_rate": 9.350953519235807e-06, + "loss": 142.7279, + "step": 30640 + }, + { + "epoch": 0.25354675931670595, + "grad_norm": 1093.0865478515625, + "learning_rate": 9.35025843704337e-06, + "loss": 133.1846, + "step": 30650 + }, + { + "epoch": 0.2536294825660752, + "grad_norm": 603.0365600585938, + "learning_rate": 9.349563008720836e-06, + "loss": 143.9578, + "step": 30660 + }, + { + "epoch": 0.25371220581544446, + "grad_norm": 926.9697265625, + "learning_rate": 9.348867234323534e-06, + "loss": 115.379, + "step": 30670 + }, + { + "epoch": 0.25379492906481366, + "grad_norm": 1196.4434814453125, + "learning_rate": 9.348171113906826e-06, + "loss": 128.1764, + "step": 30680 + }, + { + "epoch": 0.2538776523141829, + "grad_norm": 750.9150390625, + "learning_rate": 9.347474647526095e-06, + "loss": 194.924, + "step": 30690 + }, + { + "epoch": 0.2539603755635521, + "grad_norm": 1341.235595703125, + "learning_rate": 9.34677783523676e-06, + "loss": 137.8295, + "step": 30700 + }, + { + "epoch": 0.25404309881292136, + "grad_norm": 1126.3736572265625, + "learning_rate": 9.346080677094262e-06, + "loss": 132.7227, + "step": 30710 + }, + { + "epoch": 0.2541258220622906, + "grad_norm": 824.2003784179688, + "learning_rate": 9.345383173154072e-06, + "loss": 133.4808, + "step": 30720 + }, + { + "epoch": 0.2542085453116598, + "grad_norm": 929.5851440429688, + "learning_rate": 9.344685323471682e-06, + "loss": 109.8865, + "step": 30730 + }, + { + "epoch": 0.25429126856102907, + "grad_norm": 763.9591674804688, + "learning_rate": 9.343987128102624e-06, + "loss": 114.478, + "step": 30740 + }, + { + "epoch": 0.2543739918103983, + "grad_norm": 896.4277954101562, + "learning_rate": 9.343288587102444e-06, + "loss": 139.7716, + "step": 30750 + }, + { + "epoch": 0.2544567150597675, + "grad_norm": 890.5599975585938, + "learning_rate": 9.342589700526725e-06, + "loss": 119.8424, + "step": 30760 + }, + { + "epoch": 0.2545394383091368, + "grad_norm": 954.55322265625, + "learning_rate": 9.341890468431072e-06, + "loss": 197.9463, + "step": 30770 + }, + { + "epoch": 0.25462216155850603, + "grad_norm": 962.0372314453125, + "learning_rate": 9.341190890871123e-06, + "loss": 173.233, + "step": 30780 + }, + { + "epoch": 0.25470488480787523, + "grad_norm": 1072.42724609375, + "learning_rate": 9.340490967902535e-06, + "loss": 114.3112, + "step": 30790 + }, + { + "epoch": 0.2547876080572445, + "grad_norm": 927.2454223632812, + "learning_rate": 9.339790699581004e-06, + "loss": 98.9923, + "step": 30800 + }, + { + "epoch": 0.25487033130661374, + "grad_norm": 485.0035400390625, + "learning_rate": 9.339090085962244e-06, + "loss": 109.2545, + "step": 30810 + }, + { + "epoch": 0.25495305455598294, + "grad_norm": 1289.7406005859375, + "learning_rate": 9.338389127101998e-06, + "loss": 137.8362, + "step": 30820 + }, + { + "epoch": 0.2550357778053522, + "grad_norm": 655.8922119140625, + "learning_rate": 9.337687823056041e-06, + "loss": 101.9889, + "step": 30830 + }, + { + "epoch": 0.25511850105472145, + "grad_norm": 1242.1337890625, + "learning_rate": 9.336986173880169e-06, + "loss": 106.3836, + "step": 30840 + }, + { + "epoch": 0.25520122430409065, + "grad_norm": 580.3970947265625, + "learning_rate": 9.336284179630215e-06, + "loss": 94.8493, + "step": 30850 + }, + { + "epoch": 0.2552839475534599, + "grad_norm": 789.4066162109375, + "learning_rate": 9.335581840362026e-06, + "loss": 74.5354, + "step": 30860 + }, + { + "epoch": 0.25536667080282915, + "grad_norm": 1426.3404541015625, + "learning_rate": 9.33487915613149e-06, + "loss": 108.0914, + "step": 30870 + }, + { + "epoch": 0.25544939405219835, + "grad_norm": 826.1908569335938, + "learning_rate": 9.334176126994512e-06, + "loss": 109.946, + "step": 30880 + }, + { + "epoch": 0.2555321173015676, + "grad_norm": 755.2938232421875, + "learning_rate": 9.333472753007031e-06, + "loss": 111.354, + "step": 30890 + }, + { + "epoch": 0.25561484055093686, + "grad_norm": 780.1597290039062, + "learning_rate": 9.332769034225012e-06, + "loss": 142.2512, + "step": 30900 + }, + { + "epoch": 0.25569756380030606, + "grad_norm": 731.4649047851562, + "learning_rate": 9.332064970704445e-06, + "loss": 156.2841, + "step": 30910 + }, + { + "epoch": 0.2557802870496753, + "grad_norm": 1020.5748291015625, + "learning_rate": 9.33136056250135e-06, + "loss": 127.7295, + "step": 30920 + }, + { + "epoch": 0.25586301029904457, + "grad_norm": 698.0821533203125, + "learning_rate": 9.330655809671773e-06, + "loss": 92.2535, + "step": 30930 + }, + { + "epoch": 0.25594573354841377, + "grad_norm": 698.9208374023438, + "learning_rate": 9.32995071227179e-06, + "loss": 124.9385, + "step": 30940 + }, + { + "epoch": 0.256028456797783, + "grad_norm": 1320.0196533203125, + "learning_rate": 9.3292452703575e-06, + "loss": 128.1827, + "step": 30950 + }, + { + "epoch": 0.2561111800471523, + "grad_norm": 2012.84033203125, + "learning_rate": 9.328539483985031e-06, + "loss": 155.1285, + "step": 30960 + }, + { + "epoch": 0.2561939032965215, + "grad_norm": 1151.90625, + "learning_rate": 9.327833353210541e-06, + "loss": 111.3364, + "step": 30970 + }, + { + "epoch": 0.25627662654589073, + "grad_norm": 1791.881103515625, + "learning_rate": 9.327126878090214e-06, + "loss": 118.88, + "step": 30980 + }, + { + "epoch": 0.25635934979526, + "grad_norm": 741.9896850585938, + "learning_rate": 9.32642005868026e-06, + "loss": 122.3429, + "step": 30990 + }, + { + "epoch": 0.2564420730446292, + "grad_norm": 712.4248657226562, + "learning_rate": 9.325712895036916e-06, + "loss": 125.5105, + "step": 31000 + }, + { + "epoch": 0.25652479629399844, + "grad_norm": 1130.374267578125, + "learning_rate": 9.32500538721645e-06, + "loss": 86.682, + "step": 31010 + }, + { + "epoch": 0.2566075195433677, + "grad_norm": 1284.844970703125, + "learning_rate": 9.324297535275156e-06, + "loss": 114.092, + "step": 31020 + }, + { + "epoch": 0.2566902427927369, + "grad_norm": 911.753173828125, + "learning_rate": 9.323589339269352e-06, + "loss": 106.8176, + "step": 31030 + }, + { + "epoch": 0.25677296604210614, + "grad_norm": 3918.962646484375, + "learning_rate": 9.322880799255385e-06, + "loss": 160.4931, + "step": 31040 + }, + { + "epoch": 0.25685568929147534, + "grad_norm": 835.4883422851562, + "learning_rate": 9.322171915289635e-06, + "loss": 108.218, + "step": 31050 + }, + { + "epoch": 0.2569384125408446, + "grad_norm": 745.3687133789062, + "learning_rate": 9.321462687428499e-06, + "loss": 103.9572, + "step": 31060 + }, + { + "epoch": 0.25702113579021385, + "grad_norm": 2105.262939453125, + "learning_rate": 9.320753115728413e-06, + "loss": 137.9624, + "step": 31070 + }, + { + "epoch": 0.25710385903958305, + "grad_norm": 1638.893798828125, + "learning_rate": 9.320043200245829e-06, + "loss": 76.6734, + "step": 31080 + }, + { + "epoch": 0.2571865822889523, + "grad_norm": 1251.9464111328125, + "learning_rate": 9.319332941037235e-06, + "loss": 128.9104, + "step": 31090 + }, + { + "epoch": 0.25726930553832156, + "grad_norm": 946.0052490234375, + "learning_rate": 9.31862233815914e-06, + "loss": 83.5885, + "step": 31100 + }, + { + "epoch": 0.25735202878769076, + "grad_norm": 613.86181640625, + "learning_rate": 9.317911391668087e-06, + "loss": 88.8766, + "step": 31110 + }, + { + "epoch": 0.25743475203706, + "grad_norm": 980.6512451171875, + "learning_rate": 9.317200101620641e-06, + "loss": 111.9333, + "step": 31120 + }, + { + "epoch": 0.25751747528642926, + "grad_norm": 959.6453857421875, + "learning_rate": 9.316488468073397e-06, + "loss": 87.5497, + "step": 31130 + }, + { + "epoch": 0.25760019853579846, + "grad_norm": 784.59033203125, + "learning_rate": 9.315776491082973e-06, + "loss": 140.7631, + "step": 31140 + }, + { + "epoch": 0.2576829217851677, + "grad_norm": 1294.2275390625, + "learning_rate": 9.315064170706023e-06, + "loss": 114.7354, + "step": 31150 + }, + { + "epoch": 0.25776564503453697, + "grad_norm": 1613.03857421875, + "learning_rate": 9.31435150699922e-06, + "loss": 123.3567, + "step": 31160 + }, + { + "epoch": 0.25784836828390617, + "grad_norm": 1174.7305908203125, + "learning_rate": 9.313638500019267e-06, + "loss": 133.3073, + "step": 31170 + }, + { + "epoch": 0.2579310915332754, + "grad_norm": 587.86572265625, + "learning_rate": 9.312925149822895e-06, + "loss": 90.9177, + "step": 31180 + }, + { + "epoch": 0.2580138147826447, + "grad_norm": 934.0054931640625, + "learning_rate": 9.312211456466862e-06, + "loss": 127.4864, + "step": 31190 + }, + { + "epoch": 0.2580965380320139, + "grad_norm": 712.6873779296875, + "learning_rate": 9.311497420007955e-06, + "loss": 111.8241, + "step": 31200 + }, + { + "epoch": 0.25817926128138313, + "grad_norm": 1259.027587890625, + "learning_rate": 9.310783040502987e-06, + "loss": 120.1594, + "step": 31210 + }, + { + "epoch": 0.2582619845307524, + "grad_norm": 1388.41162109375, + "learning_rate": 9.310068318008794e-06, + "loss": 121.605, + "step": 31220 + }, + { + "epoch": 0.2583447077801216, + "grad_norm": 1037.8282470703125, + "learning_rate": 9.309353252582246e-06, + "loss": 138.8729, + "step": 31230 + }, + { + "epoch": 0.25842743102949084, + "grad_norm": 943.52490234375, + "learning_rate": 9.308637844280236e-06, + "loss": 132.2363, + "step": 31240 + }, + { + "epoch": 0.2585101542788601, + "grad_norm": 1297.0338134765625, + "learning_rate": 9.307922093159688e-06, + "loss": 113.9879, + "step": 31250 + }, + { + "epoch": 0.2585928775282293, + "grad_norm": 739.4756469726562, + "learning_rate": 9.30720599927755e-06, + "loss": 79.2995, + "step": 31260 + }, + { + "epoch": 0.25867560077759855, + "grad_norm": 1136.6614990234375, + "learning_rate": 9.306489562690797e-06, + "loss": 148.8123, + "step": 31270 + }, + { + "epoch": 0.2587583240269678, + "grad_norm": 1102.057861328125, + "learning_rate": 9.305772783456435e-06, + "loss": 126.115, + "step": 31280 + }, + { + "epoch": 0.258841047276337, + "grad_norm": 1000.919677734375, + "learning_rate": 9.305055661631493e-06, + "loss": 128.0628, + "step": 31290 + }, + { + "epoch": 0.25892377052570625, + "grad_norm": 1486.086669921875, + "learning_rate": 9.304338197273029e-06, + "loss": 141.9742, + "step": 31300 + }, + { + "epoch": 0.2590064937750755, + "grad_norm": 1208.7861328125, + "learning_rate": 9.303620390438128e-06, + "loss": 119.3574, + "step": 31310 + }, + { + "epoch": 0.2590892170244447, + "grad_norm": 1793.4461669921875, + "learning_rate": 9.302902241183905e-06, + "loss": 115.7504, + "step": 31320 + }, + { + "epoch": 0.25917194027381396, + "grad_norm": 1034.2620849609375, + "learning_rate": 9.302183749567498e-06, + "loss": 104.3807, + "step": 31330 + }, + { + "epoch": 0.2592546635231832, + "grad_norm": 1191.4996337890625, + "learning_rate": 9.301464915646074e-06, + "loss": 95.0326, + "step": 31340 + }, + { + "epoch": 0.2593373867725524, + "grad_norm": 663.1774291992188, + "learning_rate": 9.30074573947683e-06, + "loss": 105.2758, + "step": 31350 + }, + { + "epoch": 0.25942011002192167, + "grad_norm": 1111.489501953125, + "learning_rate": 9.30002622111698e-06, + "loss": 120.7122, + "step": 31360 + }, + { + "epoch": 0.2595028332712909, + "grad_norm": 1140.2496337890625, + "learning_rate": 9.299306360623782e-06, + "loss": 111.4868, + "step": 31370 + }, + { + "epoch": 0.2595855565206601, + "grad_norm": 1038.2596435546875, + "learning_rate": 9.298586158054508e-06, + "loss": 119.8149, + "step": 31380 + }, + { + "epoch": 0.2596682797700294, + "grad_norm": 1008.992431640625, + "learning_rate": 9.297865613466459e-06, + "loss": 145.3494, + "step": 31390 + }, + { + "epoch": 0.25975100301939863, + "grad_norm": 1030.713623046875, + "learning_rate": 9.29714472691697e-06, + "loss": 116.2307, + "step": 31400 + }, + { + "epoch": 0.25983372626876783, + "grad_norm": 1515.673095703125, + "learning_rate": 9.296423498463396e-06, + "loss": 125.331, + "step": 31410 + }, + { + "epoch": 0.2599164495181371, + "grad_norm": 1273.73486328125, + "learning_rate": 9.29570192816312e-06, + "loss": 140.6214, + "step": 31420 + }, + { + "epoch": 0.2599991727675063, + "grad_norm": 710.17236328125, + "learning_rate": 9.29498001607356e-06, + "loss": 108.083, + "step": 31430 + }, + { + "epoch": 0.26008189601687554, + "grad_norm": 900.10107421875, + "learning_rate": 9.294257762252148e-06, + "loss": 98.9134, + "step": 31440 + }, + { + "epoch": 0.2601646192662448, + "grad_norm": 875.4248657226562, + "learning_rate": 9.293535166756356e-06, + "loss": 174.0914, + "step": 31450 + }, + { + "epoch": 0.260247342515614, + "grad_norm": 1023.4577026367188, + "learning_rate": 9.292812229643674e-06, + "loss": 96.2018, + "step": 31460 + }, + { + "epoch": 0.26033006576498324, + "grad_norm": 1871.7161865234375, + "learning_rate": 9.292088950971624e-06, + "loss": 135.2347, + "step": 31470 + }, + { + "epoch": 0.2604127890143525, + "grad_norm": 1158.376953125, + "learning_rate": 9.291365330797755e-06, + "loss": 131.9809, + "step": 31480 + }, + { + "epoch": 0.2604955122637217, + "grad_norm": 962.1968383789062, + "learning_rate": 9.290641369179643e-06, + "loss": 109.7965, + "step": 31490 + }, + { + "epoch": 0.26057823551309095, + "grad_norm": 4627.09521484375, + "learning_rate": 9.289917066174887e-06, + "loss": 133.032, + "step": 31500 + }, + { + "epoch": 0.2606609587624602, + "grad_norm": 1069.57177734375, + "learning_rate": 9.289192421841116e-06, + "loss": 114.866, + "step": 31510 + }, + { + "epoch": 0.2607436820118294, + "grad_norm": 1533.0517578125, + "learning_rate": 9.288467436235992e-06, + "loss": 135.5069, + "step": 31520 + }, + { + "epoch": 0.26082640526119866, + "grad_norm": 876.2843627929688, + "learning_rate": 9.287742109417194e-06, + "loss": 165.9743, + "step": 31530 + }, + { + "epoch": 0.2609091285105679, + "grad_norm": 849.820556640625, + "learning_rate": 9.287016441442435e-06, + "loss": 113.8865, + "step": 31540 + }, + { + "epoch": 0.2609918517599371, + "grad_norm": 1214.044189453125, + "learning_rate": 9.28629043236945e-06, + "loss": 129.5201, + "step": 31550 + }, + { + "epoch": 0.26107457500930636, + "grad_norm": 974.0680541992188, + "learning_rate": 9.285564082256011e-06, + "loss": 106.2931, + "step": 31560 + }, + { + "epoch": 0.2611572982586756, + "grad_norm": 591.6702270507812, + "learning_rate": 9.284837391159904e-06, + "loss": 77.5611, + "step": 31570 + }, + { + "epoch": 0.2612400215080448, + "grad_norm": 995.6359252929688, + "learning_rate": 9.284110359138951e-06, + "loss": 169.7267, + "step": 31580 + }, + { + "epoch": 0.26132274475741407, + "grad_norm": 993.3253784179688, + "learning_rate": 9.283382986250997e-06, + "loss": 117.8098, + "step": 31590 + }, + { + "epoch": 0.2614054680067833, + "grad_norm": 720.0477905273438, + "learning_rate": 9.282655272553917e-06, + "loss": 164.0745, + "step": 31600 + }, + { + "epoch": 0.2614881912561525, + "grad_norm": 1000.5869750976562, + "learning_rate": 9.281927218105613e-06, + "loss": 103.8817, + "step": 31610 + }, + { + "epoch": 0.2615709145055218, + "grad_norm": 701.7263793945312, + "learning_rate": 9.281198822964011e-06, + "loss": 115.0276, + "step": 31620 + }, + { + "epoch": 0.26165363775489103, + "grad_norm": 1523.617919921875, + "learning_rate": 9.280470087187066e-06, + "loss": 150.8629, + "step": 31630 + }, + { + "epoch": 0.26173636100426023, + "grad_norm": 1352.073486328125, + "learning_rate": 9.279741010832761e-06, + "loss": 111.8819, + "step": 31640 + }, + { + "epoch": 0.2618190842536295, + "grad_norm": 784.3294677734375, + "learning_rate": 9.279011593959107e-06, + "loss": 134.8354, + "step": 31650 + }, + { + "epoch": 0.26190180750299874, + "grad_norm": 518.8139038085938, + "learning_rate": 9.278281836624137e-06, + "loss": 109.1452, + "step": 31660 + }, + { + "epoch": 0.26198453075236794, + "grad_norm": 738.8187255859375, + "learning_rate": 9.277551738885915e-06, + "loss": 137.3162, + "step": 31670 + }, + { + "epoch": 0.2620672540017372, + "grad_norm": 1180.13037109375, + "learning_rate": 9.276821300802535e-06, + "loss": 101.0768, + "step": 31680 + }, + { + "epoch": 0.26214997725110645, + "grad_norm": 1431.0516357421875, + "learning_rate": 9.276090522432109e-06, + "loss": 99.368, + "step": 31690 + }, + { + "epoch": 0.26223270050047565, + "grad_norm": 688.6452026367188, + "learning_rate": 9.275359403832787e-06, + "loss": 123.8677, + "step": 31700 + }, + { + "epoch": 0.2623154237498449, + "grad_norm": 854.5988159179688, + "learning_rate": 9.274627945062738e-06, + "loss": 111.6149, + "step": 31710 + }, + { + "epoch": 0.26239814699921415, + "grad_norm": 1075.212158203125, + "learning_rate": 9.27389614618016e-06, + "loss": 138.4687, + "step": 31720 + }, + { + "epoch": 0.26248087024858335, + "grad_norm": 573.2298583984375, + "learning_rate": 9.273164007243281e-06, + "loss": 108.3331, + "step": 31730 + }, + { + "epoch": 0.2625635934979526, + "grad_norm": 990.3570556640625, + "learning_rate": 9.272431528310354e-06, + "loss": 112.5479, + "step": 31740 + }, + { + "epoch": 0.26264631674732186, + "grad_norm": 1721.805419921875, + "learning_rate": 9.271698709439658e-06, + "loss": 140.3488, + "step": 31750 + }, + { + "epoch": 0.26272903999669106, + "grad_norm": 1286.5728759765625, + "learning_rate": 9.2709655506895e-06, + "loss": 168.1867, + "step": 31760 + }, + { + "epoch": 0.2628117632460603, + "grad_norm": 582.025146484375, + "learning_rate": 9.270232052118214e-06, + "loss": 119.0196, + "step": 31770 + }, + { + "epoch": 0.2628944864954295, + "grad_norm": 1799.970703125, + "learning_rate": 9.26949821378416e-06, + "loss": 130.6446, + "step": 31780 + }, + { + "epoch": 0.26297720974479877, + "grad_norm": 810.4905395507812, + "learning_rate": 9.268764035745727e-06, + "loss": 123.3437, + "step": 31790 + }, + { + "epoch": 0.263059932994168, + "grad_norm": 1139.095947265625, + "learning_rate": 9.268029518061335e-06, + "loss": 138.0163, + "step": 31800 + }, + { + "epoch": 0.2631426562435372, + "grad_norm": 728.7420654296875, + "learning_rate": 9.267294660789417e-06, + "loss": 118.9001, + "step": 31810 + }, + { + "epoch": 0.2632253794929065, + "grad_norm": 937.4639282226562, + "learning_rate": 9.26655946398845e-06, + "loss": 101.5111, + "step": 31820 + }, + { + "epoch": 0.26330810274227573, + "grad_norm": 951.8093872070312, + "learning_rate": 9.265823927716927e-06, + "loss": 114.6193, + "step": 31830 + }, + { + "epoch": 0.26339082599164493, + "grad_norm": 1131.0379638671875, + "learning_rate": 9.26508805203337e-06, + "loss": 164.4967, + "step": 31840 + }, + { + "epoch": 0.2634735492410142, + "grad_norm": 916.825439453125, + "learning_rate": 9.264351836996332e-06, + "loss": 99.9893, + "step": 31850 + }, + { + "epoch": 0.26355627249038344, + "grad_norm": 781.8618774414062, + "learning_rate": 9.26361528266439e-06, + "loss": 147.4806, + "step": 31860 + }, + { + "epoch": 0.26363899573975264, + "grad_norm": 1081.302001953125, + "learning_rate": 9.262878389096147e-06, + "loss": 107.9612, + "step": 31870 + }, + { + "epoch": 0.2637217189891219, + "grad_norm": 1674.4130859375, + "learning_rate": 9.262141156350233e-06, + "loss": 120.8496, + "step": 31880 + }, + { + "epoch": 0.26380444223849114, + "grad_norm": 599.2373046875, + "learning_rate": 9.261403584485308e-06, + "loss": 130.4039, + "step": 31890 + }, + { + "epoch": 0.26388716548786034, + "grad_norm": 931.727783203125, + "learning_rate": 9.260665673560058e-06, + "loss": 94.2291, + "step": 31900 + }, + { + "epoch": 0.2639698887372296, + "grad_norm": 695.3705444335938, + "learning_rate": 9.259927423633193e-06, + "loss": 173.953, + "step": 31910 + }, + { + "epoch": 0.26405261198659885, + "grad_norm": 643.5379638671875, + "learning_rate": 9.259188834763455e-06, + "loss": 91.6798, + "step": 31920 + }, + { + "epoch": 0.26413533523596805, + "grad_norm": 1540.7181396484375, + "learning_rate": 9.258449907009607e-06, + "loss": 126.6724, + "step": 31930 + }, + { + "epoch": 0.2642180584853373, + "grad_norm": 1498.8092041015625, + "learning_rate": 9.257710640430444e-06, + "loss": 110.8607, + "step": 31940 + }, + { + "epoch": 0.26430078173470656, + "grad_norm": 626.6985473632812, + "learning_rate": 9.256971035084786e-06, + "loss": 85.7513, + "step": 31950 + }, + { + "epoch": 0.26438350498407576, + "grad_norm": 1363.011962890625, + "learning_rate": 9.256231091031477e-06, + "loss": 101.1794, + "step": 31960 + }, + { + "epoch": 0.264466228233445, + "grad_norm": 1134.742919921875, + "learning_rate": 9.255490808329397e-06, + "loss": 212.7933, + "step": 31970 + }, + { + "epoch": 0.26454895148281427, + "grad_norm": 1176.7347412109375, + "learning_rate": 9.254750187037443e-06, + "loss": 122.8415, + "step": 31980 + }, + { + "epoch": 0.26463167473218346, + "grad_norm": 843.20458984375, + "learning_rate": 9.254009227214543e-06, + "loss": 115.774, + "step": 31990 + }, + { + "epoch": 0.2647143979815527, + "grad_norm": 990.8587646484375, + "learning_rate": 9.253267928919652e-06, + "loss": 141.8495, + "step": 32000 + }, + { + "epoch": 0.264797121230922, + "grad_norm": 1063.32763671875, + "learning_rate": 9.25252629221175e-06, + "loss": 118.555, + "step": 32010 + }, + { + "epoch": 0.26487984448029117, + "grad_norm": 1893.1072998046875, + "learning_rate": 9.251784317149848e-06, + "loss": 122.1342, + "step": 32020 + }, + { + "epoch": 0.2649625677296604, + "grad_norm": 1173.83349609375, + "learning_rate": 9.251042003792983e-06, + "loss": 161.0942, + "step": 32030 + }, + { + "epoch": 0.2650452909790297, + "grad_norm": 927.9155883789062, + "learning_rate": 9.250299352200214e-06, + "loss": 78.0564, + "step": 32040 + }, + { + "epoch": 0.2651280142283989, + "grad_norm": 1478.896484375, + "learning_rate": 9.249556362430631e-06, + "loss": 129.6906, + "step": 32050 + }, + { + "epoch": 0.26521073747776813, + "grad_norm": 772.536865234375, + "learning_rate": 9.248813034543353e-06, + "loss": 102.5596, + "step": 32060 + }, + { + "epoch": 0.2652934607271374, + "grad_norm": 728.9833984375, + "learning_rate": 9.24806936859752e-06, + "loss": 111.6626, + "step": 32070 + }, + { + "epoch": 0.2653761839765066, + "grad_norm": 1042.043701171875, + "learning_rate": 9.247325364652304e-06, + "loss": 132.3886, + "step": 32080 + }, + { + "epoch": 0.26545890722587584, + "grad_norm": 490.30419921875, + "learning_rate": 9.2465810227669e-06, + "loss": 128.4408, + "step": 32090 + }, + { + "epoch": 0.2655416304752451, + "grad_norm": 1035.492919921875, + "learning_rate": 9.245836343000534e-06, + "loss": 102.5217, + "step": 32100 + }, + { + "epoch": 0.2656243537246143, + "grad_norm": 1005.1189575195312, + "learning_rate": 9.245091325412456e-06, + "loss": 112.0046, + "step": 32110 + }, + { + "epoch": 0.26570707697398355, + "grad_norm": 864.4721069335938, + "learning_rate": 9.244345970061944e-06, + "loss": 109.3595, + "step": 32120 + }, + { + "epoch": 0.26578980022335275, + "grad_norm": 803.18896484375, + "learning_rate": 9.243600277008301e-06, + "loss": 123.4932, + "step": 32130 + }, + { + "epoch": 0.265872523472722, + "grad_norm": 1339.6492919921875, + "learning_rate": 9.24285424631086e-06, + "loss": 131.0302, + "step": 32140 + }, + { + "epoch": 0.26595524672209125, + "grad_norm": 331.6441650390625, + "learning_rate": 9.242107878028978e-06, + "loss": 84.0776, + "step": 32150 + }, + { + "epoch": 0.26603796997146045, + "grad_norm": 1174.8720703125, + "learning_rate": 9.241361172222043e-06, + "loss": 144.0584, + "step": 32160 + }, + { + "epoch": 0.2661206932208297, + "grad_norm": 955.9444580078125, + "learning_rate": 9.240614128949463e-06, + "loss": 79.2642, + "step": 32170 + }, + { + "epoch": 0.26620341647019896, + "grad_norm": 408.5090637207031, + "learning_rate": 9.239866748270679e-06, + "loss": 80.5909, + "step": 32180 + }, + { + "epoch": 0.26628613971956816, + "grad_norm": 1054.4498291015625, + "learning_rate": 9.239119030245156e-06, + "loss": 105.1165, + "step": 32190 + }, + { + "epoch": 0.2663688629689374, + "grad_norm": 1652.845703125, + "learning_rate": 9.238370974932387e-06, + "loss": 144.1475, + "step": 32200 + }, + { + "epoch": 0.26645158621830667, + "grad_norm": 897.6002807617188, + "learning_rate": 9.23762258239189e-06, + "loss": 85.8102, + "step": 32210 + }, + { + "epoch": 0.26653430946767587, + "grad_norm": 848.9139404296875, + "learning_rate": 9.236873852683213e-06, + "loss": 107.4219, + "step": 32220 + }, + { + "epoch": 0.2666170327170451, + "grad_norm": 900.8207397460938, + "learning_rate": 9.23612478586593e-06, + "loss": 77.9687, + "step": 32230 + }, + { + "epoch": 0.2666997559664144, + "grad_norm": 1326.0458984375, + "learning_rate": 9.235375381999636e-06, + "loss": 106.4991, + "step": 32240 + }, + { + "epoch": 0.2667824792157836, + "grad_norm": 2422.508056640625, + "learning_rate": 9.234625641143962e-06, + "loss": 138.341, + "step": 32250 + }, + { + "epoch": 0.26686520246515283, + "grad_norm": 964.7281494140625, + "learning_rate": 9.233875563358559e-06, + "loss": 122.4212, + "step": 32260 + }, + { + "epoch": 0.2669479257145221, + "grad_norm": 1445.0108642578125, + "learning_rate": 9.23312514870311e-06, + "loss": 107.632, + "step": 32270 + }, + { + "epoch": 0.2670306489638913, + "grad_norm": 628.1731567382812, + "learning_rate": 9.232374397237318e-06, + "loss": 118.4748, + "step": 32280 + }, + { + "epoch": 0.26711337221326054, + "grad_norm": 1020.5704345703125, + "learning_rate": 9.231623309020922e-06, + "loss": 132.8099, + "step": 32290 + }, + { + "epoch": 0.2671960954626298, + "grad_norm": 949.1611938476562, + "learning_rate": 9.230871884113679e-06, + "loss": 128.9596, + "step": 32300 + }, + { + "epoch": 0.267278818711999, + "grad_norm": 853.702880859375, + "learning_rate": 9.230120122575376e-06, + "loss": 117.7804, + "step": 32310 + }, + { + "epoch": 0.26736154196136824, + "grad_norm": 856.49462890625, + "learning_rate": 9.22936802446583e-06, + "loss": 179.9062, + "step": 32320 + }, + { + "epoch": 0.2674442652107375, + "grad_norm": 847.5012817382812, + "learning_rate": 9.228615589844879e-06, + "loss": 84.1749, + "step": 32330 + }, + { + "epoch": 0.2675269884601067, + "grad_norm": 1034.2694091796875, + "learning_rate": 9.227862818772392e-06, + "loss": 131.7186, + "step": 32340 + }, + { + "epoch": 0.26760971170947595, + "grad_norm": 1057.1470947265625, + "learning_rate": 9.227109711308265e-06, + "loss": 94.973, + "step": 32350 + }, + { + "epoch": 0.2676924349588452, + "grad_norm": 374.79473876953125, + "learning_rate": 9.226356267512417e-06, + "loss": 107.7693, + "step": 32360 + }, + { + "epoch": 0.2677751582082144, + "grad_norm": 817.5911865234375, + "learning_rate": 9.225602487444799e-06, + "loss": 107.2883, + "step": 32370 + }, + { + "epoch": 0.26785788145758366, + "grad_norm": 1157.15234375, + "learning_rate": 9.224848371165382e-06, + "loss": 170.9429, + "step": 32380 + }, + { + "epoch": 0.2679406047069529, + "grad_norm": 1658.9010009765625, + "learning_rate": 9.224093918734172e-06, + "loss": 202.5666, + "step": 32390 + }, + { + "epoch": 0.2680233279563221, + "grad_norm": 1403.2574462890625, + "learning_rate": 9.223339130211194e-06, + "loss": 113.1494, + "step": 32400 + }, + { + "epoch": 0.26810605120569136, + "grad_norm": 939.0480346679688, + "learning_rate": 9.222584005656501e-06, + "loss": 114.9759, + "step": 32410 + }, + { + "epoch": 0.2681887744550606, + "grad_norm": 578.2918701171875, + "learning_rate": 9.22182854513018e-06, + "loss": 94.9374, + "step": 32420 + }, + { + "epoch": 0.2682714977044298, + "grad_norm": 2604.21484375, + "learning_rate": 9.221072748692336e-06, + "loss": 109.7514, + "step": 32430 + }, + { + "epoch": 0.26835422095379907, + "grad_norm": 962.8878173828125, + "learning_rate": 9.220316616403109e-06, + "loss": 104.4484, + "step": 32440 + }, + { + "epoch": 0.2684369442031683, + "grad_norm": 759.3588256835938, + "learning_rate": 9.219560148322655e-06, + "loss": 101.3538, + "step": 32450 + }, + { + "epoch": 0.2685196674525375, + "grad_norm": 1004.7863159179688, + "learning_rate": 9.218803344511165e-06, + "loss": 134.46, + "step": 32460 + }, + { + "epoch": 0.2686023907019068, + "grad_norm": 1546.0279541015625, + "learning_rate": 9.218046205028854e-06, + "loss": 102.795, + "step": 32470 + }, + { + "epoch": 0.26868511395127603, + "grad_norm": 422.1183776855469, + "learning_rate": 9.217288729935966e-06, + "loss": 100.5324, + "step": 32480 + }, + { + "epoch": 0.26876783720064523, + "grad_norm": 953.4854736328125, + "learning_rate": 9.216530919292768e-06, + "loss": 147.8428, + "step": 32490 + }, + { + "epoch": 0.2688505604500145, + "grad_norm": 910.6680297851562, + "learning_rate": 9.215772773159556e-06, + "loss": 138.0076, + "step": 32500 + }, + { + "epoch": 0.2689332836993837, + "grad_norm": 1041.43505859375, + "learning_rate": 9.215014291596653e-06, + "loss": 120.8348, + "step": 32510 + }, + { + "epoch": 0.26901600694875294, + "grad_norm": 1151.9285888671875, + "learning_rate": 9.214255474664405e-06, + "loss": 150.9121, + "step": 32520 + }, + { + "epoch": 0.2690987301981222, + "grad_norm": 921.2622680664062, + "learning_rate": 9.213496322423193e-06, + "loss": 83.8476, + "step": 32530 + }, + { + "epoch": 0.2691814534474914, + "grad_norm": 411.46826171875, + "learning_rate": 9.212736834933413e-06, + "loss": 129.1243, + "step": 32540 + }, + { + "epoch": 0.26926417669686065, + "grad_norm": 3256.450927734375, + "learning_rate": 9.211977012255497e-06, + "loss": 87.7072, + "step": 32550 + }, + { + "epoch": 0.2693468999462299, + "grad_norm": 1524.965087890625, + "learning_rate": 9.211216854449903e-06, + "loss": 128.8632, + "step": 32560 + }, + { + "epoch": 0.2694296231955991, + "grad_norm": 953.8908081054688, + "learning_rate": 9.210456361577109e-06, + "loss": 113.7588, + "step": 32570 + }, + { + "epoch": 0.26951234644496835, + "grad_norm": 1008.9974975585938, + "learning_rate": 9.209695533697624e-06, + "loss": 122.1501, + "step": 32580 + }, + { + "epoch": 0.2695950696943376, + "grad_norm": 805.004150390625, + "learning_rate": 9.208934370871989e-06, + "loss": 113.7434, + "step": 32590 + }, + { + "epoch": 0.2696777929437068, + "grad_norm": 636.7761840820312, + "learning_rate": 9.20817287316076e-06, + "loss": 113.1953, + "step": 32600 + }, + { + "epoch": 0.26976051619307606, + "grad_norm": 763.083984375, + "learning_rate": 9.20741104062453e-06, + "loss": 101.9306, + "step": 32610 + }, + { + "epoch": 0.2698432394424453, + "grad_norm": 1067.39208984375, + "learning_rate": 9.206648873323912e-06, + "loss": 104.6595, + "step": 32620 + }, + { + "epoch": 0.2699259626918145, + "grad_norm": 1282.15576171875, + "learning_rate": 9.205886371319548e-06, + "loss": 112.2921, + "step": 32630 + }, + { + "epoch": 0.27000868594118377, + "grad_norm": 1269.3165283203125, + "learning_rate": 9.20512353467211e-06, + "loss": 101.4175, + "step": 32640 + }, + { + "epoch": 0.270091409190553, + "grad_norm": 1416.7222900390625, + "learning_rate": 9.204360363442288e-06, + "loss": 94.3014, + "step": 32650 + }, + { + "epoch": 0.2701741324399222, + "grad_norm": 577.2730712890625, + "learning_rate": 9.20359685769081e-06, + "loss": 178.5912, + "step": 32660 + }, + { + "epoch": 0.2702568556892915, + "grad_norm": 976.02880859375, + "learning_rate": 9.202833017478421e-06, + "loss": 142.0586, + "step": 32670 + }, + { + "epoch": 0.27033957893866073, + "grad_norm": 1126.535888671875, + "learning_rate": 9.2020688428659e-06, + "loss": 100.6238, + "step": 32680 + }, + { + "epoch": 0.27042230218802993, + "grad_norm": 945.1123657226562, + "learning_rate": 9.201304333914042e-06, + "loss": 124.4394, + "step": 32690 + }, + { + "epoch": 0.2705050254373992, + "grad_norm": 600.9089965820312, + "learning_rate": 9.200539490683682e-06, + "loss": 93.1028, + "step": 32700 + }, + { + "epoch": 0.27058774868676844, + "grad_norm": 704.4324951171875, + "learning_rate": 9.19977431323567e-06, + "loss": 124.6708, + "step": 32710 + }, + { + "epoch": 0.27067047193613764, + "grad_norm": 938.8729858398438, + "learning_rate": 9.199008801630893e-06, + "loss": 119.1146, + "step": 32720 + }, + { + "epoch": 0.2707531951855069, + "grad_norm": 1016.2319946289062, + "learning_rate": 9.198242955930257e-06, + "loss": 126.7218, + "step": 32730 + }, + { + "epoch": 0.27083591843487614, + "grad_norm": 832.0853881835938, + "learning_rate": 9.197476776194693e-06, + "loss": 102.6724, + "step": 32740 + }, + { + "epoch": 0.27091864168424534, + "grad_norm": 1260.37548828125, + "learning_rate": 9.196710262485168e-06, + "loss": 107.5099, + "step": 32750 + }, + { + "epoch": 0.2710013649336146, + "grad_norm": 840.14990234375, + "learning_rate": 9.195943414862667e-06, + "loss": 124.9764, + "step": 32760 + }, + { + "epoch": 0.27108408818298385, + "grad_norm": 486.6063537597656, + "learning_rate": 9.195176233388206e-06, + "loss": 92.0499, + "step": 32770 + }, + { + "epoch": 0.27116681143235305, + "grad_norm": 907.8724975585938, + "learning_rate": 9.194408718122825e-06, + "loss": 120.6719, + "step": 32780 + }, + { + "epoch": 0.2712495346817223, + "grad_norm": 572.4683837890625, + "learning_rate": 9.193640869127592e-06, + "loss": 124.2721, + "step": 32790 + }, + { + "epoch": 0.27133225793109156, + "grad_norm": 957.0466918945312, + "learning_rate": 9.192872686463601e-06, + "loss": 131.9941, + "step": 32800 + }, + { + "epoch": 0.27141498118046076, + "grad_norm": 691.4140014648438, + "learning_rate": 9.192104170191973e-06, + "loss": 101.0082, + "step": 32810 + }, + { + "epoch": 0.27149770442983, + "grad_norm": 791.294677734375, + "learning_rate": 9.191335320373856e-06, + "loss": 138.9451, + "step": 32820 + }, + { + "epoch": 0.27158042767919927, + "grad_norm": 1051.594482421875, + "learning_rate": 9.190566137070422e-06, + "loss": 107.2597, + "step": 32830 + }, + { + "epoch": 0.27166315092856846, + "grad_norm": 1549.462158203125, + "learning_rate": 9.189796620342875e-06, + "loss": 153.74, + "step": 32840 + }, + { + "epoch": 0.2717458741779377, + "grad_norm": 751.6979370117188, + "learning_rate": 9.189026770252437e-06, + "loss": 108.3263, + "step": 32850 + }, + { + "epoch": 0.2718285974273069, + "grad_norm": 4171.58203125, + "learning_rate": 9.188256586860365e-06, + "loss": 177.8506, + "step": 32860 + }, + { + "epoch": 0.27191132067667617, + "grad_norm": 880.1036987304688, + "learning_rate": 9.187486070227938e-06, + "loss": 124.1478, + "step": 32870 + }, + { + "epoch": 0.2719940439260454, + "grad_norm": 875.8194580078125, + "learning_rate": 9.186715220416463e-06, + "loss": 80.9601, + "step": 32880 + }, + { + "epoch": 0.2720767671754146, + "grad_norm": 517.209716796875, + "learning_rate": 9.185944037487271e-06, + "loss": 105.6458, + "step": 32890 + }, + { + "epoch": 0.2721594904247839, + "grad_norm": 783.9718627929688, + "learning_rate": 9.185172521501723e-06, + "loss": 103.928, + "step": 32900 + }, + { + "epoch": 0.27224221367415313, + "grad_norm": 809.2305297851562, + "learning_rate": 9.184400672521204e-06, + "loss": 119.6438, + "step": 32910 + }, + { + "epoch": 0.27232493692352233, + "grad_norm": 847.0447998046875, + "learning_rate": 9.183628490607129e-06, + "loss": 118.6409, + "step": 32920 + }, + { + "epoch": 0.2724076601728916, + "grad_norm": 803.6466674804688, + "learning_rate": 9.182855975820934e-06, + "loss": 86.8706, + "step": 32930 + }, + { + "epoch": 0.27249038342226084, + "grad_norm": 1112.55322265625, + "learning_rate": 9.182083128224086e-06, + "loss": 108.3938, + "step": 32940 + }, + { + "epoch": 0.27257310667163004, + "grad_norm": 758.2708740234375, + "learning_rate": 9.181309947878077e-06, + "loss": 136.1542, + "step": 32950 + }, + { + "epoch": 0.2726558299209993, + "grad_norm": 655.5614013671875, + "learning_rate": 9.180536434844426e-06, + "loss": 93.3358, + "step": 32960 + }, + { + "epoch": 0.27273855317036855, + "grad_norm": 746.388671875, + "learning_rate": 9.179762589184676e-06, + "loss": 137.545, + "step": 32970 + }, + { + "epoch": 0.27282127641973775, + "grad_norm": 919.367431640625, + "learning_rate": 9.1789884109604e-06, + "loss": 105.9232, + "step": 32980 + }, + { + "epoch": 0.272903999669107, + "grad_norm": 1165.487548828125, + "learning_rate": 9.178213900233193e-06, + "loss": 119.8975, + "step": 32990 + }, + { + "epoch": 0.27298672291847625, + "grad_norm": 1274.4398193359375, + "learning_rate": 9.177439057064684e-06, + "loss": 108.2796, + "step": 33000 + }, + { + "epoch": 0.27306944616784545, + "grad_norm": 2099.3134765625, + "learning_rate": 9.17666388151652e-06, + "loss": 88.9919, + "step": 33010 + }, + { + "epoch": 0.2731521694172147, + "grad_norm": 1373.3428955078125, + "learning_rate": 9.175888373650377e-06, + "loss": 109.9396, + "step": 33020 + }, + { + "epoch": 0.27323489266658396, + "grad_norm": 1429.390869140625, + "learning_rate": 9.175112533527963e-06, + "loss": 94.565, + "step": 33030 + }, + { + "epoch": 0.27331761591595316, + "grad_norm": 1118.853271484375, + "learning_rate": 9.174336361211007e-06, + "loss": 101.014, + "step": 33040 + }, + { + "epoch": 0.2734003391653224, + "grad_norm": 2152.85107421875, + "learning_rate": 9.173559856761262e-06, + "loss": 153.9467, + "step": 33050 + }, + { + "epoch": 0.27348306241469167, + "grad_norm": 909.071533203125, + "learning_rate": 9.172783020240514e-06, + "loss": 102.0454, + "step": 33060 + }, + { + "epoch": 0.27356578566406087, + "grad_norm": 890.9006958007812, + "learning_rate": 9.172005851710573e-06, + "loss": 130.9717, + "step": 33070 + }, + { + "epoch": 0.2736485089134301, + "grad_norm": 1000.61279296875, + "learning_rate": 9.171228351233272e-06, + "loss": 150.3027, + "step": 33080 + }, + { + "epoch": 0.2737312321627994, + "grad_norm": 1430.8470458984375, + "learning_rate": 9.170450518870475e-06, + "loss": 149.3742, + "step": 33090 + }, + { + "epoch": 0.2738139554121686, + "grad_norm": 1026.9654541015625, + "learning_rate": 9.169672354684069e-06, + "loss": 123.5882, + "step": 33100 + }, + { + "epoch": 0.27389667866153783, + "grad_norm": 841.4974975585938, + "learning_rate": 9.168893858735972e-06, + "loss": 92.7002, + "step": 33110 + }, + { + "epoch": 0.2739794019109071, + "grad_norm": 1173.48681640625, + "learning_rate": 9.168115031088122e-06, + "loss": 89.6682, + "step": 33120 + }, + { + "epoch": 0.2740621251602763, + "grad_norm": 733.5807495117188, + "learning_rate": 9.167335871802488e-06, + "loss": 86.3547, + "step": 33130 + }, + { + "epoch": 0.27414484840964554, + "grad_norm": 1434.056640625, + "learning_rate": 9.166556380941063e-06, + "loss": 125.5328, + "step": 33140 + }, + { + "epoch": 0.2742275716590148, + "grad_norm": 820.1240844726562, + "learning_rate": 9.16577655856587e-06, + "loss": 130.4958, + "step": 33150 + }, + { + "epoch": 0.274310294908384, + "grad_norm": 840.7511596679688, + "learning_rate": 9.164996404738955e-06, + "loss": 160.6511, + "step": 33160 + }, + { + "epoch": 0.27439301815775324, + "grad_norm": 915.5698852539062, + "learning_rate": 9.16421591952239e-06, + "loss": 127.9161, + "step": 33170 + }, + { + "epoch": 0.2744757414071225, + "grad_norm": 1336.1126708984375, + "learning_rate": 9.163435102978276e-06, + "loss": 122.7304, + "step": 33180 + }, + { + "epoch": 0.2745584646564917, + "grad_norm": 982.8937377929688, + "learning_rate": 9.162653955168739e-06, + "loss": 118.9783, + "step": 33190 + }, + { + "epoch": 0.27464118790586095, + "grad_norm": 756.2153930664062, + "learning_rate": 9.161872476155929e-06, + "loss": 101.5269, + "step": 33200 + }, + { + "epoch": 0.2747239111552302, + "grad_norm": 732.1403198242188, + "learning_rate": 9.161090666002029e-06, + "loss": 124.4535, + "step": 33210 + }, + { + "epoch": 0.2748066344045994, + "grad_norm": 753.6777954101562, + "learning_rate": 9.16030852476924e-06, + "loss": 109.3473, + "step": 33220 + }, + { + "epoch": 0.27488935765396866, + "grad_norm": 822.023681640625, + "learning_rate": 9.159526052519794e-06, + "loss": 120.7444, + "step": 33230 + }, + { + "epoch": 0.27497208090333786, + "grad_norm": 925.0752563476562, + "learning_rate": 9.15874324931595e-06, + "loss": 100.8344, + "step": 33240 + }, + { + "epoch": 0.2750548041527071, + "grad_norm": 686.9509887695312, + "learning_rate": 9.157960115219993e-06, + "loss": 113.704, + "step": 33250 + }, + { + "epoch": 0.27513752740207637, + "grad_norm": 996.56787109375, + "learning_rate": 9.157176650294231e-06, + "loss": 133.5279, + "step": 33260 + }, + { + "epoch": 0.27522025065144556, + "grad_norm": 728.7750244140625, + "learning_rate": 9.156392854601001e-06, + "loss": 133.3526, + "step": 33270 + }, + { + "epoch": 0.2753029739008148, + "grad_norm": 1302.504638671875, + "learning_rate": 9.155608728202669e-06, + "loss": 113.5402, + "step": 33280 + }, + { + "epoch": 0.2753856971501841, + "grad_norm": 857.5109252929688, + "learning_rate": 9.154824271161621e-06, + "loss": 83.4826, + "step": 33290 + }, + { + "epoch": 0.27546842039955327, + "grad_norm": 910.7625122070312, + "learning_rate": 9.154039483540273e-06, + "loss": 110.0397, + "step": 33300 + }, + { + "epoch": 0.2755511436489225, + "grad_norm": 529.6849365234375, + "learning_rate": 9.153254365401069e-06, + "loss": 125.4888, + "step": 33310 + }, + { + "epoch": 0.2756338668982918, + "grad_norm": 879.7425537109375, + "learning_rate": 9.152468916806477e-06, + "loss": 103.2796, + "step": 33320 + }, + { + "epoch": 0.275716590147661, + "grad_norm": 1299.931884765625, + "learning_rate": 9.151683137818989e-06, + "loss": 119.4664, + "step": 33330 + }, + { + "epoch": 0.27579931339703023, + "grad_norm": 698.8023681640625, + "learning_rate": 9.150897028501126e-06, + "loss": 100.8363, + "step": 33340 + }, + { + "epoch": 0.2758820366463995, + "grad_norm": 1195.03466796875, + "learning_rate": 9.15011058891544e-06, + "loss": 119.996, + "step": 33350 + }, + { + "epoch": 0.2759647598957687, + "grad_norm": 1072.2655029296875, + "learning_rate": 9.149323819124498e-06, + "loss": 113.2403, + "step": 33360 + }, + { + "epoch": 0.27604748314513794, + "grad_norm": 897.378662109375, + "learning_rate": 9.148536719190904e-06, + "loss": 131.1827, + "step": 33370 + }, + { + "epoch": 0.2761302063945072, + "grad_norm": 900.53955078125, + "learning_rate": 9.147749289177282e-06, + "loss": 141.6734, + "step": 33380 + }, + { + "epoch": 0.2762129296438764, + "grad_norm": 809.6741333007812, + "learning_rate": 9.146961529146285e-06, + "loss": 99.829, + "step": 33390 + }, + { + "epoch": 0.27629565289324565, + "grad_norm": 757.1586303710938, + "learning_rate": 9.146173439160591e-06, + "loss": 117.2545, + "step": 33400 + }, + { + "epoch": 0.2763783761426149, + "grad_norm": 763.6544799804688, + "learning_rate": 9.145385019282904e-06, + "loss": 135.9243, + "step": 33410 + }, + { + "epoch": 0.2764610993919841, + "grad_norm": 1427.904296875, + "learning_rate": 9.144596269575957e-06, + "loss": 125.229, + "step": 33420 + }, + { + "epoch": 0.27654382264135335, + "grad_norm": 1012.0225219726562, + "learning_rate": 9.143807190102504e-06, + "loss": 126.2279, + "step": 33430 + }, + { + "epoch": 0.2766265458907226, + "grad_norm": 2056.858154296875, + "learning_rate": 9.143017780925331e-06, + "loss": 153.6504, + "step": 33440 + }, + { + "epoch": 0.2767092691400918, + "grad_norm": 1273.417236328125, + "learning_rate": 9.142228042107248e-06, + "loss": 109.7093, + "step": 33450 + }, + { + "epoch": 0.27679199238946106, + "grad_norm": 1077.05126953125, + "learning_rate": 9.141437973711092e-06, + "loss": 106.108, + "step": 33460 + }, + { + "epoch": 0.2768747156388303, + "grad_norm": 1240.419189453125, + "learning_rate": 9.14064757579972e-06, + "loss": 95.5216, + "step": 33470 + }, + { + "epoch": 0.2769574388881995, + "grad_norm": 1002.42724609375, + "learning_rate": 9.139856848436023e-06, + "loss": 117.1653, + "step": 33480 + }, + { + "epoch": 0.27704016213756877, + "grad_norm": 1525.692626953125, + "learning_rate": 9.139065791682916e-06, + "loss": 161.3095, + "step": 33490 + }, + { + "epoch": 0.277122885386938, + "grad_norm": 773.1032104492188, + "learning_rate": 9.138274405603342e-06, + "loss": 168.1776, + "step": 33500 + }, + { + "epoch": 0.2772056086363072, + "grad_norm": 768.6693115234375, + "learning_rate": 9.137482690260265e-06, + "loss": 156.8843, + "step": 33510 + }, + { + "epoch": 0.2772883318856765, + "grad_norm": 307.3286437988281, + "learning_rate": 9.13669064571668e-06, + "loss": 109.4442, + "step": 33520 + }, + { + "epoch": 0.27737105513504573, + "grad_norm": 3717.694580078125, + "learning_rate": 9.135898272035601e-06, + "loss": 167.7826, + "step": 33530 + }, + { + "epoch": 0.27745377838441493, + "grad_norm": 986.0308227539062, + "learning_rate": 9.13510556928008e-06, + "loss": 87.5478, + "step": 33540 + }, + { + "epoch": 0.2775365016337842, + "grad_norm": 652.1424560546875, + "learning_rate": 9.134312537513188e-06, + "loss": 106.2764, + "step": 33550 + }, + { + "epoch": 0.27761922488315344, + "grad_norm": 720.5858154296875, + "learning_rate": 9.133519176798021e-06, + "loss": 152.1906, + "step": 33560 + }, + { + "epoch": 0.27770194813252264, + "grad_norm": 1833.9097900390625, + "learning_rate": 9.132725487197701e-06, + "loss": 116.5092, + "step": 33570 + }, + { + "epoch": 0.2777846713818919, + "grad_norm": 1374.428955078125, + "learning_rate": 9.131931468775382e-06, + "loss": 132.6865, + "step": 33580 + }, + { + "epoch": 0.2778673946312611, + "grad_norm": 724.4324340820312, + "learning_rate": 9.131137121594239e-06, + "loss": 127.3931, + "step": 33590 + }, + { + "epoch": 0.27795011788063034, + "grad_norm": 594.5848999023438, + "learning_rate": 9.130342445717474e-06, + "loss": 117.9793, + "step": 33600 + }, + { + "epoch": 0.2780328411299996, + "grad_norm": 1347.4599609375, + "learning_rate": 9.129547441208317e-06, + "loss": 123.5553, + "step": 33610 + }, + { + "epoch": 0.2781155643793688, + "grad_norm": 825.2183837890625, + "learning_rate": 9.128752108130022e-06, + "loss": 109.409, + "step": 33620 + }, + { + "epoch": 0.27819828762873805, + "grad_norm": 952.2738037109375, + "learning_rate": 9.12795644654587e-06, + "loss": 149.2746, + "step": 33630 + }, + { + "epoch": 0.2782810108781073, + "grad_norm": 6804.1904296875, + "learning_rate": 9.127160456519168e-06, + "loss": 132.9842, + "step": 33640 + }, + { + "epoch": 0.2783637341274765, + "grad_norm": 953.0177612304688, + "learning_rate": 9.126364138113251e-06, + "loss": 119.1077, + "step": 33650 + }, + { + "epoch": 0.27844645737684576, + "grad_norm": 771.2464599609375, + "learning_rate": 9.125567491391476e-06, + "loss": 117.123, + "step": 33660 + }, + { + "epoch": 0.278529180626215, + "grad_norm": 1822.6439208984375, + "learning_rate": 9.12477051641723e-06, + "loss": 151.1186, + "step": 33670 + }, + { + "epoch": 0.2786119038755842, + "grad_norm": 872.4586791992188, + "learning_rate": 9.123973213253923e-06, + "loss": 112.0873, + "step": 33680 + }, + { + "epoch": 0.27869462712495346, + "grad_norm": 494.63134765625, + "learning_rate": 9.123175581964995e-06, + "loss": 100.9059, + "step": 33690 + }, + { + "epoch": 0.2787773503743227, + "grad_norm": 959.332275390625, + "learning_rate": 9.122377622613909e-06, + "loss": 106.5335, + "step": 33700 + }, + { + "epoch": 0.2788600736236919, + "grad_norm": 886.007568359375, + "learning_rate": 9.121579335264155e-06, + "loss": 128.5588, + "step": 33710 + }, + { + "epoch": 0.27894279687306117, + "grad_norm": 1174.72265625, + "learning_rate": 9.120780719979248e-06, + "loss": 92.5889, + "step": 33720 + }, + { + "epoch": 0.2790255201224304, + "grad_norm": 2207.4111328125, + "learning_rate": 9.11998177682273e-06, + "loss": 132.5646, + "step": 33730 + }, + { + "epoch": 0.2791082433717996, + "grad_norm": 730.7549438476562, + "learning_rate": 9.11918250585817e-06, + "loss": 82.5129, + "step": 33740 + }, + { + "epoch": 0.2791909666211689, + "grad_norm": 876.688232421875, + "learning_rate": 9.118382907149164e-06, + "loss": 109.9412, + "step": 33750 + }, + { + "epoch": 0.27927368987053813, + "grad_norm": 831.474609375, + "learning_rate": 9.117582980759332e-06, + "loss": 124.5468, + "step": 33760 + }, + { + "epoch": 0.27935641311990733, + "grad_norm": 820.8474731445312, + "learning_rate": 9.116782726752317e-06, + "loss": 126.4644, + "step": 33770 + }, + { + "epoch": 0.2794391363692766, + "grad_norm": 921.0737915039062, + "learning_rate": 9.115982145191796e-06, + "loss": 116.8273, + "step": 33780 + }, + { + "epoch": 0.27952185961864584, + "grad_norm": 1426.6785888671875, + "learning_rate": 9.115181236141463e-06, + "loss": 127.0457, + "step": 33790 + }, + { + "epoch": 0.27960458286801504, + "grad_norm": 1093.164794921875, + "learning_rate": 9.114379999665047e-06, + "loss": 85.9147, + "step": 33800 + }, + { + "epoch": 0.2796873061173843, + "grad_norm": 1163.352783203125, + "learning_rate": 9.113578435826295e-06, + "loss": 140.9147, + "step": 33810 + }, + { + "epoch": 0.27977002936675355, + "grad_norm": 412.13787841796875, + "learning_rate": 9.112776544688988e-06, + "loss": 127.3203, + "step": 33820 + }, + { + "epoch": 0.27985275261612275, + "grad_norm": 785.0323486328125, + "learning_rate": 9.111974326316926e-06, + "loss": 103.5417, + "step": 33830 + }, + { + "epoch": 0.279935475865492, + "grad_norm": 1045.812255859375, + "learning_rate": 9.111171780773938e-06, + "loss": 120.1187, + "step": 33840 + }, + { + "epoch": 0.28001819911486125, + "grad_norm": 916.0906372070312, + "learning_rate": 9.110368908123878e-06, + "loss": 139.0241, + "step": 33850 + }, + { + "epoch": 0.28010092236423045, + "grad_norm": 1398.31982421875, + "learning_rate": 9.10956570843063e-06, + "loss": 132.5252, + "step": 33860 + }, + { + "epoch": 0.2801836456135997, + "grad_norm": 1527.2587890625, + "learning_rate": 9.108762181758096e-06, + "loss": 130.4855, + "step": 33870 + }, + { + "epoch": 0.28026636886296896, + "grad_norm": 751.3243408203125, + "learning_rate": 9.107958328170215e-06, + "loss": 106.5942, + "step": 33880 + }, + { + "epoch": 0.28034909211233816, + "grad_norm": 1024.8853759765625, + "learning_rate": 9.10715414773094e-06, + "loss": 109.6669, + "step": 33890 + }, + { + "epoch": 0.2804318153617074, + "grad_norm": 1099.5294189453125, + "learning_rate": 9.10634964050426e-06, + "loss": 119.2499, + "step": 33900 + }, + { + "epoch": 0.28051453861107667, + "grad_norm": 775.4506225585938, + "learning_rate": 9.105544806554184e-06, + "loss": 157.9587, + "step": 33910 + }, + { + "epoch": 0.28059726186044587, + "grad_norm": 1175.566162109375, + "learning_rate": 9.104739645944752e-06, + "loss": 179.6702, + "step": 33920 + }, + { + "epoch": 0.2806799851098151, + "grad_norm": 1368.4671630859375, + "learning_rate": 9.103934158740023e-06, + "loss": 129.1513, + "step": 33930 + }, + { + "epoch": 0.2807627083591844, + "grad_norm": 1385.238037109375, + "learning_rate": 9.10312834500409e-06, + "loss": 150.3397, + "step": 33940 + }, + { + "epoch": 0.2808454316085536, + "grad_norm": 1050.946044921875, + "learning_rate": 9.102322204801062e-06, + "loss": 118.2614, + "step": 33950 + }, + { + "epoch": 0.28092815485792283, + "grad_norm": 2881.64013671875, + "learning_rate": 9.101515738195084e-06, + "loss": 100.6495, + "step": 33960 + }, + { + "epoch": 0.28101087810729203, + "grad_norm": 394.7975769042969, + "learning_rate": 9.100708945250322e-06, + "loss": 81.3734, + "step": 33970 + }, + { + "epoch": 0.2810936013566613, + "grad_norm": 1190.456298828125, + "learning_rate": 9.099901826030969e-06, + "loss": 130.64, + "step": 33980 + }, + { + "epoch": 0.28117632460603054, + "grad_norm": 1374.1256103515625, + "learning_rate": 9.099094380601244e-06, + "loss": 119.4305, + "step": 33990 + }, + { + "epoch": 0.28125904785539974, + "grad_norm": 1412.7530517578125, + "learning_rate": 9.098286609025392e-06, + "loss": 106.0938, + "step": 34000 + }, + { + "epoch": 0.281341771104769, + "grad_norm": 617.6422119140625, + "learning_rate": 9.097478511367682e-06, + "loss": 119.852, + "step": 34010 + }, + { + "epoch": 0.28142449435413824, + "grad_norm": 857.2781982421875, + "learning_rate": 9.096670087692413e-06, + "loss": 168.8287, + "step": 34020 + }, + { + "epoch": 0.28150721760350744, + "grad_norm": 0.0, + "learning_rate": 9.095861338063906e-06, + "loss": 99.5146, + "step": 34030 + }, + { + "epoch": 0.2815899408528767, + "grad_norm": 1164.4888916015625, + "learning_rate": 9.09505226254651e-06, + "loss": 139.5455, + "step": 34040 + }, + { + "epoch": 0.28167266410224595, + "grad_norm": 1813.4522705078125, + "learning_rate": 9.094242861204598e-06, + "loss": 153.4502, + "step": 34050 + }, + { + "epoch": 0.28175538735161515, + "grad_norm": 918.6724243164062, + "learning_rate": 9.093433134102572e-06, + "loss": 95.0513, + "step": 34060 + }, + { + "epoch": 0.2818381106009844, + "grad_norm": 758.8914794921875, + "learning_rate": 9.09262308130486e-06, + "loss": 127.6234, + "step": 34070 + }, + { + "epoch": 0.28192083385035366, + "grad_norm": 1214.897705078125, + "learning_rate": 9.091812702875908e-06, + "loss": 135.7131, + "step": 34080 + }, + { + "epoch": 0.28200355709972286, + "grad_norm": 1762.4208984375, + "learning_rate": 9.0910019988802e-06, + "loss": 116.575, + "step": 34090 + }, + { + "epoch": 0.2820862803490921, + "grad_norm": 1027.57958984375, + "learning_rate": 9.09019096938224e-06, + "loss": 115.0406, + "step": 34100 + }, + { + "epoch": 0.28216900359846137, + "grad_norm": 549.1436157226562, + "learning_rate": 9.089379614446554e-06, + "loss": 101.1465, + "step": 34110 + }, + { + "epoch": 0.28225172684783056, + "grad_norm": 1149.4486083984375, + "learning_rate": 9.0885679341377e-06, + "loss": 111.7543, + "step": 34120 + }, + { + "epoch": 0.2823344500971998, + "grad_norm": 827.5548095703125, + "learning_rate": 9.08775592852026e-06, + "loss": 84.8386, + "step": 34130 + }, + { + "epoch": 0.2824171733465691, + "grad_norm": 2282.6884765625, + "learning_rate": 9.08694359765884e-06, + "loss": 122.2059, + "step": 34140 + }, + { + "epoch": 0.28249989659593827, + "grad_norm": 1563.5760498046875, + "learning_rate": 9.086130941618075e-06, + "loss": 127.1692, + "step": 34150 + }, + { + "epoch": 0.2825826198453075, + "grad_norm": 546.21337890625, + "learning_rate": 9.085317960462625e-06, + "loss": 91.0043, + "step": 34160 + }, + { + "epoch": 0.2826653430946768, + "grad_norm": 1092.4716796875, + "learning_rate": 9.084504654257173e-06, + "loss": 126.0462, + "step": 34170 + }, + { + "epoch": 0.282748066344046, + "grad_norm": 1006.257568359375, + "learning_rate": 9.08369102306643e-06, + "loss": 104.315, + "step": 34180 + }, + { + "epoch": 0.28283078959341523, + "grad_norm": 1649.3040771484375, + "learning_rate": 9.082877066955135e-06, + "loss": 101.4608, + "step": 34190 + }, + { + "epoch": 0.2829135128427845, + "grad_norm": 1023.9491577148438, + "learning_rate": 9.08206278598805e-06, + "loss": 133.0885, + "step": 34200 + }, + { + "epoch": 0.2829962360921537, + "grad_norm": 1013.92041015625, + "learning_rate": 9.081248180229963e-06, + "loss": 93.8945, + "step": 34210 + }, + { + "epoch": 0.28307895934152294, + "grad_norm": 655.7772827148438, + "learning_rate": 9.080433249745688e-06, + "loss": 104.3141, + "step": 34220 + }, + { + "epoch": 0.2831616825908922, + "grad_norm": 1191.8131103515625, + "learning_rate": 9.079617994600066e-06, + "loss": 147.7726, + "step": 34230 + }, + { + "epoch": 0.2832444058402614, + "grad_norm": 628.782470703125, + "learning_rate": 9.078802414857963e-06, + "loss": 96.9786, + "step": 34240 + }, + { + "epoch": 0.28332712908963065, + "grad_norm": 695.0119018554688, + "learning_rate": 9.077986510584273e-06, + "loss": 111.2695, + "step": 34250 + }, + { + "epoch": 0.2834098523389999, + "grad_norm": 1148.8243408203125, + "learning_rate": 9.07717028184391e-06, + "loss": 104.8876, + "step": 34260 + }, + { + "epoch": 0.2834925755883691, + "grad_norm": 766.49072265625, + "learning_rate": 9.07635372870182e-06, + "loss": 84.2609, + "step": 34270 + }, + { + "epoch": 0.28357529883773835, + "grad_norm": 1150.672119140625, + "learning_rate": 9.07553685122297e-06, + "loss": 146.0944, + "step": 34280 + }, + { + "epoch": 0.2836580220871076, + "grad_norm": 1328.8944091796875, + "learning_rate": 9.074719649472358e-06, + "loss": 115.0404, + "step": 34290 + }, + { + "epoch": 0.2837407453364768, + "grad_norm": 1080.066650390625, + "learning_rate": 9.073902123515005e-06, + "loss": 144.9194, + "step": 34300 + }, + { + "epoch": 0.28382346858584606, + "grad_norm": 676.450439453125, + "learning_rate": 9.073084273415956e-06, + "loss": 79.431, + "step": 34310 + }, + { + "epoch": 0.28390619183521526, + "grad_norm": 1445.7745361328125, + "learning_rate": 9.072266099240286e-06, + "loss": 99.8113, + "step": 34320 + }, + { + "epoch": 0.2839889150845845, + "grad_norm": 739.0885009765625, + "learning_rate": 9.07144760105309e-06, + "loss": 142.6431, + "step": 34330 + }, + { + "epoch": 0.28407163833395377, + "grad_norm": 920.0739135742188, + "learning_rate": 9.070628778919493e-06, + "loss": 93.1577, + "step": 34340 + }, + { + "epoch": 0.28415436158332297, + "grad_norm": 1188.843017578125, + "learning_rate": 9.069809632904647e-06, + "loss": 111.0898, + "step": 34350 + }, + { + "epoch": 0.2842370848326922, + "grad_norm": 991.0109252929688, + "learning_rate": 9.068990163073726e-06, + "loss": 87.4462, + "step": 34360 + }, + { + "epoch": 0.2843198080820615, + "grad_norm": 845.0187377929688, + "learning_rate": 9.068170369491932e-06, + "loss": 95.4965, + "step": 34370 + }, + { + "epoch": 0.2844025313314307, + "grad_norm": 861.9247436523438, + "learning_rate": 9.067350252224491e-06, + "loss": 76.6258, + "step": 34380 + }, + { + "epoch": 0.28448525458079993, + "grad_norm": 1371.8582763671875, + "learning_rate": 9.066529811336658e-06, + "loss": 117.6796, + "step": 34390 + }, + { + "epoch": 0.2845679778301692, + "grad_norm": 909.2398681640625, + "learning_rate": 9.06570904689371e-06, + "loss": 96.8476, + "step": 34400 + }, + { + "epoch": 0.2846507010795384, + "grad_norm": 941.552978515625, + "learning_rate": 9.064887958960953e-06, + "loss": 89.4982, + "step": 34410 + }, + { + "epoch": 0.28473342432890764, + "grad_norm": 2689.89404296875, + "learning_rate": 9.064066547603716e-06, + "loss": 116.0555, + "step": 34420 + }, + { + "epoch": 0.2848161475782769, + "grad_norm": 1277.172607421875, + "learning_rate": 9.063244812887357e-06, + "loss": 111.9663, + "step": 34430 + }, + { + "epoch": 0.2848988708276461, + "grad_norm": 872.6428833007812, + "learning_rate": 9.062422754877253e-06, + "loss": 124.443, + "step": 34440 + }, + { + "epoch": 0.28498159407701534, + "grad_norm": 698.310546875, + "learning_rate": 9.061600373638816e-06, + "loss": 127.376, + "step": 34450 + }, + { + "epoch": 0.2850643173263846, + "grad_norm": 914.8101806640625, + "learning_rate": 9.06077766923748e-06, + "loss": 113.9157, + "step": 34460 + }, + { + "epoch": 0.2851470405757538, + "grad_norm": 1122.4351806640625, + "learning_rate": 9.059954641738697e-06, + "loss": 126.3374, + "step": 34470 + }, + { + "epoch": 0.28522976382512305, + "grad_norm": 1123.8736572265625, + "learning_rate": 9.059131291207958e-06, + "loss": 105.3611, + "step": 34480 + }, + { + "epoch": 0.2853124870744923, + "grad_norm": 656.4722290039062, + "learning_rate": 9.058307617710771e-06, + "loss": 142.5772, + "step": 34490 + }, + { + "epoch": 0.2853952103238615, + "grad_norm": 608.4116821289062, + "learning_rate": 9.057483621312671e-06, + "loss": 115.3732, + "step": 34500 + }, + { + "epoch": 0.28547793357323076, + "grad_norm": 581.153564453125, + "learning_rate": 9.056659302079222e-06, + "loss": 118.1142, + "step": 34510 + }, + { + "epoch": 0.2855606568226, + "grad_norm": 1226.16552734375, + "learning_rate": 9.055834660076008e-06, + "loss": 110.4029, + "step": 34520 + }, + { + "epoch": 0.2856433800719692, + "grad_norm": 1013.7431640625, + "learning_rate": 9.055009695368646e-06, + "loss": 135.0033, + "step": 34530 + }, + { + "epoch": 0.28572610332133846, + "grad_norm": 749.7244262695312, + "learning_rate": 9.054184408022772e-06, + "loss": 157.7157, + "step": 34540 + }, + { + "epoch": 0.2858088265707077, + "grad_norm": 456.33026123046875, + "learning_rate": 9.05335879810405e-06, + "loss": 145.5047, + "step": 34550 + }, + { + "epoch": 0.2858915498200769, + "grad_norm": 622.3046264648438, + "learning_rate": 9.052532865678171e-06, + "loss": 123.2549, + "step": 34560 + }, + { + "epoch": 0.28597427306944617, + "grad_norm": 1478.7266845703125, + "learning_rate": 9.05170661081085e-06, + "loss": 127.2245, + "step": 34570 + }, + { + "epoch": 0.2860569963188154, + "grad_norm": 619.5127563476562, + "learning_rate": 9.050880033567831e-06, + "loss": 132.0401, + "step": 34580 + }, + { + "epoch": 0.2861397195681846, + "grad_norm": 1351.7064208984375, + "learning_rate": 9.050053134014878e-06, + "loss": 100.6227, + "step": 34590 + }, + { + "epoch": 0.2862224428175539, + "grad_norm": 1285.1185302734375, + "learning_rate": 9.049225912217782e-06, + "loss": 102.2231, + "step": 34600 + }, + { + "epoch": 0.28630516606692313, + "grad_norm": 1479.285888671875, + "learning_rate": 9.048398368242365e-06, + "loss": 93.523, + "step": 34610 + }, + { + "epoch": 0.28638788931629233, + "grad_norm": 845.21826171875, + "learning_rate": 9.047570502154471e-06, + "loss": 97.2673, + "step": 34620 + }, + { + "epoch": 0.2864706125656616, + "grad_norm": 1318.1756591796875, + "learning_rate": 9.046742314019968e-06, + "loss": 136.1264, + "step": 34630 + }, + { + "epoch": 0.28655333581503084, + "grad_norm": 1689.4859619140625, + "learning_rate": 9.045913803904748e-06, + "loss": 127.8525, + "step": 34640 + }, + { + "epoch": 0.28663605906440004, + "grad_norm": 759.2603149414062, + "learning_rate": 9.045084971874738e-06, + "loss": 119.6263, + "step": 34650 + }, + { + "epoch": 0.2867187823137693, + "grad_norm": 1472.703857421875, + "learning_rate": 9.04425581799588e-06, + "loss": 122.0518, + "step": 34660 + }, + { + "epoch": 0.2868015055631385, + "grad_norm": 903.3499755859375, + "learning_rate": 9.043426342334147e-06, + "loss": 104.4996, + "step": 34670 + }, + { + "epoch": 0.28688422881250775, + "grad_norm": 726.6737670898438, + "learning_rate": 9.042596544955538e-06, + "loss": 93.7296, + "step": 34680 + }, + { + "epoch": 0.286966952061877, + "grad_norm": 1467.8472900390625, + "learning_rate": 9.041766425926073e-06, + "loss": 109.8673, + "step": 34690 + }, + { + "epoch": 0.2870496753112462, + "grad_norm": 1312.5286865234375, + "learning_rate": 9.040935985311804e-06, + "loss": 83.5798, + "step": 34700 + }, + { + "epoch": 0.28713239856061545, + "grad_norm": 1040.613525390625, + "learning_rate": 9.040105223178803e-06, + "loss": 105.7686, + "step": 34710 + }, + { + "epoch": 0.2872151218099847, + "grad_norm": 737.3902587890625, + "learning_rate": 9.039274139593173e-06, + "loss": 100.6217, + "step": 34720 + }, + { + "epoch": 0.2872978450593539, + "grad_norm": 1900.810302734375, + "learning_rate": 9.038442734621034e-06, + "loss": 108.4988, + "step": 34730 + }, + { + "epoch": 0.28738056830872316, + "grad_norm": 823.1213989257812, + "learning_rate": 9.037611008328544e-06, + "loss": 87.0847, + "step": 34740 + }, + { + "epoch": 0.2874632915580924, + "grad_norm": 1027.710693359375, + "learning_rate": 9.036778960781874e-06, + "loss": 96.1623, + "step": 34750 + }, + { + "epoch": 0.2875460148074616, + "grad_norm": 1012.5924682617188, + "learning_rate": 9.03594659204723e-06, + "loss": 99.4886, + "step": 34760 + }, + { + "epoch": 0.28762873805683087, + "grad_norm": 1981.74609375, + "learning_rate": 9.035113902190838e-06, + "loss": 131.8146, + "step": 34770 + }, + { + "epoch": 0.2877114613062001, + "grad_norm": 838.7272338867188, + "learning_rate": 9.03428089127895e-06, + "loss": 138.8772, + "step": 34780 + }, + { + "epoch": 0.2877941845555693, + "grad_norm": 614.944091796875, + "learning_rate": 9.033447559377847e-06, + "loss": 102.8104, + "step": 34790 + }, + { + "epoch": 0.2878769078049386, + "grad_norm": 1577.7645263671875, + "learning_rate": 9.032613906553833e-06, + "loss": 128.1048, + "step": 34800 + }, + { + "epoch": 0.28795963105430783, + "grad_norm": 1021.2303466796875, + "learning_rate": 9.031779932873238e-06, + "loss": 107.6142, + "step": 34810 + }, + { + "epoch": 0.28804235430367703, + "grad_norm": 644.7843017578125, + "learning_rate": 9.030945638402415e-06, + "loss": 103.4213, + "step": 34820 + }, + { + "epoch": 0.2881250775530463, + "grad_norm": 873.067138671875, + "learning_rate": 9.030111023207751e-06, + "loss": 93.0126, + "step": 34830 + }, + { + "epoch": 0.28820780080241554, + "grad_norm": 1150.903564453125, + "learning_rate": 9.029276087355646e-06, + "loss": 121.5422, + "step": 34840 + }, + { + "epoch": 0.28829052405178474, + "grad_norm": 573.4194946289062, + "learning_rate": 9.028440830912536e-06, + "loss": 120.833, + "step": 34850 + }, + { + "epoch": 0.288373247301154, + "grad_norm": 951.9305419921875, + "learning_rate": 9.027605253944874e-06, + "loss": 146.5991, + "step": 34860 + }, + { + "epoch": 0.28845597055052324, + "grad_norm": 646.8169555664062, + "learning_rate": 9.026769356519149e-06, + "loss": 73.8264, + "step": 34870 + }, + { + "epoch": 0.28853869379989244, + "grad_norm": 2599.574462890625, + "learning_rate": 9.025933138701865e-06, + "loss": 121.7481, + "step": 34880 + }, + { + "epoch": 0.2886214170492617, + "grad_norm": 964.8515625, + "learning_rate": 9.02509660055956e-06, + "loss": 98.844, + "step": 34890 + }, + { + "epoch": 0.28870414029863095, + "grad_norm": 665.4266357421875, + "learning_rate": 9.02425974215879e-06, + "loss": 126.3558, + "step": 34900 + }, + { + "epoch": 0.28878686354800015, + "grad_norm": 747.7141723632812, + "learning_rate": 9.02342256356614e-06, + "loss": 94.1504, + "step": 34910 + }, + { + "epoch": 0.2888695867973694, + "grad_norm": 1255.60302734375, + "learning_rate": 9.022585064848222e-06, + "loss": 101.1276, + "step": 34920 + }, + { + "epoch": 0.28895231004673866, + "grad_norm": 1393.7789306640625, + "learning_rate": 9.021747246071673e-06, + "loss": 106.2699, + "step": 34930 + }, + { + "epoch": 0.28903503329610786, + "grad_norm": 860.56689453125, + "learning_rate": 9.020909107303152e-06, + "loss": 124.7073, + "step": 34940 + }, + { + "epoch": 0.2891177565454771, + "grad_norm": 982.3703002929688, + "learning_rate": 9.020070648609347e-06, + "loss": 112.9233, + "step": 34950 + }, + { + "epoch": 0.28920047979484637, + "grad_norm": 1855.6322021484375, + "learning_rate": 9.01923187005697e-06, + "loss": 108.5488, + "step": 34960 + }, + { + "epoch": 0.28928320304421556, + "grad_norm": 904.826416015625, + "learning_rate": 9.018392771712758e-06, + "loss": 125.191, + "step": 34970 + }, + { + "epoch": 0.2893659262935848, + "grad_norm": 704.2764892578125, + "learning_rate": 9.017553353643479e-06, + "loss": 126.2956, + "step": 34980 + }, + { + "epoch": 0.2894486495429541, + "grad_norm": 956.4945678710938, + "learning_rate": 9.016713615915913e-06, + "loss": 81.5747, + "step": 34990 + }, + { + "epoch": 0.28953137279232327, + "grad_norm": 783.7576293945312, + "learning_rate": 9.01587355859688e-06, + "loss": 92.5522, + "step": 35000 + }, + { + "epoch": 0.2896140960416925, + "grad_norm": 1208.607421875, + "learning_rate": 9.015033181753219e-06, + "loss": 106.08, + "step": 35010 + }, + { + "epoch": 0.2896968192910618, + "grad_norm": 852.6314697265625, + "learning_rate": 9.014192485451794e-06, + "loss": 116.7134, + "step": 35020 + }, + { + "epoch": 0.289779542540431, + "grad_norm": 1022.0242309570312, + "learning_rate": 9.013351469759497e-06, + "loss": 144.6814, + "step": 35030 + }, + { + "epoch": 0.28986226578980023, + "grad_norm": 1243.4373779296875, + "learning_rate": 9.01251013474324e-06, + "loss": 104.1561, + "step": 35040 + }, + { + "epoch": 0.28994498903916943, + "grad_norm": 960.918701171875, + "learning_rate": 9.011668480469969e-06, + "loss": 105.9182, + "step": 35050 + }, + { + "epoch": 0.2900277122885387, + "grad_norm": 601.8880004882812, + "learning_rate": 9.010826507006644e-06, + "loss": 94.2774, + "step": 35060 + }, + { + "epoch": 0.29011043553790794, + "grad_norm": 642.360107421875, + "learning_rate": 9.009984214420265e-06, + "loss": 142.8863, + "step": 35070 + }, + { + "epoch": 0.29019315878727714, + "grad_norm": 1095.9666748046875, + "learning_rate": 9.009141602777845e-06, + "loss": 128.7609, + "step": 35080 + }, + { + "epoch": 0.2902758820366464, + "grad_norm": 1230.556396484375, + "learning_rate": 9.008298672146425e-06, + "loss": 111.0792, + "step": 35090 + }, + { + "epoch": 0.29035860528601565, + "grad_norm": 1034.6533203125, + "learning_rate": 9.007455422593077e-06, + "loss": 111.8609, + "step": 35100 + }, + { + "epoch": 0.29044132853538485, + "grad_norm": 509.1571960449219, + "learning_rate": 9.006611854184893e-06, + "loss": 88.8575, + "step": 35110 + }, + { + "epoch": 0.2905240517847541, + "grad_norm": 832.6845703125, + "learning_rate": 9.00576796698899e-06, + "loss": 113.1765, + "step": 35120 + }, + { + "epoch": 0.29060677503412335, + "grad_norm": 1436.23388671875, + "learning_rate": 9.004923761072515e-06, + "loss": 103.5024, + "step": 35130 + }, + { + "epoch": 0.29068949828349255, + "grad_norm": 1610.103759765625, + "learning_rate": 9.004079236502636e-06, + "loss": 113.4215, + "step": 35140 + }, + { + "epoch": 0.2907722215328618, + "grad_norm": 881.2527465820312, + "learning_rate": 9.00323439334655e-06, + "loss": 123.1393, + "step": 35150 + }, + { + "epoch": 0.29085494478223106, + "grad_norm": 734.708740234375, + "learning_rate": 9.002389231671474e-06, + "loss": 121.4382, + "step": 35160 + }, + { + "epoch": 0.29093766803160026, + "grad_norm": 787.5501708984375, + "learning_rate": 9.001543751544654e-06, + "loss": 107.295, + "step": 35170 + }, + { + "epoch": 0.2910203912809695, + "grad_norm": 1131.2176513671875, + "learning_rate": 9.000697953033364e-06, + "loss": 107.077, + "step": 35180 + }, + { + "epoch": 0.29110311453033877, + "grad_norm": 1214.8603515625, + "learning_rate": 8.999851836204901e-06, + "loss": 103.6586, + "step": 35190 + }, + { + "epoch": 0.29118583777970797, + "grad_norm": 895.4462280273438, + "learning_rate": 8.99900540112658e-06, + "loss": 83.9514, + "step": 35200 + }, + { + "epoch": 0.2912685610290772, + "grad_norm": 543.6385498046875, + "learning_rate": 8.998158647865753e-06, + "loss": 107.0998, + "step": 35210 + }, + { + "epoch": 0.2913512842784465, + "grad_norm": 1422.5147705078125, + "learning_rate": 8.997311576489793e-06, + "loss": 127.3802, + "step": 35220 + }, + { + "epoch": 0.2914340075278157, + "grad_norm": 1031.476806640625, + "learning_rate": 8.996464187066096e-06, + "loss": 132.6045, + "step": 35230 + }, + { + "epoch": 0.29151673077718493, + "grad_norm": 962.57568359375, + "learning_rate": 8.995616479662084e-06, + "loss": 76.9806, + "step": 35240 + }, + { + "epoch": 0.2915994540265542, + "grad_norm": 888.2576293945312, + "learning_rate": 8.994768454345207e-06, + "loss": 88.927, + "step": 35250 + }, + { + "epoch": 0.2916821772759234, + "grad_norm": 991.271484375, + "learning_rate": 8.993920111182937e-06, + "loss": 116.7842, + "step": 35260 + }, + { + "epoch": 0.29176490052529264, + "grad_norm": 2302.41064453125, + "learning_rate": 8.993071450242775e-06, + "loss": 99.9801, + "step": 35270 + }, + { + "epoch": 0.2918476237746619, + "grad_norm": 729.2933349609375, + "learning_rate": 8.99222247159224e-06, + "loss": 108.2754, + "step": 35280 + }, + { + "epoch": 0.2919303470240311, + "grad_norm": 940.2220458984375, + "learning_rate": 8.991373175298887e-06, + "loss": 104.497, + "step": 35290 + }, + { + "epoch": 0.29201307027340034, + "grad_norm": 893.2850952148438, + "learning_rate": 8.99052356143029e-06, + "loss": 104.8119, + "step": 35300 + }, + { + "epoch": 0.2920957935227696, + "grad_norm": 1792.8408203125, + "learning_rate": 8.989673630054044e-06, + "loss": 134.8155, + "step": 35310 + }, + { + "epoch": 0.2921785167721388, + "grad_norm": 823.2114868164062, + "learning_rate": 8.988823381237778e-06, + "loss": 91.5063, + "step": 35320 + }, + { + "epoch": 0.29226124002150805, + "grad_norm": 1062.8973388671875, + "learning_rate": 8.987972815049144e-06, + "loss": 130.8569, + "step": 35330 + }, + { + "epoch": 0.2923439632708773, + "grad_norm": 676.7745361328125, + "learning_rate": 8.987121931555814e-06, + "loss": 124.0575, + "step": 35340 + }, + { + "epoch": 0.2924266865202465, + "grad_norm": 1002.80029296875, + "learning_rate": 8.986270730825489e-06, + "loss": 101.8287, + "step": 35350 + }, + { + "epoch": 0.29250940976961576, + "grad_norm": 1145.8223876953125, + "learning_rate": 8.985419212925898e-06, + "loss": 110.8668, + "step": 35360 + }, + { + "epoch": 0.292592133018985, + "grad_norm": 992.4735717773438, + "learning_rate": 8.98456737792479e-06, + "loss": 80.9995, + "step": 35370 + }, + { + "epoch": 0.2926748562683542, + "grad_norm": 488.52618408203125, + "learning_rate": 8.983715225889942e-06, + "loss": 111.5234, + "step": 35380 + }, + { + "epoch": 0.29275757951772347, + "grad_norm": 1136.89794921875, + "learning_rate": 8.982862756889158e-06, + "loss": 162.2252, + "step": 35390 + }, + { + "epoch": 0.29284030276709266, + "grad_norm": 1113.3115234375, + "learning_rate": 8.982009970990262e-06, + "loss": 89.9033, + "step": 35400 + }, + { + "epoch": 0.2929230260164619, + "grad_norm": 1392.5189208984375, + "learning_rate": 8.98115686826111e-06, + "loss": 131.7283, + "step": 35410 + }, + { + "epoch": 0.2930057492658312, + "grad_norm": 617.0336303710938, + "learning_rate": 8.980303448769574e-06, + "loss": 99.9001, + "step": 35420 + }, + { + "epoch": 0.29308847251520037, + "grad_norm": 2442.5810546875, + "learning_rate": 8.979449712583562e-06, + "loss": 112.8064, + "step": 35430 + }, + { + "epoch": 0.2931711957645696, + "grad_norm": 544.6372680664062, + "learning_rate": 8.978595659770997e-06, + "loss": 109.6494, + "step": 35440 + }, + { + "epoch": 0.2932539190139389, + "grad_norm": 860.6021118164062, + "learning_rate": 8.977741290399836e-06, + "loss": 106.3515, + "step": 35450 + }, + { + "epoch": 0.2933366422633081, + "grad_norm": 1385.670654296875, + "learning_rate": 8.976886604538055e-06, + "loss": 117.2203, + "step": 35460 + }, + { + "epoch": 0.29341936551267733, + "grad_norm": 653.8483276367188, + "learning_rate": 8.976031602253661e-06, + "loss": 91.5749, + "step": 35470 + }, + { + "epoch": 0.2935020887620466, + "grad_norm": 707.4151000976562, + "learning_rate": 8.975176283614677e-06, + "loss": 137.304, + "step": 35480 + }, + { + "epoch": 0.2935848120114158, + "grad_norm": 724.526123046875, + "learning_rate": 8.97432064868916e-06, + "loss": 117.3225, + "step": 35490 + }, + { + "epoch": 0.29366753526078504, + "grad_norm": 684.9421997070312, + "learning_rate": 8.973464697545191e-06, + "loss": 118.9565, + "step": 35500 + }, + { + "epoch": 0.2937502585101543, + "grad_norm": 1034.4615478515625, + "learning_rate": 8.97260843025087e-06, + "loss": 117.1274, + "step": 35510 + }, + { + "epoch": 0.2938329817595235, + "grad_norm": 808.4039916992188, + "learning_rate": 8.971751846874329e-06, + "loss": 109.1277, + "step": 35520 + }, + { + "epoch": 0.29391570500889275, + "grad_norm": 793.2233276367188, + "learning_rate": 8.97089494748372e-06, + "loss": 150.5428, + "step": 35530 + }, + { + "epoch": 0.293998428258262, + "grad_norm": 1373.2613525390625, + "learning_rate": 8.970037732147226e-06, + "loss": 144.7835, + "step": 35540 + }, + { + "epoch": 0.2940811515076312, + "grad_norm": 876.5359497070312, + "learning_rate": 8.969180200933048e-06, + "loss": 175.0752, + "step": 35550 + }, + { + "epoch": 0.29416387475700045, + "grad_norm": 1215.04443359375, + "learning_rate": 8.968322353909417e-06, + "loss": 105.4971, + "step": 35560 + }, + { + "epoch": 0.2942465980063697, + "grad_norm": 1001.4893188476562, + "learning_rate": 8.96746419114459e-06, + "loss": 109.2281, + "step": 35570 + }, + { + "epoch": 0.2943293212557389, + "grad_norm": 1082.677001953125, + "learning_rate": 8.966605712706844e-06, + "loss": 90.3954, + "step": 35580 + }, + { + "epoch": 0.29441204450510816, + "grad_norm": 962.15380859375, + "learning_rate": 8.965746918664486e-06, + "loss": 96.3644, + "step": 35590 + }, + { + "epoch": 0.2944947677544774, + "grad_norm": 1004.3434448242188, + "learning_rate": 8.964887809085846e-06, + "loss": 128.4367, + "step": 35600 + }, + { + "epoch": 0.2945774910038466, + "grad_norm": 535.0782470703125, + "learning_rate": 8.96402838403928e-06, + "loss": 101.0775, + "step": 35610 + }, + { + "epoch": 0.29466021425321587, + "grad_norm": 451.95941162109375, + "learning_rate": 8.96316864359317e-06, + "loss": 103.9834, + "step": 35620 + }, + { + "epoch": 0.2947429375025851, + "grad_norm": 861.1315307617188, + "learning_rate": 8.962308587815916e-06, + "loss": 95.3887, + "step": 35630 + }, + { + "epoch": 0.2948256607519543, + "grad_norm": 873.6254272460938, + "learning_rate": 8.961448216775955e-06, + "loss": 106.6663, + "step": 35640 + }, + { + "epoch": 0.2949083840013236, + "grad_norm": 647.5992431640625, + "learning_rate": 8.960587530541737e-06, + "loss": 113.9285, + "step": 35650 + }, + { + "epoch": 0.29499110725069283, + "grad_norm": 818.12744140625, + "learning_rate": 8.959726529181748e-06, + "loss": 93.2586, + "step": 35660 + }, + { + "epoch": 0.29507383050006203, + "grad_norm": 2233.892578125, + "learning_rate": 8.95886521276449e-06, + "loss": 99.4201, + "step": 35670 + }, + { + "epoch": 0.2951565537494313, + "grad_norm": 1236.3218994140625, + "learning_rate": 8.958003581358498e-06, + "loss": 122.5037, + "step": 35680 + }, + { + "epoch": 0.29523927699880054, + "grad_norm": 1387.99462890625, + "learning_rate": 8.957141635032325e-06, + "loss": 103.0061, + "step": 35690 + }, + { + "epoch": 0.29532200024816974, + "grad_norm": 363.5679626464844, + "learning_rate": 8.956279373854553e-06, + "loss": 297.6621, + "step": 35700 + }, + { + "epoch": 0.295404723497539, + "grad_norm": 848.06884765625, + "learning_rate": 8.955416797893787e-06, + "loss": 133.0075, + "step": 35710 + }, + { + "epoch": 0.29548744674690824, + "grad_norm": 767.8993530273438, + "learning_rate": 8.95455390721866e-06, + "loss": 86.9913, + "step": 35720 + }, + { + "epoch": 0.29557016999627744, + "grad_norm": 947.8824462890625, + "learning_rate": 8.953690701897827e-06, + "loss": 126.0984, + "step": 35730 + }, + { + "epoch": 0.2956528932456467, + "grad_norm": 866.94970703125, + "learning_rate": 8.952827181999973e-06, + "loss": 100.0804, + "step": 35740 + }, + { + "epoch": 0.29573561649501595, + "grad_norm": 997.9763793945312, + "learning_rate": 8.951963347593797e-06, + "loss": 158.5358, + "step": 35750 + }, + { + "epoch": 0.29581833974438515, + "grad_norm": 682.8516235351562, + "learning_rate": 8.951099198748036e-06, + "loss": 101.2486, + "step": 35760 + }, + { + "epoch": 0.2959010629937544, + "grad_norm": 804.0764770507812, + "learning_rate": 8.950234735531445e-06, + "loss": 98.5626, + "step": 35770 + }, + { + "epoch": 0.2959837862431236, + "grad_norm": 10388.48046875, + "learning_rate": 8.949369958012806e-06, + "loss": 155.4089, + "step": 35780 + }, + { + "epoch": 0.29606650949249286, + "grad_norm": 886.81298828125, + "learning_rate": 8.948504866260924e-06, + "loss": 113.0202, + "step": 35790 + }, + { + "epoch": 0.2961492327418621, + "grad_norm": 897.130859375, + "learning_rate": 8.94763946034463e-06, + "loss": 142.2092, + "step": 35800 + }, + { + "epoch": 0.2962319559912313, + "grad_norm": 966.3784790039062, + "learning_rate": 8.946773740332781e-06, + "loss": 157.32, + "step": 35810 + }, + { + "epoch": 0.29631467924060056, + "grad_norm": 1265.8228759765625, + "learning_rate": 8.945907706294262e-06, + "loss": 87.6832, + "step": 35820 + }, + { + "epoch": 0.2963974024899698, + "grad_norm": 434.8976135253906, + "learning_rate": 8.945041358297973e-06, + "loss": 115.3741, + "step": 35830 + }, + { + "epoch": 0.296480125739339, + "grad_norm": 1037.9794921875, + "learning_rate": 8.94417469641285e-06, + "loss": 101.735, + "step": 35840 + }, + { + "epoch": 0.29656284898870827, + "grad_norm": 1543.5999755859375, + "learning_rate": 8.943307720707846e-06, + "loss": 149.339, + "step": 35850 + }, + { + "epoch": 0.2966455722380775, + "grad_norm": 851.0203247070312, + "learning_rate": 8.942440431251947e-06, + "loss": 126.8035, + "step": 35860 + }, + { + "epoch": 0.2967282954874467, + "grad_norm": 1087.814697265625, + "learning_rate": 8.941572828114154e-06, + "loss": 154.3589, + "step": 35870 + }, + { + "epoch": 0.296811018736816, + "grad_norm": 1085.237548828125, + "learning_rate": 8.9407049113635e-06, + "loss": 133.6983, + "step": 35880 + }, + { + "epoch": 0.29689374198618523, + "grad_norm": 657.667724609375, + "learning_rate": 8.939836681069042e-06, + "loss": 77.88, + "step": 35890 + }, + { + "epoch": 0.29697646523555443, + "grad_norm": 937.2635498046875, + "learning_rate": 8.938968137299861e-06, + "loss": 121.5767, + "step": 35900 + }, + { + "epoch": 0.2970591884849237, + "grad_norm": 692.5191650390625, + "learning_rate": 8.938099280125064e-06, + "loss": 110.6443, + "step": 35910 + }, + { + "epoch": 0.29714191173429294, + "grad_norm": 1191.0335693359375, + "learning_rate": 8.937230109613778e-06, + "loss": 125.6926, + "step": 35920 + }, + { + "epoch": 0.29722463498366214, + "grad_norm": 600.857177734375, + "learning_rate": 8.936360625835164e-06, + "loss": 114.9589, + "step": 35930 + }, + { + "epoch": 0.2973073582330314, + "grad_norm": 786.0075073242188, + "learning_rate": 8.935490828858399e-06, + "loss": 156.1116, + "step": 35940 + }, + { + "epoch": 0.29739008148240065, + "grad_norm": 923.1871948242188, + "learning_rate": 8.934620718752691e-06, + "loss": 102.9856, + "step": 35950 + }, + { + "epoch": 0.29747280473176985, + "grad_norm": 785.8792114257812, + "learning_rate": 8.933750295587269e-06, + "loss": 121.2862, + "step": 35960 + }, + { + "epoch": 0.2975555279811391, + "grad_norm": 1262.131591796875, + "learning_rate": 8.932879559431392e-06, + "loss": 144.9996, + "step": 35970 + }, + { + "epoch": 0.29763825123050835, + "grad_norm": 1094.29296875, + "learning_rate": 8.932008510354336e-06, + "loss": 99.4907, + "step": 35980 + }, + { + "epoch": 0.29772097447987755, + "grad_norm": 1515.66748046875, + "learning_rate": 8.931137148425407e-06, + "loss": 117.0325, + "step": 35990 + }, + { + "epoch": 0.2978036977292468, + "grad_norm": 702.0181274414062, + "learning_rate": 8.930265473713939e-06, + "loss": 89.8551, + "step": 36000 + }, + { + "epoch": 0.29788642097861606, + "grad_norm": 924.1445922851562, + "learning_rate": 8.929393486289283e-06, + "loss": 91.7574, + "step": 36010 + }, + { + "epoch": 0.29796914422798526, + "grad_norm": 886.2630615234375, + "learning_rate": 8.928521186220822e-06, + "loss": 134.8864, + "step": 36020 + }, + { + "epoch": 0.2980518674773545, + "grad_norm": 571.9080200195312, + "learning_rate": 8.92764857357796e-06, + "loss": 166.6288, + "step": 36030 + }, + { + "epoch": 0.29813459072672377, + "grad_norm": 1017.0745239257812, + "learning_rate": 8.926775648430124e-06, + "loss": 97.3446, + "step": 36040 + }, + { + "epoch": 0.29821731397609297, + "grad_norm": 1384.15869140625, + "learning_rate": 8.925902410846774e-06, + "loss": 102.3454, + "step": 36050 + }, + { + "epoch": 0.2983000372254622, + "grad_norm": 627.1011962890625, + "learning_rate": 8.925028860897384e-06, + "loss": 99.8053, + "step": 36060 + }, + { + "epoch": 0.2983827604748315, + "grad_norm": 1164.1707763671875, + "learning_rate": 8.924154998651461e-06, + "loss": 150.2465, + "step": 36070 + }, + { + "epoch": 0.2984654837242007, + "grad_norm": 806.7046508789062, + "learning_rate": 8.923280824178538e-06, + "loss": 127.2189, + "step": 36080 + }, + { + "epoch": 0.29854820697356993, + "grad_norm": 936.9692993164062, + "learning_rate": 8.922406337548162e-06, + "loss": 106.9401, + "step": 36090 + }, + { + "epoch": 0.2986309302229392, + "grad_norm": 861.8311157226562, + "learning_rate": 8.921531538829917e-06, + "loss": 92.577, + "step": 36100 + }, + { + "epoch": 0.2987136534723084, + "grad_norm": 901.859375, + "learning_rate": 8.920656428093403e-06, + "loss": 83.8378, + "step": 36110 + }, + { + "epoch": 0.29879637672167764, + "grad_norm": 1293.734375, + "learning_rate": 8.919781005408251e-06, + "loss": 114.6592, + "step": 36120 + }, + { + "epoch": 0.29887909997104684, + "grad_norm": 697.5694580078125, + "learning_rate": 8.918905270844113e-06, + "loss": 141.8754, + "step": 36130 + }, + { + "epoch": 0.2989618232204161, + "grad_norm": 822.4385986328125, + "learning_rate": 8.918029224470671e-06, + "loss": 101.8231, + "step": 36140 + }, + { + "epoch": 0.29904454646978534, + "grad_norm": 886.5821533203125, + "learning_rate": 8.917152866357621e-06, + "loss": 97.08, + "step": 36150 + }, + { + "epoch": 0.29912726971915454, + "grad_norm": 1727.11279296875, + "learning_rate": 8.916276196574698e-06, + "loss": 124.1994, + "step": 36160 + }, + { + "epoch": 0.2992099929685238, + "grad_norm": 649.4053955078125, + "learning_rate": 8.91539921519165e-06, + "loss": 113.0979, + "step": 36170 + }, + { + "epoch": 0.29929271621789305, + "grad_norm": 762.2130737304688, + "learning_rate": 8.914521922278255e-06, + "loss": 118.0666, + "step": 36180 + }, + { + "epoch": 0.29937543946726225, + "grad_norm": 995.956787109375, + "learning_rate": 8.913644317904317e-06, + "loss": 125.7407, + "step": 36190 + }, + { + "epoch": 0.2994581627166315, + "grad_norm": 1194.2001953125, + "learning_rate": 8.912766402139662e-06, + "loss": 124.1992, + "step": 36200 + }, + { + "epoch": 0.29954088596600076, + "grad_norm": 1277.1484375, + "learning_rate": 8.91188817505414e-06, + "loss": 146.9492, + "step": 36210 + }, + { + "epoch": 0.29962360921536996, + "grad_norm": 992.379638671875, + "learning_rate": 8.91100963671763e-06, + "loss": 107.3992, + "step": 36220 + }, + { + "epoch": 0.2997063324647392, + "grad_norm": 820.5222778320312, + "learning_rate": 8.910130787200032e-06, + "loss": 93.3464, + "step": 36230 + }, + { + "epoch": 0.29978905571410847, + "grad_norm": 2073.3251953125, + "learning_rate": 8.909251626571273e-06, + "loss": 101.3619, + "step": 36240 + }, + { + "epoch": 0.29987177896347766, + "grad_norm": 767.80615234375, + "learning_rate": 8.908372154901302e-06, + "loss": 89.5982, + "step": 36250 + }, + { + "epoch": 0.2999545022128469, + "grad_norm": 1339.1114501953125, + "learning_rate": 8.907492372260096e-06, + "loss": 118.8273, + "step": 36260 + }, + { + "epoch": 0.3000372254622162, + "grad_norm": 1395.037353515625, + "learning_rate": 8.906612278717657e-06, + "loss": 114.5038, + "step": 36270 + }, + { + "epoch": 0.30011994871158537, + "grad_norm": 1279.881591796875, + "learning_rate": 8.905731874344005e-06, + "loss": 110.8277, + "step": 36280 + }, + { + "epoch": 0.3002026719609546, + "grad_norm": 1153.6810302734375, + "learning_rate": 8.904851159209193e-06, + "loss": 111.3379, + "step": 36290 + }, + { + "epoch": 0.3002853952103239, + "grad_norm": 910.88623046875, + "learning_rate": 8.903970133383297e-06, + "loss": 83.3806, + "step": 36300 + }, + { + "epoch": 0.3003681184596931, + "grad_norm": 993.2054443359375, + "learning_rate": 8.903088796936414e-06, + "loss": 117.134, + "step": 36310 + }, + { + "epoch": 0.30045084170906233, + "grad_norm": 1444.7095947265625, + "learning_rate": 8.902207149938667e-06, + "loss": 118.84, + "step": 36320 + }, + { + "epoch": 0.3005335649584316, + "grad_norm": 722.3483276367188, + "learning_rate": 8.901325192460206e-06, + "loss": 100.5878, + "step": 36330 + }, + { + "epoch": 0.3006162882078008, + "grad_norm": 2218.97119140625, + "learning_rate": 8.900442924571204e-06, + "loss": 125.5503, + "step": 36340 + }, + { + "epoch": 0.30069901145717004, + "grad_norm": 933.83984375, + "learning_rate": 8.89956034634186e-06, + "loss": 100.4451, + "step": 36350 + }, + { + "epoch": 0.3007817347065393, + "grad_norm": 1003.083251953125, + "learning_rate": 8.898677457842394e-06, + "loss": 97.8874, + "step": 36360 + }, + { + "epoch": 0.3008644579559085, + "grad_norm": 1543.9967041015625, + "learning_rate": 8.897794259143057e-06, + "loss": 144.2935, + "step": 36370 + }, + { + "epoch": 0.30094718120527775, + "grad_norm": 1334.9381103515625, + "learning_rate": 8.896910750314118e-06, + "loss": 91.8307, + "step": 36380 + }, + { + "epoch": 0.301029904454647, + "grad_norm": 1186.411865234375, + "learning_rate": 8.896026931425876e-06, + "loss": 131.1232, + "step": 36390 + }, + { + "epoch": 0.3011126277040162, + "grad_norm": 1868.9034423828125, + "learning_rate": 8.895142802548653e-06, + "loss": 124.4849, + "step": 36400 + }, + { + "epoch": 0.30119535095338545, + "grad_norm": 840.4161987304688, + "learning_rate": 8.89425836375279e-06, + "loss": 81.0873, + "step": 36410 + }, + { + "epoch": 0.3012780742027547, + "grad_norm": 1163.7984619140625, + "learning_rate": 8.893373615108663e-06, + "loss": 136.477, + "step": 36420 + }, + { + "epoch": 0.3013607974521239, + "grad_norm": 1086.72216796875, + "learning_rate": 8.892488556686665e-06, + "loss": 126.1113, + "step": 36430 + }, + { + "epoch": 0.30144352070149316, + "grad_norm": 879.4309692382812, + "learning_rate": 8.891603188557218e-06, + "loss": 96.8926, + "step": 36440 + }, + { + "epoch": 0.3015262439508624, + "grad_norm": 1197.5047607421875, + "learning_rate": 8.890717510790763e-06, + "loss": 110.1916, + "step": 36450 + }, + { + "epoch": 0.3016089672002316, + "grad_norm": 349.7181701660156, + "learning_rate": 8.889831523457773e-06, + "loss": 154.8354, + "step": 36460 + }, + { + "epoch": 0.30169169044960087, + "grad_norm": 1460.5762939453125, + "learning_rate": 8.888945226628742e-06, + "loss": 140.0803, + "step": 36470 + }, + { + "epoch": 0.3017744136989701, + "grad_norm": 665.8645629882812, + "learning_rate": 8.888058620374185e-06, + "loss": 92.3529, + "step": 36480 + }, + { + "epoch": 0.3018571369483393, + "grad_norm": 879.9156494140625, + "learning_rate": 8.887171704764647e-06, + "loss": 129.1003, + "step": 36490 + }, + { + "epoch": 0.3019398601977086, + "grad_norm": 815.3402709960938, + "learning_rate": 8.8862844798707e-06, + "loss": 118.2584, + "step": 36500 + }, + { + "epoch": 0.3020225834470778, + "grad_norm": 1314.0321044921875, + "learning_rate": 8.885396945762928e-06, + "loss": 126.3761, + "step": 36510 + }, + { + "epoch": 0.30210530669644703, + "grad_norm": 858.8338012695312, + "learning_rate": 8.884509102511956e-06, + "loss": 98.5212, + "step": 36520 + }, + { + "epoch": 0.3021880299458163, + "grad_norm": 1700.220458984375, + "learning_rate": 8.883620950188422e-06, + "loss": 106.6714, + "step": 36530 + }, + { + "epoch": 0.3022707531951855, + "grad_norm": 696.0690307617188, + "learning_rate": 8.882732488862988e-06, + "loss": 118.2453, + "step": 36540 + }, + { + "epoch": 0.30235347644455474, + "grad_norm": 793.1904907226562, + "learning_rate": 8.881843718606353e-06, + "loss": 125.0236, + "step": 36550 + }, + { + "epoch": 0.302436199693924, + "grad_norm": 970.5441284179688, + "learning_rate": 8.880954639489227e-06, + "loss": 108.3671, + "step": 36560 + }, + { + "epoch": 0.3025189229432932, + "grad_norm": 1777.4910888671875, + "learning_rate": 8.880065251582354e-06, + "loss": 160.0988, + "step": 36570 + }, + { + "epoch": 0.30260164619266244, + "grad_norm": 1037.1177978515625, + "learning_rate": 8.879175554956495e-06, + "loss": 104.0029, + "step": 36580 + }, + { + "epoch": 0.3026843694420317, + "grad_norm": 806.9476928710938, + "learning_rate": 8.87828554968244e-06, + "loss": 88.7504, + "step": 36590 + }, + { + "epoch": 0.3027670926914009, + "grad_norm": 1243.7362060546875, + "learning_rate": 8.877395235831002e-06, + "loss": 111.0507, + "step": 36600 + }, + { + "epoch": 0.30284981594077015, + "grad_norm": 1789.92529296875, + "learning_rate": 8.876504613473019e-06, + "loss": 154.0786, + "step": 36610 + }, + { + "epoch": 0.3029325391901394, + "grad_norm": 718.7664184570312, + "learning_rate": 8.875613682679356e-06, + "loss": 108.9329, + "step": 36620 + }, + { + "epoch": 0.3030152624395086, + "grad_norm": 748.9815063476562, + "learning_rate": 8.874722443520898e-06, + "loss": 123.7988, + "step": 36630 + }, + { + "epoch": 0.30309798568887786, + "grad_norm": 606.9013671875, + "learning_rate": 8.873830896068559e-06, + "loss": 107.3505, + "step": 36640 + }, + { + "epoch": 0.3031807089382471, + "grad_norm": 878.5547485351562, + "learning_rate": 8.872939040393274e-06, + "loss": 113.0779, + "step": 36650 + }, + { + "epoch": 0.3032634321876163, + "grad_norm": 1300.362060546875, + "learning_rate": 8.872046876566003e-06, + "loss": 130.3682, + "step": 36660 + }, + { + "epoch": 0.30334615543698557, + "grad_norm": 886.26025390625, + "learning_rate": 8.871154404657734e-06, + "loss": 106.1408, + "step": 36670 + }, + { + "epoch": 0.3034288786863548, + "grad_norm": 1735.70947265625, + "learning_rate": 8.870261624739474e-06, + "loss": 120.6958, + "step": 36680 + }, + { + "epoch": 0.303511601935724, + "grad_norm": 886.688232421875, + "learning_rate": 8.869368536882258e-06, + "loss": 102.5698, + "step": 36690 + }, + { + "epoch": 0.3035943251850933, + "grad_norm": 632.3603515625, + "learning_rate": 8.868475141157146e-06, + "loss": 88.5606, + "step": 36700 + }, + { + "epoch": 0.3036770484344625, + "grad_norm": 764.1319580078125, + "learning_rate": 8.867581437635221e-06, + "loss": 107.0108, + "step": 36710 + }, + { + "epoch": 0.3037597716838317, + "grad_norm": 1273.7620849609375, + "learning_rate": 8.866687426387592e-06, + "loss": 159.5809, + "step": 36720 + }, + { + "epoch": 0.303842494933201, + "grad_norm": 685.6810302734375, + "learning_rate": 8.86579310748539e-06, + "loss": 99.7657, + "step": 36730 + }, + { + "epoch": 0.30392521818257023, + "grad_norm": 777.0698852539062, + "learning_rate": 8.86489848099977e-06, + "loss": 120.2746, + "step": 36740 + }, + { + "epoch": 0.30400794143193943, + "grad_norm": 746.861572265625, + "learning_rate": 8.864003547001916e-06, + "loss": 106.8211, + "step": 36750 + }, + { + "epoch": 0.3040906646813087, + "grad_norm": 853.1625366210938, + "learning_rate": 8.863108305563035e-06, + "loss": 91.7284, + "step": 36760 + }, + { + "epoch": 0.30417338793067794, + "grad_norm": 1231.1922607421875, + "learning_rate": 8.862212756754354e-06, + "loss": 155.4766, + "step": 36770 + }, + { + "epoch": 0.30425611118004714, + "grad_norm": 1215.257568359375, + "learning_rate": 8.861316900647129e-06, + "loss": 149.8827, + "step": 36780 + }, + { + "epoch": 0.3043388344294164, + "grad_norm": 3567.228515625, + "learning_rate": 8.860420737312638e-06, + "loss": 121.5637, + "step": 36790 + }, + { + "epoch": 0.30442155767878565, + "grad_norm": 694.6704711914062, + "learning_rate": 8.859524266822188e-06, + "loss": 101.825, + "step": 36800 + }, + { + "epoch": 0.30450428092815485, + "grad_norm": 1621.055908203125, + "learning_rate": 8.858627489247105e-06, + "loss": 128.4847, + "step": 36810 + }, + { + "epoch": 0.3045870041775241, + "grad_norm": 974.5038452148438, + "learning_rate": 8.85773040465874e-06, + "loss": 157.9853, + "step": 36820 + }, + { + "epoch": 0.30466972742689336, + "grad_norm": 1332.6234130859375, + "learning_rate": 8.856833013128472e-06, + "loss": 107.964, + "step": 36830 + }, + { + "epoch": 0.30475245067626255, + "grad_norm": 1167.7923583984375, + "learning_rate": 8.855935314727702e-06, + "loss": 109.8553, + "step": 36840 + }, + { + "epoch": 0.3048351739256318, + "grad_norm": 704.8397216796875, + "learning_rate": 8.855037309527854e-06, + "loss": 115.8193, + "step": 36850 + }, + { + "epoch": 0.304917897175001, + "grad_norm": 883.3186645507812, + "learning_rate": 8.854138997600382e-06, + "loss": 91.2245, + "step": 36860 + }, + { + "epoch": 0.30500062042437026, + "grad_norm": 660.802001953125, + "learning_rate": 8.853240379016757e-06, + "loss": 88.3629, + "step": 36870 + }, + { + "epoch": 0.3050833436737395, + "grad_norm": 1194.5704345703125, + "learning_rate": 8.852341453848477e-06, + "loss": 113.7338, + "step": 36880 + }, + { + "epoch": 0.3051660669231087, + "grad_norm": 1196.6671142578125, + "learning_rate": 8.851442222167068e-06, + "loss": 96.392, + "step": 36890 + }, + { + "epoch": 0.30524879017247797, + "grad_norm": 1096.315673828125, + "learning_rate": 8.850542684044078e-06, + "loss": 132.9981, + "step": 36900 + }, + { + "epoch": 0.3053315134218472, + "grad_norm": 843.752685546875, + "learning_rate": 8.849642839551079e-06, + "loss": 120.9463, + "step": 36910 + }, + { + "epoch": 0.3054142366712164, + "grad_norm": 1700.408203125, + "learning_rate": 8.848742688759666e-06, + "loss": 106.4173, + "step": 36920 + }, + { + "epoch": 0.3054969599205857, + "grad_norm": 1714.5902099609375, + "learning_rate": 8.847842231741462e-06, + "loss": 119.7494, + "step": 36930 + }, + { + "epoch": 0.30557968316995493, + "grad_norm": 998.3058471679688, + "learning_rate": 8.846941468568108e-06, + "loss": 104.204, + "step": 36940 + }, + { + "epoch": 0.30566240641932413, + "grad_norm": 1132.1826171875, + "learning_rate": 8.846040399311278e-06, + "loss": 112.0986, + "step": 36950 + }, + { + "epoch": 0.3057451296686934, + "grad_norm": 661.6875, + "learning_rate": 8.845139024042664e-06, + "loss": 81.744, + "step": 36960 + }, + { + "epoch": 0.30582785291806264, + "grad_norm": 932.6510009765625, + "learning_rate": 8.844237342833985e-06, + "loss": 127.1373, + "step": 36970 + }, + { + "epoch": 0.30591057616743184, + "grad_norm": 780.3368530273438, + "learning_rate": 8.843335355756983e-06, + "loss": 75.523, + "step": 36980 + }, + { + "epoch": 0.3059932994168011, + "grad_norm": 754.1832275390625, + "learning_rate": 8.842433062883427e-06, + "loss": 89.4337, + "step": 36990 + }, + { + "epoch": 0.30607602266617034, + "grad_norm": 764.4425659179688, + "learning_rate": 8.841530464285105e-06, + "loss": 129.0274, + "step": 37000 + }, + { + "epoch": 0.30615874591553954, + "grad_norm": 861.7986450195312, + "learning_rate": 8.840627560033833e-06, + "loss": 82.3673, + "step": 37010 + }, + { + "epoch": 0.3062414691649088, + "grad_norm": 988.38916015625, + "learning_rate": 8.839724350201452e-06, + "loss": 114.8146, + "step": 37020 + }, + { + "epoch": 0.30632419241427805, + "grad_norm": 530.607421875, + "learning_rate": 8.838820834859829e-06, + "loss": 103.93, + "step": 37030 + }, + { + "epoch": 0.30640691566364725, + "grad_norm": 941.2070922851562, + "learning_rate": 8.837917014080849e-06, + "loss": 89.9074, + "step": 37040 + }, + { + "epoch": 0.3064896389130165, + "grad_norm": 688.3717651367188, + "learning_rate": 8.837012887936426e-06, + "loss": 111.2304, + "step": 37050 + }, + { + "epoch": 0.30657236216238576, + "grad_norm": 1092.7115478515625, + "learning_rate": 8.836108456498497e-06, + "loss": 93.2177, + "step": 37060 + }, + { + "epoch": 0.30665508541175496, + "grad_norm": 970.1156616210938, + "learning_rate": 8.835203719839024e-06, + "loss": 113.6382, + "step": 37070 + }, + { + "epoch": 0.3067378086611242, + "grad_norm": 1119.5540771484375, + "learning_rate": 8.834298678029988e-06, + "loss": 120.2044, + "step": 37080 + }, + { + "epoch": 0.30682053191049347, + "grad_norm": 840.0438842773438, + "learning_rate": 8.833393331143409e-06, + "loss": 103.9367, + "step": 37090 + }, + { + "epoch": 0.30690325515986266, + "grad_norm": 1232.6182861328125, + "learning_rate": 8.832487679251311e-06, + "loss": 107.9942, + "step": 37100 + }, + { + "epoch": 0.3069859784092319, + "grad_norm": 702.6771850585938, + "learning_rate": 8.831581722425761e-06, + "loss": 107.5534, + "step": 37110 + }, + { + "epoch": 0.3070687016586012, + "grad_norm": 933.55615234375, + "learning_rate": 8.830675460738835e-06, + "loss": 92.3703, + "step": 37120 + }, + { + "epoch": 0.30715142490797037, + "grad_norm": 1156.6966552734375, + "learning_rate": 8.829768894262644e-06, + "loss": 118.0975, + "step": 37130 + }, + { + "epoch": 0.3072341481573396, + "grad_norm": 766.1238403320312, + "learning_rate": 8.82886202306932e-06, + "loss": 91.8914, + "step": 37140 + }, + { + "epoch": 0.3073168714067089, + "grad_norm": 1076.5308837890625, + "learning_rate": 8.827954847231016e-06, + "loss": 115.0902, + "step": 37150 + }, + { + "epoch": 0.3073995946560781, + "grad_norm": 867.0706176757812, + "learning_rate": 8.82704736681991e-06, + "loss": 128.4827, + "step": 37160 + }, + { + "epoch": 0.30748231790544733, + "grad_norm": 963.2921752929688, + "learning_rate": 8.826139581908211e-06, + "loss": 112.9323, + "step": 37170 + }, + { + "epoch": 0.3075650411548166, + "grad_norm": 735.8433227539062, + "learning_rate": 8.825231492568146e-06, + "loss": 114.1932, + "step": 37180 + }, + { + "epoch": 0.3076477644041858, + "grad_norm": 1258.63818359375, + "learning_rate": 8.824323098871966e-06, + "loss": 136.7632, + "step": 37190 + }, + { + "epoch": 0.30773048765355504, + "grad_norm": 1213.376220703125, + "learning_rate": 8.823414400891948e-06, + "loss": 140.9363, + "step": 37200 + }, + { + "epoch": 0.30781321090292424, + "grad_norm": 816.0035400390625, + "learning_rate": 8.822505398700395e-06, + "loss": 97.0494, + "step": 37210 + }, + { + "epoch": 0.3078959341522935, + "grad_norm": 908.9609375, + "learning_rate": 8.821596092369627e-06, + "loss": 112.1852, + "step": 37220 + }, + { + "epoch": 0.30797865740166275, + "grad_norm": 1214.3897705078125, + "learning_rate": 8.820686481971998e-06, + "loss": 133.1782, + "step": 37230 + }, + { + "epoch": 0.30806138065103195, + "grad_norm": 1155.935546875, + "learning_rate": 8.81977656757988e-06, + "loss": 91.5354, + "step": 37240 + }, + { + "epoch": 0.3081441039004012, + "grad_norm": 1258.32275390625, + "learning_rate": 8.81886634926567e-06, + "loss": 108.436, + "step": 37250 + }, + { + "epoch": 0.30822682714977045, + "grad_norm": 1130.4833984375, + "learning_rate": 8.817955827101794e-06, + "loss": 133.1508, + "step": 37260 + }, + { + "epoch": 0.30830955039913965, + "grad_norm": 738.8419799804688, + "learning_rate": 8.817045001160693e-06, + "loss": 122.5803, + "step": 37270 + }, + { + "epoch": 0.3083922736485089, + "grad_norm": 913.4530639648438, + "learning_rate": 8.816133871514838e-06, + "loss": 108.7282, + "step": 37280 + }, + { + "epoch": 0.30847499689787816, + "grad_norm": 600.1754150390625, + "learning_rate": 8.815222438236726e-06, + "loss": 91.7117, + "step": 37290 + }, + { + "epoch": 0.30855772014724736, + "grad_norm": 987.5612182617188, + "learning_rate": 8.814310701398873e-06, + "loss": 111.6003, + "step": 37300 + }, + { + "epoch": 0.3086404433966166, + "grad_norm": 718.8853759765625, + "learning_rate": 8.813398661073823e-06, + "loss": 120.6641, + "step": 37310 + }, + { + "epoch": 0.30872316664598587, + "grad_norm": 969.3789672851562, + "learning_rate": 8.812486317334145e-06, + "loss": 112.9521, + "step": 37320 + }, + { + "epoch": 0.30880588989535507, + "grad_norm": 1069.70849609375, + "learning_rate": 8.811573670252426e-06, + "loss": 102.9678, + "step": 37330 + }, + { + "epoch": 0.3088886131447243, + "grad_norm": 1160.0357666015625, + "learning_rate": 8.810660719901283e-06, + "loss": 142.8662, + "step": 37340 + }, + { + "epoch": 0.3089713363940936, + "grad_norm": 923.1897583007812, + "learning_rate": 8.809747466353356e-06, + "loss": 134.5727, + "step": 37350 + }, + { + "epoch": 0.3090540596434628, + "grad_norm": 1227.94580078125, + "learning_rate": 8.808833909681305e-06, + "loss": 144.0661, + "step": 37360 + }, + { + "epoch": 0.30913678289283203, + "grad_norm": 636.2891235351562, + "learning_rate": 8.80792004995782e-06, + "loss": 120.0885, + "step": 37370 + }, + { + "epoch": 0.3092195061422013, + "grad_norm": 1097.7366943359375, + "learning_rate": 8.807005887255615e-06, + "loss": 138.4748, + "step": 37380 + }, + { + "epoch": 0.3093022293915705, + "grad_norm": 814.5217895507812, + "learning_rate": 8.806091421647423e-06, + "loss": 113.6995, + "step": 37390 + }, + { + "epoch": 0.30938495264093974, + "grad_norm": 737.350341796875, + "learning_rate": 8.805176653206004e-06, + "loss": 116.3498, + "step": 37400 + }, + { + "epoch": 0.309467675890309, + "grad_norm": 2945.936279296875, + "learning_rate": 8.80426158200414e-06, + "loss": 109.3581, + "step": 37410 + }, + { + "epoch": 0.3095503991396782, + "grad_norm": 1349.6761474609375, + "learning_rate": 8.803346208114643e-06, + "loss": 117.0218, + "step": 37420 + }, + { + "epoch": 0.30963312238904744, + "grad_norm": 744.024658203125, + "learning_rate": 8.802430531610344e-06, + "loss": 101.151, + "step": 37430 + }, + { + "epoch": 0.3097158456384167, + "grad_norm": 1006.9130859375, + "learning_rate": 8.801514552564097e-06, + "loss": 87.0184, + "step": 37440 + }, + { + "epoch": 0.3097985688877859, + "grad_norm": 1106.60693359375, + "learning_rate": 8.800598271048784e-06, + "loss": 159.6884, + "step": 37450 + }, + { + "epoch": 0.30988129213715515, + "grad_norm": 1054.00244140625, + "learning_rate": 8.799681687137309e-06, + "loss": 97.845, + "step": 37460 + }, + { + "epoch": 0.3099640153865244, + "grad_norm": 551.2701416015625, + "learning_rate": 8.7987648009026e-06, + "loss": 87.5821, + "step": 37470 + }, + { + "epoch": 0.3100467386358936, + "grad_norm": 2352.765869140625, + "learning_rate": 8.79784761241761e-06, + "loss": 144.6846, + "step": 37480 + }, + { + "epoch": 0.31012946188526286, + "grad_norm": 899.6213989257812, + "learning_rate": 8.796930121755315e-06, + "loss": 110.2097, + "step": 37490 + }, + { + "epoch": 0.3102121851346321, + "grad_norm": 527.784912109375, + "learning_rate": 8.796012328988716e-06, + "loss": 98.5166, + "step": 37500 + }, + { + "epoch": 0.3102949083840013, + "grad_norm": 1046.642822265625, + "learning_rate": 8.795094234190837e-06, + "loss": 94.5682, + "step": 37510 + }, + { + "epoch": 0.31037763163337057, + "grad_norm": 1084.1600341796875, + "learning_rate": 8.794175837434729e-06, + "loss": 141.1946, + "step": 37520 + }, + { + "epoch": 0.3104603548827398, + "grad_norm": 761.6773071289062, + "learning_rate": 8.79325713879346e-06, + "loss": 87.328, + "step": 37530 + }, + { + "epoch": 0.310543078132109, + "grad_norm": 1374.055908203125, + "learning_rate": 8.792338138340131e-06, + "loss": 102.905, + "step": 37540 + }, + { + "epoch": 0.3106258013814783, + "grad_norm": 935.9447631835938, + "learning_rate": 8.791418836147858e-06, + "loss": 146.4921, + "step": 37550 + }, + { + "epoch": 0.3107085246308475, + "grad_norm": 1152.4400634765625, + "learning_rate": 8.790499232289793e-06, + "loss": 128.4351, + "step": 37560 + }, + { + "epoch": 0.3107912478802167, + "grad_norm": 658.3630981445312, + "learning_rate": 8.789579326839097e-06, + "loss": 121.7294, + "step": 37570 + }, + { + "epoch": 0.310873971129586, + "grad_norm": 830.0960083007812, + "learning_rate": 8.788659119868966e-06, + "loss": 133.9257, + "step": 37580 + }, + { + "epoch": 0.3109566943789552, + "grad_norm": 866.0687255859375, + "learning_rate": 8.787738611452616e-06, + "loss": 116.4662, + "step": 37590 + }, + { + "epoch": 0.31103941762832443, + "grad_norm": 1266.32177734375, + "learning_rate": 8.78681780166329e-06, + "loss": 120.4589, + "step": 37600 + }, + { + "epoch": 0.3111221408776937, + "grad_norm": 876.0301513671875, + "learning_rate": 8.785896690574248e-06, + "loss": 103.7038, + "step": 37610 + }, + { + "epoch": 0.3112048641270629, + "grad_norm": 822.327880859375, + "learning_rate": 8.784975278258783e-06, + "loss": 146.5088, + "step": 37620 + }, + { + "epoch": 0.31128758737643214, + "grad_norm": 976.1931762695312, + "learning_rate": 8.784053564790205e-06, + "loss": 110.9248, + "step": 37630 + }, + { + "epoch": 0.3113703106258014, + "grad_norm": 1754.4827880859375, + "learning_rate": 8.783131550241853e-06, + "loss": 132.1888, + "step": 37640 + }, + { + "epoch": 0.3114530338751706, + "grad_norm": 889.1416015625, + "learning_rate": 8.782209234687083e-06, + "loss": 98.5607, + "step": 37650 + }, + { + "epoch": 0.31153575712453985, + "grad_norm": 578.06689453125, + "learning_rate": 8.781286618199285e-06, + "loss": 93.0681, + "step": 37660 + }, + { + "epoch": 0.3116184803739091, + "grad_norm": 625.0341796875, + "learning_rate": 8.780363700851863e-06, + "loss": 84.4234, + "step": 37670 + }, + { + "epoch": 0.3117012036232783, + "grad_norm": 1180.2471923828125, + "learning_rate": 8.779440482718251e-06, + "loss": 126.0896, + "step": 37680 + }, + { + "epoch": 0.31178392687264755, + "grad_norm": 1036.988525390625, + "learning_rate": 8.778516963871904e-06, + "loss": 109.445, + "step": 37690 + }, + { + "epoch": 0.3118666501220168, + "grad_norm": 771.7061157226562, + "learning_rate": 8.777593144386305e-06, + "loss": 106.4233, + "step": 37700 + }, + { + "epoch": 0.311949373371386, + "grad_norm": 853.5861206054688, + "learning_rate": 8.776669024334955e-06, + "loss": 149.2146, + "step": 37710 + }, + { + "epoch": 0.31203209662075526, + "grad_norm": 1038.280029296875, + "learning_rate": 8.775744603791385e-06, + "loss": 87.6942, + "step": 37720 + }, + { + "epoch": 0.3121148198701245, + "grad_norm": 785.39892578125, + "learning_rate": 8.774819882829144e-06, + "loss": 101.3138, + "step": 37730 + }, + { + "epoch": 0.3121975431194937, + "grad_norm": 810.301513671875, + "learning_rate": 8.77389486152181e-06, + "loss": 104.7605, + "step": 37740 + }, + { + "epoch": 0.31228026636886297, + "grad_norm": 907.918212890625, + "learning_rate": 8.772969539942981e-06, + "loss": 93.2778, + "step": 37750 + }, + { + "epoch": 0.3123629896182322, + "grad_norm": 1630.78955078125, + "learning_rate": 8.772043918166282e-06, + "loss": 118.6197, + "step": 37760 + }, + { + "epoch": 0.3124457128676014, + "grad_norm": 792.9736328125, + "learning_rate": 8.771117996265358e-06, + "loss": 110.336, + "step": 37770 + }, + { + "epoch": 0.3125284361169707, + "grad_norm": 515.8519287109375, + "learning_rate": 8.770191774313883e-06, + "loss": 98.6496, + "step": 37780 + }, + { + "epoch": 0.31261115936633993, + "grad_norm": 889.776611328125, + "learning_rate": 8.769265252385552e-06, + "loss": 165.6621, + "step": 37790 + }, + { + "epoch": 0.31269388261570913, + "grad_norm": 780.7835693359375, + "learning_rate": 8.768338430554083e-06, + "loss": 91.5842, + "step": 37800 + }, + { + "epoch": 0.3127766058650784, + "grad_norm": 1138.0804443359375, + "learning_rate": 8.76741130889322e-06, + "loss": 100.7134, + "step": 37810 + }, + { + "epoch": 0.31285932911444764, + "grad_norm": 942.5914916992188, + "learning_rate": 8.766483887476727e-06, + "loss": 105.6115, + "step": 37820 + }, + { + "epoch": 0.31294205236381684, + "grad_norm": 618.5468139648438, + "learning_rate": 8.7655561663784e-06, + "loss": 75.4478, + "step": 37830 + }, + { + "epoch": 0.3130247756131861, + "grad_norm": 1693.069580078125, + "learning_rate": 8.764628145672048e-06, + "loss": 127.7167, + "step": 37840 + }, + { + "epoch": 0.31310749886255534, + "grad_norm": 1026.7965087890625, + "learning_rate": 8.763699825431513e-06, + "loss": 138.911, + "step": 37850 + }, + { + "epoch": 0.31319022211192454, + "grad_norm": 1648.09765625, + "learning_rate": 8.762771205730656e-06, + "loss": 117.1393, + "step": 37860 + }, + { + "epoch": 0.3132729453612938, + "grad_norm": 933.2096557617188, + "learning_rate": 8.761842286643362e-06, + "loss": 96.2922, + "step": 37870 + }, + { + "epoch": 0.31335566861066305, + "grad_norm": 802.694580078125, + "learning_rate": 8.760913068243542e-06, + "loss": 144.4842, + "step": 37880 + }, + { + "epoch": 0.31343839186003225, + "grad_norm": 1243.0850830078125, + "learning_rate": 8.759983550605132e-06, + "loss": 128.3055, + "step": 37890 + }, + { + "epoch": 0.3135211151094015, + "grad_norm": 1257.254638671875, + "learning_rate": 8.759053733802083e-06, + "loss": 100.2831, + "step": 37900 + }, + { + "epoch": 0.31360383835877076, + "grad_norm": 934.783935546875, + "learning_rate": 8.758123617908383e-06, + "loss": 100.1143, + "step": 37910 + }, + { + "epoch": 0.31368656160813996, + "grad_norm": 989.8101196289062, + "learning_rate": 8.757193202998033e-06, + "loss": 127.9963, + "step": 37920 + }, + { + "epoch": 0.3137692848575092, + "grad_norm": 1011.3324584960938, + "learning_rate": 8.756262489145061e-06, + "loss": 112.5696, + "step": 37930 + }, + { + "epoch": 0.3138520081068784, + "grad_norm": 925.4760131835938, + "learning_rate": 8.755331476423526e-06, + "loss": 79.9976, + "step": 37940 + }, + { + "epoch": 0.31393473135624766, + "grad_norm": 932.2153930664062, + "learning_rate": 8.754400164907496e-06, + "loss": 118.1142, + "step": 37950 + }, + { + "epoch": 0.3140174546056169, + "grad_norm": 869.6834106445312, + "learning_rate": 8.753468554671078e-06, + "loss": 122.9429, + "step": 37960 + }, + { + "epoch": 0.3141001778549861, + "grad_norm": 1097.6431884765625, + "learning_rate": 8.752536645788391e-06, + "loss": 116.1235, + "step": 37970 + }, + { + "epoch": 0.31418290110435537, + "grad_norm": 943.837158203125, + "learning_rate": 8.751604438333587e-06, + "loss": 120.1827, + "step": 37980 + }, + { + "epoch": 0.3142656243537246, + "grad_norm": 965.2254028320312, + "learning_rate": 8.750671932380834e-06, + "loss": 111.8385, + "step": 37990 + }, + { + "epoch": 0.3143483476030938, + "grad_norm": 2054.935546875, + "learning_rate": 8.749739128004329e-06, + "loss": 105.5353, + "step": 38000 + }, + { + "epoch": 0.3144310708524631, + "grad_norm": 885.7263793945312, + "learning_rate": 8.748806025278292e-06, + "loss": 113.4429, + "step": 38010 + }, + { + "epoch": 0.31451379410183233, + "grad_norm": 1407.1341552734375, + "learning_rate": 8.747872624276963e-06, + "loss": 99.107, + "step": 38020 + }, + { + "epoch": 0.31459651735120153, + "grad_norm": 1092.1849365234375, + "learning_rate": 8.746938925074609e-06, + "loss": 130.1728, + "step": 38030 + }, + { + "epoch": 0.3146792406005708, + "grad_norm": 977.42578125, + "learning_rate": 8.746004927745522e-06, + "loss": 116.9955, + "step": 38040 + }, + { + "epoch": 0.31476196384994004, + "grad_norm": 813.2716064453125, + "learning_rate": 8.745070632364014e-06, + "loss": 103.2874, + "step": 38050 + }, + { + "epoch": 0.31484468709930924, + "grad_norm": 823.4459838867188, + "learning_rate": 8.744136039004422e-06, + "loss": 122.7185, + "step": 38060 + }, + { + "epoch": 0.3149274103486785, + "grad_norm": 675.5857543945312, + "learning_rate": 8.743201147741112e-06, + "loss": 117.528, + "step": 38070 + }, + { + "epoch": 0.31501013359804775, + "grad_norm": 1719.105224609375, + "learning_rate": 8.742265958648464e-06, + "loss": 110.0581, + "step": 38080 + }, + { + "epoch": 0.31509285684741695, + "grad_norm": 1066.7659912109375, + "learning_rate": 8.741330471800888e-06, + "loss": 89.2473, + "step": 38090 + }, + { + "epoch": 0.3151755800967862, + "grad_norm": 776.9163818359375, + "learning_rate": 8.740394687272817e-06, + "loss": 124.1178, + "step": 38100 + }, + { + "epoch": 0.31525830334615546, + "grad_norm": 760.9815063476562, + "learning_rate": 8.739458605138706e-06, + "loss": 119.1256, + "step": 38110 + }, + { + "epoch": 0.31534102659552465, + "grad_norm": 974.4657592773438, + "learning_rate": 8.738522225473036e-06, + "loss": 105.6252, + "step": 38120 + }, + { + "epoch": 0.3154237498448939, + "grad_norm": 634.0014038085938, + "learning_rate": 8.737585548350312e-06, + "loss": 119.6853, + "step": 38130 + }, + { + "epoch": 0.31550647309426316, + "grad_norm": 765.5140380859375, + "learning_rate": 8.736648573845057e-06, + "loss": 99.3297, + "step": 38140 + }, + { + "epoch": 0.31558919634363236, + "grad_norm": 1316.4825439453125, + "learning_rate": 8.735711302031824e-06, + "loss": 123.088, + "step": 38150 + }, + { + "epoch": 0.3156719195930016, + "grad_norm": 1433.5438232421875, + "learning_rate": 8.734773732985186e-06, + "loss": 116.7357, + "step": 38160 + }, + { + "epoch": 0.31575464284237087, + "grad_norm": 826.6362915039062, + "learning_rate": 8.733835866779745e-06, + "loss": 93.5203, + "step": 38170 + }, + { + "epoch": 0.31583736609174007, + "grad_norm": 1368.983642578125, + "learning_rate": 8.73289770349012e-06, + "loss": 90.895, + "step": 38180 + }, + { + "epoch": 0.3159200893411093, + "grad_norm": 764.3739013671875, + "learning_rate": 8.731959243190955e-06, + "loss": 115.1358, + "step": 38190 + }, + { + "epoch": 0.3160028125904786, + "grad_norm": 1166.0986328125, + "learning_rate": 8.73102048595692e-06, + "loss": 110.7284, + "step": 38200 + }, + { + "epoch": 0.3160855358398478, + "grad_norm": 900.78759765625, + "learning_rate": 8.730081431862709e-06, + "loss": 114.7286, + "step": 38210 + }, + { + "epoch": 0.31616825908921703, + "grad_norm": 694.7711791992188, + "learning_rate": 8.729142080983037e-06, + "loss": 99.7621, + "step": 38220 + }, + { + "epoch": 0.3162509823385863, + "grad_norm": 1478.10546875, + "learning_rate": 8.728202433392645e-06, + "loss": 103.1368, + "step": 38230 + }, + { + "epoch": 0.3163337055879555, + "grad_norm": 739.2174682617188, + "learning_rate": 8.727262489166295e-06, + "loss": 91.7107, + "step": 38240 + }, + { + "epoch": 0.31641642883732474, + "grad_norm": 1041.38525390625, + "learning_rate": 8.726322248378775e-06, + "loss": 133.2948, + "step": 38250 + }, + { + "epoch": 0.316499152086694, + "grad_norm": 709.4502563476562, + "learning_rate": 8.725381711104894e-06, + "loss": 134.3007, + "step": 38260 + }, + { + "epoch": 0.3165818753360632, + "grad_norm": 1081.0482177734375, + "learning_rate": 8.724440877419487e-06, + "loss": 107.7189, + "step": 38270 + }, + { + "epoch": 0.31666459858543244, + "grad_norm": 1248.0484619140625, + "learning_rate": 8.723499747397415e-06, + "loss": 105.8039, + "step": 38280 + }, + { + "epoch": 0.3167473218348017, + "grad_norm": 1431.8619384765625, + "learning_rate": 8.722558321113555e-06, + "loss": 108.0174, + "step": 38290 + }, + { + "epoch": 0.3168300450841709, + "grad_norm": 972.2454223632812, + "learning_rate": 8.721616598642812e-06, + "loss": 124.3465, + "step": 38300 + }, + { + "epoch": 0.31691276833354015, + "grad_norm": 1106.1824951171875, + "learning_rate": 8.720674580060117e-06, + "loss": 92.7966, + "step": 38310 + }, + { + "epoch": 0.31699549158290935, + "grad_norm": 1259.550048828125, + "learning_rate": 8.719732265440423e-06, + "loss": 91.808, + "step": 38320 + }, + { + "epoch": 0.3170782148322786, + "grad_norm": 1003.4591064453125, + "learning_rate": 8.718789654858702e-06, + "loss": 97.9086, + "step": 38330 + }, + { + "epoch": 0.31716093808164786, + "grad_norm": 1087.510498046875, + "learning_rate": 8.717846748389956e-06, + "loss": 190.7288, + "step": 38340 + }, + { + "epoch": 0.31724366133101706, + "grad_norm": 1307.7303466796875, + "learning_rate": 8.716903546109208e-06, + "loss": 100.8898, + "step": 38350 + }, + { + "epoch": 0.3173263845803863, + "grad_norm": 389.17926025390625, + "learning_rate": 8.715960048091502e-06, + "loss": 105.3628, + "step": 38360 + }, + { + "epoch": 0.31740910782975557, + "grad_norm": 634.9244995117188, + "learning_rate": 8.715016254411908e-06, + "loss": 93.6207, + "step": 38370 + }, + { + "epoch": 0.31749183107912476, + "grad_norm": 1184.2578125, + "learning_rate": 8.714072165145521e-06, + "loss": 135.4022, + "step": 38380 + }, + { + "epoch": 0.317574554328494, + "grad_norm": 619.5433349609375, + "learning_rate": 8.713127780367458e-06, + "loss": 82.4095, + "step": 38390 + }, + { + "epoch": 0.3176572775778633, + "grad_norm": 1421.2760009765625, + "learning_rate": 8.712183100152858e-06, + "loss": 104.6683, + "step": 38400 + }, + { + "epoch": 0.31774000082723247, + "grad_norm": 569.2383422851562, + "learning_rate": 8.711238124576884e-06, + "loss": 100.9128, + "step": 38410 + }, + { + "epoch": 0.3178227240766017, + "grad_norm": 486.7202453613281, + "learning_rate": 8.710292853714726e-06, + "loss": 102.4042, + "step": 38420 + }, + { + "epoch": 0.317905447325971, + "grad_norm": 1279.0054931640625, + "learning_rate": 8.709347287641593e-06, + "loss": 107.4948, + "step": 38430 + }, + { + "epoch": 0.3179881705753402, + "grad_norm": 777.2363891601562, + "learning_rate": 8.70840142643272e-06, + "loss": 126.2411, + "step": 38440 + }, + { + "epoch": 0.31807089382470943, + "grad_norm": 680.6844482421875, + "learning_rate": 8.707455270163365e-06, + "loss": 98.6142, + "step": 38450 + }, + { + "epoch": 0.3181536170740787, + "grad_norm": 1288.6944580078125, + "learning_rate": 8.70650881890881e-06, + "loss": 125.0557, + "step": 38460 + }, + { + "epoch": 0.3182363403234479, + "grad_norm": 760.6031494140625, + "learning_rate": 8.705562072744358e-06, + "loss": 138.251, + "step": 38470 + }, + { + "epoch": 0.31831906357281714, + "grad_norm": 883.0706176757812, + "learning_rate": 8.704615031745337e-06, + "loss": 111.4153, + "step": 38480 + }, + { + "epoch": 0.3184017868221864, + "grad_norm": 1131.5177001953125, + "learning_rate": 8.703667695987102e-06, + "loss": 113.8998, + "step": 38490 + }, + { + "epoch": 0.3184845100715556, + "grad_norm": 973.4132690429688, + "learning_rate": 8.702720065545024e-06, + "loss": 106.239, + "step": 38500 + }, + { + "epoch": 0.31856723332092485, + "grad_norm": 1401.8857421875, + "learning_rate": 8.701772140494504e-06, + "loss": 131.2176, + "step": 38510 + }, + { + "epoch": 0.3186499565702941, + "grad_norm": 735.4816284179688, + "learning_rate": 8.700823920910964e-06, + "loss": 124.5568, + "step": 38520 + }, + { + "epoch": 0.3187326798196633, + "grad_norm": 1129.6258544921875, + "learning_rate": 8.699875406869848e-06, + "loss": 103.7197, + "step": 38530 + }, + { + "epoch": 0.31881540306903255, + "grad_norm": 802.9367065429688, + "learning_rate": 8.69892659844663e-06, + "loss": 129.3652, + "step": 38540 + }, + { + "epoch": 0.3188981263184018, + "grad_norm": 1019.4291381835938, + "learning_rate": 8.697977495716794e-06, + "loss": 113.1963, + "step": 38550 + }, + { + "epoch": 0.318980849567771, + "grad_norm": 1141.768798828125, + "learning_rate": 8.697028098755863e-06, + "loss": 75.0446, + "step": 38560 + }, + { + "epoch": 0.31906357281714026, + "grad_norm": 561.0872192382812, + "learning_rate": 8.69607840763937e-06, + "loss": 107.2292, + "step": 38570 + }, + { + "epoch": 0.3191462960665095, + "grad_norm": 567.6842651367188, + "learning_rate": 8.695128422442882e-06, + "loss": 105.8062, + "step": 38580 + }, + { + "epoch": 0.3192290193158787, + "grad_norm": 1812.7880859375, + "learning_rate": 8.694178143241984e-06, + "loss": 116.8599, + "step": 38590 + }, + { + "epoch": 0.31931174256524797, + "grad_norm": 821.3114013671875, + "learning_rate": 8.693227570112285e-06, + "loss": 113.9192, + "step": 38600 + }, + { + "epoch": 0.3193944658146172, + "grad_norm": 1383.909423828125, + "learning_rate": 8.692276703129421e-06, + "loss": 123.0948, + "step": 38610 + }, + { + "epoch": 0.3194771890639864, + "grad_norm": 559.3286743164062, + "learning_rate": 8.691325542369041e-06, + "loss": 81.4486, + "step": 38620 + }, + { + "epoch": 0.3195599123133557, + "grad_norm": 1106.412109375, + "learning_rate": 8.69037408790683e-06, + "loss": 119.6945, + "step": 38630 + }, + { + "epoch": 0.31964263556272493, + "grad_norm": 1364.677490234375, + "learning_rate": 8.689422339818489e-06, + "loss": 140.9282, + "step": 38640 + }, + { + "epoch": 0.31972535881209413, + "grad_norm": 0.0, + "learning_rate": 8.688470298179746e-06, + "loss": 140.5661, + "step": 38650 + }, + { + "epoch": 0.3198080820614634, + "grad_norm": 859.6204833984375, + "learning_rate": 8.687517963066347e-06, + "loss": 110.4718, + "step": 38660 + }, + { + "epoch": 0.3198908053108326, + "grad_norm": 1258.269775390625, + "learning_rate": 8.686565334554069e-06, + "loss": 126.0004, + "step": 38670 + }, + { + "epoch": 0.31997352856020184, + "grad_norm": 1039.6004638671875, + "learning_rate": 8.685612412718704e-06, + "loss": 119.8658, + "step": 38680 + }, + { + "epoch": 0.3200562518095711, + "grad_norm": 923.8244018554688, + "learning_rate": 8.684659197636076e-06, + "loss": 124.2017, + "step": 38690 + }, + { + "epoch": 0.3201389750589403, + "grad_norm": 1117.4451904296875, + "learning_rate": 8.683705689382025e-06, + "loss": 107.7295, + "step": 38700 + }, + { + "epoch": 0.32022169830830954, + "grad_norm": 1363.1929931640625, + "learning_rate": 8.682751888032419e-06, + "loss": 99.3945, + "step": 38710 + }, + { + "epoch": 0.3203044215576788, + "grad_norm": 701.418212890625, + "learning_rate": 8.681797793663147e-06, + "loss": 120.6914, + "step": 38720 + }, + { + "epoch": 0.320387144807048, + "grad_norm": 1962.092041015625, + "learning_rate": 8.680843406350122e-06, + "loss": 105.0907, + "step": 38730 + }, + { + "epoch": 0.32046986805641725, + "grad_norm": 1574.8798828125, + "learning_rate": 8.679888726169277e-06, + "loss": 123.2075, + "step": 38740 + }, + { + "epoch": 0.3205525913057865, + "grad_norm": 979.2647094726562, + "learning_rate": 8.678933753196577e-06, + "loss": 117.9523, + "step": 38750 + }, + { + "epoch": 0.3206353145551557, + "grad_norm": 961.4496459960938, + "learning_rate": 8.677978487508002e-06, + "loss": 130.495, + "step": 38760 + }, + { + "epoch": 0.32071803780452496, + "grad_norm": 1239.4903564453125, + "learning_rate": 8.677022929179558e-06, + "loss": 116.15, + "step": 38770 + }, + { + "epoch": 0.3208007610538942, + "grad_norm": 936.8592529296875, + "learning_rate": 8.676067078287276e-06, + "loss": 102.5058, + "step": 38780 + }, + { + "epoch": 0.3208834843032634, + "grad_norm": 1102.019775390625, + "learning_rate": 8.675110934907206e-06, + "loss": 105.1739, + "step": 38790 + }, + { + "epoch": 0.32096620755263267, + "grad_norm": 1137.979736328125, + "learning_rate": 8.674154499115426e-06, + "loss": 103.8995, + "step": 38800 + }, + { + "epoch": 0.3210489308020019, + "grad_norm": 983.4441528320312, + "learning_rate": 8.673197770988034e-06, + "loss": 100.5983, + "step": 38810 + }, + { + "epoch": 0.3211316540513711, + "grad_norm": 601.077392578125, + "learning_rate": 8.672240750601152e-06, + "loss": 100.5274, + "step": 38820 + }, + { + "epoch": 0.3212143773007404, + "grad_norm": 897.4487915039062, + "learning_rate": 8.67128343803093e-06, + "loss": 99.9841, + "step": 38830 + }, + { + "epoch": 0.3212971005501096, + "grad_norm": 1025.739013671875, + "learning_rate": 8.670325833353532e-06, + "loss": 89.5816, + "step": 38840 + }, + { + "epoch": 0.3213798237994788, + "grad_norm": 677.2406005859375, + "learning_rate": 8.669367936645152e-06, + "loss": 105.6764, + "step": 38850 + }, + { + "epoch": 0.3214625470488481, + "grad_norm": 912.070068359375, + "learning_rate": 8.668409747982005e-06, + "loss": 129.2276, + "step": 38860 + }, + { + "epoch": 0.32154527029821733, + "grad_norm": 909.7315673828125, + "learning_rate": 8.667451267440332e-06, + "loss": 98.6507, + "step": 38870 + }, + { + "epoch": 0.32162799354758653, + "grad_norm": 934.35400390625, + "learning_rate": 8.666492495096391e-06, + "loss": 121.8479, + "step": 38880 + }, + { + "epoch": 0.3217107167969558, + "grad_norm": 1550.7830810546875, + "learning_rate": 8.66553343102647e-06, + "loss": 124.7869, + "step": 38890 + }, + { + "epoch": 0.32179344004632504, + "grad_norm": 1913.998291015625, + "learning_rate": 8.664574075306876e-06, + "loss": 109.4713, + "step": 38900 + }, + { + "epoch": 0.32187616329569424, + "grad_norm": 665.229736328125, + "learning_rate": 8.66361442801394e-06, + "loss": 94.5389, + "step": 38910 + }, + { + "epoch": 0.3219588865450635, + "grad_norm": 762.2039794921875, + "learning_rate": 8.662654489224018e-06, + "loss": 94.5307, + "step": 38920 + }, + { + "epoch": 0.32204160979443275, + "grad_norm": 1010.405029296875, + "learning_rate": 8.661694259013489e-06, + "loss": 143.722, + "step": 38930 + }, + { + "epoch": 0.32212433304380195, + "grad_norm": 808.5379638671875, + "learning_rate": 8.660733737458751e-06, + "loss": 134.8724, + "step": 38940 + }, + { + "epoch": 0.3222070562931712, + "grad_norm": 964.7113037109375, + "learning_rate": 8.659772924636232e-06, + "loss": 122.8288, + "step": 38950 + }, + { + "epoch": 0.32228977954254046, + "grad_norm": 908.0009765625, + "learning_rate": 8.658811820622376e-06, + "loss": 90.802, + "step": 38960 + }, + { + "epoch": 0.32237250279190965, + "grad_norm": 1564.2470703125, + "learning_rate": 8.657850425493656e-06, + "loss": 129.7668, + "step": 38970 + }, + { + "epoch": 0.3224552260412789, + "grad_norm": 1255.0084228515625, + "learning_rate": 8.656888739326564e-06, + "loss": 96.6529, + "step": 38980 + }, + { + "epoch": 0.32253794929064816, + "grad_norm": 960.5479125976562, + "learning_rate": 8.65592676219762e-06, + "loss": 103.2335, + "step": 38990 + }, + { + "epoch": 0.32262067254001736, + "grad_norm": 815.9246826171875, + "learning_rate": 8.65496449418336e-06, + "loss": 94.21, + "step": 39000 + }, + { + "epoch": 0.3227033957893866, + "grad_norm": 942.2803344726562, + "learning_rate": 8.654001935360349e-06, + "loss": 108.4447, + "step": 39010 + }, + { + "epoch": 0.32278611903875587, + "grad_norm": 1212.7076416015625, + "learning_rate": 8.653039085805174e-06, + "loss": 97.4576, + "step": 39020 + }, + { + "epoch": 0.32286884228812507, + "grad_norm": 1259.3065185546875, + "learning_rate": 8.652075945594444e-06, + "loss": 96.3901, + "step": 39030 + }, + { + "epoch": 0.3229515655374943, + "grad_norm": 673.8052368164062, + "learning_rate": 8.651112514804793e-06, + "loss": 94.6694, + "step": 39040 + }, + { + "epoch": 0.3230342887868635, + "grad_norm": 1086.0980224609375, + "learning_rate": 8.650148793512874e-06, + "loss": 161.4135, + "step": 39050 + }, + { + "epoch": 0.3231170120362328, + "grad_norm": 1190.9241943359375, + "learning_rate": 8.649184781795367e-06, + "loss": 122.2091, + "step": 39060 + }, + { + "epoch": 0.32319973528560203, + "grad_norm": 750.9871215820312, + "learning_rate": 8.648220479728976e-06, + "loss": 129.1647, + "step": 39070 + }, + { + "epoch": 0.32328245853497123, + "grad_norm": 675.1224365234375, + "learning_rate": 8.647255887390425e-06, + "loss": 87.7561, + "step": 39080 + }, + { + "epoch": 0.3233651817843405, + "grad_norm": 960.1796875, + "learning_rate": 8.64629100485646e-06, + "loss": 107.4958, + "step": 39090 + }, + { + "epoch": 0.32344790503370974, + "grad_norm": 1386.735595703125, + "learning_rate": 8.645325832203855e-06, + "loss": 135.6421, + "step": 39100 + }, + { + "epoch": 0.32353062828307894, + "grad_norm": 589.93896484375, + "learning_rate": 8.644360369509403e-06, + "loss": 102.7022, + "step": 39110 + }, + { + "epoch": 0.3236133515324482, + "grad_norm": 1394.318603515625, + "learning_rate": 8.64339461684992e-06, + "loss": 109.0257, + "step": 39120 + }, + { + "epoch": 0.32369607478181744, + "grad_norm": 1033.558349609375, + "learning_rate": 8.64242857430225e-06, + "loss": 120.0752, + "step": 39130 + }, + { + "epoch": 0.32377879803118664, + "grad_norm": 877.220947265625, + "learning_rate": 8.641462241943255e-06, + "loss": 149.2554, + "step": 39140 + }, + { + "epoch": 0.3238615212805559, + "grad_norm": 1799.2706298828125, + "learning_rate": 8.640495619849821e-06, + "loss": 106.8699, + "step": 39150 + }, + { + "epoch": 0.32394424452992515, + "grad_norm": 354.8163146972656, + "learning_rate": 8.639528708098858e-06, + "loss": 138.8774, + "step": 39160 + }, + { + "epoch": 0.32402696777929435, + "grad_norm": 734.3604125976562, + "learning_rate": 8.6385615067673e-06, + "loss": 113.2349, + "step": 39170 + }, + { + "epoch": 0.3241096910286636, + "grad_norm": 1038.3038330078125, + "learning_rate": 8.6375940159321e-06, + "loss": 80.891, + "step": 39180 + }, + { + "epoch": 0.32419241427803286, + "grad_norm": 1265.29638671875, + "learning_rate": 8.63662623567024e-06, + "loss": 100.5664, + "step": 39190 + }, + { + "epoch": 0.32427513752740206, + "grad_norm": 975.9793701171875, + "learning_rate": 8.63565816605872e-06, + "loss": 98.3975, + "step": 39200 + }, + { + "epoch": 0.3243578607767713, + "grad_norm": 1260.0736083984375, + "learning_rate": 8.634689807174564e-06, + "loss": 122.2016, + "step": 39210 + }, + { + "epoch": 0.32444058402614057, + "grad_norm": 745.4339599609375, + "learning_rate": 8.633721159094823e-06, + "loss": 118.936, + "step": 39220 + }, + { + "epoch": 0.32452330727550976, + "grad_norm": 803.7348022460938, + "learning_rate": 8.632752221896562e-06, + "loss": 76.6836, + "step": 39230 + }, + { + "epoch": 0.324606030524879, + "grad_norm": 901.3794555664062, + "learning_rate": 8.631782995656884e-06, + "loss": 114.3698, + "step": 39240 + }, + { + "epoch": 0.3246887537742483, + "grad_norm": 2110.507080078125, + "learning_rate": 8.630813480452898e-06, + "loss": 109.549, + "step": 39250 + }, + { + "epoch": 0.32477147702361747, + "grad_norm": 868.3207397460938, + "learning_rate": 8.629843676361747e-06, + "loss": 147.7418, + "step": 39260 + }, + { + "epoch": 0.3248542002729867, + "grad_norm": 776.3412475585938, + "learning_rate": 8.628873583460593e-06, + "loss": 126.7695, + "step": 39270 + }, + { + "epoch": 0.324936923522356, + "grad_norm": 1359.6387939453125, + "learning_rate": 8.627903201826622e-06, + "loss": 120.8187, + "step": 39280 + }, + { + "epoch": 0.3250196467717252, + "grad_norm": 1282.950439453125, + "learning_rate": 8.626932531537042e-06, + "loss": 123.0786, + "step": 39290 + }, + { + "epoch": 0.32510237002109443, + "grad_norm": 530.9898071289062, + "learning_rate": 8.625961572669087e-06, + "loss": 120.1885, + "step": 39300 + }, + { + "epoch": 0.3251850932704637, + "grad_norm": 997.0206298828125, + "learning_rate": 8.62499032530001e-06, + "loss": 120.9154, + "step": 39310 + }, + { + "epoch": 0.3252678165198329, + "grad_norm": 676.1962890625, + "learning_rate": 8.624018789507091e-06, + "loss": 97.3104, + "step": 39320 + }, + { + "epoch": 0.32535053976920214, + "grad_norm": 692.87255859375, + "learning_rate": 8.62304696536763e-06, + "loss": 118.6817, + "step": 39330 + }, + { + "epoch": 0.3254332630185714, + "grad_norm": 980.8681030273438, + "learning_rate": 8.622074852958946e-06, + "loss": 112.1015, + "step": 39340 + }, + { + "epoch": 0.3255159862679406, + "grad_norm": 1021.6939697265625, + "learning_rate": 8.621102452358393e-06, + "loss": 190.7402, + "step": 39350 + }, + { + "epoch": 0.32559870951730985, + "grad_norm": 1223.068115234375, + "learning_rate": 8.620129763643333e-06, + "loss": 128.2917, + "step": 39360 + }, + { + "epoch": 0.3256814327666791, + "grad_norm": 1219.437744140625, + "learning_rate": 8.619156786891162e-06, + "loss": 136.7339, + "step": 39370 + }, + { + "epoch": 0.3257641560160483, + "grad_norm": 1412.034912109375, + "learning_rate": 8.618183522179295e-06, + "loss": 131.6702, + "step": 39380 + }, + { + "epoch": 0.32584687926541755, + "grad_norm": 1203.2330322265625, + "learning_rate": 8.617209969585171e-06, + "loss": 88.7958, + "step": 39390 + }, + { + "epoch": 0.32592960251478675, + "grad_norm": 809.6597290039062, + "learning_rate": 8.616236129186252e-06, + "loss": 102.6644, + "step": 39400 + }, + { + "epoch": 0.326012325764156, + "grad_norm": 3100.604736328125, + "learning_rate": 8.615262001060019e-06, + "loss": 176.8819, + "step": 39410 + }, + { + "epoch": 0.32609504901352526, + "grad_norm": 1474.5286865234375, + "learning_rate": 8.61428758528398e-06, + "loss": 123.6517, + "step": 39420 + }, + { + "epoch": 0.32617777226289446, + "grad_norm": 893.0943603515625, + "learning_rate": 8.613312881935667e-06, + "loss": 118.6461, + "step": 39430 + }, + { + "epoch": 0.3262604955122637, + "grad_norm": 1108.7327880859375, + "learning_rate": 8.61233789109263e-06, + "loss": 129.676, + "step": 39440 + }, + { + "epoch": 0.32634321876163297, + "grad_norm": 799.1575317382812, + "learning_rate": 8.611362612832445e-06, + "loss": 109.5865, + "step": 39450 + }, + { + "epoch": 0.32642594201100217, + "grad_norm": 1402.5484619140625, + "learning_rate": 8.610387047232711e-06, + "loss": 103.1031, + "step": 39460 + }, + { + "epoch": 0.3265086652603714, + "grad_norm": 1590.6834716796875, + "learning_rate": 8.609411194371049e-06, + "loss": 114.6393, + "step": 39470 + }, + { + "epoch": 0.3265913885097407, + "grad_norm": 1389.5260009765625, + "learning_rate": 8.608435054325103e-06, + "loss": 100.0405, + "step": 39480 + }, + { + "epoch": 0.3266741117591099, + "grad_norm": 1510.9293212890625, + "learning_rate": 8.60745862717254e-06, + "loss": 78.9952, + "step": 39490 + }, + { + "epoch": 0.32675683500847913, + "grad_norm": 2515.405029296875, + "learning_rate": 8.606481912991052e-06, + "loss": 125.8343, + "step": 39500 + }, + { + "epoch": 0.3268395582578484, + "grad_norm": 1044.6246337890625, + "learning_rate": 8.605504911858347e-06, + "loss": 95.9947, + "step": 39510 + }, + { + "epoch": 0.3269222815072176, + "grad_norm": 742.9393920898438, + "learning_rate": 8.604527623852165e-06, + "loss": 129.3403, + "step": 39520 + }, + { + "epoch": 0.32700500475658684, + "grad_norm": 843.3123779296875, + "learning_rate": 8.603550049050262e-06, + "loss": 124.6452, + "step": 39530 + }, + { + "epoch": 0.3270877280059561, + "grad_norm": 629.9082641601562, + "learning_rate": 8.602572187530421e-06, + "loss": 103.7542, + "step": 39540 + }, + { + "epoch": 0.3271704512553253, + "grad_norm": 1079.95556640625, + "learning_rate": 8.601594039370441e-06, + "loss": 117.5058, + "step": 39550 + }, + { + "epoch": 0.32725317450469454, + "grad_norm": 785.5013427734375, + "learning_rate": 8.600615604648155e-06, + "loss": 67.8067, + "step": 39560 + }, + { + "epoch": 0.3273358977540638, + "grad_norm": 831.0355224609375, + "learning_rate": 8.599636883441408e-06, + "loss": 123.9131, + "step": 39570 + }, + { + "epoch": 0.327418621003433, + "grad_norm": 2647.583251953125, + "learning_rate": 8.598657875828078e-06, + "loss": 107.162, + "step": 39580 + }, + { + "epoch": 0.32750134425280225, + "grad_norm": 2026.9219970703125, + "learning_rate": 8.597678581886055e-06, + "loss": 130.6936, + "step": 39590 + }, + { + "epoch": 0.3275840675021715, + "grad_norm": 991.1618041992188, + "learning_rate": 8.596699001693257e-06, + "loss": 107.1374, + "step": 39600 + }, + { + "epoch": 0.3276667907515407, + "grad_norm": 1521.5604248046875, + "learning_rate": 8.595719135327627e-06, + "loss": 98.5977, + "step": 39610 + }, + { + "epoch": 0.32774951400090996, + "grad_norm": 613.502685546875, + "learning_rate": 8.594738982867126e-06, + "loss": 100.9653, + "step": 39620 + }, + { + "epoch": 0.3278322372502792, + "grad_norm": 1424.407470703125, + "learning_rate": 8.593758544389743e-06, + "loss": 119.1369, + "step": 39630 + }, + { + "epoch": 0.3279149604996484, + "grad_norm": 519.1179809570312, + "learning_rate": 8.592777819973486e-06, + "loss": 121.2218, + "step": 39640 + }, + { + "epoch": 0.32799768374901767, + "grad_norm": 1449.661865234375, + "learning_rate": 8.591796809696386e-06, + "loss": 114.1455, + "step": 39650 + }, + { + "epoch": 0.3280804069983869, + "grad_norm": 986.8948364257812, + "learning_rate": 8.590815513636498e-06, + "loss": 111.6402, + "step": 39660 + }, + { + "epoch": 0.3281631302477561, + "grad_norm": 839.0859375, + "learning_rate": 8.5898339318719e-06, + "loss": 85.4794, + "step": 39670 + }, + { + "epoch": 0.3282458534971254, + "grad_norm": 1216.8238525390625, + "learning_rate": 8.58885206448069e-06, + "loss": 126.5229, + "step": 39680 + }, + { + "epoch": 0.3283285767464946, + "grad_norm": 1210.4658203125, + "learning_rate": 8.587869911540993e-06, + "loss": 131.2425, + "step": 39690 + }, + { + "epoch": 0.3284112999958638, + "grad_norm": 638.7323608398438, + "learning_rate": 8.586887473130951e-06, + "loss": 117.0074, + "step": 39700 + }, + { + "epoch": 0.3284940232452331, + "grad_norm": 1674.9326171875, + "learning_rate": 8.585904749328736e-06, + "loss": 101.3178, + "step": 39710 + }, + { + "epoch": 0.32857674649460233, + "grad_norm": 730.9718627929688, + "learning_rate": 8.584921740212537e-06, + "loss": 79.7682, + "step": 39720 + }, + { + "epoch": 0.32865946974397153, + "grad_norm": 849.2908325195312, + "learning_rate": 8.583938445860569e-06, + "loss": 134.0528, + "step": 39730 + }, + { + "epoch": 0.3287421929933408, + "grad_norm": 1976.713134765625, + "learning_rate": 8.582954866351065e-06, + "loss": 109.2086, + "step": 39740 + }, + { + "epoch": 0.32882491624271004, + "grad_norm": 1140.69140625, + "learning_rate": 8.581971001762287e-06, + "loss": 115.7576, + "step": 39750 + }, + { + "epoch": 0.32890763949207924, + "grad_norm": 1253.4771728515625, + "learning_rate": 8.580986852172514e-06, + "loss": 99.3701, + "step": 39760 + }, + { + "epoch": 0.3289903627414485, + "grad_norm": 1577.8370361328125, + "learning_rate": 8.580002417660054e-06, + "loss": 137.5488, + "step": 39770 + }, + { + "epoch": 0.3290730859908177, + "grad_norm": 759.9320068359375, + "learning_rate": 8.579017698303228e-06, + "loss": 110.6118, + "step": 39780 + }, + { + "epoch": 0.32915580924018695, + "grad_norm": 807.0444946289062, + "learning_rate": 8.578032694180394e-06, + "loss": 108.5404, + "step": 39790 + }, + { + "epoch": 0.3292385324895562, + "grad_norm": 901.5609741210938, + "learning_rate": 8.577047405369916e-06, + "loss": 92.3528, + "step": 39800 + }, + { + "epoch": 0.3293212557389254, + "grad_norm": 815.9768676757812, + "learning_rate": 8.576061831950193e-06, + "loss": 116.8808, + "step": 39810 + }, + { + "epoch": 0.32940397898829465, + "grad_norm": 676.6227416992188, + "learning_rate": 8.575075973999642e-06, + "loss": 104.0332, + "step": 39820 + }, + { + "epoch": 0.3294867022376639, + "grad_norm": 655.098876953125, + "learning_rate": 8.574089831596703e-06, + "loss": 114.4098, + "step": 39830 + }, + { + "epoch": 0.3295694254870331, + "grad_norm": 1169.68359375, + "learning_rate": 8.57310340481984e-06, + "loss": 88.758, + "step": 39840 + }, + { + "epoch": 0.32965214873640236, + "grad_norm": 635.2750244140625, + "learning_rate": 8.572116693747537e-06, + "loss": 98.1875, + "step": 39850 + }, + { + "epoch": 0.3297348719857716, + "grad_norm": 797.3588256835938, + "learning_rate": 8.571129698458302e-06, + "loss": 101.033, + "step": 39860 + }, + { + "epoch": 0.3298175952351408, + "grad_norm": 1276.1683349609375, + "learning_rate": 8.570142419030668e-06, + "loss": 111.7359, + "step": 39870 + }, + { + "epoch": 0.32990031848451007, + "grad_norm": 700.5169677734375, + "learning_rate": 8.569154855543184e-06, + "loss": 101.829, + "step": 39880 + }, + { + "epoch": 0.3299830417338793, + "grad_norm": 1665.83984375, + "learning_rate": 8.56816700807443e-06, + "loss": 120.2223, + "step": 39890 + }, + { + "epoch": 0.3300657649832485, + "grad_norm": 619.46435546875, + "learning_rate": 8.567178876703002e-06, + "loss": 101.4117, + "step": 39900 + }, + { + "epoch": 0.3301484882326178, + "grad_norm": 1413.1790771484375, + "learning_rate": 8.566190461507521e-06, + "loss": 108.3938, + "step": 39910 + }, + { + "epoch": 0.33023121148198703, + "grad_norm": 917.2999877929688, + "learning_rate": 8.565201762566632e-06, + "loss": 80.6623, + "step": 39920 + }, + { + "epoch": 0.33031393473135623, + "grad_norm": 787.8756713867188, + "learning_rate": 8.564212779959003e-06, + "loss": 130.3724, + "step": 39930 + }, + { + "epoch": 0.3303966579807255, + "grad_norm": 649.1900634765625, + "learning_rate": 8.563223513763319e-06, + "loss": 107.5673, + "step": 39940 + }, + { + "epoch": 0.33047938123009474, + "grad_norm": 656.0354614257812, + "learning_rate": 8.562233964058294e-06, + "loss": 138.9998, + "step": 39950 + }, + { + "epoch": 0.33056210447946394, + "grad_norm": 1630.0479736328125, + "learning_rate": 8.561244130922658e-06, + "loss": 79.6873, + "step": 39960 + }, + { + "epoch": 0.3306448277288332, + "grad_norm": 908.9981689453125, + "learning_rate": 8.560254014435172e-06, + "loss": 124.0382, + "step": 39970 + }, + { + "epoch": 0.33072755097820244, + "grad_norm": 1071.929931640625, + "learning_rate": 8.559263614674615e-06, + "loss": 102.3747, + "step": 39980 + }, + { + "epoch": 0.33081027422757164, + "grad_norm": 827.8428344726562, + "learning_rate": 8.558272931719785e-06, + "loss": 100.2324, + "step": 39990 + }, + { + "epoch": 0.3308929974769409, + "grad_norm": 2593.23779296875, + "learning_rate": 8.557281965649508e-06, + "loss": 107.4415, + "step": 40000 + }, + { + "epoch": 0.33097572072631015, + "grad_norm": 1010.6577758789062, + "learning_rate": 8.556290716542632e-06, + "loss": 84.2611, + "step": 40010 + }, + { + "epoch": 0.33105844397567935, + "grad_norm": 787.0457153320312, + "learning_rate": 8.555299184478026e-06, + "loss": 129.8781, + "step": 40020 + }, + { + "epoch": 0.3311411672250486, + "grad_norm": 953.2777099609375, + "learning_rate": 8.554307369534577e-06, + "loss": 103.5916, + "step": 40030 + }, + { + "epoch": 0.33122389047441786, + "grad_norm": 1413.6817626953125, + "learning_rate": 8.553315271791207e-06, + "loss": 92.5186, + "step": 40040 + }, + { + "epoch": 0.33130661372378706, + "grad_norm": 1188.9342041015625, + "learning_rate": 8.552322891326846e-06, + "loss": 98.2379, + "step": 40050 + }, + { + "epoch": 0.3313893369731563, + "grad_norm": 1236.0517578125, + "learning_rate": 8.551330228220454e-06, + "loss": 107.0516, + "step": 40060 + }, + { + "epoch": 0.33147206022252557, + "grad_norm": 919.5791015625, + "learning_rate": 8.550337282551016e-06, + "loss": 101.3186, + "step": 40070 + }, + { + "epoch": 0.33155478347189477, + "grad_norm": 1503.684326171875, + "learning_rate": 8.549344054397533e-06, + "loss": 108.6517, + "step": 40080 + }, + { + "epoch": 0.331637506721264, + "grad_norm": 419.35028076171875, + "learning_rate": 8.548350543839034e-06, + "loss": 85.9801, + "step": 40090 + }, + { + "epoch": 0.3317202299706333, + "grad_norm": 1511.6007080078125, + "learning_rate": 8.547356750954568e-06, + "loss": 109.9655, + "step": 40100 + }, + { + "epoch": 0.3318029532200025, + "grad_norm": 811.8706665039062, + "learning_rate": 8.546362675823204e-06, + "loss": 186.2005, + "step": 40110 + }, + { + "epoch": 0.3318856764693717, + "grad_norm": 910.4181518554688, + "learning_rate": 8.545368318524036e-06, + "loss": 113.1569, + "step": 40120 + }, + { + "epoch": 0.3319683997187409, + "grad_norm": 841.266357421875, + "learning_rate": 8.544373679136184e-06, + "loss": 102.7385, + "step": 40130 + }, + { + "epoch": 0.3320511229681102, + "grad_norm": 1087.2705078125, + "learning_rate": 8.543378757738785e-06, + "loss": 83.0132, + "step": 40140 + }, + { + "epoch": 0.33213384621747943, + "grad_norm": 844.1511840820312, + "learning_rate": 8.542383554411e-06, + "loss": 97.4727, + "step": 40150 + }, + { + "epoch": 0.33221656946684863, + "grad_norm": 1138.1463623046875, + "learning_rate": 8.541388069232012e-06, + "loss": 96.1207, + "step": 40160 + }, + { + "epoch": 0.3322992927162179, + "grad_norm": 1386.313232421875, + "learning_rate": 8.54039230228103e-06, + "loss": 121.9133, + "step": 40170 + }, + { + "epoch": 0.33238201596558714, + "grad_norm": 1644.196044921875, + "learning_rate": 8.53939625363728e-06, + "loss": 110.2846, + "step": 40180 + }, + { + "epoch": 0.33246473921495634, + "grad_norm": 663.9628295898438, + "learning_rate": 8.538399923380011e-06, + "loss": 147.1378, + "step": 40190 + }, + { + "epoch": 0.3325474624643256, + "grad_norm": 921.0543212890625, + "learning_rate": 8.537403311588502e-06, + "loss": 94.7127, + "step": 40200 + }, + { + "epoch": 0.33263018571369485, + "grad_norm": 1512.567626953125, + "learning_rate": 8.536406418342044e-06, + "loss": 87.8837, + "step": 40210 + }, + { + "epoch": 0.33271290896306405, + "grad_norm": 1098.78369140625, + "learning_rate": 8.53540924371996e-06, + "loss": 108.6445, + "step": 40220 + }, + { + "epoch": 0.3327956322124333, + "grad_norm": 1846.2921142578125, + "learning_rate": 8.534411787801586e-06, + "loss": 95.6519, + "step": 40230 + }, + { + "epoch": 0.33287835546180256, + "grad_norm": 1103.0107421875, + "learning_rate": 8.533414050666287e-06, + "loss": 109.1561, + "step": 40240 + }, + { + "epoch": 0.33296107871117175, + "grad_norm": 903.0642700195312, + "learning_rate": 8.532416032393447e-06, + "loss": 99.2833, + "step": 40250 + }, + { + "epoch": 0.333043801960541, + "grad_norm": 1340.9583740234375, + "learning_rate": 8.531417733062476e-06, + "loss": 116.9413, + "step": 40260 + }, + { + "epoch": 0.33312652520991026, + "grad_norm": 1119.4525146484375, + "learning_rate": 8.530419152752804e-06, + "loss": 124.4811, + "step": 40270 + }, + { + "epoch": 0.33320924845927946, + "grad_norm": 1566.3560791015625, + "learning_rate": 8.529420291543882e-06, + "loss": 127.6215, + "step": 40280 + }, + { + "epoch": 0.3332919717086487, + "grad_norm": 1316.3895263671875, + "learning_rate": 8.528421149515185e-06, + "loss": 107.4906, + "step": 40290 + }, + { + "epoch": 0.33337469495801797, + "grad_norm": 825.5098266601562, + "learning_rate": 8.52742172674621e-06, + "loss": 129.04, + "step": 40300 + }, + { + "epoch": 0.33345741820738717, + "grad_norm": 1096.2099609375, + "learning_rate": 8.526422023316478e-06, + "loss": 91.2496, + "step": 40310 + }, + { + "epoch": 0.3335401414567564, + "grad_norm": 781.985107421875, + "learning_rate": 8.525422039305529e-06, + "loss": 124.4936, + "step": 40320 + }, + { + "epoch": 0.3336228647061257, + "grad_norm": 1158.1458740234375, + "learning_rate": 8.524421774792926e-06, + "loss": 84.0795, + "step": 40330 + }, + { + "epoch": 0.3337055879554949, + "grad_norm": 1207.7098388671875, + "learning_rate": 8.52342122985826e-06, + "loss": 91.0112, + "step": 40340 + }, + { + "epoch": 0.33378831120486413, + "grad_norm": 847.1415405273438, + "learning_rate": 8.522420404581135e-06, + "loss": 101.6437, + "step": 40350 + }, + { + "epoch": 0.3338710344542334, + "grad_norm": 995.0086669921875, + "learning_rate": 8.521419299041185e-06, + "loss": 93.0817, + "step": 40360 + }, + { + "epoch": 0.3339537577036026, + "grad_norm": 892.8756103515625, + "learning_rate": 8.520417913318065e-06, + "loss": 110.2036, + "step": 40370 + }, + { + "epoch": 0.33403648095297184, + "grad_norm": 1183.2650146484375, + "learning_rate": 8.519416247491445e-06, + "loss": 126.0844, + "step": 40380 + }, + { + "epoch": 0.3341192042023411, + "grad_norm": 605.9612426757812, + "learning_rate": 8.518414301641027e-06, + "loss": 69.7784, + "step": 40390 + }, + { + "epoch": 0.3342019274517103, + "grad_norm": 1190.9295654296875, + "learning_rate": 8.517412075846529e-06, + "loss": 138.7514, + "step": 40400 + }, + { + "epoch": 0.33428465070107954, + "grad_norm": 792.2731323242188, + "learning_rate": 8.516409570187698e-06, + "loss": 98.212, + "step": 40410 + }, + { + "epoch": 0.3343673739504488, + "grad_norm": 820.3512573242188, + "learning_rate": 8.515406784744294e-06, + "loss": 87.1192, + "step": 40420 + }, + { + "epoch": 0.334450097199818, + "grad_norm": 1030.4779052734375, + "learning_rate": 8.514403719596104e-06, + "loss": 112.4568, + "step": 40430 + }, + { + "epoch": 0.33453282044918725, + "grad_norm": 649.7733764648438, + "learning_rate": 8.513400374822942e-06, + "loss": 129.4392, + "step": 40440 + }, + { + "epoch": 0.3346155436985565, + "grad_norm": 1169.7542724609375, + "learning_rate": 8.512396750504635e-06, + "loss": 96.2116, + "step": 40450 + }, + { + "epoch": 0.3346982669479257, + "grad_norm": 1340.5599365234375, + "learning_rate": 8.511392846721037e-06, + "loss": 130.6511, + "step": 40460 + }, + { + "epoch": 0.33478099019729496, + "grad_norm": 1084.3248291015625, + "learning_rate": 8.510388663552027e-06, + "loss": 96.2522, + "step": 40470 + }, + { + "epoch": 0.33486371344666416, + "grad_norm": 940.4993896484375, + "learning_rate": 8.509384201077502e-06, + "loss": 182.6661, + "step": 40480 + }, + { + "epoch": 0.3349464366960334, + "grad_norm": 473.4619445800781, + "learning_rate": 8.508379459377381e-06, + "loss": 98.2326, + "step": 40490 + }, + { + "epoch": 0.33502915994540267, + "grad_norm": 915.9678344726562, + "learning_rate": 8.507374438531606e-06, + "loss": 90.029, + "step": 40500 + }, + { + "epoch": 0.33511188319477186, + "grad_norm": 644.4666748046875, + "learning_rate": 8.506369138620148e-06, + "loss": 160.186, + "step": 40510 + }, + { + "epoch": 0.3351946064441411, + "grad_norm": 771.171875, + "learning_rate": 8.505363559722985e-06, + "loss": 96.5032, + "step": 40520 + }, + { + "epoch": 0.3352773296935104, + "grad_norm": 575.1885375976562, + "learning_rate": 8.504357701920134e-06, + "loss": 78.7146, + "step": 40530 + }, + { + "epoch": 0.33536005294287957, + "grad_norm": 728.8525390625, + "learning_rate": 8.503351565291622e-06, + "loss": 130.1776, + "step": 40540 + }, + { + "epoch": 0.3354427761922488, + "grad_norm": 1049.326416015625, + "learning_rate": 8.502345149917506e-06, + "loss": 91.4142, + "step": 40550 + }, + { + "epoch": 0.3355254994416181, + "grad_norm": 1567.1068115234375, + "learning_rate": 8.501338455877859e-06, + "loss": 128.1109, + "step": 40560 + }, + { + "epoch": 0.3356082226909873, + "grad_norm": 639.6390380859375, + "learning_rate": 8.50033148325278e-06, + "loss": 89.2162, + "step": 40570 + }, + { + "epoch": 0.33569094594035653, + "grad_norm": 1309.4464111328125, + "learning_rate": 8.499324232122389e-06, + "loss": 119.5868, + "step": 40580 + }, + { + "epoch": 0.3357736691897258, + "grad_norm": 961.0704345703125, + "learning_rate": 8.498316702566828e-06, + "loss": 108.5671, + "step": 40590 + }, + { + "epoch": 0.335856392439095, + "grad_norm": 1294.3653564453125, + "learning_rate": 8.497308894666263e-06, + "loss": 114.0025, + "step": 40600 + }, + { + "epoch": 0.33593911568846424, + "grad_norm": 1210.564697265625, + "learning_rate": 8.496300808500878e-06, + "loss": 122.3642, + "step": 40610 + }, + { + "epoch": 0.3360218389378335, + "grad_norm": 1512.3568115234375, + "learning_rate": 8.495292444150887e-06, + "loss": 146.3031, + "step": 40620 + }, + { + "epoch": 0.3361045621872027, + "grad_norm": 956.16162109375, + "learning_rate": 8.494283801696514e-06, + "loss": 140.3855, + "step": 40630 + }, + { + "epoch": 0.33618728543657195, + "grad_norm": 862.5502319335938, + "learning_rate": 8.493274881218017e-06, + "loss": 92.3681, + "step": 40640 + }, + { + "epoch": 0.3362700086859412, + "grad_norm": 715.0096435546875, + "learning_rate": 8.49226568279567e-06, + "loss": 93.8356, + "step": 40650 + }, + { + "epoch": 0.3363527319353104, + "grad_norm": 858.4218139648438, + "learning_rate": 8.49125620650977e-06, + "loss": 89.4689, + "step": 40660 + }, + { + "epoch": 0.33643545518467965, + "grad_norm": 797.36328125, + "learning_rate": 8.490246452440636e-06, + "loss": 131.3191, + "step": 40670 + }, + { + "epoch": 0.3365181784340489, + "grad_norm": 887.1189575195312, + "learning_rate": 8.48923642066861e-06, + "loss": 143.5478, + "step": 40680 + }, + { + "epoch": 0.3366009016834181, + "grad_norm": 688.75732421875, + "learning_rate": 8.488226111274055e-06, + "loss": 129.7013, + "step": 40690 + }, + { + "epoch": 0.33668362493278736, + "grad_norm": 770.3273315429688, + "learning_rate": 8.487215524337357e-06, + "loss": 77.0376, + "step": 40700 + }, + { + "epoch": 0.3367663481821566, + "grad_norm": 960.6439819335938, + "learning_rate": 8.486204659938924e-06, + "loss": 127.0703, + "step": 40710 + }, + { + "epoch": 0.3368490714315258, + "grad_norm": 764.961669921875, + "learning_rate": 8.485193518159186e-06, + "loss": 111.9176, + "step": 40720 + }, + { + "epoch": 0.33693179468089507, + "grad_norm": 900.38134765625, + "learning_rate": 8.484182099078596e-06, + "loss": 112.1536, + "step": 40730 + }, + { + "epoch": 0.3370145179302643, + "grad_norm": 960.113525390625, + "learning_rate": 8.483170402777624e-06, + "loss": 127.3206, + "step": 40740 + }, + { + "epoch": 0.3370972411796335, + "grad_norm": 1646.762939453125, + "learning_rate": 8.482158429336769e-06, + "loss": 143.5467, + "step": 40750 + }, + { + "epoch": 0.3371799644290028, + "grad_norm": 985.1450805664062, + "learning_rate": 8.48114617883655e-06, + "loss": 114.6298, + "step": 40760 + }, + { + "epoch": 0.33726268767837203, + "grad_norm": 759.6780395507812, + "learning_rate": 8.480133651357507e-06, + "loss": 116.8154, + "step": 40770 + }, + { + "epoch": 0.33734541092774123, + "grad_norm": 885.9656982421875, + "learning_rate": 8.479120846980197e-06, + "loss": 108.7685, + "step": 40780 + }, + { + "epoch": 0.3374281341771105, + "grad_norm": 575.7235717773438, + "learning_rate": 8.478107765785212e-06, + "loss": 88.1911, + "step": 40790 + }, + { + "epoch": 0.33751085742647974, + "grad_norm": 666.4598388671875, + "learning_rate": 8.477094407853153e-06, + "loss": 69.0146, + "step": 40800 + }, + { + "epoch": 0.33759358067584894, + "grad_norm": 645.81201171875, + "learning_rate": 8.47608077326465e-06, + "loss": 94.4932, + "step": 40810 + }, + { + "epoch": 0.3376763039252182, + "grad_norm": 994.3779907226562, + "learning_rate": 8.475066862100352e-06, + "loss": 88.9415, + "step": 40820 + }, + { + "epoch": 0.33775902717458745, + "grad_norm": 759.6505737304688, + "learning_rate": 8.474052674440934e-06, + "loss": 117.1598, + "step": 40830 + }, + { + "epoch": 0.33784175042395664, + "grad_norm": 869.6111450195312, + "learning_rate": 8.473038210367086e-06, + "loss": 78.2449, + "step": 40840 + }, + { + "epoch": 0.3379244736733259, + "grad_norm": 947.123046875, + "learning_rate": 8.47202346995953e-06, + "loss": 93.2359, + "step": 40850 + }, + { + "epoch": 0.3380071969226951, + "grad_norm": 1361.4940185546875, + "learning_rate": 8.471008453298998e-06, + "loss": 134.8301, + "step": 40860 + }, + { + "epoch": 0.33808992017206435, + "grad_norm": 729.060546875, + "learning_rate": 8.469993160466254e-06, + "loss": 94.2659, + "step": 40870 + }, + { + "epoch": 0.3381726434214336, + "grad_norm": 643.9381713867188, + "learning_rate": 8.46897759154208e-06, + "loss": 88.6729, + "step": 40880 + }, + { + "epoch": 0.3382553666708028, + "grad_norm": 927.8340454101562, + "learning_rate": 8.467961746607279e-06, + "loss": 127.7991, + "step": 40890 + }, + { + "epoch": 0.33833808992017206, + "grad_norm": 604.8875122070312, + "learning_rate": 8.466945625742678e-06, + "loss": 76.509, + "step": 40900 + }, + { + "epoch": 0.3384208131695413, + "grad_norm": 672.4412841796875, + "learning_rate": 8.465929229029124e-06, + "loss": 165.3088, + "step": 40910 + }, + { + "epoch": 0.3385035364189105, + "grad_norm": 649.953369140625, + "learning_rate": 8.464912556547486e-06, + "loss": 112.9105, + "step": 40920 + }, + { + "epoch": 0.33858625966827977, + "grad_norm": 1930.8785400390625, + "learning_rate": 8.46389560837866e-06, + "loss": 108.5322, + "step": 40930 + }, + { + "epoch": 0.338668982917649, + "grad_norm": 622.740966796875, + "learning_rate": 8.462878384603558e-06, + "loss": 117.5824, + "step": 40940 + }, + { + "epoch": 0.3387517061670182, + "grad_norm": 474.0765686035156, + "learning_rate": 8.461860885303116e-06, + "loss": 99.5456, + "step": 40950 + }, + { + "epoch": 0.3388344294163875, + "grad_norm": 952.2894287109375, + "learning_rate": 8.460843110558287e-06, + "loss": 124.9169, + "step": 40960 + }, + { + "epoch": 0.3389171526657567, + "grad_norm": 654.9668579101562, + "learning_rate": 8.459825060450058e-06, + "loss": 90.4174, + "step": 40970 + }, + { + "epoch": 0.3389998759151259, + "grad_norm": 1069.80029296875, + "learning_rate": 8.458806735059428e-06, + "loss": 134.0334, + "step": 40980 + }, + { + "epoch": 0.3390825991644952, + "grad_norm": 657.5958862304688, + "learning_rate": 8.45778813446742e-06, + "loss": 97.927, + "step": 40990 + }, + { + "epoch": 0.33916532241386443, + "grad_norm": 1148.0079345703125, + "learning_rate": 8.456769258755078e-06, + "loss": 111.504, + "step": 41000 + }, + { + "epoch": 0.33924804566323363, + "grad_norm": 1073.7718505859375, + "learning_rate": 8.455750108003468e-06, + "loss": 78.7796, + "step": 41010 + }, + { + "epoch": 0.3393307689126029, + "grad_norm": 530.935302734375, + "learning_rate": 8.454730682293686e-06, + "loss": 76.4729, + "step": 41020 + }, + { + "epoch": 0.33941349216197214, + "grad_norm": 584.3358764648438, + "learning_rate": 8.453710981706838e-06, + "loss": 100.3047, + "step": 41030 + }, + { + "epoch": 0.33949621541134134, + "grad_norm": 1019.848388671875, + "learning_rate": 8.452691006324055e-06, + "loss": 101.324, + "step": 41040 + }, + { + "epoch": 0.3395789386607106, + "grad_norm": 991.2681274414062, + "learning_rate": 8.451670756226496e-06, + "loss": 75.0817, + "step": 41050 + }, + { + "epoch": 0.33966166191007985, + "grad_norm": 730.0400390625, + "learning_rate": 8.450650231495336e-06, + "loss": 85.305, + "step": 41060 + }, + { + "epoch": 0.33974438515944905, + "grad_norm": 929.1596069335938, + "learning_rate": 8.449629432211774e-06, + "loss": 92.8536, + "step": 41070 + }, + { + "epoch": 0.3398271084088183, + "grad_norm": 1377.75146484375, + "learning_rate": 8.44860835845703e-06, + "loss": 103.3764, + "step": 41080 + }, + { + "epoch": 0.33990983165818756, + "grad_norm": 522.6357421875, + "learning_rate": 8.447587010312343e-06, + "loss": 107.57, + "step": 41090 + }, + { + "epoch": 0.33999255490755675, + "grad_norm": 2292.6142578125, + "learning_rate": 8.44656538785898e-06, + "loss": 135.6061, + "step": 41100 + }, + { + "epoch": 0.340075278156926, + "grad_norm": 419.8533630371094, + "learning_rate": 8.44554349117823e-06, + "loss": 80.6014, + "step": 41110 + }, + { + "epoch": 0.34015800140629526, + "grad_norm": 1163.7928466796875, + "learning_rate": 8.444521320351397e-06, + "loss": 110.4075, + "step": 41120 + }, + { + "epoch": 0.34024072465566446, + "grad_norm": 942.8880615234375, + "learning_rate": 8.44349887545981e-06, + "loss": 118.3985, + "step": 41130 + }, + { + "epoch": 0.3403234479050337, + "grad_norm": 509.7681884765625, + "learning_rate": 8.442476156584818e-06, + "loss": 133.2833, + "step": 41140 + }, + { + "epoch": 0.34040617115440297, + "grad_norm": 1343.72705078125, + "learning_rate": 8.4414531638078e-06, + "loss": 134.3201, + "step": 41150 + }, + { + "epoch": 0.34048889440377217, + "grad_norm": 1334.9111328125, + "learning_rate": 8.440429897210148e-06, + "loss": 94.3114, + "step": 41160 + }, + { + "epoch": 0.3405716176531414, + "grad_norm": 918.8824462890625, + "learning_rate": 8.439406356873279e-06, + "loss": 105.6756, + "step": 41170 + }, + { + "epoch": 0.3406543409025107, + "grad_norm": 435.05010986328125, + "learning_rate": 8.43838254287863e-06, + "loss": 86.3829, + "step": 41180 + }, + { + "epoch": 0.3407370641518799, + "grad_norm": 934.824462890625, + "learning_rate": 8.43735845530766e-06, + "loss": 122.4491, + "step": 41190 + }, + { + "epoch": 0.34081978740124913, + "grad_norm": 1095.854248046875, + "learning_rate": 8.436334094241855e-06, + "loss": 110.3371, + "step": 41200 + }, + { + "epoch": 0.34090251065061833, + "grad_norm": 1048.58154296875, + "learning_rate": 8.435309459762718e-06, + "loss": 135.8438, + "step": 41210 + }, + { + "epoch": 0.3409852338999876, + "grad_norm": 722.0227661132812, + "learning_rate": 8.434284551951772e-06, + "loss": 86.2307, + "step": 41220 + }, + { + "epoch": 0.34106795714935684, + "grad_norm": 835.3677368164062, + "learning_rate": 8.433259370890565e-06, + "loss": 79.5151, + "step": 41230 + }, + { + "epoch": 0.34115068039872604, + "grad_norm": 1341.2940673828125, + "learning_rate": 8.432233916660669e-06, + "loss": 102.6455, + "step": 41240 + }, + { + "epoch": 0.3412334036480953, + "grad_norm": 972.4404907226562, + "learning_rate": 8.43120818934367e-06, + "loss": 105.9913, + "step": 41250 + }, + { + "epoch": 0.34131612689746454, + "grad_norm": 752.3978881835938, + "learning_rate": 8.43018218902118e-06, + "loss": 97.6527, + "step": 41260 + }, + { + "epoch": 0.34139885014683374, + "grad_norm": 744.5557250976562, + "learning_rate": 8.429155915774839e-06, + "loss": 98.6538, + "step": 41270 + }, + { + "epoch": 0.341481573396203, + "grad_norm": 1084.3350830078125, + "learning_rate": 8.428129369686299e-06, + "loss": 96.4803, + "step": 41280 + }, + { + "epoch": 0.34156429664557225, + "grad_norm": 887.330078125, + "learning_rate": 8.427102550837238e-06, + "loss": 111.9868, + "step": 41290 + }, + { + "epoch": 0.34164701989494145, + "grad_norm": 869.30908203125, + "learning_rate": 8.426075459309356e-06, + "loss": 120.3469, + "step": 41300 + }, + { + "epoch": 0.3417297431443107, + "grad_norm": 2174.565673828125, + "learning_rate": 8.42504809518437e-06, + "loss": 115.0281, + "step": 41310 + }, + { + "epoch": 0.34181246639367996, + "grad_norm": 939.2781372070312, + "learning_rate": 8.42402045854403e-06, + "loss": 118.3146, + "step": 41320 + }, + { + "epoch": 0.34189518964304916, + "grad_norm": 962.0004272460938, + "learning_rate": 8.422992549470094e-06, + "loss": 111.6336, + "step": 41330 + }, + { + "epoch": 0.3419779128924184, + "grad_norm": 856.4796142578125, + "learning_rate": 8.42196436804435e-06, + "loss": 89.8677, + "step": 41340 + }, + { + "epoch": 0.34206063614178767, + "grad_norm": 882.47509765625, + "learning_rate": 8.420935914348607e-06, + "loss": 109.2613, + "step": 41350 + }, + { + "epoch": 0.34214335939115686, + "grad_norm": 1152.5184326171875, + "learning_rate": 8.419907188464691e-06, + "loss": 83.5429, + "step": 41360 + }, + { + "epoch": 0.3422260826405261, + "grad_norm": 1086.84521484375, + "learning_rate": 8.418878190474459e-06, + "loss": 107.8546, + "step": 41370 + }, + { + "epoch": 0.3423088058898954, + "grad_norm": 1217.004150390625, + "learning_rate": 8.417848920459778e-06, + "loss": 137.2482, + "step": 41380 + }, + { + "epoch": 0.34239152913926457, + "grad_norm": 706.2305297851562, + "learning_rate": 8.416819378502543e-06, + "loss": 74.3434, + "step": 41390 + }, + { + "epoch": 0.3424742523886338, + "grad_norm": 1614.1566162109375, + "learning_rate": 8.415789564684673e-06, + "loss": 142.1887, + "step": 41400 + }, + { + "epoch": 0.3425569756380031, + "grad_norm": 875.3898315429688, + "learning_rate": 8.414759479088102e-06, + "loss": 97.1488, + "step": 41410 + }, + { + "epoch": 0.3426396988873723, + "grad_norm": 1026.1566162109375, + "learning_rate": 8.413729121794794e-06, + "loss": 130.0628, + "step": 41420 + }, + { + "epoch": 0.34272242213674153, + "grad_norm": 977.263427734375, + "learning_rate": 8.412698492886723e-06, + "loss": 78.8849, + "step": 41430 + }, + { + "epoch": 0.3428051453861108, + "grad_norm": 1244.2081298828125, + "learning_rate": 8.411667592445898e-06, + "loss": 142.179, + "step": 41440 + }, + { + "epoch": 0.34288786863548, + "grad_norm": 751.5814819335938, + "learning_rate": 8.410636420554337e-06, + "loss": 95.5533, + "step": 41450 + }, + { + "epoch": 0.34297059188484924, + "grad_norm": 786.3610229492188, + "learning_rate": 8.409604977294093e-06, + "loss": 103.3417, + "step": 41460 + }, + { + "epoch": 0.3430533151342185, + "grad_norm": 931.486572265625, + "learning_rate": 8.408573262747225e-06, + "loss": 141.6174, + "step": 41470 + }, + { + "epoch": 0.3431360383835877, + "grad_norm": 838.9139404296875, + "learning_rate": 8.407541276995828e-06, + "loss": 105.8545, + "step": 41480 + }, + { + "epoch": 0.34321876163295695, + "grad_norm": 654.8321533203125, + "learning_rate": 8.40650902012201e-06, + "loss": 107.5087, + "step": 41490 + }, + { + "epoch": 0.3433014848823262, + "grad_norm": 766.4340209960938, + "learning_rate": 8.405476492207902e-06, + "loss": 113.7733, + "step": 41500 + }, + { + "epoch": 0.3433842081316954, + "grad_norm": 1260.985595703125, + "learning_rate": 8.404443693335658e-06, + "loss": 108.2389, + "step": 41510 + }, + { + "epoch": 0.34346693138106466, + "grad_norm": 777.0344848632812, + "learning_rate": 8.403410623587454e-06, + "loss": 112.1793, + "step": 41520 + }, + { + "epoch": 0.3435496546304339, + "grad_norm": 836.8817749023438, + "learning_rate": 8.402377283045487e-06, + "loss": 121.241, + "step": 41530 + }, + { + "epoch": 0.3436323778798031, + "grad_norm": 675.0514526367188, + "learning_rate": 8.401343671791974e-06, + "loss": 121.9953, + "step": 41540 + }, + { + "epoch": 0.34371510112917236, + "grad_norm": 1098.513916015625, + "learning_rate": 8.400309789909155e-06, + "loss": 129.7842, + "step": 41550 + }, + { + "epoch": 0.3437978243785416, + "grad_norm": 728.864013671875, + "learning_rate": 8.399275637479291e-06, + "loss": 99.4059, + "step": 41560 + }, + { + "epoch": 0.3438805476279108, + "grad_norm": 744.5280151367188, + "learning_rate": 8.398241214584666e-06, + "loss": 115.1609, + "step": 41570 + }, + { + "epoch": 0.34396327087728007, + "grad_norm": 938.07080078125, + "learning_rate": 8.397206521307584e-06, + "loss": 102.9427, + "step": 41580 + }, + { + "epoch": 0.34404599412664927, + "grad_norm": 1658.468994140625, + "learning_rate": 8.396171557730369e-06, + "loss": 115.574, + "step": 41590 + }, + { + "epoch": 0.3441287173760185, + "grad_norm": 634.4130859375, + "learning_rate": 8.39513632393537e-06, + "loss": 93.7718, + "step": 41600 + }, + { + "epoch": 0.3442114406253878, + "grad_norm": 579.6549072265625, + "learning_rate": 8.394100820004954e-06, + "loss": 92.9008, + "step": 41610 + }, + { + "epoch": 0.344294163874757, + "grad_norm": 450.8374938964844, + "learning_rate": 8.393065046021513e-06, + "loss": 90.1996, + "step": 41620 + }, + { + "epoch": 0.34437688712412623, + "grad_norm": 572.2847290039062, + "learning_rate": 8.39202900206746e-06, + "loss": 147.2724, + "step": 41630 + }, + { + "epoch": 0.3444596103734955, + "grad_norm": 2148.787353515625, + "learning_rate": 8.390992688225226e-06, + "loss": 132.4656, + "step": 41640 + }, + { + "epoch": 0.3445423336228647, + "grad_norm": 707.6249389648438, + "learning_rate": 8.389956104577265e-06, + "loss": 90.7987, + "step": 41650 + }, + { + "epoch": 0.34462505687223394, + "grad_norm": 1235.2393798828125, + "learning_rate": 8.388919251206054e-06, + "loss": 120.5997, + "step": 41660 + }, + { + "epoch": 0.3447077801216032, + "grad_norm": 802.1920166015625, + "learning_rate": 8.387882128194094e-06, + "loss": 127.8519, + "step": 41670 + }, + { + "epoch": 0.3447905033709724, + "grad_norm": 1075.386962890625, + "learning_rate": 8.3868447356239e-06, + "loss": 95.1565, + "step": 41680 + }, + { + "epoch": 0.34487322662034164, + "grad_norm": 1524.40283203125, + "learning_rate": 8.385807073578014e-06, + "loss": 125.0543, + "step": 41690 + }, + { + "epoch": 0.3449559498697109, + "grad_norm": 887.3607177734375, + "learning_rate": 8.384769142138998e-06, + "loss": 86.6021, + "step": 41700 + }, + { + "epoch": 0.3450386731190801, + "grad_norm": 932.5927124023438, + "learning_rate": 8.383730941389434e-06, + "loss": 96.8807, + "step": 41710 + }, + { + "epoch": 0.34512139636844935, + "grad_norm": 956.2900390625, + "learning_rate": 8.382692471411931e-06, + "loss": 97.0625, + "step": 41720 + }, + { + "epoch": 0.3452041196178186, + "grad_norm": 646.645263671875, + "learning_rate": 8.38165373228911e-06, + "loss": 95.7504, + "step": 41730 + }, + { + "epoch": 0.3452868428671878, + "grad_norm": 1114.9278564453125, + "learning_rate": 8.380614724103622e-06, + "loss": 120.9078, + "step": 41740 + }, + { + "epoch": 0.34536956611655706, + "grad_norm": 693.3104248046875, + "learning_rate": 8.379575446938136e-06, + "loss": 106.318, + "step": 41750 + }, + { + "epoch": 0.3454522893659263, + "grad_norm": 8694.8857421875, + "learning_rate": 8.37853590087534e-06, + "loss": 133.577, + "step": 41760 + }, + { + "epoch": 0.3455350126152955, + "grad_norm": 645.8015747070312, + "learning_rate": 8.377496085997949e-06, + "loss": 124.9107, + "step": 41770 + }, + { + "epoch": 0.34561773586466477, + "grad_norm": 2443.57421875, + "learning_rate": 8.376456002388695e-06, + "loss": 115.6421, + "step": 41780 + }, + { + "epoch": 0.345700459114034, + "grad_norm": 1185.3404541015625, + "learning_rate": 8.375415650130332e-06, + "loss": 103.3099, + "step": 41790 + }, + { + "epoch": 0.3457831823634032, + "grad_norm": 1205.232666015625, + "learning_rate": 8.37437502930564e-06, + "loss": 100.6235, + "step": 41800 + }, + { + "epoch": 0.3458659056127725, + "grad_norm": 1242.7322998046875, + "learning_rate": 8.373334139997409e-06, + "loss": 131.52, + "step": 41810 + }, + { + "epoch": 0.3459486288621417, + "grad_norm": 747.3458862304688, + "learning_rate": 8.372292982288463e-06, + "loss": 118.5125, + "step": 41820 + }, + { + "epoch": 0.3460313521115109, + "grad_norm": 1096.74462890625, + "learning_rate": 8.371251556261642e-06, + "loss": 136.9112, + "step": 41830 + }, + { + "epoch": 0.3461140753608802, + "grad_norm": 801.9876708984375, + "learning_rate": 8.370209861999807e-06, + "loss": 106.0218, + "step": 41840 + }, + { + "epoch": 0.34619679861024943, + "grad_norm": 909.0194091796875, + "learning_rate": 8.36916789958584e-06, + "loss": 103.4507, + "step": 41850 + }, + { + "epoch": 0.34627952185961863, + "grad_norm": 1055.92041015625, + "learning_rate": 8.368125669102645e-06, + "loss": 94.5659, + "step": 41860 + }, + { + "epoch": 0.3463622451089879, + "grad_norm": 867.0262451171875, + "learning_rate": 8.36708317063315e-06, + "loss": 135.4051, + "step": 41870 + }, + { + "epoch": 0.34644496835835714, + "grad_norm": 871.2109985351562, + "learning_rate": 8.366040404260298e-06, + "loss": 99.1085, + "step": 41880 + }, + { + "epoch": 0.34652769160772634, + "grad_norm": 830.19384765625, + "learning_rate": 8.36499737006706e-06, + "loss": 76.3817, + "step": 41890 + }, + { + "epoch": 0.3466104148570956, + "grad_norm": 951.84326171875, + "learning_rate": 8.363954068136424e-06, + "loss": 119.5681, + "step": 41900 + }, + { + "epoch": 0.34669313810646485, + "grad_norm": 731.0087890625, + "learning_rate": 8.362910498551402e-06, + "loss": 114.4868, + "step": 41910 + }, + { + "epoch": 0.34677586135583405, + "grad_norm": 635.1656494140625, + "learning_rate": 8.361866661395024e-06, + "loss": 116.8612, + "step": 41920 + }, + { + "epoch": 0.3468585846052033, + "grad_norm": 1068.5445556640625, + "learning_rate": 8.360822556750345e-06, + "loss": 91.3164, + "step": 41930 + }, + { + "epoch": 0.3469413078545725, + "grad_norm": 949.19873046875, + "learning_rate": 8.35977818470044e-06, + "loss": 104.9618, + "step": 41940 + }, + { + "epoch": 0.34702403110394175, + "grad_norm": 429.62677001953125, + "learning_rate": 8.358733545328404e-06, + "loss": 93.7747, + "step": 41950 + }, + { + "epoch": 0.347106754353311, + "grad_norm": 893.44189453125, + "learning_rate": 8.357688638717354e-06, + "loss": 106.5521, + "step": 41960 + }, + { + "epoch": 0.3471894776026802, + "grad_norm": 585.1188354492188, + "learning_rate": 8.356643464950428e-06, + "loss": 80.4151, + "step": 41970 + }, + { + "epoch": 0.34727220085204946, + "grad_norm": 1215.0146484375, + "learning_rate": 8.355598024110789e-06, + "loss": 181.5081, + "step": 41980 + }, + { + "epoch": 0.3473549241014187, + "grad_norm": 1458.9388427734375, + "learning_rate": 8.354552316281613e-06, + "loss": 141.7899, + "step": 41990 + }, + { + "epoch": 0.3474376473507879, + "grad_norm": 915.29443359375, + "learning_rate": 8.353506341546106e-06, + "loss": 108.375, + "step": 42000 + }, + { + "epoch": 0.34752037060015717, + "grad_norm": 1018.5140991210938, + "learning_rate": 8.352460099987488e-06, + "loss": 118.4601, + "step": 42010 + }, + { + "epoch": 0.3476030938495264, + "grad_norm": 1653.6846923828125, + "learning_rate": 8.351413591689007e-06, + "loss": 127.7061, + "step": 42020 + }, + { + "epoch": 0.3476858170988956, + "grad_norm": 1303.524169921875, + "learning_rate": 8.350366816733927e-06, + "loss": 109.428, + "step": 42030 + }, + { + "epoch": 0.3477685403482649, + "grad_norm": 1039.2083740234375, + "learning_rate": 8.349319775205536e-06, + "loss": 120.8401, + "step": 42040 + }, + { + "epoch": 0.34785126359763413, + "grad_norm": 1214.731689453125, + "learning_rate": 8.34827246718714e-06, + "loss": 157.5093, + "step": 42050 + }, + { + "epoch": 0.34793398684700333, + "grad_norm": 832.408203125, + "learning_rate": 8.347224892762072e-06, + "loss": 106.25, + "step": 42060 + }, + { + "epoch": 0.3480167100963726, + "grad_norm": 1899.1883544921875, + "learning_rate": 8.346177052013681e-06, + "loss": 128.2392, + "step": 42070 + }, + { + "epoch": 0.34809943334574184, + "grad_norm": 475.4049072265625, + "learning_rate": 8.345128945025338e-06, + "loss": 128.0041, + "step": 42080 + }, + { + "epoch": 0.34818215659511104, + "grad_norm": 483.9809875488281, + "learning_rate": 8.344080571880438e-06, + "loss": 92.6426, + "step": 42090 + }, + { + "epoch": 0.3482648798444803, + "grad_norm": 1139.35302734375, + "learning_rate": 8.343031932662394e-06, + "loss": 89.6336, + "step": 42100 + }, + { + "epoch": 0.34834760309384954, + "grad_norm": 998.1423950195312, + "learning_rate": 8.341983027454641e-06, + "loss": 148.1835, + "step": 42110 + }, + { + "epoch": 0.34843032634321874, + "grad_norm": 1257.747802734375, + "learning_rate": 8.340933856340637e-06, + "loss": 116.7931, + "step": 42120 + }, + { + "epoch": 0.348513049592588, + "grad_norm": 1050.6527099609375, + "learning_rate": 8.339884419403857e-06, + "loss": 124.4426, + "step": 42130 + }, + { + "epoch": 0.34859577284195725, + "grad_norm": 935.9532470703125, + "learning_rate": 8.338834716727801e-06, + "loss": 100.9498, + "step": 42140 + }, + { + "epoch": 0.34867849609132645, + "grad_norm": 1319.49365234375, + "learning_rate": 8.337784748395992e-06, + "loss": 89.4219, + "step": 42150 + }, + { + "epoch": 0.3487612193406957, + "grad_norm": 583.6215209960938, + "learning_rate": 8.336734514491968e-06, + "loss": 101.7242, + "step": 42160 + }, + { + "epoch": 0.34884394259006496, + "grad_norm": 1093.5703125, + "learning_rate": 8.335684015099294e-06, + "loss": 100.3031, + "step": 42170 + }, + { + "epoch": 0.34892666583943416, + "grad_norm": 7167.025390625, + "learning_rate": 8.33463325030155e-06, + "loss": 142.965, + "step": 42180 + }, + { + "epoch": 0.3490093890888034, + "grad_norm": 1053.3721923828125, + "learning_rate": 8.333582220182344e-06, + "loss": 118.1564, + "step": 42190 + }, + { + "epoch": 0.34909211233817267, + "grad_norm": 1960.47705078125, + "learning_rate": 8.332530924825297e-06, + "loss": 119.301, + "step": 42200 + }, + { + "epoch": 0.34917483558754187, + "grad_norm": 973.1529541015625, + "learning_rate": 8.33147936431406e-06, + "loss": 131.6417, + "step": 42210 + }, + { + "epoch": 0.3492575588369111, + "grad_norm": 925.6248168945312, + "learning_rate": 8.3304275387323e-06, + "loss": 116.9732, + "step": 42220 + }, + { + "epoch": 0.3493402820862804, + "grad_norm": 0.0, + "learning_rate": 8.329375448163703e-06, + "loss": 87.3547, + "step": 42230 + }, + { + "epoch": 0.3494230053356496, + "grad_norm": 1005.3858032226562, + "learning_rate": 8.328323092691985e-06, + "loss": 104.9806, + "step": 42240 + }, + { + "epoch": 0.3495057285850188, + "grad_norm": 790.18603515625, + "learning_rate": 8.32727047240087e-06, + "loss": 141.6189, + "step": 42250 + }, + { + "epoch": 0.3495884518343881, + "grad_norm": 885.5327758789062, + "learning_rate": 8.326217587374115e-06, + "loss": 95.2874, + "step": 42260 + }, + { + "epoch": 0.3496711750837573, + "grad_norm": 1333.3345947265625, + "learning_rate": 8.325164437695493e-06, + "loss": 110.1591, + "step": 42270 + }, + { + "epoch": 0.34975389833312653, + "grad_norm": 444.2149963378906, + "learning_rate": 8.324111023448795e-06, + "loss": 89.4089, + "step": 42280 + }, + { + "epoch": 0.3498366215824958, + "grad_norm": 712.4427490234375, + "learning_rate": 8.32305734471784e-06, + "loss": 104.8016, + "step": 42290 + }, + { + "epoch": 0.349919344831865, + "grad_norm": 825.5255737304688, + "learning_rate": 8.322003401586463e-06, + "loss": 109.8285, + "step": 42300 + }, + { + "epoch": 0.35000206808123424, + "grad_norm": 674.3945922851562, + "learning_rate": 8.32094919413852e-06, + "loss": 118.0758, + "step": 42310 + }, + { + "epoch": 0.35008479133060344, + "grad_norm": 1092.6353759765625, + "learning_rate": 8.319894722457892e-06, + "loss": 100.6579, + "step": 42320 + }, + { + "epoch": 0.3501675145799727, + "grad_norm": 791.6581420898438, + "learning_rate": 8.318839986628477e-06, + "loss": 96.1517, + "step": 42330 + }, + { + "epoch": 0.35025023782934195, + "grad_norm": 483.9101867675781, + "learning_rate": 8.317784986734194e-06, + "loss": 86.3806, + "step": 42340 + }, + { + "epoch": 0.35033296107871115, + "grad_norm": 982.6383056640625, + "learning_rate": 8.316729722858987e-06, + "loss": 142.3889, + "step": 42350 + }, + { + "epoch": 0.3504156843280804, + "grad_norm": 1031.0849609375, + "learning_rate": 8.31567419508682e-06, + "loss": 89.9149, + "step": 42360 + }, + { + "epoch": 0.35049840757744966, + "grad_norm": 1122.879150390625, + "learning_rate": 8.31461840350167e-06, + "loss": 103.2544, + "step": 42370 + }, + { + "epoch": 0.35058113082681885, + "grad_norm": 1027.361328125, + "learning_rate": 8.313562348187549e-06, + "loss": 90.0591, + "step": 42380 + }, + { + "epoch": 0.3506638540761881, + "grad_norm": 789.521240234375, + "learning_rate": 8.312506029228478e-06, + "loss": 90.2937, + "step": 42390 + }, + { + "epoch": 0.35074657732555736, + "grad_norm": 616.3732299804688, + "learning_rate": 8.311449446708506e-06, + "loss": 129.6206, + "step": 42400 + }, + { + "epoch": 0.35082930057492656, + "grad_norm": 1325.065185546875, + "learning_rate": 8.310392600711698e-06, + "loss": 114.9959, + "step": 42410 + }, + { + "epoch": 0.3509120238242958, + "grad_norm": 976.2049560546875, + "learning_rate": 8.309335491322143e-06, + "loss": 95.8001, + "step": 42420 + }, + { + "epoch": 0.35099474707366507, + "grad_norm": 1060.470947265625, + "learning_rate": 8.30827811862395e-06, + "loss": 101.8484, + "step": 42430 + }, + { + "epoch": 0.35107747032303427, + "grad_norm": 813.407470703125, + "learning_rate": 8.307220482701251e-06, + "loss": 119.0382, + "step": 42440 + }, + { + "epoch": 0.3511601935724035, + "grad_norm": 769.6190795898438, + "learning_rate": 8.306162583638197e-06, + "loss": 116.1655, + "step": 42450 + }, + { + "epoch": 0.3512429168217728, + "grad_norm": 1182.1982421875, + "learning_rate": 8.305104421518959e-06, + "loss": 116.6159, + "step": 42460 + }, + { + "epoch": 0.351325640071142, + "grad_norm": 1151.38671875, + "learning_rate": 8.30404599642773e-06, + "loss": 97.3802, + "step": 42470 + }, + { + "epoch": 0.35140836332051123, + "grad_norm": 0.0, + "learning_rate": 8.302987308448724e-06, + "loss": 81.0704, + "step": 42480 + }, + { + "epoch": 0.3514910865698805, + "grad_norm": 728.1912231445312, + "learning_rate": 8.301928357666178e-06, + "loss": 92.2258, + "step": 42490 + }, + { + "epoch": 0.3515738098192497, + "grad_norm": 793.99267578125, + "learning_rate": 8.300869144164346e-06, + "loss": 110.2738, + "step": 42500 + }, + { + "epoch": 0.35165653306861894, + "grad_norm": 993.7640991210938, + "learning_rate": 8.299809668027505e-06, + "loss": 131.7156, + "step": 42510 + }, + { + "epoch": 0.3517392563179882, + "grad_norm": 606.397705078125, + "learning_rate": 8.298749929339953e-06, + "loss": 104.0031, + "step": 42520 + }, + { + "epoch": 0.3518219795673574, + "grad_norm": 827.4598388671875, + "learning_rate": 8.297689928186009e-06, + "loss": 110.1917, + "step": 42530 + }, + { + "epoch": 0.35190470281672664, + "grad_norm": 2409.9150390625, + "learning_rate": 8.29662966465001e-06, + "loss": 135.4566, + "step": 42540 + }, + { + "epoch": 0.3519874260660959, + "grad_norm": 613.3368530273438, + "learning_rate": 8.295569138816319e-06, + "loss": 80.187, + "step": 42550 + }, + { + "epoch": 0.3520701493154651, + "grad_norm": 843.9635009765625, + "learning_rate": 8.294508350769315e-06, + "loss": 109.0294, + "step": 42560 + }, + { + "epoch": 0.35215287256483435, + "grad_norm": 622.92578125, + "learning_rate": 8.293447300593402e-06, + "loss": 127.0855, + "step": 42570 + }, + { + "epoch": 0.3522355958142036, + "grad_norm": 657.6416625976562, + "learning_rate": 8.292385988373005e-06, + "loss": 108.607, + "step": 42580 + }, + { + "epoch": 0.3523183190635728, + "grad_norm": 1057.9915771484375, + "learning_rate": 8.29132441419256e-06, + "loss": 117.3586, + "step": 42590 + }, + { + "epoch": 0.35240104231294206, + "grad_norm": 704.32421875, + "learning_rate": 8.290262578136541e-06, + "loss": 92.2817, + "step": 42600 + }, + { + "epoch": 0.3524837655623113, + "grad_norm": 391.53265380859375, + "learning_rate": 8.289200480289426e-06, + "loss": 91.4089, + "step": 42610 + }, + { + "epoch": 0.3525664888116805, + "grad_norm": 1248.0340576171875, + "learning_rate": 8.288138120735726e-06, + "loss": 94.6713, + "step": 42620 + }, + { + "epoch": 0.35264921206104977, + "grad_norm": 1576.25, + "learning_rate": 8.287075499559965e-06, + "loss": 120.6687, + "step": 42630 + }, + { + "epoch": 0.352731935310419, + "grad_norm": 850.7510375976562, + "learning_rate": 8.286012616846693e-06, + "loss": 68.9104, + "step": 42640 + }, + { + "epoch": 0.3528146585597882, + "grad_norm": 1018.4036254882812, + "learning_rate": 8.284949472680477e-06, + "loss": 96.3005, + "step": 42650 + }, + { + "epoch": 0.3528973818091575, + "grad_norm": 769.276611328125, + "learning_rate": 8.283886067145908e-06, + "loss": 101.5941, + "step": 42660 + }, + { + "epoch": 0.35298010505852667, + "grad_norm": 1009.304931640625, + "learning_rate": 8.282822400327595e-06, + "loss": 100.8695, + "step": 42670 + }, + { + "epoch": 0.3530628283078959, + "grad_norm": 965.6119384765625, + "learning_rate": 8.28175847231017e-06, + "loss": 112.0248, + "step": 42680 + }, + { + "epoch": 0.3531455515572652, + "grad_norm": 862.6695556640625, + "learning_rate": 8.280694283178285e-06, + "loss": 97.2944, + "step": 42690 + }, + { + "epoch": 0.3532282748066344, + "grad_norm": 698.0172119140625, + "learning_rate": 8.27962983301661e-06, + "loss": 66.0908, + "step": 42700 + }, + { + "epoch": 0.35331099805600363, + "grad_norm": 680.3596801757812, + "learning_rate": 8.278565121909845e-06, + "loss": 56.0012, + "step": 42710 + }, + { + "epoch": 0.3533937213053729, + "grad_norm": 979.0956420898438, + "learning_rate": 8.277500149942697e-06, + "loss": 111.6114, + "step": 42720 + }, + { + "epoch": 0.3534764445547421, + "grad_norm": 839.169921875, + "learning_rate": 8.276434917199904e-06, + "loss": 126.5481, + "step": 42730 + }, + { + "epoch": 0.35355916780411134, + "grad_norm": 1183.0323486328125, + "learning_rate": 8.275369423766222e-06, + "loss": 97.7488, + "step": 42740 + }, + { + "epoch": 0.3536418910534806, + "grad_norm": 1349.1572265625, + "learning_rate": 8.274303669726427e-06, + "loss": 110.0713, + "step": 42750 + }, + { + "epoch": 0.3537246143028498, + "grad_norm": 916.60107421875, + "learning_rate": 8.273237655165314e-06, + "loss": 106.8208, + "step": 42760 + }, + { + "epoch": 0.35380733755221905, + "grad_norm": 800.146484375, + "learning_rate": 8.272171380167705e-06, + "loss": 83.875, + "step": 42770 + }, + { + "epoch": 0.3538900608015883, + "grad_norm": 1539.4642333984375, + "learning_rate": 8.271104844818436e-06, + "loss": 130.0894, + "step": 42780 + }, + { + "epoch": 0.3539727840509575, + "grad_norm": 1235.24560546875, + "learning_rate": 8.270038049202366e-06, + "loss": 131.9467, + "step": 42790 + }, + { + "epoch": 0.35405550730032675, + "grad_norm": 962.0623168945312, + "learning_rate": 8.268970993404377e-06, + "loss": 100.8265, + "step": 42800 + }, + { + "epoch": 0.354138230549696, + "grad_norm": 836.7759399414062, + "learning_rate": 8.267903677509368e-06, + "loss": 119.7814, + "step": 42810 + }, + { + "epoch": 0.3542209537990652, + "grad_norm": 1205.632080078125, + "learning_rate": 8.266836101602263e-06, + "loss": 107.5889, + "step": 42820 + }, + { + "epoch": 0.35430367704843446, + "grad_norm": 1180.9609375, + "learning_rate": 8.265768265767999e-06, + "loss": 146.9007, + "step": 42830 + }, + { + "epoch": 0.3543864002978037, + "grad_norm": 784.8786010742188, + "learning_rate": 8.264700170091543e-06, + "loss": 93.1217, + "step": 42840 + }, + { + "epoch": 0.3544691235471729, + "grad_norm": 583.6641845703125, + "learning_rate": 8.263631814657879e-06, + "loss": 127.952, + "step": 42850 + }, + { + "epoch": 0.35455184679654217, + "grad_norm": 1015.6278076171875, + "learning_rate": 8.262563199552007e-06, + "loss": 109.5995, + "step": 42860 + }, + { + "epoch": 0.3546345700459114, + "grad_norm": 1008.2879638671875, + "learning_rate": 8.261494324858956e-06, + "loss": 83.4442, + "step": 42870 + }, + { + "epoch": 0.3547172932952806, + "grad_norm": 934.4283447265625, + "learning_rate": 8.26042519066377e-06, + "loss": 81.4526, + "step": 42880 + }, + { + "epoch": 0.3548000165446499, + "grad_norm": 602.0402221679688, + "learning_rate": 8.259355797051515e-06, + "loss": 87.8508, + "step": 42890 + }, + { + "epoch": 0.35488273979401913, + "grad_norm": 952.8324584960938, + "learning_rate": 8.258286144107277e-06, + "loss": 101.1536, + "step": 42900 + }, + { + "epoch": 0.35496546304338833, + "grad_norm": 924.0335693359375, + "learning_rate": 8.257216231916162e-06, + "loss": 113.0049, + "step": 42910 + }, + { + "epoch": 0.3550481862927576, + "grad_norm": 684.9584350585938, + "learning_rate": 8.256146060563304e-06, + "loss": 117.6641, + "step": 42920 + }, + { + "epoch": 0.35513090954212684, + "grad_norm": 1416.010009765625, + "learning_rate": 8.255075630133847e-06, + "loss": 93.2686, + "step": 42930 + }, + { + "epoch": 0.35521363279149604, + "grad_norm": 1231.5831298828125, + "learning_rate": 8.254004940712958e-06, + "loss": 111.2918, + "step": 42940 + }, + { + "epoch": 0.3552963560408653, + "grad_norm": 1368.33935546875, + "learning_rate": 8.252933992385833e-06, + "loss": 101.9154, + "step": 42950 + }, + { + "epoch": 0.35537907929023455, + "grad_norm": 754.49609375, + "learning_rate": 8.251862785237676e-06, + "loss": 88.8121, + "step": 42960 + }, + { + "epoch": 0.35546180253960374, + "grad_norm": 1223.3607177734375, + "learning_rate": 8.250791319353723e-06, + "loss": 111.0358, + "step": 42970 + }, + { + "epoch": 0.355544525788973, + "grad_norm": 1084.2374267578125, + "learning_rate": 8.249719594819225e-06, + "loss": 107.8028, + "step": 42980 + }, + { + "epoch": 0.35562724903834225, + "grad_norm": 1002.8360595703125, + "learning_rate": 8.248647611719452e-06, + "loss": 87.2639, + "step": 42990 + }, + { + "epoch": 0.35570997228771145, + "grad_norm": 882.518310546875, + "learning_rate": 8.247575370139695e-06, + "loss": 120.7826, + "step": 43000 + }, + { + "epoch": 0.3557926955370807, + "grad_norm": 710.4674682617188, + "learning_rate": 8.246502870165273e-06, + "loss": 130.1348, + "step": 43010 + }, + { + "epoch": 0.3558754187864499, + "grad_norm": 705.7244262695312, + "learning_rate": 8.245430111881519e-06, + "loss": 82.2953, + "step": 43020 + }, + { + "epoch": 0.35595814203581916, + "grad_norm": 1012.916748046875, + "learning_rate": 8.244357095373783e-06, + "loss": 103.8642, + "step": 43030 + }, + { + "epoch": 0.3560408652851884, + "grad_norm": 823.5924682617188, + "learning_rate": 8.243283820727441e-06, + "loss": 120.169, + "step": 43040 + }, + { + "epoch": 0.3561235885345576, + "grad_norm": 506.7729187011719, + "learning_rate": 8.242210288027893e-06, + "loss": 106.6605, + "step": 43050 + }, + { + "epoch": 0.35620631178392687, + "grad_norm": 573.303955078125, + "learning_rate": 8.241136497360552e-06, + "loss": 96.0248, + "step": 43060 + }, + { + "epoch": 0.3562890350332961, + "grad_norm": 1349.73974609375, + "learning_rate": 8.240062448810853e-06, + "loss": 150.305, + "step": 43070 + }, + { + "epoch": 0.3563717582826653, + "grad_norm": 898.83349609375, + "learning_rate": 8.238988142464254e-06, + "loss": 99.9782, + "step": 43080 + }, + { + "epoch": 0.3564544815320346, + "grad_norm": 1892.2440185546875, + "learning_rate": 8.237913578406236e-06, + "loss": 127.4689, + "step": 43090 + }, + { + "epoch": 0.3565372047814038, + "grad_norm": 1101.9765625, + "learning_rate": 8.236838756722294e-06, + "loss": 99.9242, + "step": 43100 + }, + { + "epoch": 0.356619928030773, + "grad_norm": 785.5177001953125, + "learning_rate": 8.235763677497945e-06, + "loss": 102.3918, + "step": 43110 + }, + { + "epoch": 0.3567026512801423, + "grad_norm": 1374.7742919921875, + "learning_rate": 8.234688340818732e-06, + "loss": 113.4264, + "step": 43120 + }, + { + "epoch": 0.35678537452951153, + "grad_norm": 606.4083862304688, + "learning_rate": 8.233612746770214e-06, + "loss": 93.4949, + "step": 43130 + }, + { + "epoch": 0.35686809777888073, + "grad_norm": 922.9923095703125, + "learning_rate": 8.232536895437968e-06, + "loss": 85.8221, + "step": 43140 + }, + { + "epoch": 0.35695082102825, + "grad_norm": 790.5139770507812, + "learning_rate": 8.231460786907597e-06, + "loss": 117.3538, + "step": 43150 + }, + { + "epoch": 0.35703354427761924, + "grad_norm": 417.8067626953125, + "learning_rate": 8.230384421264722e-06, + "loss": 87.6937, + "step": 43160 + }, + { + "epoch": 0.35711626752698844, + "grad_norm": 1458.1962890625, + "learning_rate": 8.229307798594985e-06, + "loss": 133.923, + "step": 43170 + }, + { + "epoch": 0.3571989907763577, + "grad_norm": 987.9276123046875, + "learning_rate": 8.228230918984046e-06, + "loss": 111.6961, + "step": 43180 + }, + { + "epoch": 0.35728171402572695, + "grad_norm": 1103.388427734375, + "learning_rate": 8.22715378251759e-06, + "loss": 93.5427, + "step": 43190 + }, + { + "epoch": 0.35736443727509615, + "grad_norm": 697.0567626953125, + "learning_rate": 8.226076389281316e-06, + "loss": 117.8876, + "step": 43200 + }, + { + "epoch": 0.3574471605244654, + "grad_norm": 1062.5294189453125, + "learning_rate": 8.22499873936095e-06, + "loss": 85.2779, + "step": 43210 + }, + { + "epoch": 0.35752988377383466, + "grad_norm": 997.5350341796875, + "learning_rate": 8.223920832842236e-06, + "loss": 127.3359, + "step": 43220 + }, + { + "epoch": 0.35761260702320385, + "grad_norm": 839.9952392578125, + "learning_rate": 8.222842669810936e-06, + "loss": 112.6368, + "step": 43230 + }, + { + "epoch": 0.3576953302725731, + "grad_norm": 1012.621337890625, + "learning_rate": 8.221764250352835e-06, + "loss": 108.84, + "step": 43240 + }, + { + "epoch": 0.35777805352194236, + "grad_norm": 918.2972412109375, + "learning_rate": 8.220685574553739e-06, + "loss": 85.8282, + "step": 43250 + }, + { + "epoch": 0.35786077677131156, + "grad_norm": 776.9376220703125, + "learning_rate": 8.219606642499474e-06, + "loss": 96.5936, + "step": 43260 + }, + { + "epoch": 0.3579435000206808, + "grad_norm": 868.3795166015625, + "learning_rate": 8.218527454275884e-06, + "loss": 91.7565, + "step": 43270 + }, + { + "epoch": 0.35802622327005007, + "grad_norm": 958.1405029296875, + "learning_rate": 8.217448009968834e-06, + "loss": 110.3028, + "step": 43280 + }, + { + "epoch": 0.35810894651941927, + "grad_norm": 1635.0120849609375, + "learning_rate": 8.216368309664213e-06, + "loss": 115.6983, + "step": 43290 + }, + { + "epoch": 0.3581916697687885, + "grad_norm": 495.8813781738281, + "learning_rate": 8.215288353447927e-06, + "loss": 125.6738, + "step": 43300 + }, + { + "epoch": 0.3582743930181578, + "grad_norm": 1934.987548828125, + "learning_rate": 8.214208141405903e-06, + "loss": 96.0109, + "step": 43310 + }, + { + "epoch": 0.358357116267527, + "grad_norm": 1259.3150634765625, + "learning_rate": 8.213127673624088e-06, + "loss": 98.7009, + "step": 43320 + }, + { + "epoch": 0.35843983951689623, + "grad_norm": 985.3812255859375, + "learning_rate": 8.212046950188451e-06, + "loss": 119.01, + "step": 43330 + }, + { + "epoch": 0.3585225627662655, + "grad_norm": 657.3631591796875, + "learning_rate": 8.21096597118498e-06, + "loss": 102.065, + "step": 43340 + }, + { + "epoch": 0.3586052860156347, + "grad_norm": 808.63232421875, + "learning_rate": 8.209884736699681e-06, + "loss": 86.2247, + "step": 43350 + }, + { + "epoch": 0.35868800926500394, + "grad_norm": 963.5828857421875, + "learning_rate": 8.208803246818586e-06, + "loss": 99.5541, + "step": 43360 + }, + { + "epoch": 0.3587707325143732, + "grad_norm": 356.1247253417969, + "learning_rate": 8.207721501627743e-06, + "loss": 132.9291, + "step": 43370 + }, + { + "epoch": 0.3588534557637424, + "grad_norm": 1359.7357177734375, + "learning_rate": 8.20663950121322e-06, + "loss": 106.6871, + "step": 43380 + }, + { + "epoch": 0.35893617901311164, + "grad_norm": 741.9186401367188, + "learning_rate": 8.20555724566111e-06, + "loss": 110.5265, + "step": 43390 + }, + { + "epoch": 0.35901890226248084, + "grad_norm": 804.3803100585938, + "learning_rate": 8.204474735057522e-06, + "loss": 89.7678, + "step": 43400 + }, + { + "epoch": 0.3591016255118501, + "grad_norm": 986.8497924804688, + "learning_rate": 8.203391969488586e-06, + "loss": 76.5805, + "step": 43410 + }, + { + "epoch": 0.35918434876121935, + "grad_norm": 981.0845947265625, + "learning_rate": 8.20230894904045e-06, + "loss": 108.0519, + "step": 43420 + }, + { + "epoch": 0.35926707201058855, + "grad_norm": 967.68310546875, + "learning_rate": 8.20122567379929e-06, + "loss": 144.2405, + "step": 43430 + }, + { + "epoch": 0.3593497952599578, + "grad_norm": 1253.8336181640625, + "learning_rate": 8.200142143851295e-06, + "loss": 85.0357, + "step": 43440 + }, + { + "epoch": 0.35943251850932706, + "grad_norm": 660.7843627929688, + "learning_rate": 8.199058359282675e-06, + "loss": 110.5242, + "step": 43450 + }, + { + "epoch": 0.35951524175869626, + "grad_norm": 876.0247802734375, + "learning_rate": 8.197974320179664e-06, + "loss": 143.5727, + "step": 43460 + }, + { + "epoch": 0.3595979650080655, + "grad_norm": 1184.567626953125, + "learning_rate": 8.19689002662851e-06, + "loss": 87.2803, + "step": 43470 + }, + { + "epoch": 0.35968068825743477, + "grad_norm": 899.4736938476562, + "learning_rate": 8.195805478715492e-06, + "loss": 94.5841, + "step": 43480 + }, + { + "epoch": 0.35976341150680397, + "grad_norm": 1176.9891357421875, + "learning_rate": 8.194720676526898e-06, + "loss": 105.5688, + "step": 43490 + }, + { + "epoch": 0.3598461347561732, + "grad_norm": 1430.493896484375, + "learning_rate": 8.193635620149041e-06, + "loss": 120.161, + "step": 43500 + }, + { + "epoch": 0.3599288580055425, + "grad_norm": 1398.2462158203125, + "learning_rate": 8.192550309668254e-06, + "loss": 153.4543, + "step": 43510 + }, + { + "epoch": 0.3600115812549117, + "grad_norm": 978.09033203125, + "learning_rate": 8.191464745170892e-06, + "loss": 97.1732, + "step": 43520 + }, + { + "epoch": 0.3600943045042809, + "grad_norm": 1013.3282470703125, + "learning_rate": 8.190378926743327e-06, + "loss": 101.2923, + "step": 43530 + }, + { + "epoch": 0.3601770277536502, + "grad_norm": 881.2088012695312, + "learning_rate": 8.189292854471953e-06, + "loss": 148.854, + "step": 43540 + }, + { + "epoch": 0.3602597510030194, + "grad_norm": 870.5662841796875, + "learning_rate": 8.188206528443182e-06, + "loss": 92.8082, + "step": 43550 + }, + { + "epoch": 0.36034247425238863, + "grad_norm": 776.38916015625, + "learning_rate": 8.18711994874345e-06, + "loss": 82.2745, + "step": 43560 + }, + { + "epoch": 0.3604251975017579, + "grad_norm": 904.0106811523438, + "learning_rate": 8.186033115459211e-06, + "loss": 97.3916, + "step": 43570 + }, + { + "epoch": 0.3605079207511271, + "grad_norm": 1215.83349609375, + "learning_rate": 8.184946028676937e-06, + "loss": 106.3127, + "step": 43580 + }, + { + "epoch": 0.36059064400049634, + "grad_norm": 794.0036010742188, + "learning_rate": 8.183858688483126e-06, + "loss": 91.0681, + "step": 43590 + }, + { + "epoch": 0.3606733672498656, + "grad_norm": 1268.9676513671875, + "learning_rate": 8.182771094964292e-06, + "loss": 123.4264, + "step": 43600 + }, + { + "epoch": 0.3607560904992348, + "grad_norm": 777.9105224609375, + "learning_rate": 8.181683248206968e-06, + "loss": 111.6841, + "step": 43610 + }, + { + "epoch": 0.36083881374860405, + "grad_norm": 854.5106201171875, + "learning_rate": 8.180595148297709e-06, + "loss": 113.4441, + "step": 43620 + }, + { + "epoch": 0.3609215369979733, + "grad_norm": 850.8244018554688, + "learning_rate": 8.179506795323092e-06, + "loss": 135.2171, + "step": 43630 + }, + { + "epoch": 0.3610042602473425, + "grad_norm": 1269.437255859375, + "learning_rate": 8.17841818936971e-06, + "loss": 155.0749, + "step": 43640 + }, + { + "epoch": 0.36108698349671176, + "grad_norm": 1103.447509765625, + "learning_rate": 8.177329330524182e-06, + "loss": 85.1156, + "step": 43650 + }, + { + "epoch": 0.361169706746081, + "grad_norm": 800.2313232421875, + "learning_rate": 8.17624021887314e-06, + "loss": 103.1669, + "step": 43660 + }, + { + "epoch": 0.3612524299954502, + "grad_norm": 662.4274291992188, + "learning_rate": 8.17515085450324e-06, + "loss": 98.6518, + "step": 43670 + }, + { + "epoch": 0.36133515324481946, + "grad_norm": 975.4820556640625, + "learning_rate": 8.174061237501159e-06, + "loss": 120.7466, + "step": 43680 + }, + { + "epoch": 0.3614178764941887, + "grad_norm": 1076.44873046875, + "learning_rate": 8.172971367953593e-06, + "loss": 80.2128, + "step": 43690 + }, + { + "epoch": 0.3615005997435579, + "grad_norm": 1007.4608764648438, + "learning_rate": 8.171881245947257e-06, + "loss": 62.0215, + "step": 43700 + }, + { + "epoch": 0.36158332299292717, + "grad_norm": 1051.32177734375, + "learning_rate": 8.170790871568887e-06, + "loss": 157.7504, + "step": 43710 + }, + { + "epoch": 0.3616660462422964, + "grad_norm": 1688.2108154296875, + "learning_rate": 8.169700244905239e-06, + "loss": 123.6984, + "step": 43720 + }, + { + "epoch": 0.3617487694916656, + "grad_norm": 1116.3714599609375, + "learning_rate": 8.168609366043089e-06, + "loss": 92.2827, + "step": 43730 + }, + { + "epoch": 0.3618314927410349, + "grad_norm": 845.093505859375, + "learning_rate": 8.167518235069234e-06, + "loss": 77.9922, + "step": 43740 + }, + { + "epoch": 0.3619142159904041, + "grad_norm": 2042.611572265625, + "learning_rate": 8.16642685207049e-06, + "loss": 141.047, + "step": 43750 + }, + { + "epoch": 0.36199693923977333, + "grad_norm": 602.3466186523438, + "learning_rate": 8.165335217133695e-06, + "loss": 122.7751, + "step": 43760 + }, + { + "epoch": 0.3620796624891426, + "grad_norm": 998.31005859375, + "learning_rate": 8.164243330345702e-06, + "loss": 95.6849, + "step": 43770 + }, + { + "epoch": 0.3621623857385118, + "grad_norm": 939.9814453125, + "learning_rate": 8.16315119179339e-06, + "loss": 88.2418, + "step": 43780 + }, + { + "epoch": 0.36224510898788104, + "grad_norm": 2236.7392578125, + "learning_rate": 8.162058801563652e-06, + "loss": 116.7937, + "step": 43790 + }, + { + "epoch": 0.3623278322372503, + "grad_norm": 973.8187255859375, + "learning_rate": 8.160966159743411e-06, + "loss": 94.5988, + "step": 43800 + }, + { + "epoch": 0.3624105554866195, + "grad_norm": 640.3901977539062, + "learning_rate": 8.159873266419598e-06, + "loss": 103.685, + "step": 43810 + }, + { + "epoch": 0.36249327873598874, + "grad_norm": 1432.554931640625, + "learning_rate": 8.15878012167917e-06, + "loss": 106.0658, + "step": 43820 + }, + { + "epoch": 0.362576001985358, + "grad_norm": 859.1890258789062, + "learning_rate": 8.157686725609105e-06, + "loss": 87.8233, + "step": 43830 + }, + { + "epoch": 0.3626587252347272, + "grad_norm": 1776.3751220703125, + "learning_rate": 8.1565930782964e-06, + "loss": 114.2181, + "step": 43840 + }, + { + "epoch": 0.36274144848409645, + "grad_norm": 926.181884765625, + "learning_rate": 8.155499179828068e-06, + "loss": 114.7968, + "step": 43850 + }, + { + "epoch": 0.3628241717334657, + "grad_norm": 959.3102416992188, + "learning_rate": 8.15440503029115e-06, + "loss": 105.2323, + "step": 43860 + }, + { + "epoch": 0.3629068949828349, + "grad_norm": 1929.3040771484375, + "learning_rate": 8.153310629772702e-06, + "loss": 131.4064, + "step": 43870 + }, + { + "epoch": 0.36298961823220416, + "grad_norm": 1120.273193359375, + "learning_rate": 8.152215978359796e-06, + "loss": 92.3281, + "step": 43880 + }, + { + "epoch": 0.3630723414815734, + "grad_norm": 830.0736694335938, + "learning_rate": 8.151121076139534e-06, + "loss": 91.5073, + "step": 43890 + }, + { + "epoch": 0.3631550647309426, + "grad_norm": 1354.7823486328125, + "learning_rate": 8.150025923199027e-06, + "loss": 201.7689, + "step": 43900 + }, + { + "epoch": 0.36323778798031187, + "grad_norm": 1229.7574462890625, + "learning_rate": 8.148930519625417e-06, + "loss": 116.0604, + "step": 43910 + }, + { + "epoch": 0.3633205112296811, + "grad_norm": 1157.707763671875, + "learning_rate": 8.147834865505855e-06, + "loss": 118.8252, + "step": 43920 + }, + { + "epoch": 0.3634032344790503, + "grad_norm": 391.3431396484375, + "learning_rate": 8.14673896092752e-06, + "loss": 94.0042, + "step": 43930 + }, + { + "epoch": 0.3634859577284196, + "grad_norm": 963.4109497070312, + "learning_rate": 8.145642805977608e-06, + "loss": 94.306, + "step": 43940 + }, + { + "epoch": 0.3635686809777888, + "grad_norm": 680.21826171875, + "learning_rate": 8.144546400743334e-06, + "loss": 121.9921, + "step": 43950 + }, + { + "epoch": 0.363651404227158, + "grad_norm": 763.9702758789062, + "learning_rate": 8.143449745311934e-06, + "loss": 105.5048, + "step": 43960 + }, + { + "epoch": 0.3637341274765273, + "grad_norm": 706.5518188476562, + "learning_rate": 8.142352839770663e-06, + "loss": 112.0056, + "step": 43970 + }, + { + "epoch": 0.36381685072589653, + "grad_norm": 1533.0765380859375, + "learning_rate": 8.1412556842068e-06, + "loss": 109.1279, + "step": 43980 + }, + { + "epoch": 0.36389957397526573, + "grad_norm": 784.7587890625, + "learning_rate": 8.140158278707637e-06, + "loss": 121.243, + "step": 43990 + }, + { + "epoch": 0.363982297224635, + "grad_norm": 886.728759765625, + "learning_rate": 8.139060623360494e-06, + "loss": 131.5882, + "step": 44000 + }, + { + "epoch": 0.36406502047400424, + "grad_norm": 990.3309936523438, + "learning_rate": 8.1379627182527e-06, + "loss": 95.6152, + "step": 44010 + }, + { + "epoch": 0.36414774372337344, + "grad_norm": 1236.8133544921875, + "learning_rate": 8.136864563471617e-06, + "loss": 104.638, + "step": 44020 + }, + { + "epoch": 0.3642304669727427, + "grad_norm": 560.0664672851562, + "learning_rate": 8.135766159104615e-06, + "loss": 90.362, + "step": 44030 + }, + { + "epoch": 0.36431319022211195, + "grad_norm": 796.653076171875, + "learning_rate": 8.134667505239092e-06, + "loss": 99.4829, + "step": 44040 + }, + { + "epoch": 0.36439591347148115, + "grad_norm": 777.6304321289062, + "learning_rate": 8.133568601962462e-06, + "loss": 106.3731, + "step": 44050 + }, + { + "epoch": 0.3644786367208504, + "grad_norm": 746.3777465820312, + "learning_rate": 8.132469449362158e-06, + "loss": 101.6638, + "step": 44060 + }, + { + "epoch": 0.36456135997021966, + "grad_norm": 824.9530029296875, + "learning_rate": 8.131370047525637e-06, + "loss": 144.5076, + "step": 44070 + }, + { + "epoch": 0.36464408321958885, + "grad_norm": 1749.48095703125, + "learning_rate": 8.130270396540372e-06, + "loss": 107.8925, + "step": 44080 + }, + { + "epoch": 0.3647268064689581, + "grad_norm": 1590.762451171875, + "learning_rate": 8.129170496493857e-06, + "loss": 129.4328, + "step": 44090 + }, + { + "epoch": 0.36480952971832736, + "grad_norm": 854.1180419921875, + "learning_rate": 8.128070347473609e-06, + "loss": 97.3397, + "step": 44100 + }, + { + "epoch": 0.36489225296769656, + "grad_norm": 920.0262451171875, + "learning_rate": 8.126969949567157e-06, + "loss": 90.8626, + "step": 44110 + }, + { + "epoch": 0.3649749762170658, + "grad_norm": 802.9644775390625, + "learning_rate": 8.125869302862058e-06, + "loss": 101.2598, + "step": 44120 + }, + { + "epoch": 0.365057699466435, + "grad_norm": 721.7289428710938, + "learning_rate": 8.124768407445883e-06, + "loss": 91.293, + "step": 44130 + }, + { + "epoch": 0.36514042271580427, + "grad_norm": 452.2812805175781, + "learning_rate": 8.123667263406228e-06, + "loss": 115.1237, + "step": 44140 + }, + { + "epoch": 0.3652231459651735, + "grad_norm": 755.5316162109375, + "learning_rate": 8.122565870830704e-06, + "loss": 95.5803, + "step": 44150 + }, + { + "epoch": 0.3653058692145427, + "grad_norm": 583.1863403320312, + "learning_rate": 8.121464229806944e-06, + "loss": 91.2347, + "step": 44160 + }, + { + "epoch": 0.365388592463912, + "grad_norm": 894.2572631835938, + "learning_rate": 8.120362340422601e-06, + "loss": 101.122, + "step": 44170 + }, + { + "epoch": 0.36547131571328123, + "grad_norm": 1000.3782958984375, + "learning_rate": 8.119260202765347e-06, + "loss": 104.094, + "step": 44180 + }, + { + "epoch": 0.36555403896265043, + "grad_norm": 1205.3262939453125, + "learning_rate": 8.118157816922874e-06, + "loss": 107.1193, + "step": 44190 + }, + { + "epoch": 0.3656367622120197, + "grad_norm": 1496.134033203125, + "learning_rate": 8.117055182982895e-06, + "loss": 84.7695, + "step": 44200 + }, + { + "epoch": 0.36571948546138894, + "grad_norm": 1437.298095703125, + "learning_rate": 8.115952301033141e-06, + "loss": 117.0865, + "step": 44210 + }, + { + "epoch": 0.36580220871075814, + "grad_norm": 1193.9093017578125, + "learning_rate": 8.11484917116136e-06, + "loss": 104.2912, + "step": 44220 + }, + { + "epoch": 0.3658849319601274, + "grad_norm": 912.3463745117188, + "learning_rate": 8.113745793455328e-06, + "loss": 105.9879, + "step": 44230 + }, + { + "epoch": 0.36596765520949665, + "grad_norm": 1175.63134765625, + "learning_rate": 8.112642168002831e-06, + "loss": 95.1146, + "step": 44240 + }, + { + "epoch": 0.36605037845886584, + "grad_norm": 957.2208862304688, + "learning_rate": 8.111538294891684e-06, + "loss": 133.3301, + "step": 44250 + }, + { + "epoch": 0.3661331017082351, + "grad_norm": 780.9307861328125, + "learning_rate": 8.110434174209714e-06, + "loss": 112.0869, + "step": 44260 + }, + { + "epoch": 0.36621582495760435, + "grad_norm": 1046.54052734375, + "learning_rate": 8.109329806044772e-06, + "loss": 122.4268, + "step": 44270 + }, + { + "epoch": 0.36629854820697355, + "grad_norm": 593.833984375, + "learning_rate": 8.108225190484728e-06, + "loss": 134.9322, + "step": 44280 + }, + { + "epoch": 0.3663812714563428, + "grad_norm": 1084.296630859375, + "learning_rate": 8.107120327617469e-06, + "loss": 108.1544, + "step": 44290 + }, + { + "epoch": 0.36646399470571206, + "grad_norm": 795.716552734375, + "learning_rate": 8.106015217530906e-06, + "loss": 104.3661, + "step": 44300 + }, + { + "epoch": 0.36654671795508126, + "grad_norm": 969.0634765625, + "learning_rate": 8.104909860312968e-06, + "loss": 118.4515, + "step": 44310 + }, + { + "epoch": 0.3666294412044505, + "grad_norm": 1363.8905029296875, + "learning_rate": 8.1038042560516e-06, + "loss": 109.1337, + "step": 44320 + }, + { + "epoch": 0.36671216445381977, + "grad_norm": 715.2791748046875, + "learning_rate": 8.102698404834773e-06, + "loss": 79.2838, + "step": 44330 + }, + { + "epoch": 0.36679488770318897, + "grad_norm": 1469.4854736328125, + "learning_rate": 8.101592306750472e-06, + "loss": 110.5569, + "step": 44340 + }, + { + "epoch": 0.3668776109525582, + "grad_norm": 992.7304077148438, + "learning_rate": 8.100485961886707e-06, + "loss": 97.823, + "step": 44350 + }, + { + "epoch": 0.3669603342019275, + "grad_norm": 1063.874755859375, + "learning_rate": 8.099379370331502e-06, + "loss": 112.1215, + "step": 44360 + }, + { + "epoch": 0.3670430574512967, + "grad_norm": 726.4131469726562, + "learning_rate": 8.098272532172906e-06, + "loss": 135.1273, + "step": 44370 + }, + { + "epoch": 0.3671257807006659, + "grad_norm": 1017.98193359375, + "learning_rate": 8.097165447498985e-06, + "loss": 102.8711, + "step": 44380 + }, + { + "epoch": 0.3672085039500352, + "grad_norm": 1006.7951049804688, + "learning_rate": 8.09605811639782e-06, + "loss": 120.6296, + "step": 44390 + }, + { + "epoch": 0.3672912271994044, + "grad_norm": 1553.7777099609375, + "learning_rate": 8.094950538957523e-06, + "loss": 116.9153, + "step": 44400 + }, + { + "epoch": 0.36737395044877363, + "grad_norm": 578.3179931640625, + "learning_rate": 8.093842715266214e-06, + "loss": 86.2257, + "step": 44410 + }, + { + "epoch": 0.3674566736981429, + "grad_norm": 1568.3966064453125, + "learning_rate": 8.092734645412037e-06, + "loss": 103.4828, + "step": 44420 + }, + { + "epoch": 0.3675393969475121, + "grad_norm": 1186.5718994140625, + "learning_rate": 8.09162632948316e-06, + "loss": 117.7764, + "step": 44430 + }, + { + "epoch": 0.36762212019688134, + "grad_norm": 1135.4630126953125, + "learning_rate": 8.090517767567765e-06, + "loss": 95.9603, + "step": 44440 + }, + { + "epoch": 0.3677048434462506, + "grad_norm": 959.4806518554688, + "learning_rate": 8.089408959754055e-06, + "loss": 99.0822, + "step": 44450 + }, + { + "epoch": 0.3677875666956198, + "grad_norm": 758.1735229492188, + "learning_rate": 8.088299906130252e-06, + "loss": 149.9401, + "step": 44460 + }, + { + "epoch": 0.36787028994498905, + "grad_norm": 791.1016845703125, + "learning_rate": 8.087190606784598e-06, + "loss": 79.0925, + "step": 44470 + }, + { + "epoch": 0.36795301319435825, + "grad_norm": 640.9752807617188, + "learning_rate": 8.086081061805357e-06, + "loss": 95.8233, + "step": 44480 + }, + { + "epoch": 0.3680357364437275, + "grad_norm": 1189.6053466796875, + "learning_rate": 8.084971271280808e-06, + "loss": 122.035, + "step": 44490 + }, + { + "epoch": 0.36811845969309676, + "grad_norm": 1168.3837890625, + "learning_rate": 8.083861235299253e-06, + "loss": 100.2019, + "step": 44500 + }, + { + "epoch": 0.36820118294246595, + "grad_norm": 891.6553955078125, + "learning_rate": 8.082750953949015e-06, + "loss": 115.5503, + "step": 44510 + }, + { + "epoch": 0.3682839061918352, + "grad_norm": 884.3746337890625, + "learning_rate": 8.081640427318429e-06, + "loss": 94.7234, + "step": 44520 + }, + { + "epoch": 0.36836662944120446, + "grad_norm": 760.7252807617188, + "learning_rate": 8.080529655495856e-06, + "loss": 126.0468, + "step": 44530 + }, + { + "epoch": 0.36844935269057366, + "grad_norm": 831.2738647460938, + "learning_rate": 8.079418638569679e-06, + "loss": 92.954, + "step": 44540 + }, + { + "epoch": 0.3685320759399429, + "grad_norm": 460.5909118652344, + "learning_rate": 8.078307376628292e-06, + "loss": 83.0053, + "step": 44550 + }, + { + "epoch": 0.36861479918931217, + "grad_norm": 795.8884887695312, + "learning_rate": 8.077195869760114e-06, + "loss": 86.7317, + "step": 44560 + }, + { + "epoch": 0.36869752243868137, + "grad_norm": 683.8246459960938, + "learning_rate": 8.076084118053584e-06, + "loss": 90.0042, + "step": 44570 + }, + { + "epoch": 0.3687802456880506, + "grad_norm": 1078.8155517578125, + "learning_rate": 8.074972121597158e-06, + "loss": 140.497, + "step": 44580 + }, + { + "epoch": 0.3688629689374199, + "grad_norm": 447.01214599609375, + "learning_rate": 8.073859880479314e-06, + "loss": 76.7604, + "step": 44590 + }, + { + "epoch": 0.3689456921867891, + "grad_norm": 948.76416015625, + "learning_rate": 8.072747394788545e-06, + "loss": 83.8023, + "step": 44600 + }, + { + "epoch": 0.36902841543615833, + "grad_norm": 1048.3646240234375, + "learning_rate": 8.071634664613367e-06, + "loss": 102.7217, + "step": 44610 + }, + { + "epoch": 0.3691111386855276, + "grad_norm": 859.9012451171875, + "learning_rate": 8.070521690042317e-06, + "loss": 103.1552, + "step": 44620 + }, + { + "epoch": 0.3691938619348968, + "grad_norm": 1066.72314453125, + "learning_rate": 8.069408471163947e-06, + "loss": 121.4919, + "step": 44630 + }, + { + "epoch": 0.36927658518426604, + "grad_norm": 969.5633544921875, + "learning_rate": 8.068295008066832e-06, + "loss": 128.1989, + "step": 44640 + }, + { + "epoch": 0.3693593084336353, + "grad_norm": 1198.6636962890625, + "learning_rate": 8.067181300839565e-06, + "loss": 161.2369, + "step": 44650 + }, + { + "epoch": 0.3694420316830045, + "grad_norm": 700.932861328125, + "learning_rate": 8.066067349570757e-06, + "loss": 86.4731, + "step": 44660 + }, + { + "epoch": 0.36952475493237374, + "grad_norm": 1984.68017578125, + "learning_rate": 8.064953154349042e-06, + "loss": 85.2434, + "step": 44670 + }, + { + "epoch": 0.369607478181743, + "grad_norm": 951.8640747070312, + "learning_rate": 8.063838715263072e-06, + "loss": 92.2627, + "step": 44680 + }, + { + "epoch": 0.3696902014311122, + "grad_norm": 991.0081176757812, + "learning_rate": 8.062724032401515e-06, + "loss": 82.2411, + "step": 44690 + }, + { + "epoch": 0.36977292468048145, + "grad_norm": 915.6588134765625, + "learning_rate": 8.061609105853062e-06, + "loss": 123.3313, + "step": 44700 + }, + { + "epoch": 0.3698556479298507, + "grad_norm": 648.7603759765625, + "learning_rate": 8.060493935706425e-06, + "loss": 86.3172, + "step": 44710 + }, + { + "epoch": 0.3699383711792199, + "grad_norm": 731.9179077148438, + "learning_rate": 8.059378522050332e-06, + "loss": 142.6297, + "step": 44720 + }, + { + "epoch": 0.37002109442858916, + "grad_norm": 738.2081909179688, + "learning_rate": 8.05826286497353e-06, + "loss": 100.2493, + "step": 44730 + }, + { + "epoch": 0.3701038176779584, + "grad_norm": 961.7689208984375, + "learning_rate": 8.057146964564786e-06, + "loss": 108.2104, + "step": 44740 + }, + { + "epoch": 0.3701865409273276, + "grad_norm": 1143.8248291015625, + "learning_rate": 8.05603082091289e-06, + "loss": 102.1352, + "step": 44750 + }, + { + "epoch": 0.37026926417669687, + "grad_norm": 968.6405639648438, + "learning_rate": 8.054914434106647e-06, + "loss": 89.4334, + "step": 44760 + }, + { + "epoch": 0.3703519874260661, + "grad_norm": 800.0516967773438, + "learning_rate": 8.053797804234882e-06, + "loss": 100.4505, + "step": 44770 + }, + { + "epoch": 0.3704347106754353, + "grad_norm": 928.8748168945312, + "learning_rate": 8.052680931386441e-06, + "loss": 88.3113, + "step": 44780 + }, + { + "epoch": 0.3705174339248046, + "grad_norm": 680.3980102539062, + "learning_rate": 8.051563815650187e-06, + "loss": 93.7541, + "step": 44790 + }, + { + "epoch": 0.3706001571741738, + "grad_norm": 988.8343505859375, + "learning_rate": 8.050446457115005e-06, + "loss": 138.652, + "step": 44800 + }, + { + "epoch": 0.370682880423543, + "grad_norm": 5196.66015625, + "learning_rate": 8.0493288558698e-06, + "loss": 161.6813, + "step": 44810 + }, + { + "epoch": 0.3707656036729123, + "grad_norm": 1195.2532958984375, + "learning_rate": 8.04821101200349e-06, + "loss": 104.2872, + "step": 44820 + }, + { + "epoch": 0.37084832692228153, + "grad_norm": 916.4719848632812, + "learning_rate": 8.047092925605022e-06, + "loss": 72.5952, + "step": 44830 + }, + { + "epoch": 0.37093105017165073, + "grad_norm": 1935.22119140625, + "learning_rate": 8.045974596763352e-06, + "loss": 124.3693, + "step": 44840 + }, + { + "epoch": 0.37101377342102, + "grad_norm": 660.0169677734375, + "learning_rate": 8.044856025567464e-06, + "loss": 118.5706, + "step": 44850 + }, + { + "epoch": 0.3710964966703892, + "grad_norm": 1358.344482421875, + "learning_rate": 8.043737212106356e-06, + "loss": 116.8551, + "step": 44860 + }, + { + "epoch": 0.37117921991975844, + "grad_norm": 963.0214233398438, + "learning_rate": 8.042618156469045e-06, + "loss": 88.5938, + "step": 44870 + }, + { + "epoch": 0.3712619431691277, + "grad_norm": 718.8231811523438, + "learning_rate": 8.041498858744572e-06, + "loss": 93.5438, + "step": 44880 + }, + { + "epoch": 0.3713446664184969, + "grad_norm": 884.6295776367188, + "learning_rate": 8.040379319021994e-06, + "loss": 98.0878, + "step": 44890 + }, + { + "epoch": 0.37142738966786615, + "grad_norm": 818.326171875, + "learning_rate": 8.039259537390388e-06, + "loss": 106.2791, + "step": 44900 + }, + { + "epoch": 0.3715101129172354, + "grad_norm": 722.222900390625, + "learning_rate": 8.038139513938847e-06, + "loss": 101.4262, + "step": 44910 + }, + { + "epoch": 0.3715928361666046, + "grad_norm": 706.7052001953125, + "learning_rate": 8.037019248756488e-06, + "loss": 103.0186, + "step": 44920 + }, + { + "epoch": 0.37167555941597386, + "grad_norm": 783.2863159179688, + "learning_rate": 8.035898741932447e-06, + "loss": 101.9469, + "step": 44930 + }, + { + "epoch": 0.3717582826653431, + "grad_norm": 1484.9461669921875, + "learning_rate": 8.034777993555875e-06, + "loss": 140.325, + "step": 44940 + }, + { + "epoch": 0.3718410059147123, + "grad_norm": 669.56640625, + "learning_rate": 8.033657003715945e-06, + "loss": 116.9738, + "step": 44950 + }, + { + "epoch": 0.37192372916408156, + "grad_norm": 705.1654663085938, + "learning_rate": 8.032535772501851e-06, + "loss": 102.4115, + "step": 44960 + }, + { + "epoch": 0.3720064524134508, + "grad_norm": 1263.1771240234375, + "learning_rate": 8.031414300002802e-06, + "loss": 77.7587, + "step": 44970 + }, + { + "epoch": 0.37208917566282, + "grad_norm": 1212.223876953125, + "learning_rate": 8.03029258630803e-06, + "loss": 112.9885, + "step": 44980 + }, + { + "epoch": 0.37217189891218927, + "grad_norm": 951.40185546875, + "learning_rate": 8.029170631506785e-06, + "loss": 95.1384, + "step": 44990 + }, + { + "epoch": 0.3722546221615585, + "grad_norm": 1134.679443359375, + "learning_rate": 8.028048435688333e-06, + "loss": 106.5822, + "step": 45000 + }, + { + "epoch": 0.3723373454109277, + "grad_norm": 932.267822265625, + "learning_rate": 8.026925998941965e-06, + "loss": 110.1278, + "step": 45010 + }, + { + "epoch": 0.372420068660297, + "grad_norm": 457.553955078125, + "learning_rate": 8.025803321356989e-06, + "loss": 95.6591, + "step": 45020 + }, + { + "epoch": 0.37250279190966623, + "grad_norm": 655.8544921875, + "learning_rate": 8.024680403022726e-06, + "loss": 89.4721, + "step": 45030 + }, + { + "epoch": 0.37258551515903543, + "grad_norm": 893.6047973632812, + "learning_rate": 8.023557244028526e-06, + "loss": 101.3761, + "step": 45040 + }, + { + "epoch": 0.3726682384084047, + "grad_norm": 765.0892333984375, + "learning_rate": 8.022433844463752e-06, + "loss": 85.2344, + "step": 45050 + }, + { + "epoch": 0.37275096165777394, + "grad_norm": 955.836181640625, + "learning_rate": 8.02131020441779e-06, + "loss": 105.1853, + "step": 45060 + }, + { + "epoch": 0.37283368490714314, + "grad_norm": 1159.584228515625, + "learning_rate": 8.02018632398004e-06, + "loss": 82.6549, + "step": 45070 + }, + { + "epoch": 0.3729164081565124, + "grad_norm": 827.6931762695312, + "learning_rate": 8.019062203239923e-06, + "loss": 145.6601, + "step": 45080 + }, + { + "epoch": 0.37299913140588165, + "grad_norm": 1674.2239990234375, + "learning_rate": 8.017937842286882e-06, + "loss": 104.1544, + "step": 45090 + }, + { + "epoch": 0.37308185465525084, + "grad_norm": 1010.4392700195312, + "learning_rate": 8.01681324121038e-06, + "loss": 102.7155, + "step": 45100 + }, + { + "epoch": 0.3731645779046201, + "grad_norm": 812.0460205078125, + "learning_rate": 8.015688400099893e-06, + "loss": 80.9767, + "step": 45110 + }, + { + "epoch": 0.37324730115398935, + "grad_norm": 676.9262084960938, + "learning_rate": 8.014563319044919e-06, + "loss": 107.2131, + "step": 45120 + }, + { + "epoch": 0.37333002440335855, + "grad_norm": 1148.935302734375, + "learning_rate": 8.013437998134978e-06, + "loss": 96.8717, + "step": 45130 + }, + { + "epoch": 0.3734127476527278, + "grad_norm": 2210.831298828125, + "learning_rate": 8.012312437459604e-06, + "loss": 115.9179, + "step": 45140 + }, + { + "epoch": 0.37349547090209706, + "grad_norm": 740.3560180664062, + "learning_rate": 8.011186637108354e-06, + "loss": 95.7675, + "step": 45150 + }, + { + "epoch": 0.37357819415146626, + "grad_norm": 1315.1302490234375, + "learning_rate": 8.010060597170805e-06, + "loss": 108.1328, + "step": 45160 + }, + { + "epoch": 0.3736609174008355, + "grad_norm": 1144.85400390625, + "learning_rate": 8.008934317736546e-06, + "loss": 104.8319, + "step": 45170 + }, + { + "epoch": 0.37374364065020477, + "grad_norm": 642.925048828125, + "learning_rate": 8.007807798895195e-06, + "loss": 114.467, + "step": 45180 + }, + { + "epoch": 0.37382636389957397, + "grad_norm": 1433.939208984375, + "learning_rate": 8.00668104073638e-06, + "loss": 142.9769, + "step": 45190 + }, + { + "epoch": 0.3739090871489432, + "grad_norm": 615.9909057617188, + "learning_rate": 8.005554043349753e-06, + "loss": 77.9182, + "step": 45200 + }, + { + "epoch": 0.3739918103983124, + "grad_norm": 1355.69287109375, + "learning_rate": 8.004426806824985e-06, + "loss": 98.0649, + "step": 45210 + }, + { + "epoch": 0.3740745336476817, + "grad_norm": 716.5189208984375, + "learning_rate": 8.003299331251764e-06, + "loss": 113.2009, + "step": 45220 + }, + { + "epoch": 0.3741572568970509, + "grad_norm": 860.948974609375, + "learning_rate": 8.002171616719798e-06, + "loss": 89.8221, + "step": 45230 + }, + { + "epoch": 0.3742399801464201, + "grad_norm": 1081.593994140625, + "learning_rate": 8.001043663318815e-06, + "loss": 148.4487, + "step": 45240 + }, + { + "epoch": 0.3743227033957894, + "grad_norm": 714.3857421875, + "learning_rate": 7.999915471138562e-06, + "loss": 111.338, + "step": 45250 + }, + { + "epoch": 0.37440542664515863, + "grad_norm": 863.04052734375, + "learning_rate": 7.9987870402688e-06, + "loss": 123.2091, + "step": 45260 + }, + { + "epoch": 0.37448814989452783, + "grad_norm": 541.889404296875, + "learning_rate": 7.997658370799318e-06, + "loss": 122.7542, + "step": 45270 + }, + { + "epoch": 0.3745708731438971, + "grad_norm": 948.584716796875, + "learning_rate": 7.996529462819915e-06, + "loss": 119.426, + "step": 45280 + }, + { + "epoch": 0.37465359639326634, + "grad_norm": 736.9681396484375, + "learning_rate": 7.995400316420416e-06, + "loss": 64.0782, + "step": 45290 + }, + { + "epoch": 0.37473631964263554, + "grad_norm": 1551.1883544921875, + "learning_rate": 7.994270931690662e-06, + "loss": 136.7847, + "step": 45300 + }, + { + "epoch": 0.3748190428920048, + "grad_norm": 804.4282836914062, + "learning_rate": 7.993141308720511e-06, + "loss": 136.6521, + "step": 45310 + }, + { + "epoch": 0.37490176614137405, + "grad_norm": 778.236083984375, + "learning_rate": 7.99201144759984e-06, + "loss": 95.8208, + "step": 45320 + }, + { + "epoch": 0.37498448939074325, + "grad_norm": 2299.00048828125, + "learning_rate": 7.990881348418554e-06, + "loss": 124.4509, + "step": 45330 + }, + { + "epoch": 0.3750672126401125, + "grad_norm": 973.8239135742188, + "learning_rate": 7.989751011266565e-06, + "loss": 112.5193, + "step": 45340 + }, + { + "epoch": 0.37514993588948176, + "grad_norm": 745.3870239257812, + "learning_rate": 7.988620436233806e-06, + "loss": 112.1299, + "step": 45350 + }, + { + "epoch": 0.37523265913885095, + "grad_norm": 2419.01953125, + "learning_rate": 7.987489623410236e-06, + "loss": 130.317, + "step": 45360 + }, + { + "epoch": 0.3753153823882202, + "grad_norm": 2003.9739990234375, + "learning_rate": 7.986358572885828e-06, + "loss": 118.5811, + "step": 45370 + }, + { + "epoch": 0.37539810563758946, + "grad_norm": 1251.0047607421875, + "learning_rate": 7.985227284750574e-06, + "loss": 106.9141, + "step": 45380 + }, + { + "epoch": 0.37548082888695866, + "grad_norm": 879.7947998046875, + "learning_rate": 7.984095759094485e-06, + "loss": 93.4843, + "step": 45390 + }, + { + "epoch": 0.3755635521363279, + "grad_norm": 852.9107055664062, + "learning_rate": 7.982963996007591e-06, + "loss": 141.075, + "step": 45400 + }, + { + "epoch": 0.37564627538569717, + "grad_norm": 952.68798828125, + "learning_rate": 7.981831995579943e-06, + "loss": 91.4658, + "step": 45410 + }, + { + "epoch": 0.37572899863506637, + "grad_norm": 1191.1885986328125, + "learning_rate": 7.980699757901607e-06, + "loss": 80.4354, + "step": 45420 + }, + { + "epoch": 0.3758117218844356, + "grad_norm": 813.9268188476562, + "learning_rate": 7.97956728306267e-06, + "loss": 90.7331, + "step": 45430 + }, + { + "epoch": 0.3758944451338049, + "grad_norm": 697.1983032226562, + "learning_rate": 7.97843457115324e-06, + "loss": 98.3303, + "step": 45440 + }, + { + "epoch": 0.3759771683831741, + "grad_norm": 628.31982421875, + "learning_rate": 7.97730162226344e-06, + "loss": 103.7881, + "step": 45450 + }, + { + "epoch": 0.37605989163254333, + "grad_norm": 2688.026123046875, + "learning_rate": 7.976168436483415e-06, + "loss": 97.9229, + "step": 45460 + }, + { + "epoch": 0.3761426148819126, + "grad_norm": 813.2411499023438, + "learning_rate": 7.975035013903326e-06, + "loss": 94.9314, + "step": 45470 + }, + { + "epoch": 0.3762253381312818, + "grad_norm": 712.8557739257812, + "learning_rate": 7.973901354613353e-06, + "loss": 85.9302, + "step": 45480 + }, + { + "epoch": 0.37630806138065104, + "grad_norm": 1530.84033203125, + "learning_rate": 7.972767458703697e-06, + "loss": 128.0701, + "step": 45490 + }, + { + "epoch": 0.3763907846300203, + "grad_norm": 1192.7733154296875, + "learning_rate": 7.971633326264581e-06, + "loss": 99.9536, + "step": 45500 + }, + { + "epoch": 0.3764735078793895, + "grad_norm": 854.33203125, + "learning_rate": 7.970498957386237e-06, + "loss": 121.3529, + "step": 45510 + }, + { + "epoch": 0.37655623112875874, + "grad_norm": 673.5110473632812, + "learning_rate": 7.969364352158922e-06, + "loss": 102.1387, + "step": 45520 + }, + { + "epoch": 0.376638954378128, + "grad_norm": 868.6480712890625, + "learning_rate": 7.968229510672915e-06, + "loss": 97.5747, + "step": 45530 + }, + { + "epoch": 0.3767216776274972, + "grad_norm": 1399.73779296875, + "learning_rate": 7.967094433018508e-06, + "loss": 125.0937, + "step": 45540 + }, + { + "epoch": 0.37680440087686645, + "grad_norm": 1239.8349609375, + "learning_rate": 7.965959119286013e-06, + "loss": 113.5951, + "step": 45550 + }, + { + "epoch": 0.37688712412623565, + "grad_norm": 1359.953125, + "learning_rate": 7.964823569565765e-06, + "loss": 103.3041, + "step": 45560 + }, + { + "epoch": 0.3769698473756049, + "grad_norm": 720.0075073242188, + "learning_rate": 7.963687783948111e-06, + "loss": 81.487, + "step": 45570 + }, + { + "epoch": 0.37705257062497416, + "grad_norm": 1454.417724609375, + "learning_rate": 7.96255176252342e-06, + "loss": 83.4532, + "step": 45580 + }, + { + "epoch": 0.37713529387434336, + "grad_norm": 1103.7496337890625, + "learning_rate": 7.961415505382083e-06, + "loss": 125.8114, + "step": 45590 + }, + { + "epoch": 0.3772180171237126, + "grad_norm": 948.4056396484375, + "learning_rate": 7.960279012614508e-06, + "loss": 90.2628, + "step": 45600 + }, + { + "epoch": 0.37730074037308187, + "grad_norm": 1128.2615966796875, + "learning_rate": 7.959142284311115e-06, + "loss": 113.3847, + "step": 45610 + }, + { + "epoch": 0.37738346362245107, + "grad_norm": 921.497802734375, + "learning_rate": 7.958005320562349e-06, + "loss": 125.8096, + "step": 45620 + }, + { + "epoch": 0.3774661868718203, + "grad_norm": 1018.8650512695312, + "learning_rate": 7.95686812145868e-06, + "loss": 122.7865, + "step": 45630 + }, + { + "epoch": 0.3775489101211896, + "grad_norm": 979.5180053710938, + "learning_rate": 7.955730687090582e-06, + "loss": 125.0732, + "step": 45640 + }, + { + "epoch": 0.3776316333705588, + "grad_norm": 978.5775756835938, + "learning_rate": 7.954593017548557e-06, + "loss": 87.2099, + "step": 45650 + }, + { + "epoch": 0.377714356619928, + "grad_norm": 978.8362426757812, + "learning_rate": 7.953455112923127e-06, + "loss": 134.838, + "step": 45660 + }, + { + "epoch": 0.3777970798692973, + "grad_norm": 961.3007202148438, + "learning_rate": 7.952316973304828e-06, + "loss": 109.7581, + "step": 45670 + }, + { + "epoch": 0.3778798031186665, + "grad_norm": 872.6028442382812, + "learning_rate": 7.951178598784217e-06, + "loss": 94.058, + "step": 45680 + }, + { + "epoch": 0.37796252636803573, + "grad_norm": 603.7721557617188, + "learning_rate": 7.950039989451868e-06, + "loss": 142.0334, + "step": 45690 + }, + { + "epoch": 0.378045249617405, + "grad_norm": 769.1307373046875, + "learning_rate": 7.948901145398376e-06, + "loss": 101.5725, + "step": 45700 + }, + { + "epoch": 0.3781279728667742, + "grad_norm": 523.3740844726562, + "learning_rate": 7.947762066714353e-06, + "loss": 99.7676, + "step": 45710 + }, + { + "epoch": 0.37821069611614344, + "grad_norm": 569.120361328125, + "learning_rate": 7.946622753490433e-06, + "loss": 73.9602, + "step": 45720 + }, + { + "epoch": 0.3782934193655127, + "grad_norm": 850.3909912109375, + "learning_rate": 7.945483205817262e-06, + "loss": 99.4327, + "step": 45730 + }, + { + "epoch": 0.3783761426148819, + "grad_norm": 1302.0482177734375, + "learning_rate": 7.94434342378551e-06, + "loss": 132.1461, + "step": 45740 + }, + { + "epoch": 0.37845886586425115, + "grad_norm": 698.3201293945312, + "learning_rate": 7.943203407485864e-06, + "loss": 69.9039, + "step": 45750 + }, + { + "epoch": 0.3785415891136204, + "grad_norm": 911.8252563476562, + "learning_rate": 7.942063157009033e-06, + "loss": 102.4791, + "step": 45760 + }, + { + "epoch": 0.3786243123629896, + "grad_norm": 991.9946899414062, + "learning_rate": 7.940922672445737e-06, + "loss": 113.1581, + "step": 45770 + }, + { + "epoch": 0.37870703561235886, + "grad_norm": 1038.773681640625, + "learning_rate": 7.939781953886722e-06, + "loss": 128.2211, + "step": 45780 + }, + { + "epoch": 0.3787897588617281, + "grad_norm": 1063.1624755859375, + "learning_rate": 7.938641001422747e-06, + "loss": 106.1225, + "step": 45790 + }, + { + "epoch": 0.3788724821110973, + "grad_norm": 897.7556762695312, + "learning_rate": 7.937499815144597e-06, + "loss": 102.7701, + "step": 45800 + }, + { + "epoch": 0.37895520536046656, + "grad_norm": 454.84552001953125, + "learning_rate": 7.936358395143065e-06, + "loss": 133.0522, + "step": 45810 + }, + { + "epoch": 0.3790379286098358, + "grad_norm": 491.5146484375, + "learning_rate": 7.935216741508971e-06, + "loss": 131.0702, + "step": 45820 + }, + { + "epoch": 0.379120651859205, + "grad_norm": 816.953125, + "learning_rate": 7.934074854333153e-06, + "loss": 102.6913, + "step": 45830 + }, + { + "epoch": 0.37920337510857427, + "grad_norm": 991.1273193359375, + "learning_rate": 7.932932733706467e-06, + "loss": 96.2913, + "step": 45840 + }, + { + "epoch": 0.3792860983579435, + "grad_norm": 2715.989501953125, + "learning_rate": 7.931790379719781e-06, + "loss": 145.7241, + "step": 45850 + }, + { + "epoch": 0.3793688216073127, + "grad_norm": 733.826416015625, + "learning_rate": 7.93064779246399e-06, + "loss": 79.5076, + "step": 45860 + }, + { + "epoch": 0.379451544856682, + "grad_norm": 1925.5067138671875, + "learning_rate": 7.929504972030003e-06, + "loss": 129.3025, + "step": 45870 + }, + { + "epoch": 0.37953426810605123, + "grad_norm": 723.109130859375, + "learning_rate": 7.928361918508752e-06, + "loss": 124.5502, + "step": 45880 + }, + { + "epoch": 0.37961699135542043, + "grad_norm": 942.7910766601562, + "learning_rate": 7.927218631991182e-06, + "loss": 87.0695, + "step": 45890 + }, + { + "epoch": 0.3796997146047897, + "grad_norm": 1327.8336181640625, + "learning_rate": 7.92607511256826e-06, + "loss": 90.3665, + "step": 45900 + }, + { + "epoch": 0.37978243785415894, + "grad_norm": 1204.0654296875, + "learning_rate": 7.924931360330968e-06, + "loss": 120.6505, + "step": 45910 + }, + { + "epoch": 0.37986516110352814, + "grad_norm": 643.5101928710938, + "learning_rate": 7.92378737537031e-06, + "loss": 84.0929, + "step": 45920 + }, + { + "epoch": 0.3799478843528974, + "grad_norm": 1710.7091064453125, + "learning_rate": 7.922643157777314e-06, + "loss": 94.1093, + "step": 45930 + }, + { + "epoch": 0.3800306076022666, + "grad_norm": 1306.8468017578125, + "learning_rate": 7.921498707643011e-06, + "loss": 110.1609, + "step": 45940 + }, + { + "epoch": 0.38011333085163584, + "grad_norm": 1266.775146484375, + "learning_rate": 7.920354025058467e-06, + "loss": 94.9035, + "step": 45950 + }, + { + "epoch": 0.3801960541010051, + "grad_norm": 1168.3138427734375, + "learning_rate": 7.919209110114752e-06, + "loss": 165.2224, + "step": 45960 + }, + { + "epoch": 0.3802787773503743, + "grad_norm": 760.196044921875, + "learning_rate": 7.918063962902968e-06, + "loss": 118.3697, + "step": 45970 + }, + { + "epoch": 0.38036150059974355, + "grad_norm": 1145.4422607421875, + "learning_rate": 7.916918583514227e-06, + "loss": 102.1873, + "step": 45980 + }, + { + "epoch": 0.3804442238491128, + "grad_norm": 523.3143310546875, + "learning_rate": 7.91577297203966e-06, + "loss": 94.5064, + "step": 45990 + }, + { + "epoch": 0.380526947098482, + "grad_norm": 777.9324951171875, + "learning_rate": 7.91462712857042e-06, + "loss": 99.4327, + "step": 46000 + }, + { + "epoch": 0.38060967034785126, + "grad_norm": 1038.9837646484375, + "learning_rate": 7.913481053197673e-06, + "loss": 120.6933, + "step": 46010 + }, + { + "epoch": 0.3806923935972205, + "grad_norm": 702.442138671875, + "learning_rate": 7.912334746012613e-06, + "loss": 141.1689, + "step": 46020 + }, + { + "epoch": 0.3807751168465897, + "grad_norm": 910.6521606445312, + "learning_rate": 7.911188207106442e-06, + "loss": 84.091, + "step": 46030 + }, + { + "epoch": 0.38085784009595897, + "grad_norm": 836.0477905273438, + "learning_rate": 7.910041436570386e-06, + "loss": 92.7803, + "step": 46040 + }, + { + "epoch": 0.3809405633453282, + "grad_norm": 796.7591552734375, + "learning_rate": 7.90889443449569e-06, + "loss": 93.9601, + "step": 46050 + }, + { + "epoch": 0.3810232865946974, + "grad_norm": 1023.9012451171875, + "learning_rate": 7.90774720097361e-06, + "loss": 125.3338, + "step": 46060 + }, + { + "epoch": 0.3811060098440667, + "grad_norm": 1169.42822265625, + "learning_rate": 7.906599736095433e-06, + "loss": 89.7407, + "step": 46070 + }, + { + "epoch": 0.3811887330934359, + "grad_norm": 924.0548706054688, + "learning_rate": 7.905452039952453e-06, + "loss": 86.3304, + "step": 46080 + }, + { + "epoch": 0.3812714563428051, + "grad_norm": 595.2965087890625, + "learning_rate": 7.904304112635987e-06, + "loss": 106.9174, + "step": 46090 + }, + { + "epoch": 0.3813541795921744, + "grad_norm": 472.991943359375, + "learning_rate": 7.903155954237375e-06, + "loss": 120.8275, + "step": 46100 + }, + { + "epoch": 0.38143690284154363, + "grad_norm": 1009.1423950195312, + "learning_rate": 7.902007564847967e-06, + "loss": 105.188, + "step": 46110 + }, + { + "epoch": 0.38151962609091283, + "grad_norm": 851.7573852539062, + "learning_rate": 7.900858944559133e-06, + "loss": 111.3603, + "step": 46120 + }, + { + "epoch": 0.3816023493402821, + "grad_norm": 981.0646362304688, + "learning_rate": 7.899710093462267e-06, + "loss": 86.6864, + "step": 46130 + }, + { + "epoch": 0.38168507258965134, + "grad_norm": 864.4039306640625, + "learning_rate": 7.898561011648777e-06, + "loss": 155.4255, + "step": 46140 + }, + { + "epoch": 0.38176779583902054, + "grad_norm": 1161.7957763671875, + "learning_rate": 7.89741169921009e-06, + "loss": 92.5417, + "step": 46150 + }, + { + "epoch": 0.3818505190883898, + "grad_norm": 1559.1328125, + "learning_rate": 7.896262156237652e-06, + "loss": 109.0317, + "step": 46160 + }, + { + "epoch": 0.38193324233775905, + "grad_norm": 839.4773559570312, + "learning_rate": 7.895112382822925e-06, + "loss": 124.5884, + "step": 46170 + }, + { + "epoch": 0.38201596558712825, + "grad_norm": 1135.74658203125, + "learning_rate": 7.893962379057393e-06, + "loss": 115.562, + "step": 46180 + }, + { + "epoch": 0.3820986888364975, + "grad_norm": 979.9491577148438, + "learning_rate": 7.892812145032557e-06, + "loss": 118.3164, + "step": 46190 + }, + { + "epoch": 0.38218141208586676, + "grad_norm": 4294.3427734375, + "learning_rate": 7.891661680839932e-06, + "loss": 125.0205, + "step": 46200 + }, + { + "epoch": 0.38226413533523596, + "grad_norm": 933.6452026367188, + "learning_rate": 7.89051098657106e-06, + "loss": 121.8078, + "step": 46210 + }, + { + "epoch": 0.3823468585846052, + "grad_norm": 527.7791137695312, + "learning_rate": 7.889360062317495e-06, + "loss": 96.7531, + "step": 46220 + }, + { + "epoch": 0.38242958183397446, + "grad_norm": 777.3706665039062, + "learning_rate": 7.888208908170812e-06, + "loss": 134.1928, + "step": 46230 + }, + { + "epoch": 0.38251230508334366, + "grad_norm": 805.9109497070312, + "learning_rate": 7.887057524222596e-06, + "loss": 101.3832, + "step": 46240 + }, + { + "epoch": 0.3825950283327129, + "grad_norm": 1473.904296875, + "learning_rate": 7.885905910564466e-06, + "loss": 87.6336, + "step": 46250 + }, + { + "epoch": 0.38267775158208217, + "grad_norm": 784.258056640625, + "learning_rate": 7.884754067288047e-06, + "loss": 94.394, + "step": 46260 + }, + { + "epoch": 0.38276047483145137, + "grad_norm": 1091.794677734375, + "learning_rate": 7.883601994484986e-06, + "loss": 106.1401, + "step": 46270 + }, + { + "epoch": 0.3828431980808206, + "grad_norm": 1147.5867919921875, + "learning_rate": 7.882449692246948e-06, + "loss": 104.8446, + "step": 46280 + }, + { + "epoch": 0.3829259213301898, + "grad_norm": 1361.256591796875, + "learning_rate": 7.881297160665616e-06, + "loss": 97.2336, + "step": 46290 + }, + { + "epoch": 0.3830086445795591, + "grad_norm": 1118.20556640625, + "learning_rate": 7.880144399832693e-06, + "loss": 109.2898, + "step": 46300 + }, + { + "epoch": 0.38309136782892833, + "grad_norm": 1180.1666259765625, + "learning_rate": 7.878991409839897e-06, + "loss": 107.2897, + "step": 46310 + }, + { + "epoch": 0.38317409107829753, + "grad_norm": 976.9520874023438, + "learning_rate": 7.87783819077897e-06, + "loss": 127.2354, + "step": 46320 + }, + { + "epoch": 0.3832568143276668, + "grad_norm": 569.0108032226562, + "learning_rate": 7.876684742741665e-06, + "loss": 89.219, + "step": 46330 + }, + { + "epoch": 0.38333953757703604, + "grad_norm": 1490.4720458984375, + "learning_rate": 7.875531065819755e-06, + "loss": 88.2369, + "step": 46340 + }, + { + "epoch": 0.38342226082640524, + "grad_norm": 1101.564697265625, + "learning_rate": 7.874377160105037e-06, + "loss": 104.7651, + "step": 46350 + }, + { + "epoch": 0.3835049840757745, + "grad_norm": 1799.0706787109375, + "learning_rate": 7.873223025689319e-06, + "loss": 138.4782, + "step": 46360 + }, + { + "epoch": 0.38358770732514375, + "grad_norm": 1268.860595703125, + "learning_rate": 7.872068662664432e-06, + "loss": 91.4783, + "step": 46370 + }, + { + "epoch": 0.38367043057451294, + "grad_norm": 1431.305419921875, + "learning_rate": 7.870914071122222e-06, + "loss": 100.8851, + "step": 46380 + }, + { + "epoch": 0.3837531538238822, + "grad_norm": 1034.6007080078125, + "learning_rate": 7.869759251154554e-06, + "loss": 94.5458, + "step": 46390 + }, + { + "epoch": 0.38383587707325145, + "grad_norm": 1329.562255859375, + "learning_rate": 7.868604202853314e-06, + "loss": 95.9945, + "step": 46400 + }, + { + "epoch": 0.38391860032262065, + "grad_norm": 755.7239379882812, + "learning_rate": 7.867448926310403e-06, + "loss": 106.9036, + "step": 46410 + }, + { + "epoch": 0.3840013235719899, + "grad_norm": 1034.0926513671875, + "learning_rate": 7.866293421617741e-06, + "loss": 94.1212, + "step": 46420 + }, + { + "epoch": 0.38408404682135916, + "grad_norm": 1169.0289306640625, + "learning_rate": 7.865137688867264e-06, + "loss": 76.2746, + "step": 46430 + }, + { + "epoch": 0.38416677007072836, + "grad_norm": 1374.6610107421875, + "learning_rate": 7.86398172815093e-06, + "loss": 141.711, + "step": 46440 + }, + { + "epoch": 0.3842494933200976, + "grad_norm": 715.2680053710938, + "learning_rate": 7.862825539560716e-06, + "loss": 127.8841, + "step": 46450 + }, + { + "epoch": 0.38433221656946687, + "grad_norm": 1387.7430419921875, + "learning_rate": 7.861669123188613e-06, + "loss": 124.0174, + "step": 46460 + }, + { + "epoch": 0.38441493981883607, + "grad_norm": 826.4485473632812, + "learning_rate": 7.86051247912663e-06, + "loss": 110.8221, + "step": 46470 + }, + { + "epoch": 0.3844976630682053, + "grad_norm": 807.095703125, + "learning_rate": 7.859355607466797e-06, + "loss": 87.1465, + "step": 46480 + }, + { + "epoch": 0.3845803863175746, + "grad_norm": 1193.6893310546875, + "learning_rate": 7.858198508301161e-06, + "loss": 133.6182, + "step": 46490 + }, + { + "epoch": 0.3846631095669438, + "grad_norm": 527.4124755859375, + "learning_rate": 7.857041181721788e-06, + "loss": 97.8456, + "step": 46500 + }, + { + "epoch": 0.384745832816313, + "grad_norm": 862.8912963867188, + "learning_rate": 7.855883627820757e-06, + "loss": 134.22, + "step": 46510 + }, + { + "epoch": 0.3848285560656823, + "grad_norm": 548.6507568359375, + "learning_rate": 7.854725846690175e-06, + "loss": 83.1777, + "step": 46520 + }, + { + "epoch": 0.3849112793150515, + "grad_norm": 902.584228515625, + "learning_rate": 7.85356783842216e-06, + "loss": 106.9863, + "step": 46530 + }, + { + "epoch": 0.38499400256442073, + "grad_norm": 535.5631103515625, + "learning_rate": 7.852409603108845e-06, + "loss": 113.9048, + "step": 46540 + }, + { + "epoch": 0.38507672581379, + "grad_norm": 938.954345703125, + "learning_rate": 7.85125114084239e-06, + "loss": 105.1793, + "step": 46550 + }, + { + "epoch": 0.3851594490631592, + "grad_norm": 1107.5294189453125, + "learning_rate": 7.850092451714967e-06, + "loss": 115.7462, + "step": 46560 + }, + { + "epoch": 0.38524217231252844, + "grad_norm": 1338.5953369140625, + "learning_rate": 7.84893353581877e-06, + "loss": 93.1394, + "step": 46570 + }, + { + "epoch": 0.3853248955618977, + "grad_norm": 1115.7308349609375, + "learning_rate": 7.847774393246005e-06, + "loss": 117.0743, + "step": 46580 + }, + { + "epoch": 0.3854076188112669, + "grad_norm": 712.0968017578125, + "learning_rate": 7.8466150240889e-06, + "loss": 92.4161, + "step": 46590 + }, + { + "epoch": 0.38549034206063615, + "grad_norm": 1130.7666015625, + "learning_rate": 7.845455428439703e-06, + "loss": 90.2969, + "step": 46600 + }, + { + "epoch": 0.3855730653100054, + "grad_norm": 694.2098388671875, + "learning_rate": 7.844295606390675e-06, + "loss": 106.4844, + "step": 46610 + }, + { + "epoch": 0.3856557885593746, + "grad_norm": 758.242431640625, + "learning_rate": 7.843135558034101e-06, + "loss": 85.5782, + "step": 46620 + }, + { + "epoch": 0.38573851180874386, + "grad_norm": 847.0222778320312, + "learning_rate": 7.841975283462278e-06, + "loss": 83.5226, + "step": 46630 + }, + { + "epoch": 0.3858212350581131, + "grad_norm": 828.2359619140625, + "learning_rate": 7.840814782767525e-06, + "loss": 74.5017, + "step": 46640 + }, + { + "epoch": 0.3859039583074823, + "grad_norm": 575.1474609375, + "learning_rate": 7.839654056042176e-06, + "loss": 84.7638, + "step": 46650 + }, + { + "epoch": 0.38598668155685156, + "grad_norm": 961.8069458007812, + "learning_rate": 7.838493103378588e-06, + "loss": 105.9289, + "step": 46660 + }, + { + "epoch": 0.38606940480622076, + "grad_norm": 323.90765380859375, + "learning_rate": 7.83733192486913e-06, + "loss": 134.9893, + "step": 46670 + }, + { + "epoch": 0.38615212805559, + "grad_norm": 906.62060546875, + "learning_rate": 7.836170520606191e-06, + "loss": 132.9467, + "step": 46680 + }, + { + "epoch": 0.38623485130495927, + "grad_norm": 1547.6925048828125, + "learning_rate": 7.83500889068218e-06, + "loss": 145.7491, + "step": 46690 + }, + { + "epoch": 0.38631757455432847, + "grad_norm": 1164.046142578125, + "learning_rate": 7.833847035189524e-06, + "loss": 110.9907, + "step": 46700 + }, + { + "epoch": 0.3864002978036977, + "grad_norm": 1105.657470703125, + "learning_rate": 7.832684954220664e-06, + "loss": 124.7499, + "step": 46710 + }, + { + "epoch": 0.386483021053067, + "grad_norm": 508.67803955078125, + "learning_rate": 7.831522647868064e-06, + "loss": 95.8649, + "step": 46720 + }, + { + "epoch": 0.3865657443024362, + "grad_norm": 674.171875, + "learning_rate": 7.8303601162242e-06, + "loss": 99.7405, + "step": 46730 + }, + { + "epoch": 0.38664846755180543, + "grad_norm": 1009.1093139648438, + "learning_rate": 7.829197359381571e-06, + "loss": 111.1325, + "step": 46740 + }, + { + "epoch": 0.3867311908011747, + "grad_norm": 1165.2833251953125, + "learning_rate": 7.828034377432694e-06, + "loss": 111.5451, + "step": 46750 + }, + { + "epoch": 0.3868139140505439, + "grad_norm": 542.6980590820312, + "learning_rate": 7.826871170470099e-06, + "loss": 93.1879, + "step": 46760 + }, + { + "epoch": 0.38689663729991314, + "grad_norm": 1133.044921875, + "learning_rate": 7.82570773858634e-06, + "loss": 90.8988, + "step": 46770 + }, + { + "epoch": 0.3869793605492824, + "grad_norm": 684.3965454101562, + "learning_rate": 7.824544081873984e-06, + "loss": 79.4068, + "step": 46780 + }, + { + "epoch": 0.3870620837986516, + "grad_norm": 1281.01513671875, + "learning_rate": 7.823380200425618e-06, + "loss": 132.8948, + "step": 46790 + }, + { + "epoch": 0.38714480704802084, + "grad_norm": 1106.5760498046875, + "learning_rate": 7.822216094333847e-06, + "loss": 109.8843, + "step": 46800 + }, + { + "epoch": 0.3872275302973901, + "grad_norm": 736.9039306640625, + "learning_rate": 7.821051763691293e-06, + "loss": 95.4937, + "step": 46810 + }, + { + "epoch": 0.3873102535467593, + "grad_norm": 983.0117797851562, + "learning_rate": 7.819887208590597e-06, + "loss": 106.8212, + "step": 46820 + }, + { + "epoch": 0.38739297679612855, + "grad_norm": 900.6527099609375, + "learning_rate": 7.818722429124418e-06, + "loss": 97.8815, + "step": 46830 + }, + { + "epoch": 0.3874757000454978, + "grad_norm": 850.8548583984375, + "learning_rate": 7.817557425385433e-06, + "loss": 81.9495, + "step": 46840 + }, + { + "epoch": 0.387558423294867, + "grad_norm": 855.3139038085938, + "learning_rate": 7.816392197466333e-06, + "loss": 89.8393, + "step": 46850 + }, + { + "epoch": 0.38764114654423626, + "grad_norm": 587.1494750976562, + "learning_rate": 7.815226745459831e-06, + "loss": 106.3622, + "step": 46860 + }, + { + "epoch": 0.3877238697936055, + "grad_norm": 3245.526123046875, + "learning_rate": 7.814061069458657e-06, + "loss": 111.3302, + "step": 46870 + }, + { + "epoch": 0.3878065930429747, + "grad_norm": 547.8377685546875, + "learning_rate": 7.81289516955556e-06, + "loss": 78.6426, + "step": 46880 + }, + { + "epoch": 0.38788931629234397, + "grad_norm": 1064.7379150390625, + "learning_rate": 7.811729045843303e-06, + "loss": 90.7304, + "step": 46890 + }, + { + "epoch": 0.3879720395417132, + "grad_norm": 616.6322631835938, + "learning_rate": 7.81056269841467e-06, + "loss": 107.5491, + "step": 46900 + }, + { + "epoch": 0.3880547627910824, + "grad_norm": 1086.90185546875, + "learning_rate": 7.80939612736246e-06, + "loss": 112.952, + "step": 46910 + }, + { + "epoch": 0.3881374860404517, + "grad_norm": 762.38916015625, + "learning_rate": 7.808229332779496e-06, + "loss": 93.649, + "step": 46920 + }, + { + "epoch": 0.38822020928982093, + "grad_norm": 1226.2991943359375, + "learning_rate": 7.807062314758612e-06, + "loss": 122.8837, + "step": 46930 + }, + { + "epoch": 0.3883029325391901, + "grad_norm": 980.0850219726562, + "learning_rate": 7.80589507339266e-06, + "loss": 118.3508, + "step": 46940 + }, + { + "epoch": 0.3883856557885594, + "grad_norm": 899.0253295898438, + "learning_rate": 7.804727608774516e-06, + "loss": 95.3277, + "step": 46950 + }, + { + "epoch": 0.38846837903792864, + "grad_norm": 992.0533447265625, + "learning_rate": 7.803559920997067e-06, + "loss": 100.9585, + "step": 46960 + }, + { + "epoch": 0.38855110228729783, + "grad_norm": 1150.515869140625, + "learning_rate": 7.802392010153223e-06, + "loss": 106.7416, + "step": 46970 + }, + { + "epoch": 0.3886338255366671, + "grad_norm": 392.8724670410156, + "learning_rate": 7.801223876335907e-06, + "loss": 81.8418, + "step": 46980 + }, + { + "epoch": 0.38871654878603634, + "grad_norm": 958.8024291992188, + "learning_rate": 7.800055519638064e-06, + "loss": 104.2003, + "step": 46990 + }, + { + "epoch": 0.38879927203540554, + "grad_norm": 1033.255859375, + "learning_rate": 7.798886940152654e-06, + "loss": 111.1013, + "step": 47000 + }, + { + "epoch": 0.3888819952847748, + "grad_norm": 670.1970825195312, + "learning_rate": 7.797718137972654e-06, + "loss": 108.9194, + "step": 47010 + }, + { + "epoch": 0.388964718534144, + "grad_norm": 927.21630859375, + "learning_rate": 7.79654911319106e-06, + "loss": 107.1055, + "step": 47020 + }, + { + "epoch": 0.38904744178351325, + "grad_norm": 1264.6552734375, + "learning_rate": 7.795379865900892e-06, + "loss": 131.0568, + "step": 47030 + }, + { + "epoch": 0.3891301650328825, + "grad_norm": 1989.842529296875, + "learning_rate": 7.794210396195175e-06, + "loss": 94.6299, + "step": 47040 + }, + { + "epoch": 0.3892128882822517, + "grad_norm": 530.0291748046875, + "learning_rate": 7.79304070416696e-06, + "loss": 121.8401, + "step": 47050 + }, + { + "epoch": 0.38929561153162096, + "grad_norm": 936.9892578125, + "learning_rate": 7.791870789909315e-06, + "loss": 89.362, + "step": 47060 + }, + { + "epoch": 0.3893783347809902, + "grad_norm": 2093.591796875, + "learning_rate": 7.790700653515324e-06, + "loss": 90.1807, + "step": 47070 + }, + { + "epoch": 0.3894610580303594, + "grad_norm": 549.9674682617188, + "learning_rate": 7.789530295078089e-06, + "loss": 100.1859, + "step": 47080 + }, + { + "epoch": 0.38954378127972866, + "grad_norm": 882.615966796875, + "learning_rate": 7.788359714690732e-06, + "loss": 117.6557, + "step": 47090 + }, + { + "epoch": 0.3896265045290979, + "grad_norm": 798.0199584960938, + "learning_rate": 7.787188912446389e-06, + "loss": 127.0244, + "step": 47100 + }, + { + "epoch": 0.3897092277784671, + "grad_norm": 776.5193481445312, + "learning_rate": 7.786017888438214e-06, + "loss": 126.0215, + "step": 47110 + }, + { + "epoch": 0.38979195102783637, + "grad_norm": 654.182373046875, + "learning_rate": 7.784846642759383e-06, + "loss": 82.0503, + "step": 47120 + }, + { + "epoch": 0.3898746742772056, + "grad_norm": 822.113525390625, + "learning_rate": 7.783675175503087e-06, + "loss": 89.8243, + "step": 47130 + }, + { + "epoch": 0.3899573975265748, + "grad_norm": 865.35546875, + "learning_rate": 7.78250348676253e-06, + "loss": 98.9063, + "step": 47140 + }, + { + "epoch": 0.3900401207759441, + "grad_norm": 1154.8814697265625, + "learning_rate": 7.781331576630941e-06, + "loss": 146.3992, + "step": 47150 + }, + { + "epoch": 0.39012284402531333, + "grad_norm": 1378.4947509765625, + "learning_rate": 7.780159445201562e-06, + "loss": 130.5315, + "step": 47160 + }, + { + "epoch": 0.39020556727468253, + "grad_norm": 1242.33251953125, + "learning_rate": 7.778987092567658e-06, + "loss": 119.4341, + "step": 47170 + }, + { + "epoch": 0.3902882905240518, + "grad_norm": 797.1754760742188, + "learning_rate": 7.777814518822504e-06, + "loss": 67.0835, + "step": 47180 + }, + { + "epoch": 0.39037101377342104, + "grad_norm": 607.6759033203125, + "learning_rate": 7.776641724059398e-06, + "loss": 93.4001, + "step": 47190 + }, + { + "epoch": 0.39045373702279024, + "grad_norm": 819.6945190429688, + "learning_rate": 7.77546870837165e-06, + "loss": 74.3229, + "step": 47200 + }, + { + "epoch": 0.3905364602721595, + "grad_norm": 903.440185546875, + "learning_rate": 7.774295471852596e-06, + "loss": 108.8114, + "step": 47210 + }, + { + "epoch": 0.39061918352152875, + "grad_norm": 785.1321411132812, + "learning_rate": 7.773122014595584e-06, + "loss": 169.5685, + "step": 47220 + }, + { + "epoch": 0.39070190677089794, + "grad_norm": 1930.775146484375, + "learning_rate": 7.771948336693983e-06, + "loss": 108.4483, + "step": 47230 + }, + { + "epoch": 0.3907846300202672, + "grad_norm": 515.0844116210938, + "learning_rate": 7.770774438241168e-06, + "loss": 67.2212, + "step": 47240 + }, + { + "epoch": 0.39086735326963645, + "grad_norm": 1046.875, + "learning_rate": 7.769600319330553e-06, + "loss": 122.4751, + "step": 47250 + }, + { + "epoch": 0.39095007651900565, + "grad_norm": 516.0284423828125, + "learning_rate": 7.768425980055548e-06, + "loss": 87.5364, + "step": 47260 + }, + { + "epoch": 0.3910327997683749, + "grad_norm": 1055.4755859375, + "learning_rate": 7.767251420509593e-06, + "loss": 127.459, + "step": 47270 + }, + { + "epoch": 0.39111552301774416, + "grad_norm": 1103.45361328125, + "learning_rate": 7.766076640786145e-06, + "loss": 91.529, + "step": 47280 + }, + { + "epoch": 0.39119824626711336, + "grad_norm": 751.03759765625, + "learning_rate": 7.764901640978671e-06, + "loss": 97.0965, + "step": 47290 + }, + { + "epoch": 0.3912809695164826, + "grad_norm": 849.8720703125, + "learning_rate": 7.763726421180664e-06, + "loss": 100.8384, + "step": 47300 + }, + { + "epoch": 0.39136369276585187, + "grad_norm": 1227.1131591796875, + "learning_rate": 7.762550981485629e-06, + "loss": 85.1875, + "step": 47310 + }, + { + "epoch": 0.39144641601522107, + "grad_norm": 1593.5885009765625, + "learning_rate": 7.76137532198709e-06, + "loss": 117.0101, + "step": 47320 + }, + { + "epoch": 0.3915291392645903, + "grad_norm": 901.8778686523438, + "learning_rate": 7.76019944277859e-06, + "loss": 132.7024, + "step": 47330 + }, + { + "epoch": 0.3916118625139596, + "grad_norm": 851.2544555664062, + "learning_rate": 7.759023343953689e-06, + "loss": 89.3048, + "step": 47340 + }, + { + "epoch": 0.3916945857633288, + "grad_norm": 605.6761474609375, + "learning_rate": 7.757847025605963e-06, + "loss": 103.3425, + "step": 47350 + }, + { + "epoch": 0.391777309012698, + "grad_norm": 1666.5958251953125, + "learning_rate": 7.756670487829005e-06, + "loss": 112.8522, + "step": 47360 + }, + { + "epoch": 0.3918600322620673, + "grad_norm": 928.7994384765625, + "learning_rate": 7.755493730716428e-06, + "loss": 139.6545, + "step": 47370 + }, + { + "epoch": 0.3919427555114365, + "grad_norm": 1118.8592529296875, + "learning_rate": 7.75431675436186e-06, + "loss": 101.6983, + "step": 47380 + }, + { + "epoch": 0.39202547876080573, + "grad_norm": 753.40185546875, + "learning_rate": 7.753139558858949e-06, + "loss": 111.5847, + "step": 47390 + }, + { + "epoch": 0.39210820201017493, + "grad_norm": 1253.97509765625, + "learning_rate": 7.751962144301359e-06, + "loss": 101.4553, + "step": 47400 + }, + { + "epoch": 0.3921909252595442, + "grad_norm": 1033.0867919921875, + "learning_rate": 7.75078451078277e-06, + "loss": 92.0257, + "step": 47410 + }, + { + "epoch": 0.39227364850891344, + "grad_norm": 747.4989624023438, + "learning_rate": 7.749606658396883e-06, + "loss": 100.6043, + "step": 47420 + }, + { + "epoch": 0.39235637175828264, + "grad_norm": 822.0686645507812, + "learning_rate": 7.748428587237412e-06, + "loss": 80.0977, + "step": 47430 + }, + { + "epoch": 0.3924390950076519, + "grad_norm": 1061.1436767578125, + "learning_rate": 7.747250297398092e-06, + "loss": 120.7229, + "step": 47440 + }, + { + "epoch": 0.39252181825702115, + "grad_norm": 497.0961608886719, + "learning_rate": 7.746071788972675e-06, + "loss": 92.1028, + "step": 47450 + }, + { + "epoch": 0.39260454150639035, + "grad_norm": 747.8739624023438, + "learning_rate": 7.744893062054928e-06, + "loss": 84.091, + "step": 47460 + }, + { + "epoch": 0.3926872647557596, + "grad_norm": 1407.6231689453125, + "learning_rate": 7.743714116738636e-06, + "loss": 96.2375, + "step": 47470 + }, + { + "epoch": 0.39276998800512886, + "grad_norm": 1389.636962890625, + "learning_rate": 7.742534953117607e-06, + "loss": 83.5301, + "step": 47480 + }, + { + "epoch": 0.39285271125449805, + "grad_norm": 827.8368530273438, + "learning_rate": 7.741355571285656e-06, + "loss": 98.6002, + "step": 47490 + }, + { + "epoch": 0.3929354345038673, + "grad_norm": 1032.63330078125, + "learning_rate": 7.740175971336624e-06, + "loss": 111.3695, + "step": 47500 + }, + { + "epoch": 0.39301815775323656, + "grad_norm": 938.416015625, + "learning_rate": 7.738996153364364e-06, + "loss": 128.9613, + "step": 47510 + }, + { + "epoch": 0.39310088100260576, + "grad_norm": 960.8037719726562, + "learning_rate": 7.737816117462752e-06, + "loss": 96.7483, + "step": 47520 + }, + { + "epoch": 0.393183604251975, + "grad_norm": 880.8233642578125, + "learning_rate": 7.736635863725677e-06, + "loss": 112.1702, + "step": 47530 + }, + { + "epoch": 0.39326632750134427, + "grad_norm": 643.0890502929688, + "learning_rate": 7.735455392247044e-06, + "loss": 172.7514, + "step": 47540 + }, + { + "epoch": 0.39334905075071347, + "grad_norm": 481.9482421875, + "learning_rate": 7.73427470312078e-06, + "loss": 98.0854, + "step": 47550 + }, + { + "epoch": 0.3934317740000827, + "grad_norm": 7659.08837890625, + "learning_rate": 7.733093796440828e-06, + "loss": 94.3791, + "step": 47560 + }, + { + "epoch": 0.393514497249452, + "grad_norm": 611.7446899414062, + "learning_rate": 7.731912672301145e-06, + "loss": 121.1342, + "step": 47570 + }, + { + "epoch": 0.3935972204988212, + "grad_norm": 1074.301513671875, + "learning_rate": 7.730731330795707e-06, + "loss": 107.5944, + "step": 47580 + }, + { + "epoch": 0.39367994374819043, + "grad_norm": 1130.7880859375, + "learning_rate": 7.72954977201851e-06, + "loss": 88.7905, + "step": 47590 + }, + { + "epoch": 0.3937626669975597, + "grad_norm": 427.6048278808594, + "learning_rate": 7.728367996063566e-06, + "loss": 95.4467, + "step": 47600 + }, + { + "epoch": 0.3938453902469289, + "grad_norm": 1087.6966552734375, + "learning_rate": 7.727186003024902e-06, + "loss": 116.7486, + "step": 47610 + }, + { + "epoch": 0.39392811349629814, + "grad_norm": 677.6015014648438, + "learning_rate": 7.726003792996562e-06, + "loss": 112.2149, + "step": 47620 + }, + { + "epoch": 0.3940108367456674, + "grad_norm": 1281.115478515625, + "learning_rate": 7.724821366072612e-06, + "loss": 112.6385, + "step": 47630 + }, + { + "epoch": 0.3940935599950366, + "grad_norm": 1624.0828857421875, + "learning_rate": 7.723638722347132e-06, + "loss": 114.2262, + "step": 47640 + }, + { + "epoch": 0.39417628324440585, + "grad_norm": 835.2601318359375, + "learning_rate": 7.722455861914218e-06, + "loss": 87.17, + "step": 47650 + }, + { + "epoch": 0.3942590064937751, + "grad_norm": 721.5057983398438, + "learning_rate": 7.721272784867983e-06, + "loss": 112.5632, + "step": 47660 + }, + { + "epoch": 0.3943417297431443, + "grad_norm": 909.7614135742188, + "learning_rate": 7.720089491302565e-06, + "loss": 105.2768, + "step": 47670 + }, + { + "epoch": 0.39442445299251355, + "grad_norm": 641.9271240234375, + "learning_rate": 7.718905981312108e-06, + "loss": 93.7019, + "step": 47680 + }, + { + "epoch": 0.3945071762418828, + "grad_norm": 585.551025390625, + "learning_rate": 7.71772225499078e-06, + "loss": 80.3911, + "step": 47690 + }, + { + "epoch": 0.394589899491252, + "grad_norm": 783.8123168945312, + "learning_rate": 7.716538312432767e-06, + "loss": 115.101, + "step": 47700 + }, + { + "epoch": 0.39467262274062126, + "grad_norm": 644.3832397460938, + "learning_rate": 7.715354153732265e-06, + "loss": 99.6165, + "step": 47710 + }, + { + "epoch": 0.3947553459899905, + "grad_norm": 1999.86572265625, + "learning_rate": 7.714169778983496e-06, + "loss": 117.3061, + "step": 47720 + }, + { + "epoch": 0.3948380692393597, + "grad_norm": 1040.9405517578125, + "learning_rate": 7.712985188280694e-06, + "loss": 101.7906, + "step": 47730 + }, + { + "epoch": 0.39492079248872897, + "grad_norm": 1040.4888916015625, + "learning_rate": 7.711800381718111e-06, + "loss": 110.293, + "step": 47740 + }, + { + "epoch": 0.39500351573809817, + "grad_norm": 1044.0440673828125, + "learning_rate": 7.710615359390018e-06, + "loss": 94.9559, + "step": 47750 + }, + { + "epoch": 0.3950862389874674, + "grad_norm": 749.12939453125, + "learning_rate": 7.7094301213907e-06, + "loss": 92.2077, + "step": 47760 + }, + { + "epoch": 0.3951689622368367, + "grad_norm": 1238.37158203125, + "learning_rate": 7.708244667814463e-06, + "loss": 100.6183, + "step": 47770 + }, + { + "epoch": 0.3952516854862059, + "grad_norm": 821.638916015625, + "learning_rate": 7.707058998755626e-06, + "loss": 109.0208, + "step": 47780 + }, + { + "epoch": 0.3953344087355751, + "grad_norm": 767.8424072265625, + "learning_rate": 7.705873114308529e-06, + "loss": 85.0486, + "step": 47790 + }, + { + "epoch": 0.3954171319849444, + "grad_norm": 756.989501953125, + "learning_rate": 7.704687014567524e-06, + "loss": 89.8211, + "step": 47800 + }, + { + "epoch": 0.3954998552343136, + "grad_norm": 1021.226806640625, + "learning_rate": 7.703500699626988e-06, + "loss": 90.8781, + "step": 47810 + }, + { + "epoch": 0.39558257848368283, + "grad_norm": 749.4614868164062, + "learning_rate": 7.702314169581311e-06, + "loss": 103.451, + "step": 47820 + }, + { + "epoch": 0.3956653017330521, + "grad_norm": 954.7346801757812, + "learning_rate": 7.701127424524894e-06, + "loss": 97.5412, + "step": 47830 + }, + { + "epoch": 0.3957480249824213, + "grad_norm": 704.9465942382812, + "learning_rate": 7.699940464552166e-06, + "loss": 128.5169, + "step": 47840 + }, + { + "epoch": 0.39583074823179054, + "grad_norm": 926.2518310546875, + "learning_rate": 7.698753289757565e-06, + "loss": 103.6893, + "step": 47850 + }, + { + "epoch": 0.3959134714811598, + "grad_norm": 903.693115234375, + "learning_rate": 7.69756590023555e-06, + "loss": 117.7347, + "step": 47860 + }, + { + "epoch": 0.395996194730529, + "grad_norm": 576.5755004882812, + "learning_rate": 7.696378296080598e-06, + "loss": 77.2486, + "step": 47870 + }, + { + "epoch": 0.39607891797989825, + "grad_norm": 1016.8932495117188, + "learning_rate": 7.6951904773872e-06, + "loss": 102.5683, + "step": 47880 + }, + { + "epoch": 0.3961616412292675, + "grad_norm": 649.89013671875, + "learning_rate": 7.694002444249863e-06, + "loss": 101.7743, + "step": 47890 + }, + { + "epoch": 0.3962443644786367, + "grad_norm": 2596.306396484375, + "learning_rate": 7.692814196763118e-06, + "loss": 125.352, + "step": 47900 + }, + { + "epoch": 0.39632708772800596, + "grad_norm": 1052.6181640625, + "learning_rate": 7.691625735021505e-06, + "loss": 109.4487, + "step": 47910 + }, + { + "epoch": 0.3964098109773752, + "grad_norm": 909.6773071289062, + "learning_rate": 7.690437059119584e-06, + "loss": 132.6711, + "step": 47920 + }, + { + "epoch": 0.3964925342267444, + "grad_norm": 468.37310791015625, + "learning_rate": 7.689248169151935e-06, + "loss": 89.8137, + "step": 47930 + }, + { + "epoch": 0.39657525747611366, + "grad_norm": 1061.165771484375, + "learning_rate": 7.68805906521315e-06, + "loss": 117.4328, + "step": 47940 + }, + { + "epoch": 0.3966579807254829, + "grad_norm": 757.1315307617188, + "learning_rate": 7.686869747397843e-06, + "loss": 101.1938, + "step": 47950 + }, + { + "epoch": 0.3967407039748521, + "grad_norm": 811.54052734375, + "learning_rate": 7.685680215800639e-06, + "loss": 98.3037, + "step": 47960 + }, + { + "epoch": 0.39682342722422137, + "grad_norm": 880.4942626953125, + "learning_rate": 7.684490470516185e-06, + "loss": 105.742, + "step": 47970 + }, + { + "epoch": 0.3969061504735906, + "grad_norm": 3485.52685546875, + "learning_rate": 7.683300511639149e-06, + "loss": 121.3876, + "step": 47980 + }, + { + "epoch": 0.3969888737229598, + "grad_norm": 794.6721801757812, + "learning_rate": 7.682110339264203e-06, + "loss": 99.216, + "step": 47990 + }, + { + "epoch": 0.3970715969723291, + "grad_norm": 1388.62255859375, + "learning_rate": 7.680919953486047e-06, + "loss": 114.4334, + "step": 48000 + }, + { + "epoch": 0.39715432022169833, + "grad_norm": 617.267578125, + "learning_rate": 7.679729354399395e-06, + "loss": 96.2605, + "step": 48010 + }, + { + "epoch": 0.39723704347106753, + "grad_norm": 577.44189453125, + "learning_rate": 7.678538542098974e-06, + "loss": 106.764, + "step": 48020 + }, + { + "epoch": 0.3973197667204368, + "grad_norm": 922.736083984375, + "learning_rate": 7.677347516679536e-06, + "loss": 94.4455, + "step": 48030 + }, + { + "epoch": 0.39740248996980604, + "grad_norm": 1115.3370361328125, + "learning_rate": 7.676156278235845e-06, + "loss": 124.8899, + "step": 48040 + }, + { + "epoch": 0.39748521321917524, + "grad_norm": 956.1436157226562, + "learning_rate": 7.674964826862679e-06, + "loss": 114.437, + "step": 48050 + }, + { + "epoch": 0.3975679364685445, + "grad_norm": 1833.231201171875, + "learning_rate": 7.673773162654836e-06, + "loss": 127.1646, + "step": 48060 + }, + { + "epoch": 0.39765065971791375, + "grad_norm": 424.2684631347656, + "learning_rate": 7.672581285707135e-06, + "loss": 88.6944, + "step": 48070 + }, + { + "epoch": 0.39773338296728294, + "grad_norm": 1856.982421875, + "learning_rate": 7.67138919611441e-06, + "loss": 154.6424, + "step": 48080 + }, + { + "epoch": 0.3978161062166522, + "grad_norm": 907.585205078125, + "learning_rate": 7.670196893971502e-06, + "loss": 121.6254, + "step": 48090 + }, + { + "epoch": 0.3978988294660214, + "grad_norm": 1184.2100830078125, + "learning_rate": 7.669004379373284e-06, + "loss": 86.6673, + "step": 48100 + }, + { + "epoch": 0.39798155271539065, + "grad_norm": 1260.71875, + "learning_rate": 7.667811652414637e-06, + "loss": 87.2874, + "step": 48110 + }, + { + "epoch": 0.3980642759647599, + "grad_norm": 986.4711303710938, + "learning_rate": 7.666618713190459e-06, + "loss": 98.885, + "step": 48120 + }, + { + "epoch": 0.3981469992141291, + "grad_norm": 1163.977783203125, + "learning_rate": 7.665425561795669e-06, + "loss": 86.0785, + "step": 48130 + }, + { + "epoch": 0.39822972246349836, + "grad_norm": 851.3588256835938, + "learning_rate": 7.664232198325198e-06, + "loss": 88.372, + "step": 48140 + }, + { + "epoch": 0.3983124457128676, + "grad_norm": 786.80419921875, + "learning_rate": 7.663038622873999e-06, + "loss": 83.9941, + "step": 48150 + }, + { + "epoch": 0.3983951689622368, + "grad_norm": 1034.7119140625, + "learning_rate": 7.66184483553704e-06, + "loss": 91.0622, + "step": 48160 + }, + { + "epoch": 0.39847789221160607, + "grad_norm": 1714.0181884765625, + "learning_rate": 7.660650836409302e-06, + "loss": 88.6052, + "step": 48170 + }, + { + "epoch": 0.3985606154609753, + "grad_norm": 1282.2288818359375, + "learning_rate": 7.65945662558579e-06, + "loss": 102.4522, + "step": 48180 + }, + { + "epoch": 0.3986433387103445, + "grad_norm": 1070.42626953125, + "learning_rate": 7.658262203161517e-06, + "loss": 120.5579, + "step": 48190 + }, + { + "epoch": 0.3987260619597138, + "grad_norm": 1412.6171875, + "learning_rate": 7.65706756923152e-06, + "loss": 88.6738, + "step": 48200 + }, + { + "epoch": 0.398808785209083, + "grad_norm": 745.8485107421875, + "learning_rate": 7.655872723890854e-06, + "loss": 116.865, + "step": 48210 + }, + { + "epoch": 0.3988915084584522, + "grad_norm": 938.5888671875, + "learning_rate": 7.654677667234582e-06, + "loss": 112.7861, + "step": 48220 + }, + { + "epoch": 0.3989742317078215, + "grad_norm": 692.1904296875, + "learning_rate": 7.65348239935779e-06, + "loss": 93.3395, + "step": 48230 + }, + { + "epoch": 0.39905695495719073, + "grad_norm": 892.9244384765625, + "learning_rate": 7.652286920355583e-06, + "loss": 123.322, + "step": 48240 + }, + { + "epoch": 0.39913967820655993, + "grad_norm": 353.6568603515625, + "learning_rate": 7.651091230323079e-06, + "loss": 75.8525, + "step": 48250 + }, + { + "epoch": 0.3992224014559292, + "grad_norm": 1054.718017578125, + "learning_rate": 7.649895329355411e-06, + "loss": 144.2353, + "step": 48260 + }, + { + "epoch": 0.39930512470529844, + "grad_norm": 854.1619873046875, + "learning_rate": 7.648699217547733e-06, + "loss": 89.6152, + "step": 48270 + }, + { + "epoch": 0.39938784795466764, + "grad_norm": 1015.1759033203125, + "learning_rate": 7.647502894995215e-06, + "loss": 97.5599, + "step": 48280 + }, + { + "epoch": 0.3994705712040369, + "grad_norm": 691.9119873046875, + "learning_rate": 7.646306361793042e-06, + "loss": 95.9693, + "step": 48290 + }, + { + "epoch": 0.39955329445340615, + "grad_norm": 545.0184936523438, + "learning_rate": 7.645109618036416e-06, + "loss": 89.2513, + "step": 48300 + }, + { + "epoch": 0.39963601770277535, + "grad_norm": 1180.732666015625, + "learning_rate": 7.643912663820559e-06, + "loss": 107.0819, + "step": 48310 + }, + { + "epoch": 0.3997187409521446, + "grad_norm": 634.8629760742188, + "learning_rate": 7.642715499240702e-06, + "loss": 93.8869, + "step": 48320 + }, + { + "epoch": 0.39980146420151386, + "grad_norm": 878.6117553710938, + "learning_rate": 7.641518124392105e-06, + "loss": 117.2257, + "step": 48330 + }, + { + "epoch": 0.39988418745088306, + "grad_norm": 741.9995727539062, + "learning_rate": 7.640320539370032e-06, + "loss": 92.1223, + "step": 48340 + }, + { + "epoch": 0.3999669107002523, + "grad_norm": 693.643310546875, + "learning_rate": 7.63912274426977e-06, + "loss": 102.7763, + "step": 48350 + }, + { + "epoch": 0.40004963394962156, + "grad_norm": 747.3237915039062, + "learning_rate": 7.637924739186624e-06, + "loss": 83.4088, + "step": 48360 + }, + { + "epoch": 0.40013235719899076, + "grad_norm": 1104.4271240234375, + "learning_rate": 7.636726524215913e-06, + "loss": 96.2126, + "step": 48370 + }, + { + "epoch": 0.40021508044836, + "grad_norm": 1066.0506591796875, + "learning_rate": 7.635528099452974e-06, + "loss": 137.2111, + "step": 48380 + }, + { + "epoch": 0.40029780369772927, + "grad_norm": 764.3858032226562, + "learning_rate": 7.634329464993158e-06, + "loss": 99.6301, + "step": 48390 + }, + { + "epoch": 0.40038052694709847, + "grad_norm": 619.43017578125, + "learning_rate": 7.633130620931837e-06, + "loss": 95.3814, + "step": 48400 + }, + { + "epoch": 0.4004632501964677, + "grad_norm": 576.7614135742188, + "learning_rate": 7.631931567364398e-06, + "loss": 115.1573, + "step": 48410 + }, + { + "epoch": 0.400545973445837, + "grad_norm": 1004.3753662109375, + "learning_rate": 7.630732304386244e-06, + "loss": 90.6397, + "step": 48420 + }, + { + "epoch": 0.4006286966952062, + "grad_norm": 1928.7547607421875, + "learning_rate": 7.629532832092792e-06, + "loss": 98.6528, + "step": 48430 + }, + { + "epoch": 0.40071141994457543, + "grad_norm": 1009.5800170898438, + "learning_rate": 7.62833315057948e-06, + "loss": 105.3359, + "step": 48440 + }, + { + "epoch": 0.4007941431939447, + "grad_norm": 573.4387817382812, + "learning_rate": 7.627133259941762e-06, + "loss": 95.6264, + "step": 48450 + }, + { + "epoch": 0.4008768664433139, + "grad_norm": 1055.6937255859375, + "learning_rate": 7.625933160275109e-06, + "loss": 98.1826, + "step": 48460 + }, + { + "epoch": 0.40095958969268314, + "grad_norm": 636.2737426757812, + "learning_rate": 7.6247328516750055e-06, + "loss": 106.1883, + "step": 48470 + }, + { + "epoch": 0.40104231294205234, + "grad_norm": 624.5562744140625, + "learning_rate": 7.623532334236954e-06, + "loss": 115.2045, + "step": 48480 + }, + { + "epoch": 0.4011250361914216, + "grad_norm": 1036.740234375, + "learning_rate": 7.622331608056474e-06, + "loss": 100.7731, + "step": 48490 + }, + { + "epoch": 0.40120775944079085, + "grad_norm": 1890.8753662109375, + "learning_rate": 7.621130673229105e-06, + "loss": 98.8333, + "step": 48500 + }, + { + "epoch": 0.40129048269016004, + "grad_norm": 967.46044921875, + "learning_rate": 7.619929529850397e-06, + "loss": 62.4726, + "step": 48510 + }, + { + "epoch": 0.4013732059395293, + "grad_norm": 861.5704956054688, + "learning_rate": 7.618728178015919e-06, + "loss": 94.6595, + "step": 48520 + }, + { + "epoch": 0.40145592918889855, + "grad_norm": 1017.3840942382812, + "learning_rate": 7.617526617821259e-06, + "loss": 107.7875, + "step": 48530 + }, + { + "epoch": 0.40153865243826775, + "grad_norm": 2595.431640625, + "learning_rate": 7.616324849362019e-06, + "loss": 104.2326, + "step": 48540 + }, + { + "epoch": 0.401621375687637, + "grad_norm": 767.1851196289062, + "learning_rate": 7.615122872733819e-06, + "loss": 126.0866, + "step": 48550 + }, + { + "epoch": 0.40170409893700626, + "grad_norm": 901.1397705078125, + "learning_rate": 7.613920688032293e-06, + "loss": 120.4315, + "step": 48560 + }, + { + "epoch": 0.40178682218637546, + "grad_norm": 1408.295166015625, + "learning_rate": 7.612718295353094e-06, + "loss": 104.612, + "step": 48570 + }, + { + "epoch": 0.4018695454357447, + "grad_norm": 746.8675537109375, + "learning_rate": 7.61151569479189e-06, + "loss": 84.4078, + "step": 48580 + }, + { + "epoch": 0.40195226868511397, + "grad_norm": 925.1431884765625, + "learning_rate": 7.610312886444369e-06, + "loss": 89.8368, + "step": 48590 + }, + { + "epoch": 0.40203499193448317, + "grad_norm": 757.10693359375, + "learning_rate": 7.60910987040623e-06, + "loss": 92.3049, + "step": 48600 + }, + { + "epoch": 0.4021177151838524, + "grad_norm": 777.216552734375, + "learning_rate": 7.607906646773195e-06, + "loss": 105.3716, + "step": 48610 + }, + { + "epoch": 0.4022004384332217, + "grad_norm": 751.5682983398438, + "learning_rate": 7.606703215640995e-06, + "loss": 92.2909, + "step": 48620 + }, + { + "epoch": 0.4022831616825909, + "grad_norm": 1239.3206787109375, + "learning_rate": 7.605499577105382e-06, + "loss": 106.9025, + "step": 48630 + }, + { + "epoch": 0.4023658849319601, + "grad_norm": 750.9842529296875, + "learning_rate": 7.604295731262128e-06, + "loss": 115.5005, + "step": 48640 + }, + { + "epoch": 0.4024486081813294, + "grad_norm": 1378.44677734375, + "learning_rate": 7.603091678207013e-06, + "loss": 95.4831, + "step": 48650 + }, + { + "epoch": 0.4025313314306986, + "grad_norm": 883.4937133789062, + "learning_rate": 7.60188741803584e-06, + "loss": 94.2378, + "step": 48660 + }, + { + "epoch": 0.40261405468006783, + "grad_norm": 927.4861450195312, + "learning_rate": 7.600682950844428e-06, + "loss": 97.9098, + "step": 48670 + }, + { + "epoch": 0.4026967779294371, + "grad_norm": 999.6773071289062, + "learning_rate": 7.599478276728607e-06, + "loss": 88.1278, + "step": 48680 + }, + { + "epoch": 0.4027795011788063, + "grad_norm": 981.5151977539062, + "learning_rate": 7.5982733957842304e-06, + "loss": 96.1045, + "step": 48690 + }, + { + "epoch": 0.40286222442817554, + "grad_norm": 779.521728515625, + "learning_rate": 7.597068308107165e-06, + "loss": 96.5194, + "step": 48700 + }, + { + "epoch": 0.4029449476775448, + "grad_norm": 723.9231567382812, + "learning_rate": 7.595863013793292e-06, + "loss": 105.2723, + "step": 48710 + }, + { + "epoch": 0.403027670926914, + "grad_norm": 688.5134887695312, + "learning_rate": 7.594657512938513e-06, + "loss": 73.8923, + "step": 48720 + }, + { + "epoch": 0.40311039417628325, + "grad_norm": 1107.6732177734375, + "learning_rate": 7.593451805638743e-06, + "loss": 84.0132, + "step": 48730 + }, + { + "epoch": 0.4031931174256525, + "grad_norm": 971.2855834960938, + "learning_rate": 7.592245891989914e-06, + "loss": 126.4593, + "step": 48740 + }, + { + "epoch": 0.4032758406750217, + "grad_norm": 906.73388671875, + "learning_rate": 7.5910397720879785e-06, + "loss": 112.5872, + "step": 48750 + }, + { + "epoch": 0.40335856392439096, + "grad_norm": 976.5882568359375, + "learning_rate": 7.589833446028898e-06, + "loss": 113.1635, + "step": 48760 + }, + { + "epoch": 0.4034412871737602, + "grad_norm": 517.5700073242188, + "learning_rate": 7.5886269139086565e-06, + "loss": 95.9183, + "step": 48770 + }, + { + "epoch": 0.4035240104231294, + "grad_norm": 1049.484375, + "learning_rate": 7.587420175823252e-06, + "loss": 114.5341, + "step": 48780 + }, + { + "epoch": 0.40360673367249866, + "grad_norm": 1442.658447265625, + "learning_rate": 7.586213231868699e-06, + "loss": 89.2601, + "step": 48790 + }, + { + "epoch": 0.4036894569218679, + "grad_norm": 923.6640625, + "learning_rate": 7.585006082141028e-06, + "loss": 100.5833, + "step": 48800 + }, + { + "epoch": 0.4037721801712371, + "grad_norm": 882.6138916015625, + "learning_rate": 7.583798726736286e-06, + "loss": 111.9895, + "step": 48810 + }, + { + "epoch": 0.40385490342060637, + "grad_norm": 841.87548828125, + "learning_rate": 7.5825911657505365e-06, + "loss": 110.3644, + "step": 48820 + }, + { + "epoch": 0.40393762666997557, + "grad_norm": 686.5569458007812, + "learning_rate": 7.581383399279863e-06, + "loss": 113.7908, + "step": 48830 + }, + { + "epoch": 0.4040203499193448, + "grad_norm": 1229.239501953125, + "learning_rate": 7.580175427420358e-06, + "loss": 94.2136, + "step": 48840 + }, + { + "epoch": 0.4041030731687141, + "grad_norm": 1013.9456176757812, + "learning_rate": 7.578967250268137e-06, + "loss": 112.4359, + "step": 48850 + }, + { + "epoch": 0.4041857964180833, + "grad_norm": 843.6847534179688, + "learning_rate": 7.577758867919325e-06, + "loss": 119.3482, + "step": 48860 + }, + { + "epoch": 0.40426851966745253, + "grad_norm": 660.8492431640625, + "learning_rate": 7.576550280470072e-06, + "loss": 105.5132, + "step": 48870 + }, + { + "epoch": 0.4043512429168218, + "grad_norm": 1110.703125, + "learning_rate": 7.5753414880165365e-06, + "loss": 117.6248, + "step": 48880 + }, + { + "epoch": 0.404433966166191, + "grad_norm": 971.7891235351562, + "learning_rate": 7.5741324906548996e-06, + "loss": 84.8425, + "step": 48890 + }, + { + "epoch": 0.40451668941556024, + "grad_norm": 674.7775268554688, + "learning_rate": 7.572923288481355e-06, + "loss": 93.9152, + "step": 48900 + }, + { + "epoch": 0.4045994126649295, + "grad_norm": 1152.0150146484375, + "learning_rate": 7.571713881592109e-06, + "loss": 97.321, + "step": 48910 + }, + { + "epoch": 0.4046821359142987, + "grad_norm": 813.1100463867188, + "learning_rate": 7.570504270083394e-06, + "loss": 107.887, + "step": 48920 + }, + { + "epoch": 0.40476485916366794, + "grad_norm": 860.7267456054688, + "learning_rate": 7.569294454051452e-06, + "loss": 93.7367, + "step": 48930 + }, + { + "epoch": 0.4048475824130372, + "grad_norm": 641.8156127929688, + "learning_rate": 7.568084433592542e-06, + "loss": 103.0248, + "step": 48940 + }, + { + "epoch": 0.4049303056624064, + "grad_norm": 1134.04736328125, + "learning_rate": 7.566874208802939e-06, + "loss": 101.0284, + "step": 48950 + }, + { + "epoch": 0.40501302891177565, + "grad_norm": 596.593017578125, + "learning_rate": 7.5656637797789335e-06, + "loss": 80.7004, + "step": 48960 + }, + { + "epoch": 0.4050957521611449, + "grad_norm": 791.4570922851562, + "learning_rate": 7.564453146616837e-06, + "loss": 109.8588, + "step": 48970 + }, + { + "epoch": 0.4051784754105141, + "grad_norm": 681.3819580078125, + "learning_rate": 7.563242309412975e-06, + "loss": 105.6946, + "step": 48980 + }, + { + "epoch": 0.40526119865988336, + "grad_norm": 881.9998168945312, + "learning_rate": 7.562031268263686e-06, + "loss": 104.3682, + "step": 48990 + }, + { + "epoch": 0.4053439219092526, + "grad_norm": 985.5377807617188, + "learning_rate": 7.5608200232653254e-06, + "loss": 146.3788, + "step": 49000 + }, + { + "epoch": 0.4054266451586218, + "grad_norm": 547.89697265625, + "learning_rate": 7.5596085745142654e-06, + "loss": 126.0642, + "step": 49010 + }, + { + "epoch": 0.40550936840799107, + "grad_norm": 1244.3304443359375, + "learning_rate": 7.558396922106903e-06, + "loss": 105.9497, + "step": 49020 + }, + { + "epoch": 0.4055920916573603, + "grad_norm": 1159.8284912109375, + "learning_rate": 7.557185066139638e-06, + "loss": 123.0697, + "step": 49030 + }, + { + "epoch": 0.4056748149067295, + "grad_norm": 1442.5506591796875, + "learning_rate": 7.555973006708892e-06, + "loss": 124.4241, + "step": 49040 + }, + { + "epoch": 0.4057575381560988, + "grad_norm": 1344.17236328125, + "learning_rate": 7.554760743911104e-06, + "loss": 98.3027, + "step": 49050 + }, + { + "epoch": 0.40584026140546803, + "grad_norm": 733.8927001953125, + "learning_rate": 7.553548277842729e-06, + "loss": 90.8391, + "step": 49060 + }, + { + "epoch": 0.4059229846548372, + "grad_norm": 1737.365966796875, + "learning_rate": 7.5523356086002364e-06, + "loss": 128.2919, + "step": 49070 + }, + { + "epoch": 0.4060057079042065, + "grad_norm": 1121.77783203125, + "learning_rate": 7.551122736280113e-06, + "loss": 114.6872, + "step": 49080 + }, + { + "epoch": 0.40608843115357574, + "grad_norm": 830.7010498046875, + "learning_rate": 7.549909660978863e-06, + "loss": 103.0037, + "step": 49090 + }, + { + "epoch": 0.40617115440294493, + "grad_norm": 950.9368896484375, + "learning_rate": 7.548696382793002e-06, + "loss": 76.3594, + "step": 49100 + }, + { + "epoch": 0.4062538776523142, + "grad_norm": 721.6538696289062, + "learning_rate": 7.547482901819066e-06, + "loss": 101.0682, + "step": 49110 + }, + { + "epoch": 0.40633660090168344, + "grad_norm": 1179.335693359375, + "learning_rate": 7.5462692181536094e-06, + "loss": 113.5078, + "step": 49120 + }, + { + "epoch": 0.40641932415105264, + "grad_norm": 1521.753173828125, + "learning_rate": 7.545055331893195e-06, + "loss": 127.0566, + "step": 49130 + }, + { + "epoch": 0.4065020474004219, + "grad_norm": 718.9688720703125, + "learning_rate": 7.543841243134409e-06, + "loss": 77.3431, + "step": 49140 + }, + { + "epoch": 0.40658477064979115, + "grad_norm": 1070.0787353515625, + "learning_rate": 7.5426269519738495e-06, + "loss": 108.8352, + "step": 49150 + }, + { + "epoch": 0.40666749389916035, + "grad_norm": 514.2880859375, + "learning_rate": 7.541412458508133e-06, + "loss": 104.5422, + "step": 49160 + }, + { + "epoch": 0.4067502171485296, + "grad_norm": 774.6307983398438, + "learning_rate": 7.54019776283389e-06, + "loss": 100.5812, + "step": 49170 + }, + { + "epoch": 0.40683294039789886, + "grad_norm": 600.9510498046875, + "learning_rate": 7.53898286504777e-06, + "loss": 75.3506, + "step": 49180 + }, + { + "epoch": 0.40691566364726806, + "grad_norm": 843.1857299804688, + "learning_rate": 7.537767765246436e-06, + "loss": 110.9489, + "step": 49190 + }, + { + "epoch": 0.4069983868966373, + "grad_norm": 957.2750854492188, + "learning_rate": 7.536552463526565e-06, + "loss": 86.3647, + "step": 49200 + }, + { + "epoch": 0.4070811101460065, + "grad_norm": 1061.4150390625, + "learning_rate": 7.535336959984858e-06, + "loss": 127.3531, + "step": 49210 + }, + { + "epoch": 0.40716383339537576, + "grad_norm": 2062.2001953125, + "learning_rate": 7.5341212547180246e-06, + "loss": 111.2139, + "step": 49220 + }, + { + "epoch": 0.407246556644745, + "grad_norm": 768.780517578125, + "learning_rate": 7.532905347822792e-06, + "loss": 116.6353, + "step": 49230 + }, + { + "epoch": 0.4073292798941142, + "grad_norm": 780.7527465820312, + "learning_rate": 7.5316892393959064e-06, + "loss": 91.1807, + "step": 49240 + }, + { + "epoch": 0.40741200314348347, + "grad_norm": 487.50665283203125, + "learning_rate": 7.530472929534126e-06, + "loss": 110.9754, + "step": 49250 + }, + { + "epoch": 0.4074947263928527, + "grad_norm": 578.7011108398438, + "learning_rate": 7.529256418334228e-06, + "loss": 131.2194, + "step": 49260 + }, + { + "epoch": 0.4075774496422219, + "grad_norm": 818.4442749023438, + "learning_rate": 7.528039705893006e-06, + "loss": 98.4813, + "step": 49270 + }, + { + "epoch": 0.4076601728915912, + "grad_norm": 555.372314453125, + "learning_rate": 7.5268227923072665e-06, + "loss": 100.5567, + "step": 49280 + }, + { + "epoch": 0.40774289614096043, + "grad_norm": 1478.378173828125, + "learning_rate": 7.525605677673831e-06, + "loss": 94.0006, + "step": 49290 + }, + { + "epoch": 0.40782561939032963, + "grad_norm": 1054.407958984375, + "learning_rate": 7.524388362089545e-06, + "loss": 110.3638, + "step": 49300 + }, + { + "epoch": 0.4079083426396989, + "grad_norm": 936.5081176757812, + "learning_rate": 7.523170845651263e-06, + "loss": 108.1984, + "step": 49310 + }, + { + "epoch": 0.40799106588906814, + "grad_norm": 714.961181640625, + "learning_rate": 7.521953128455856e-06, + "loss": 109.1958, + "step": 49320 + }, + { + "epoch": 0.40807378913843734, + "grad_norm": 850.507080078125, + "learning_rate": 7.520735210600213e-06, + "loss": 117.9377, + "step": 49330 + }, + { + "epoch": 0.4081565123878066, + "grad_norm": 815.2796020507812, + "learning_rate": 7.519517092181237e-06, + "loss": 128.2985, + "step": 49340 + }, + { + "epoch": 0.40823923563717585, + "grad_norm": 1209.8961181640625, + "learning_rate": 7.518298773295849e-06, + "loss": 108.5808, + "step": 49350 + }, + { + "epoch": 0.40832195888654504, + "grad_norm": 1075.5911865234375, + "learning_rate": 7.517080254040985e-06, + "loss": 87.4483, + "step": 49360 + }, + { + "epoch": 0.4084046821359143, + "grad_norm": 952.4523315429688, + "learning_rate": 7.5158615345136e-06, + "loss": 85.5586, + "step": 49370 + }, + { + "epoch": 0.40848740538528355, + "grad_norm": 688.7626342773438, + "learning_rate": 7.514642614810655e-06, + "loss": 113.6158, + "step": 49380 + }, + { + "epoch": 0.40857012863465275, + "grad_norm": 754.0592651367188, + "learning_rate": 7.51342349502914e-06, + "loss": 95.794, + "step": 49390 + }, + { + "epoch": 0.408652851884022, + "grad_norm": 1221.88818359375, + "learning_rate": 7.512204175266052e-06, + "loss": 99.691, + "step": 49400 + }, + { + "epoch": 0.40873557513339126, + "grad_norm": 781.3279418945312, + "learning_rate": 7.510984655618407e-06, + "loss": 105.6204, + "step": 49410 + }, + { + "epoch": 0.40881829838276046, + "grad_norm": 593.7680053710938, + "learning_rate": 7.509764936183237e-06, + "loss": 82.4356, + "step": 49420 + }, + { + "epoch": 0.4089010216321297, + "grad_norm": 2055.203369140625, + "learning_rate": 7.5085450170575876e-06, + "loss": 120.1463, + "step": 49430 + }, + { + "epoch": 0.40898374488149897, + "grad_norm": 1296.7412109375, + "learning_rate": 7.5073248983385265e-06, + "loss": 130.0576, + "step": 49440 + }, + { + "epoch": 0.40906646813086817, + "grad_norm": 754.3132934570312, + "learning_rate": 7.50610458012313e-06, + "loss": 88.8123, + "step": 49450 + }, + { + "epoch": 0.4091491913802374, + "grad_norm": 827.0701904296875, + "learning_rate": 7.504884062508493e-06, + "loss": 85.6176, + "step": 49460 + }, + { + "epoch": 0.4092319146296067, + "grad_norm": 533.4575805664062, + "learning_rate": 7.503663345591726e-06, + "loss": 99.3742, + "step": 49470 + }, + { + "epoch": 0.4093146378789759, + "grad_norm": 914.8536376953125, + "learning_rate": 7.502442429469956e-06, + "loss": 123.0487, + "step": 49480 + }, + { + "epoch": 0.4093973611283451, + "grad_norm": 1090.47998046875, + "learning_rate": 7.501221314240329e-06, + "loss": 110.9421, + "step": 49490 + }, + { + "epoch": 0.4094800843777144, + "grad_norm": 969.5048828125, + "learning_rate": 7.500000000000001e-06, + "loss": 132.2819, + "step": 49500 + }, + { + "epoch": 0.4095628076270836, + "grad_norm": 1003.5017700195312, + "learning_rate": 7.4987784868461455e-06, + "loss": 125.4059, + "step": 49510 + }, + { + "epoch": 0.40964553087645283, + "grad_norm": 1233.950927734375, + "learning_rate": 7.497556774875953e-06, + "loss": 100.2119, + "step": 49520 + }, + { + "epoch": 0.4097282541258221, + "grad_norm": 638.7094116210938, + "learning_rate": 7.496334864186632e-06, + "loss": 133.3458, + "step": 49530 + }, + { + "epoch": 0.4098109773751913, + "grad_norm": 448.5832214355469, + "learning_rate": 7.4951127548754025e-06, + "loss": 95.0754, + "step": 49540 + }, + { + "epoch": 0.40989370062456054, + "grad_norm": 1257.3818359375, + "learning_rate": 7.4938904470395e-06, + "loss": 91.5809, + "step": 49550 + }, + { + "epoch": 0.40997642387392974, + "grad_norm": 1006.02294921875, + "learning_rate": 7.492667940776182e-06, + "loss": 102.9834, + "step": 49560 + }, + { + "epoch": 0.410059147123299, + "grad_norm": 1712.72607421875, + "learning_rate": 7.491445236182715e-06, + "loss": 103.0404, + "step": 49570 + }, + { + "epoch": 0.41014187037266825, + "grad_norm": 799.1417236328125, + "learning_rate": 7.490222333356384e-06, + "loss": 106.2483, + "step": 49580 + }, + { + "epoch": 0.41022459362203745, + "grad_norm": 1142.0809326171875, + "learning_rate": 7.488999232394492e-06, + "loss": 151.4166, + "step": 49590 + }, + { + "epoch": 0.4103073168714067, + "grad_norm": 1388.60693359375, + "learning_rate": 7.487775933394353e-06, + "loss": 103.9434, + "step": 49600 + }, + { + "epoch": 0.41039004012077596, + "grad_norm": 835.6099853515625, + "learning_rate": 7.4865524364533e-06, + "loss": 104.3207, + "step": 49610 + }, + { + "epoch": 0.41047276337014516, + "grad_norm": 653.7705688476562, + "learning_rate": 7.485328741668683e-06, + "loss": 83.743, + "step": 49620 + }, + { + "epoch": 0.4105554866195144, + "grad_norm": 870.46484375, + "learning_rate": 7.484104849137862e-06, + "loss": 108.1679, + "step": 49630 + }, + { + "epoch": 0.41063820986888366, + "grad_norm": 839.9007568359375, + "learning_rate": 7.482880758958219e-06, + "loss": 83.2425, + "step": 49640 + }, + { + "epoch": 0.41072093311825286, + "grad_norm": 931.4784545898438, + "learning_rate": 7.48165647122715e-06, + "loss": 114.039, + "step": 49650 + }, + { + "epoch": 0.4108036563676221, + "grad_norm": 776.007080078125, + "learning_rate": 7.480431986042065e-06, + "loss": 138.1686, + "step": 49660 + }, + { + "epoch": 0.41088637961699137, + "grad_norm": 890.8333129882812, + "learning_rate": 7.47920730350039e-06, + "loss": 129.4647, + "step": 49670 + }, + { + "epoch": 0.41096910286636057, + "grad_norm": 1296.5220947265625, + "learning_rate": 7.477982423699568e-06, + "loss": 120.0857, + "step": 49680 + }, + { + "epoch": 0.4110518261157298, + "grad_norm": 1131.2828369140625, + "learning_rate": 7.476757346737057e-06, + "loss": 112.3677, + "step": 49690 + }, + { + "epoch": 0.4111345493650991, + "grad_norm": 1283.157470703125, + "learning_rate": 7.47553207271033e-06, + "loss": 114.4162, + "step": 49700 + }, + { + "epoch": 0.4112172726144683, + "grad_norm": 309.3968200683594, + "learning_rate": 7.474306601716877e-06, + "loss": 89.349, + "step": 49710 + }, + { + "epoch": 0.41129999586383753, + "grad_norm": 961.7044677734375, + "learning_rate": 7.473080933854205e-06, + "loss": 81.6981, + "step": 49720 + }, + { + "epoch": 0.4113827191132068, + "grad_norm": 427.75469970703125, + "learning_rate": 7.471855069219831e-06, + "loss": 111.079, + "step": 49730 + }, + { + "epoch": 0.411465442362576, + "grad_norm": 973.0006103515625, + "learning_rate": 7.470629007911294e-06, + "loss": 99.5809, + "step": 49740 + }, + { + "epoch": 0.41154816561194524, + "grad_norm": 1471.537109375, + "learning_rate": 7.469402750026147e-06, + "loss": 130.2198, + "step": 49750 + }, + { + "epoch": 0.4116308888613145, + "grad_norm": 765.6078491210938, + "learning_rate": 7.468176295661955e-06, + "loss": 124.7598, + "step": 49760 + }, + { + "epoch": 0.4117136121106837, + "grad_norm": 918.6041259765625, + "learning_rate": 7.466949644916301e-06, + "loss": 103.558, + "step": 49770 + }, + { + "epoch": 0.41179633536005295, + "grad_norm": 933.6361694335938, + "learning_rate": 7.465722797886788e-06, + "loss": 94.5863, + "step": 49780 + }, + { + "epoch": 0.4118790586094222, + "grad_norm": 623.9163818359375, + "learning_rate": 7.464495754671027e-06, + "loss": 86.9486, + "step": 49790 + }, + { + "epoch": 0.4119617818587914, + "grad_norm": 592.259033203125, + "learning_rate": 7.4632685153666505e-06, + "loss": 111.5722, + "step": 49800 + }, + { + "epoch": 0.41204450510816065, + "grad_norm": 430.714599609375, + "learning_rate": 7.462041080071301e-06, + "loss": 81.418, + "step": 49810 + }, + { + "epoch": 0.4121272283575299, + "grad_norm": 1098.07373046875, + "learning_rate": 7.460813448882643e-06, + "loss": 145.5894, + "step": 49820 + }, + { + "epoch": 0.4122099516068991, + "grad_norm": 630.3162841796875, + "learning_rate": 7.459585621898353e-06, + "loss": 88.393, + "step": 49830 + }, + { + "epoch": 0.41229267485626836, + "grad_norm": 783.9848022460938, + "learning_rate": 7.4583575992161235e-06, + "loss": 73.975, + "step": 49840 + }, + { + "epoch": 0.4123753981056376, + "grad_norm": 645.27392578125, + "learning_rate": 7.457129380933662e-06, + "loss": 108.0057, + "step": 49850 + }, + { + "epoch": 0.4124581213550068, + "grad_norm": 522.0549926757812, + "learning_rate": 7.4559009671486906e-06, + "loss": 65.7118, + "step": 49860 + }, + { + "epoch": 0.41254084460437607, + "grad_norm": 706.9224853515625, + "learning_rate": 7.454672357958951e-06, + "loss": 127.6793, + "step": 49870 + }, + { + "epoch": 0.4126235678537453, + "grad_norm": 1543.4708251953125, + "learning_rate": 7.453443553462198e-06, + "loss": 138.2186, + "step": 49880 + }, + { + "epoch": 0.4127062911031145, + "grad_norm": 1025.1986083984375, + "learning_rate": 7.4522145537562015e-06, + "loss": 117.5677, + "step": 49890 + }, + { + "epoch": 0.4127890143524838, + "grad_norm": 611.4989013671875, + "learning_rate": 7.450985358938747e-06, + "loss": 106.0235, + "step": 49900 + }, + { + "epoch": 0.41287173760185303, + "grad_norm": 1154.43701171875, + "learning_rate": 7.449755969107635e-06, + "loss": 113.3347, + "step": 49910 + }, + { + "epoch": 0.4129544608512222, + "grad_norm": 1076.0106201171875, + "learning_rate": 7.4485263843606835e-06, + "loss": 104.9749, + "step": 49920 + }, + { + "epoch": 0.4130371841005915, + "grad_norm": 1215.836669921875, + "learning_rate": 7.447296604795726e-06, + "loss": 119.1512, + "step": 49930 + }, + { + "epoch": 0.4131199073499607, + "grad_norm": 1207.3167724609375, + "learning_rate": 7.4460666305106084e-06, + "loss": 101.0483, + "step": 49940 + }, + { + "epoch": 0.41320263059932993, + "grad_norm": 1147.7183837890625, + "learning_rate": 7.444836461603195e-06, + "loss": 101.2153, + "step": 49950 + }, + { + "epoch": 0.4132853538486992, + "grad_norm": 784.3466796875, + "learning_rate": 7.443606098171363e-06, + "loss": 86.7002, + "step": 49960 + }, + { + "epoch": 0.4133680770980684, + "grad_norm": 452.4320068359375, + "learning_rate": 7.442375540313012e-06, + "loss": 94.4948, + "step": 49970 + }, + { + "epoch": 0.41345080034743764, + "grad_norm": 911.47021484375, + "learning_rate": 7.441144788126045e-06, + "loss": 110.6758, + "step": 49980 + }, + { + "epoch": 0.4135335235968069, + "grad_norm": 1147.6248779296875, + "learning_rate": 7.4399138417083925e-06, + "loss": 95.634, + "step": 49990 + }, + { + "epoch": 0.4136162468461761, + "grad_norm": 1253.6624755859375, + "learning_rate": 7.438682701157993e-06, + "loss": 141.2699, + "step": 50000 + }, + { + "epoch": 0.41369897009554535, + "grad_norm": 1440.07080078125, + "learning_rate": 7.437451366572803e-06, + "loss": 109.6948, + "step": 50010 + }, + { + "epoch": 0.4137816933449146, + "grad_norm": 833.8212280273438, + "learning_rate": 7.436219838050793e-06, + "loss": 106.7741, + "step": 50020 + }, + { + "epoch": 0.4138644165942838, + "grad_norm": 999.9931640625, + "learning_rate": 7.4349881156899525e-06, + "loss": 110.1778, + "step": 50030 + }, + { + "epoch": 0.41394713984365306, + "grad_norm": 1517.33251953125, + "learning_rate": 7.433756199588282e-06, + "loss": 106.7463, + "step": 50040 + }, + { + "epoch": 0.4140298630930223, + "grad_norm": 749.0157470703125, + "learning_rate": 7.4325240898438e-06, + "loss": 86.2277, + "step": 50050 + }, + { + "epoch": 0.4141125863423915, + "grad_norm": 1004.859619140625, + "learning_rate": 7.4312917865545406e-06, + "loss": 101.3041, + "step": 50060 + }, + { + "epoch": 0.41419530959176076, + "grad_norm": 2136.435546875, + "learning_rate": 7.430059289818552e-06, + "loss": 109.2253, + "step": 50070 + }, + { + "epoch": 0.41427803284113, + "grad_norm": 1305.9910888671875, + "learning_rate": 7.4288265997338985e-06, + "loss": 116.3073, + "step": 50080 + }, + { + "epoch": 0.4143607560904992, + "grad_norm": 1031.19921875, + "learning_rate": 7.427593716398658e-06, + "loss": 136.2479, + "step": 50090 + }, + { + "epoch": 0.41444347933986847, + "grad_norm": 539.9341430664062, + "learning_rate": 7.426360639910927e-06, + "loss": 86.8462, + "step": 50100 + }, + { + "epoch": 0.4145262025892377, + "grad_norm": 527.5219116210938, + "learning_rate": 7.425127370368815e-06, + "loss": 104.5311, + "step": 50110 + }, + { + "epoch": 0.4146089258386069, + "grad_norm": 836.485107421875, + "learning_rate": 7.423893907870449e-06, + "loss": 125.8744, + "step": 50120 + }, + { + "epoch": 0.4146916490879762, + "grad_norm": 514.3257446289062, + "learning_rate": 7.422660252513969e-06, + "loss": 100.2241, + "step": 50130 + }, + { + "epoch": 0.41477437233734543, + "grad_norm": 773.184814453125, + "learning_rate": 7.421426404397531e-06, + "loss": 82.5671, + "step": 50140 + }, + { + "epoch": 0.41485709558671463, + "grad_norm": 1118.6546630859375, + "learning_rate": 7.420192363619305e-06, + "loss": 116.5798, + "step": 50150 + }, + { + "epoch": 0.4149398188360839, + "grad_norm": 1059.586669921875, + "learning_rate": 7.418958130277483e-06, + "loss": 94.7955, + "step": 50160 + }, + { + "epoch": 0.41502254208545314, + "grad_norm": 468.1589660644531, + "learning_rate": 7.417723704470261e-06, + "loss": 92.9759, + "step": 50170 + }, + { + "epoch": 0.41510526533482234, + "grad_norm": 2084.325927734375, + "learning_rate": 7.4164890862958615e-06, + "loss": 128.8667, + "step": 50180 + }, + { + "epoch": 0.4151879885841916, + "grad_norm": 834.3111572265625, + "learning_rate": 7.415254275852515e-06, + "loss": 103.7669, + "step": 50190 + }, + { + "epoch": 0.41527071183356085, + "grad_norm": 869.2041625976562, + "learning_rate": 7.414019273238471e-06, + "loss": 88.4897, + "step": 50200 + }, + { + "epoch": 0.41535343508293004, + "grad_norm": 1044.08056640625, + "learning_rate": 7.4127840785519915e-06, + "loss": 98.155, + "step": 50210 + }, + { + "epoch": 0.4154361583322993, + "grad_norm": 967.443603515625, + "learning_rate": 7.411548691891357e-06, + "loss": 115.068, + "step": 50220 + }, + { + "epoch": 0.41551888158166855, + "grad_norm": 534.2567138671875, + "learning_rate": 7.41031311335486e-06, + "loss": 106.5501, + "step": 50230 + }, + { + "epoch": 0.41560160483103775, + "grad_norm": 1074.7396240234375, + "learning_rate": 7.409077343040809e-06, + "loss": 153.6957, + "step": 50240 + }, + { + "epoch": 0.415684328080407, + "grad_norm": 657.9391479492188, + "learning_rate": 7.407841381047533e-06, + "loss": 99.6328, + "step": 50250 + }, + { + "epoch": 0.41576705132977626, + "grad_norm": 750.8782958984375, + "learning_rate": 7.406605227473367e-06, + "loss": 88.5056, + "step": 50260 + }, + { + "epoch": 0.41584977457914546, + "grad_norm": 912.8604125976562, + "learning_rate": 7.405368882416668e-06, + "loss": 127.2815, + "step": 50270 + }, + { + "epoch": 0.4159324978285147, + "grad_norm": 722.0275268554688, + "learning_rate": 7.404132345975806e-06, + "loss": 89.6634, + "step": 50280 + }, + { + "epoch": 0.4160152210778839, + "grad_norm": 503.6108093261719, + "learning_rate": 7.4028956182491665e-06, + "loss": 65.2768, + "step": 50290 + }, + { + "epoch": 0.41609794432725317, + "grad_norm": 1528.93212890625, + "learning_rate": 7.401658699335151e-06, + "loss": 110.4871, + "step": 50300 + }, + { + "epoch": 0.4161806675766224, + "grad_norm": 742.7732543945312, + "learning_rate": 7.400421589332175e-06, + "loss": 85.2059, + "step": 50310 + }, + { + "epoch": 0.4162633908259916, + "grad_norm": 1395.8526611328125, + "learning_rate": 7.39918428833867e-06, + "loss": 94.5206, + "step": 50320 + }, + { + "epoch": 0.4163461140753609, + "grad_norm": 978.8078002929688, + "learning_rate": 7.397946796453081e-06, + "loss": 122.651, + "step": 50330 + }, + { + "epoch": 0.41642883732473013, + "grad_norm": 738.7698974609375, + "learning_rate": 7.39670911377387e-06, + "loss": 128.4523, + "step": 50340 + }, + { + "epoch": 0.4165115605740993, + "grad_norm": 924.0703735351562, + "learning_rate": 7.395471240399515e-06, + "loss": 100.5796, + "step": 50350 + }, + { + "epoch": 0.4165942838234686, + "grad_norm": 1406.5579833984375, + "learning_rate": 7.394233176428508e-06, + "loss": 84.9948, + "step": 50360 + }, + { + "epoch": 0.41667700707283784, + "grad_norm": 692.1464233398438, + "learning_rate": 7.3929949219593545e-06, + "loss": 108.0571, + "step": 50370 + }, + { + "epoch": 0.41675973032220703, + "grad_norm": 935.2982177734375, + "learning_rate": 7.391756477090577e-06, + "loss": 106.4006, + "step": 50380 + }, + { + "epoch": 0.4168424535715763, + "grad_norm": 803.556396484375, + "learning_rate": 7.3905178419207126e-06, + "loss": 91.8345, + "step": 50390 + }, + { + "epoch": 0.41692517682094554, + "grad_norm": 170.64480590820312, + "learning_rate": 7.3892790165483164e-06, + "loss": 86.5613, + "step": 50400 + }, + { + "epoch": 0.41700790007031474, + "grad_norm": 846.9085693359375, + "learning_rate": 7.388040001071953e-06, + "loss": 116.5208, + "step": 50410 + }, + { + "epoch": 0.417090623319684, + "grad_norm": 488.96343994140625, + "learning_rate": 7.386800795590208e-06, + "loss": 84.3048, + "step": 50420 + }, + { + "epoch": 0.41717334656905325, + "grad_norm": 2128.975830078125, + "learning_rate": 7.385561400201675e-06, + "loss": 101.9137, + "step": 50430 + }, + { + "epoch": 0.41725606981842245, + "grad_norm": 886.4940795898438, + "learning_rate": 7.384321815004971e-06, + "loss": 122.7343, + "step": 50440 + }, + { + "epoch": 0.4173387930677917, + "grad_norm": 1239.8963623046875, + "learning_rate": 7.383082040098723e-06, + "loss": 118.5375, + "step": 50450 + }, + { + "epoch": 0.41742151631716096, + "grad_norm": 598.2474365234375, + "learning_rate": 7.381842075581573e-06, + "loss": 82.4002, + "step": 50460 + }, + { + "epoch": 0.41750423956653016, + "grad_norm": 927.5972900390625, + "learning_rate": 7.380601921552181e-06, + "loss": 96.2848, + "step": 50470 + }, + { + "epoch": 0.4175869628158994, + "grad_norm": 1360.900146484375, + "learning_rate": 7.379361578109218e-06, + "loss": 99.3107, + "step": 50480 + }, + { + "epoch": 0.41766968606526866, + "grad_norm": 637.6759643554688, + "learning_rate": 7.378121045351378e-06, + "loss": 115.3234, + "step": 50490 + }, + { + "epoch": 0.41775240931463786, + "grad_norm": 1148.364990234375, + "learning_rate": 7.376880323377357e-06, + "loss": 106.6024, + "step": 50500 + }, + { + "epoch": 0.4178351325640071, + "grad_norm": 567.2054443359375, + "learning_rate": 7.375639412285877e-06, + "loss": 94.4765, + "step": 50510 + }, + { + "epoch": 0.41791785581337637, + "grad_norm": 742.42236328125, + "learning_rate": 7.374398312175674e-06, + "loss": 103.2163, + "step": 50520 + }, + { + "epoch": 0.41800057906274557, + "grad_norm": 698.6530151367188, + "learning_rate": 7.373157023145493e-06, + "loss": 86.0616, + "step": 50530 + }, + { + "epoch": 0.4180833023121148, + "grad_norm": 1061.374755859375, + "learning_rate": 7.371915545294098e-06, + "loss": 74.492, + "step": 50540 + }, + { + "epoch": 0.4181660255614841, + "grad_norm": 748.8624877929688, + "learning_rate": 7.37067387872027e-06, + "loss": 75.0185, + "step": 50550 + }, + { + "epoch": 0.4182487488108533, + "grad_norm": 315.02374267578125, + "learning_rate": 7.369432023522801e-06, + "loss": 77.1597, + "step": 50560 + }, + { + "epoch": 0.41833147206022253, + "grad_norm": 981.3681030273438, + "learning_rate": 7.3681899798005006e-06, + "loss": 118.4615, + "step": 50570 + }, + { + "epoch": 0.4184141953095918, + "grad_norm": 1849.541259765625, + "learning_rate": 7.366947747652191e-06, + "loss": 104.9723, + "step": 50580 + }, + { + "epoch": 0.418496918558961, + "grad_norm": 1334.4669189453125, + "learning_rate": 7.365705327176713e-06, + "loss": 100.1431, + "step": 50590 + }, + { + "epoch": 0.41857964180833024, + "grad_norm": 1951.8896484375, + "learning_rate": 7.364462718472919e-06, + "loss": 142.2957, + "step": 50600 + }, + { + "epoch": 0.4186623650576995, + "grad_norm": 659.5794067382812, + "learning_rate": 7.363219921639677e-06, + "loss": 96.1194, + "step": 50610 + }, + { + "epoch": 0.4187450883070687, + "grad_norm": 960.1915893554688, + "learning_rate": 7.361976936775872e-06, + "loss": 117.1287, + "step": 50620 + }, + { + "epoch": 0.41882781155643795, + "grad_norm": 940.3717651367188, + "learning_rate": 7.360733763980404e-06, + "loss": 110.9005, + "step": 50630 + }, + { + "epoch": 0.4189105348058072, + "grad_norm": 741.2780151367188, + "learning_rate": 7.3594904033521815e-06, + "loss": 86.0149, + "step": 50640 + }, + { + "epoch": 0.4189932580551764, + "grad_norm": 1676.6136474609375, + "learning_rate": 7.358246854990138e-06, + "loss": 119.8522, + "step": 50650 + }, + { + "epoch": 0.41907598130454565, + "grad_norm": 1362.4383544921875, + "learning_rate": 7.357003118993215e-06, + "loss": 125.9308, + "step": 50660 + }, + { + "epoch": 0.41915870455391485, + "grad_norm": 1395.822998046875, + "learning_rate": 7.355759195460371e-06, + "loss": 125.4457, + "step": 50670 + }, + { + "epoch": 0.4192414278032841, + "grad_norm": 754.3294067382812, + "learning_rate": 7.354515084490579e-06, + "loss": 105.6307, + "step": 50680 + }, + { + "epoch": 0.41932415105265336, + "grad_norm": 1079.152587890625, + "learning_rate": 7.353270786182828e-06, + "loss": 95.9782, + "step": 50690 + }, + { + "epoch": 0.41940687430202256, + "grad_norm": 713.3341674804688, + "learning_rate": 7.352026300636121e-06, + "loss": 104.3826, + "step": 50700 + }, + { + "epoch": 0.4194895975513918, + "grad_norm": 879.9957275390625, + "learning_rate": 7.350781627949475e-06, + "loss": 86.2512, + "step": 50710 + }, + { + "epoch": 0.41957232080076107, + "grad_norm": 1013.7462158203125, + "learning_rate": 7.3495367682219236e-06, + "loss": 110.0644, + "step": 50720 + }, + { + "epoch": 0.41965504405013027, + "grad_norm": 948.6749877929688, + "learning_rate": 7.348291721552514e-06, + "loss": 145.048, + "step": 50730 + }, + { + "epoch": 0.4197377672994995, + "grad_norm": 2777.333251953125, + "learning_rate": 7.3470464880403105e-06, + "loss": 116.8931, + "step": 50740 + }, + { + "epoch": 0.4198204905488688, + "grad_norm": 1682.1556396484375, + "learning_rate": 7.345801067784388e-06, + "loss": 94.5308, + "step": 50750 + }, + { + "epoch": 0.419903213798238, + "grad_norm": 988.3783569335938, + "learning_rate": 7.34455546088384e-06, + "loss": 100.0454, + "step": 50760 + }, + { + "epoch": 0.4199859370476072, + "grad_norm": 721.0875244140625, + "learning_rate": 7.343309667437775e-06, + "loss": 116.8833, + "step": 50770 + }, + { + "epoch": 0.4200686602969765, + "grad_norm": 832.2625122070312, + "learning_rate": 7.3420636875453135e-06, + "loss": 77.9705, + "step": 50780 + }, + { + "epoch": 0.4201513835463457, + "grad_norm": 995.8257446289062, + "learning_rate": 7.340817521305595e-06, + "loss": 101.0271, + "step": 50790 + }, + { + "epoch": 0.42023410679571493, + "grad_norm": 935.89404296875, + "learning_rate": 7.3395711688177676e-06, + "loss": 123.7885, + "step": 50800 + }, + { + "epoch": 0.4203168300450842, + "grad_norm": 957.3890380859375, + "learning_rate": 7.3383246301809985e-06, + "loss": 79.4234, + "step": 50810 + }, + { + "epoch": 0.4203995532944534, + "grad_norm": 1143.3251953125, + "learning_rate": 7.337077905494472e-06, + "loss": 79.35, + "step": 50820 + }, + { + "epoch": 0.42048227654382264, + "grad_norm": 617.7476806640625, + "learning_rate": 7.335830994857382e-06, + "loss": 83.5682, + "step": 50830 + }, + { + "epoch": 0.4205649997931919, + "grad_norm": 923.8682861328125, + "learning_rate": 7.334583898368939e-06, + "loss": 83.5606, + "step": 50840 + }, + { + "epoch": 0.4206477230425611, + "grad_norm": 456.18902587890625, + "learning_rate": 7.333336616128369e-06, + "loss": 110.9043, + "step": 50850 + }, + { + "epoch": 0.42073044629193035, + "grad_norm": 629.4192504882812, + "learning_rate": 7.332089148234913e-06, + "loss": 84.0271, + "step": 50860 + }, + { + "epoch": 0.4208131695412996, + "grad_norm": 777.4642333984375, + "learning_rate": 7.330841494787828e-06, + "loss": 94.1915, + "step": 50870 + }, + { + "epoch": 0.4208958927906688, + "grad_norm": 1021.780029296875, + "learning_rate": 7.329593655886382e-06, + "loss": 86.0737, + "step": 50880 + }, + { + "epoch": 0.42097861604003806, + "grad_norm": 1032.098876953125, + "learning_rate": 7.3283456316298595e-06, + "loss": 83.7801, + "step": 50890 + }, + { + "epoch": 0.4210613392894073, + "grad_norm": 987.5896606445312, + "learning_rate": 7.32709742211756e-06, + "loss": 86.3313, + "step": 50900 + }, + { + "epoch": 0.4211440625387765, + "grad_norm": 799.8026733398438, + "learning_rate": 7.325849027448799e-06, + "loss": 71.7253, + "step": 50910 + }, + { + "epoch": 0.42122678578814576, + "grad_norm": 675.6878051757812, + "learning_rate": 7.324600447722907e-06, + "loss": 79.754, + "step": 50920 + }, + { + "epoch": 0.421309509037515, + "grad_norm": 578.5625, + "learning_rate": 7.323351683039224e-06, + "loss": 103.9349, + "step": 50930 + }, + { + "epoch": 0.4213922322868842, + "grad_norm": 1150.8740234375, + "learning_rate": 7.32210273349711e-06, + "loss": 101.4098, + "step": 50940 + }, + { + "epoch": 0.42147495553625347, + "grad_norm": 1209.810546875, + "learning_rate": 7.32085359919594e-06, + "loss": 112.2176, + "step": 50950 + }, + { + "epoch": 0.4215576787856227, + "grad_norm": 1046.1680908203125, + "learning_rate": 7.3196042802350995e-06, + "loss": 81.5384, + "step": 50960 + }, + { + "epoch": 0.4216404020349919, + "grad_norm": 1647.843017578125, + "learning_rate": 7.3183547767139916e-06, + "loss": 106.6991, + "step": 50970 + }, + { + "epoch": 0.4217231252843612, + "grad_norm": 1031.4130859375, + "learning_rate": 7.317105088732035e-06, + "loss": 102.2982, + "step": 50980 + }, + { + "epoch": 0.42180584853373043, + "grad_norm": 739.594482421875, + "learning_rate": 7.31585521638866e-06, + "loss": 83.5706, + "step": 50990 + }, + { + "epoch": 0.42188857178309963, + "grad_norm": 1355.9227294921875, + "learning_rate": 7.314605159783313e-06, + "loss": 155.9577, + "step": 51000 + }, + { + "epoch": 0.4219712950324689, + "grad_norm": 752.1953735351562, + "learning_rate": 7.313354919015457e-06, + "loss": 87.4393, + "step": 51010 + }, + { + "epoch": 0.4220540182818381, + "grad_norm": 1794.4256591796875, + "learning_rate": 7.312104494184566e-06, + "loss": 140.2643, + "step": 51020 + }, + { + "epoch": 0.42213674153120734, + "grad_norm": 1220.9268798828125, + "learning_rate": 7.310853885390133e-06, + "loss": 98.7091, + "step": 51030 + }, + { + "epoch": 0.4222194647805766, + "grad_norm": 2305.75, + "learning_rate": 7.309603092731661e-06, + "loss": 112.4157, + "step": 51040 + }, + { + "epoch": 0.4223021880299458, + "grad_norm": 1095.7064208984375, + "learning_rate": 7.30835211630867e-06, + "loss": 102.6219, + "step": 51050 + }, + { + "epoch": 0.42238491127931505, + "grad_norm": 678.8933715820312, + "learning_rate": 7.3071009562206965e-06, + "loss": 88.1793, + "step": 51060 + }, + { + "epoch": 0.4224676345286843, + "grad_norm": 809.6482543945312, + "learning_rate": 7.305849612567287e-06, + "loss": 99.8888, + "step": 51070 + }, + { + "epoch": 0.4225503577780535, + "grad_norm": 1535.9327392578125, + "learning_rate": 7.304598085448007e-06, + "loss": 119.08, + "step": 51080 + }, + { + "epoch": 0.42263308102742275, + "grad_norm": 953.1244506835938, + "learning_rate": 7.303346374962433e-06, + "loss": 86.5312, + "step": 51090 + }, + { + "epoch": 0.422715804276792, + "grad_norm": 1713.26318359375, + "learning_rate": 7.302094481210159e-06, + "loss": 103.2178, + "step": 51100 + }, + { + "epoch": 0.4227985275261612, + "grad_norm": 826.4808959960938, + "learning_rate": 7.300842404290792e-06, + "loss": 96.0054, + "step": 51110 + }, + { + "epoch": 0.42288125077553046, + "grad_norm": 1028.5643310546875, + "learning_rate": 7.2995901443039554e-06, + "loss": 128.5591, + "step": 51120 + }, + { + "epoch": 0.4229639740248997, + "grad_norm": 840.199462890625, + "learning_rate": 7.298337701349285e-06, + "loss": 116.7384, + "step": 51130 + }, + { + "epoch": 0.4230466972742689, + "grad_norm": 672.873779296875, + "learning_rate": 7.29708507552643e-06, + "loss": 68.3276, + "step": 51140 + }, + { + "epoch": 0.42312942052363817, + "grad_norm": 383.77313232421875, + "learning_rate": 7.295832266935059e-06, + "loss": 84.0062, + "step": 51150 + }, + { + "epoch": 0.4232121437730074, + "grad_norm": 1001.8519287109375, + "learning_rate": 7.2945792756748505e-06, + "loss": 140.7101, + "step": 51160 + }, + { + "epoch": 0.4232948670223766, + "grad_norm": 709.9981689453125, + "learning_rate": 7.2933261018455005e-06, + "loss": 75.1831, + "step": 51170 + }, + { + "epoch": 0.4233775902717459, + "grad_norm": 675.74658203125, + "learning_rate": 7.292072745546716e-06, + "loss": 73.5509, + "step": 51180 + }, + { + "epoch": 0.42346031352111513, + "grad_norm": 886.9816284179688, + "learning_rate": 7.290819206878223e-06, + "loss": 92.8137, + "step": 51190 + }, + { + "epoch": 0.4235430367704843, + "grad_norm": 688.6138305664062, + "learning_rate": 7.289565485939759e-06, + "loss": 109.6997, + "step": 51200 + }, + { + "epoch": 0.4236257600198536, + "grad_norm": 1372.497802734375, + "learning_rate": 7.288311582831078e-06, + "loss": 86.5049, + "step": 51210 + }, + { + "epoch": 0.42370848326922284, + "grad_norm": 632.5243530273438, + "learning_rate": 7.2870574976519455e-06, + "loss": 73.3826, + "step": 51220 + }, + { + "epoch": 0.42379120651859203, + "grad_norm": 564.6524658203125, + "learning_rate": 7.2858032305021455e-06, + "loss": 123.0075, + "step": 51230 + }, + { + "epoch": 0.4238739297679613, + "grad_norm": 758.579833984375, + "learning_rate": 7.28454878148147e-06, + "loss": 97.95, + "step": 51240 + }, + { + "epoch": 0.42395665301733054, + "grad_norm": 1073.8216552734375, + "learning_rate": 7.283294150689735e-06, + "loss": 88.5506, + "step": 51250 + }, + { + "epoch": 0.42403937626669974, + "grad_norm": 621.6570434570312, + "learning_rate": 7.282039338226763e-06, + "loss": 106.7351, + "step": 51260 + }, + { + "epoch": 0.424122099516069, + "grad_norm": 1591.994384765625, + "learning_rate": 7.280784344192393e-06, + "loss": 117.9606, + "step": 51270 + }, + { + "epoch": 0.42420482276543825, + "grad_norm": 811.947265625, + "learning_rate": 7.279529168686481e-06, + "loss": 83.5623, + "step": 51280 + }, + { + "epoch": 0.42428754601480745, + "grad_norm": 939.7516479492188, + "learning_rate": 7.278273811808894e-06, + "loss": 91.11, + "step": 51290 + }, + { + "epoch": 0.4243702692641767, + "grad_norm": 1210.847900390625, + "learning_rate": 7.2770182736595164e-06, + "loss": 130.9754, + "step": 51300 + }, + { + "epoch": 0.42445299251354596, + "grad_norm": 1220.7769775390625, + "learning_rate": 7.275762554338244e-06, + "loss": 76.7486, + "step": 51310 + }, + { + "epoch": 0.42453571576291516, + "grad_norm": 1265.53271484375, + "learning_rate": 7.2745066539449905e-06, + "loss": 95.9141, + "step": 51320 + }, + { + "epoch": 0.4246184390122844, + "grad_norm": 875.6475219726562, + "learning_rate": 7.27325057257968e-06, + "loss": 97.6407, + "step": 51330 + }, + { + "epoch": 0.42470116226165366, + "grad_norm": 693.2019653320312, + "learning_rate": 7.271994310342254e-06, + "loss": 99.6023, + "step": 51340 + }, + { + "epoch": 0.42478388551102286, + "grad_norm": 897.9375, + "learning_rate": 7.270737867332669e-06, + "loss": 104.1033, + "step": 51350 + }, + { + "epoch": 0.4248666087603921, + "grad_norm": 1381.2633056640625, + "learning_rate": 7.2694812436508934e-06, + "loss": 98.0639, + "step": 51360 + }, + { + "epoch": 0.4249493320097613, + "grad_norm": 825.0390625, + "learning_rate": 7.268224439396909e-06, + "loss": 109.9973, + "step": 51370 + }, + { + "epoch": 0.42503205525913057, + "grad_norm": 997.9774780273438, + "learning_rate": 7.266967454670717e-06, + "loss": 115.4315, + "step": 51380 + }, + { + "epoch": 0.4251147785084998, + "grad_norm": 1014.7069702148438, + "learning_rate": 7.265710289572328e-06, + "loss": 122.6427, + "step": 51390 + }, + { + "epoch": 0.425197501757869, + "grad_norm": 1186.5941162109375, + "learning_rate": 7.264452944201771e-06, + "loss": 79.164, + "step": 51400 + }, + { + "epoch": 0.4252802250072383, + "grad_norm": 896.6864013671875, + "learning_rate": 7.263195418659083e-06, + "loss": 94.9668, + "step": 51410 + }, + { + "epoch": 0.42536294825660753, + "grad_norm": 1269.0845947265625, + "learning_rate": 7.261937713044325e-06, + "loss": 111.4878, + "step": 51420 + }, + { + "epoch": 0.42544567150597673, + "grad_norm": 737.5647583007812, + "learning_rate": 7.260679827457562e-06, + "loss": 144.4097, + "step": 51430 + }, + { + "epoch": 0.425528394755346, + "grad_norm": 1106.5899658203125, + "learning_rate": 7.259421761998881e-06, + "loss": 97.2455, + "step": 51440 + }, + { + "epoch": 0.42561111800471524, + "grad_norm": 867.489013671875, + "learning_rate": 7.2581635167683805e-06, + "loss": 105.3224, + "step": 51450 + }, + { + "epoch": 0.42569384125408444, + "grad_norm": 693.6383666992188, + "learning_rate": 7.256905091866171e-06, + "loss": 101.9842, + "step": 51460 + }, + { + "epoch": 0.4257765645034537, + "grad_norm": 611.8115844726562, + "learning_rate": 7.255646487392382e-06, + "loss": 115.6671, + "step": 51470 + }, + { + "epoch": 0.42585928775282295, + "grad_norm": 1504.69775390625, + "learning_rate": 7.254387703447154e-06, + "loss": 95.3933, + "step": 51480 + }, + { + "epoch": 0.42594201100219214, + "grad_norm": 600.2556762695312, + "learning_rate": 7.2531287401306435e-06, + "loss": 92.5239, + "step": 51490 + }, + { + "epoch": 0.4260247342515614, + "grad_norm": 407.8628845214844, + "learning_rate": 7.251869597543019e-06, + "loss": 82.7194, + "step": 51500 + }, + { + "epoch": 0.42610745750093065, + "grad_norm": 614.1116943359375, + "learning_rate": 7.250610275784464e-06, + "loss": 93.6232, + "step": 51510 + }, + { + "epoch": 0.42619018075029985, + "grad_norm": 898.9727172851562, + "learning_rate": 7.2493507749551795e-06, + "loss": 85.4422, + "step": 51520 + }, + { + "epoch": 0.4262729039996691, + "grad_norm": 1155.338623046875, + "learning_rate": 7.248091095155378e-06, + "loss": 118.6169, + "step": 51530 + }, + { + "epoch": 0.42635562724903836, + "grad_norm": 1159.13916015625, + "learning_rate": 7.246831236485283e-06, + "loss": 135.8041, + "step": 51540 + }, + { + "epoch": 0.42643835049840756, + "grad_norm": 928.2463989257812, + "learning_rate": 7.245571199045139e-06, + "loss": 108.4106, + "step": 51550 + }, + { + "epoch": 0.4265210737477768, + "grad_norm": 767.41259765625, + "learning_rate": 7.244310982935202e-06, + "loss": 74.6865, + "step": 51560 + }, + { + "epoch": 0.42660379699714607, + "grad_norm": 709.4650268554688, + "learning_rate": 7.243050588255738e-06, + "loss": 76.4944, + "step": 51570 + }, + { + "epoch": 0.42668652024651527, + "grad_norm": 1164.441162109375, + "learning_rate": 7.241790015107034e-06, + "loss": 88.2155, + "step": 51580 + }, + { + "epoch": 0.4267692434958845, + "grad_norm": 1503.4273681640625, + "learning_rate": 7.240529263589386e-06, + "loss": 126.4441, + "step": 51590 + }, + { + "epoch": 0.4268519667452538, + "grad_norm": 760.6532592773438, + "learning_rate": 7.239268333803109e-06, + "loss": 105.8668, + "step": 51600 + }, + { + "epoch": 0.426934689994623, + "grad_norm": 1101.9364013671875, + "learning_rate": 7.2380072258485265e-06, + "loss": 92.2242, + "step": 51610 + }, + { + "epoch": 0.4270174132439922, + "grad_norm": 554.7161254882812, + "learning_rate": 7.2367459398259795e-06, + "loss": 83.6779, + "step": 51620 + }, + { + "epoch": 0.4271001364933615, + "grad_norm": 972.8421020507812, + "learning_rate": 7.2354844758358234e-06, + "loss": 123.7181, + "step": 51630 + }, + { + "epoch": 0.4271828597427307, + "grad_norm": 1161.73828125, + "learning_rate": 7.234222833978427e-06, + "loss": 106.6433, + "step": 51640 + }, + { + "epoch": 0.42726558299209993, + "grad_norm": 1067.09765625, + "learning_rate": 7.232961014354175e-06, + "loss": 104.9934, + "step": 51650 + }, + { + "epoch": 0.4273483062414692, + "grad_norm": 1101.327392578125, + "learning_rate": 7.23169901706346e-06, + "loss": 106.5562, + "step": 51660 + }, + { + "epoch": 0.4274310294908384, + "grad_norm": 1341.46044921875, + "learning_rate": 7.2304368422067e-06, + "loss": 134.7537, + "step": 51670 + }, + { + "epoch": 0.42751375274020764, + "grad_norm": 1436.9295654296875, + "learning_rate": 7.2291744898843145e-06, + "loss": 98.4549, + "step": 51680 + }, + { + "epoch": 0.4275964759895769, + "grad_norm": 540.67236328125, + "learning_rate": 7.227911960196746e-06, + "loss": 86.389, + "step": 51690 + }, + { + "epoch": 0.4276791992389461, + "grad_norm": 1108.2845458984375, + "learning_rate": 7.226649253244448e-06, + "loss": 74.8376, + "step": 51700 + }, + { + "epoch": 0.42776192248831535, + "grad_norm": 964.5218505859375, + "learning_rate": 7.225386369127886e-06, + "loss": 112.0215, + "step": 51710 + }, + { + "epoch": 0.4278446457376846, + "grad_norm": 768.393310546875, + "learning_rate": 7.224123307947545e-06, + "loss": 94.5367, + "step": 51720 + }, + { + "epoch": 0.4279273689870538, + "grad_norm": 708.0656127929688, + "learning_rate": 7.2228600698039205e-06, + "loss": 108.4423, + "step": 51730 + }, + { + "epoch": 0.42801009223642306, + "grad_norm": 1345.224853515625, + "learning_rate": 7.221596654797522e-06, + "loss": 91.7173, + "step": 51740 + }, + { + "epoch": 0.42809281548579226, + "grad_norm": 1059.3037109375, + "learning_rate": 7.2203330630288714e-06, + "loss": 109.0566, + "step": 51750 + }, + { + "epoch": 0.4281755387351615, + "grad_norm": 975.2772216796875, + "learning_rate": 7.21906929459851e-06, + "loss": 140.6817, + "step": 51760 + }, + { + "epoch": 0.42825826198453076, + "grad_norm": 1177.9395751953125, + "learning_rate": 7.217805349606988e-06, + "loss": 91.187, + "step": 51770 + }, + { + "epoch": 0.42834098523389996, + "grad_norm": 685.8370971679688, + "learning_rate": 7.216541228154875e-06, + "loss": 93.8095, + "step": 51780 + }, + { + "epoch": 0.4284237084832692, + "grad_norm": 1138.31689453125, + "learning_rate": 7.215276930342747e-06, + "loss": 104.4566, + "step": 51790 + }, + { + "epoch": 0.42850643173263847, + "grad_norm": 601.1682739257812, + "learning_rate": 7.214012456271202e-06, + "loss": 130.8603, + "step": 51800 + }, + { + "epoch": 0.42858915498200767, + "grad_norm": 1000.6312866210938, + "learning_rate": 7.212747806040845e-06, + "loss": 123.3161, + "step": 51810 + }, + { + "epoch": 0.4286718782313769, + "grad_norm": 1142.5823974609375, + "learning_rate": 7.211482979752302e-06, + "loss": 85.4368, + "step": 51820 + }, + { + "epoch": 0.4287546014807462, + "grad_norm": 816.385986328125, + "learning_rate": 7.210217977506207e-06, + "loss": 109.9975, + "step": 51830 + }, + { + "epoch": 0.4288373247301154, + "grad_norm": 944.0142822265625, + "learning_rate": 7.208952799403211e-06, + "loss": 108.7334, + "step": 51840 + }, + { + "epoch": 0.42892004797948463, + "grad_norm": 824.8801879882812, + "learning_rate": 7.207687445543977e-06, + "loss": 82.7929, + "step": 51850 + }, + { + "epoch": 0.4290027712288539, + "grad_norm": 964.5783081054688, + "learning_rate": 7.206421916029187e-06, + "loss": 121.5998, + "step": 51860 + }, + { + "epoch": 0.4290854944782231, + "grad_norm": 1063.656982421875, + "learning_rate": 7.205156210959529e-06, + "loss": 90.583, + "step": 51870 + }, + { + "epoch": 0.42916821772759234, + "grad_norm": 3292.513671875, + "learning_rate": 7.203890330435715e-06, + "loss": 105.8095, + "step": 51880 + }, + { + "epoch": 0.4292509409769616, + "grad_norm": 703.033447265625, + "learning_rate": 7.202624274558458e-06, + "loss": 106.7044, + "step": 51890 + }, + { + "epoch": 0.4293336642263308, + "grad_norm": 669.6672973632812, + "learning_rate": 7.201358043428499e-06, + "loss": 89.5573, + "step": 51900 + }, + { + "epoch": 0.42941638747570005, + "grad_norm": 647.8473510742188, + "learning_rate": 7.200091637146582e-06, + "loss": 99.5425, + "step": 51910 + }, + { + "epoch": 0.4294991107250693, + "grad_norm": 1656.5313720703125, + "learning_rate": 7.198825055813471e-06, + "loss": 120.4202, + "step": 51920 + }, + { + "epoch": 0.4295818339744385, + "grad_norm": 755.7421875, + "learning_rate": 7.197558299529941e-06, + "loss": 74.5723, + "step": 51930 + }, + { + "epoch": 0.42966455722380775, + "grad_norm": 1194.8021240234375, + "learning_rate": 7.196291368396784e-06, + "loss": 104.8849, + "step": 51940 + }, + { + "epoch": 0.429747280473177, + "grad_norm": 810.8287353515625, + "learning_rate": 7.1950242625148e-06, + "loss": 109.7433, + "step": 51950 + }, + { + "epoch": 0.4298300037225462, + "grad_norm": 5541.013671875, + "learning_rate": 7.1937569819848115e-06, + "loss": 117.9274, + "step": 51960 + }, + { + "epoch": 0.42991272697191546, + "grad_norm": 1193.3897705078125, + "learning_rate": 7.192489526907646e-06, + "loss": 112.9372, + "step": 51970 + }, + { + "epoch": 0.4299954502212847, + "grad_norm": 1110.0302734375, + "learning_rate": 7.191221897384153e-06, + "loss": 120.9369, + "step": 51980 + }, + { + "epoch": 0.4300781734706539, + "grad_norm": 1447.90771484375, + "learning_rate": 7.189954093515189e-06, + "loss": 142.4958, + "step": 51990 + }, + { + "epoch": 0.43016089672002317, + "grad_norm": 1462.6444091796875, + "learning_rate": 7.188686115401628e-06, + "loss": 127.5024, + "step": 52000 + }, + { + "epoch": 0.4302436199693924, + "grad_norm": 1592.32666015625, + "learning_rate": 7.187417963144358e-06, + "loss": 102.1783, + "step": 52010 + }, + { + "epoch": 0.4303263432187616, + "grad_norm": 356.8098449707031, + "learning_rate": 7.18614963684428e-06, + "loss": 94.6831, + "step": 52020 + }, + { + "epoch": 0.4304090664681309, + "grad_norm": 762.9508666992188, + "learning_rate": 7.184881136602309e-06, + "loss": 117.5537, + "step": 52030 + }, + { + "epoch": 0.43049178971750013, + "grad_norm": 614.644775390625, + "learning_rate": 7.183612462519371e-06, + "loss": 120.3886, + "step": 52040 + }, + { + "epoch": 0.4305745129668693, + "grad_norm": 884.6917724609375, + "learning_rate": 7.182343614696412e-06, + "loss": 87.1297, + "step": 52050 + }, + { + "epoch": 0.4306572362162386, + "grad_norm": 1503.93603515625, + "learning_rate": 7.181074593234387e-06, + "loss": 102.104, + "step": 52060 + }, + { + "epoch": 0.43073995946560784, + "grad_norm": 431.5899963378906, + "learning_rate": 7.179805398234266e-06, + "loss": 93.1396, + "step": 52070 + }, + { + "epoch": 0.43082268271497703, + "grad_norm": 1156.7777099609375, + "learning_rate": 7.178536029797035e-06, + "loss": 113.4172, + "step": 52080 + }, + { + "epoch": 0.4309054059643463, + "grad_norm": 680.9636840820312, + "learning_rate": 7.177266488023688e-06, + "loss": 92.4326, + "step": 52090 + }, + { + "epoch": 0.4309881292137155, + "grad_norm": 1016.119873046875, + "learning_rate": 7.17599677301524e-06, + "loss": 83.3582, + "step": 52100 + }, + { + "epoch": 0.43107085246308474, + "grad_norm": 625.1591796875, + "learning_rate": 7.174726884872716e-06, + "loss": 85.9998, + "step": 52110 + }, + { + "epoch": 0.431153575712454, + "grad_norm": 1880.2071533203125, + "learning_rate": 7.173456823697154e-06, + "loss": 148.4656, + "step": 52120 + }, + { + "epoch": 0.4312362989618232, + "grad_norm": 559.7216796875, + "learning_rate": 7.172186589589607e-06, + "loss": 117.4733, + "step": 52130 + }, + { + "epoch": 0.43131902221119245, + "grad_norm": 878.204833984375, + "learning_rate": 7.170916182651141e-06, + "loss": 72.1657, + "step": 52140 + }, + { + "epoch": 0.4314017454605617, + "grad_norm": 785.7388916015625, + "learning_rate": 7.1696456029828386e-06, + "loss": 78.2875, + "step": 52150 + }, + { + "epoch": 0.4314844687099309, + "grad_norm": 960.450439453125, + "learning_rate": 7.168374850685794e-06, + "loss": 99.7961, + "step": 52160 + }, + { + "epoch": 0.43156719195930016, + "grad_norm": 1000.7466430664062, + "learning_rate": 7.167103925861113e-06, + "loss": 113.2332, + "step": 52170 + }, + { + "epoch": 0.4316499152086694, + "grad_norm": 923.0742797851562, + "learning_rate": 7.165832828609918e-06, + "loss": 108.3951, + "step": 52180 + }, + { + "epoch": 0.4317326384580386, + "grad_norm": 1476.0849609375, + "learning_rate": 7.164561559033344e-06, + "loss": 104.3691, + "step": 52190 + }, + { + "epoch": 0.43181536170740786, + "grad_norm": 656.1659545898438, + "learning_rate": 7.163290117232542e-06, + "loss": 111.1306, + "step": 52200 + }, + { + "epoch": 0.4318980849567771, + "grad_norm": 1209.8184814453125, + "learning_rate": 7.162018503308674e-06, + "loss": 105.6424, + "step": 52210 + }, + { + "epoch": 0.4319808082061463, + "grad_norm": 780.0021362304688, + "learning_rate": 7.1607467173629145e-06, + "loss": 90.2464, + "step": 52220 + }, + { + "epoch": 0.43206353145551557, + "grad_norm": 785.8810424804688, + "learning_rate": 7.1594747594964564e-06, + "loss": 123.5292, + "step": 52230 + }, + { + "epoch": 0.4321462547048848, + "grad_norm": 1079.1829833984375, + "learning_rate": 7.1582026298105e-06, + "loss": 79.3976, + "step": 52240 + }, + { + "epoch": 0.432228977954254, + "grad_norm": 1131.198974609375, + "learning_rate": 7.156930328406268e-06, + "loss": 137.1325, + "step": 52250 + }, + { + "epoch": 0.4323117012036233, + "grad_norm": 1154.282470703125, + "learning_rate": 7.1556578553849875e-06, + "loss": 78.7498, + "step": 52260 + }, + { + "epoch": 0.43239442445299253, + "grad_norm": 783.6915283203125, + "learning_rate": 7.154385210847905e-06, + "loss": 96.9286, + "step": 52270 + }, + { + "epoch": 0.43247714770236173, + "grad_norm": 1002.9415893554688, + "learning_rate": 7.153112394896279e-06, + "loss": 101.9183, + "step": 52280 + }, + { + "epoch": 0.432559870951731, + "grad_norm": 805.485595703125, + "learning_rate": 7.15183940763138e-06, + "loss": 84.4508, + "step": 52290 + }, + { + "epoch": 0.43264259420110024, + "grad_norm": 972.188720703125, + "learning_rate": 7.150566249154496e-06, + "loss": 112.0686, + "step": 52300 + }, + { + "epoch": 0.43272531745046944, + "grad_norm": 694.895263671875, + "learning_rate": 7.149292919566924e-06, + "loss": 86.5633, + "step": 52310 + }, + { + "epoch": 0.4328080406998387, + "grad_norm": 507.7707214355469, + "learning_rate": 7.148019418969979e-06, + "loss": 108.7999, + "step": 52320 + }, + { + "epoch": 0.43289076394920795, + "grad_norm": 928.7743530273438, + "learning_rate": 7.146745747464987e-06, + "loss": 133.0144, + "step": 52330 + }, + { + "epoch": 0.43297348719857714, + "grad_norm": 888.1306762695312, + "learning_rate": 7.145471905153288e-06, + "loss": 74.0938, + "step": 52340 + }, + { + "epoch": 0.4330562104479464, + "grad_norm": 602.6085815429688, + "learning_rate": 7.1441978921362365e-06, + "loss": 106.0087, + "step": 52350 + }, + { + "epoch": 0.43313893369731565, + "grad_norm": 739.8253784179688, + "learning_rate": 7.142923708515199e-06, + "loss": 130.5773, + "step": 52360 + }, + { + "epoch": 0.43322165694668485, + "grad_norm": 561.8637084960938, + "learning_rate": 7.141649354391556e-06, + "loss": 116.5679, + "step": 52370 + }, + { + "epoch": 0.4333043801960541, + "grad_norm": 635.5332641601562, + "learning_rate": 7.140374829866703e-06, + "loss": 102.7501, + "step": 52380 + }, + { + "epoch": 0.43338710344542336, + "grad_norm": 849.3636474609375, + "learning_rate": 7.1391001350420486e-06, + "loss": 112.318, + "step": 52390 + }, + { + "epoch": 0.43346982669479256, + "grad_norm": 968.818603515625, + "learning_rate": 7.137825270019012e-06, + "loss": 116.6222, + "step": 52400 + }, + { + "epoch": 0.4335525499441618, + "grad_norm": 879.7605590820312, + "learning_rate": 7.1365502348990315e-06, + "loss": 72.7146, + "step": 52410 + }, + { + "epoch": 0.43363527319353107, + "grad_norm": 974.20947265625, + "learning_rate": 7.135275029783554e-06, + "loss": 87.4071, + "step": 52420 + }, + { + "epoch": 0.43371799644290027, + "grad_norm": 898.2373046875, + "learning_rate": 7.133999654774041e-06, + "loss": 96.0373, + "step": 52430 + }, + { + "epoch": 0.4338007196922695, + "grad_norm": 1289.7435302734375, + "learning_rate": 7.13272410997197e-06, + "loss": 121.3297, + "step": 52440 + }, + { + "epoch": 0.4338834429416388, + "grad_norm": 406.2857666015625, + "learning_rate": 7.13144839547883e-06, + "loss": 72.3889, + "step": 52450 + }, + { + "epoch": 0.433966166191008, + "grad_norm": 1770.5550537109375, + "learning_rate": 7.130172511396123e-06, + "loss": 157.1078, + "step": 52460 + }, + { + "epoch": 0.43404888944037723, + "grad_norm": 924.5420532226562, + "learning_rate": 7.128896457825364e-06, + "loss": 94.4494, + "step": 52470 + }, + { + "epoch": 0.4341316126897464, + "grad_norm": 1168.6419677734375, + "learning_rate": 7.127620234868085e-06, + "loss": 151.9046, + "step": 52480 + }, + { + "epoch": 0.4342143359391157, + "grad_norm": 923.4803466796875, + "learning_rate": 7.126343842625828e-06, + "loss": 103.1586, + "step": 52490 + }, + { + "epoch": 0.43429705918848494, + "grad_norm": 849.2039794921875, + "learning_rate": 7.1250672812001505e-06, + "loss": 98.9931, + "step": 52500 + }, + { + "epoch": 0.43437978243785413, + "grad_norm": 1026.3223876953125, + "learning_rate": 7.123790550692624e-06, + "loss": 81.8112, + "step": 52510 + }, + { + "epoch": 0.4344625056872234, + "grad_norm": 1015.2179565429688, + "learning_rate": 7.1225136512048275e-06, + "loss": 98.0542, + "step": 52520 + }, + { + "epoch": 0.43454522893659264, + "grad_norm": 756.0176391601562, + "learning_rate": 7.1212365828383615e-06, + "loss": 143.049, + "step": 52530 + }, + { + "epoch": 0.43462795218596184, + "grad_norm": 919.13427734375, + "learning_rate": 7.119959345694835e-06, + "loss": 89.8264, + "step": 52540 + }, + { + "epoch": 0.4347106754353311, + "grad_norm": 693.4651489257812, + "learning_rate": 7.118681939875875e-06, + "loss": 129.897, + "step": 52550 + }, + { + "epoch": 0.43479339868470035, + "grad_norm": 1032.639404296875, + "learning_rate": 7.117404365483116e-06, + "loss": 109.7115, + "step": 52560 + }, + { + "epoch": 0.43487612193406955, + "grad_norm": 644.7308959960938, + "learning_rate": 7.116126622618207e-06, + "loss": 121.4155, + "step": 52570 + }, + { + "epoch": 0.4349588451834388, + "grad_norm": 1076.7471923828125, + "learning_rate": 7.114848711382816e-06, + "loss": 105.8533, + "step": 52580 + }, + { + "epoch": 0.43504156843280806, + "grad_norm": 682.4072265625, + "learning_rate": 7.1135706318786195e-06, + "loss": 126.4976, + "step": 52590 + }, + { + "epoch": 0.43512429168217726, + "grad_norm": 334.1803894042969, + "learning_rate": 7.112292384207306e-06, + "loss": 72.7947, + "step": 52600 + }, + { + "epoch": 0.4352070149315465, + "grad_norm": 501.4839172363281, + "learning_rate": 7.111013968470581e-06, + "loss": 88.0988, + "step": 52610 + }, + { + "epoch": 0.43528973818091576, + "grad_norm": 1030.7449951171875, + "learning_rate": 7.109735384770166e-06, + "loss": 92.1345, + "step": 52620 + }, + { + "epoch": 0.43537246143028496, + "grad_norm": 1418.4031982421875, + "learning_rate": 7.108456633207787e-06, + "loss": 137.2343, + "step": 52630 + }, + { + "epoch": 0.4354551846796542, + "grad_norm": 1056.733154296875, + "learning_rate": 7.10717771388519e-06, + "loss": 122.0539, + "step": 52640 + }, + { + "epoch": 0.43553790792902347, + "grad_norm": 567.1400756835938, + "learning_rate": 7.105898626904134e-06, + "loss": 97.4046, + "step": 52650 + }, + { + "epoch": 0.43562063117839267, + "grad_norm": 557.343017578125, + "learning_rate": 7.104619372366387e-06, + "loss": 97.5606, + "step": 52660 + }, + { + "epoch": 0.4357033544277619, + "grad_norm": 663.3614501953125, + "learning_rate": 7.103339950373737e-06, + "loss": 78.0228, + "step": 52670 + }, + { + "epoch": 0.4357860776771312, + "grad_norm": 726.0701293945312, + "learning_rate": 7.102060361027981e-06, + "loss": 122.9625, + "step": 52680 + }, + { + "epoch": 0.4358688009265004, + "grad_norm": 833.1370239257812, + "learning_rate": 7.100780604430928e-06, + "loss": 92.9005, + "step": 52690 + }, + { + "epoch": 0.43595152417586963, + "grad_norm": 1035.490478515625, + "learning_rate": 7.099500680684404e-06, + "loss": 82.178, + "step": 52700 + }, + { + "epoch": 0.4360342474252389, + "grad_norm": 721.7255859375, + "learning_rate": 7.0982205898902444e-06, + "loss": 134.4474, + "step": 52710 + }, + { + "epoch": 0.4361169706746081, + "grad_norm": 872.9884033203125, + "learning_rate": 7.096940332150305e-06, + "loss": 112.2354, + "step": 52720 + }, + { + "epoch": 0.43619969392397734, + "grad_norm": 504.2191162109375, + "learning_rate": 7.095659907566446e-06, + "loss": 69.3615, + "step": 52730 + }, + { + "epoch": 0.4362824171733466, + "grad_norm": 1016.4193725585938, + "learning_rate": 7.094379316240545e-06, + "loss": 88.9207, + "step": 52740 + }, + { + "epoch": 0.4363651404227158, + "grad_norm": 972.1260986328125, + "learning_rate": 7.093098558274494e-06, + "loss": 104.1136, + "step": 52750 + }, + { + "epoch": 0.43644786367208505, + "grad_norm": 1002.4033203125, + "learning_rate": 7.091817633770197e-06, + "loss": 94.7899, + "step": 52760 + }, + { + "epoch": 0.4365305869214543, + "grad_norm": 880.1514892578125, + "learning_rate": 7.090536542829571e-06, + "loss": 87.8467, + "step": 52770 + }, + { + "epoch": 0.4366133101708235, + "grad_norm": 620.6061401367188, + "learning_rate": 7.089255285554546e-06, + "loss": 136.4645, + "step": 52780 + }, + { + "epoch": 0.43669603342019275, + "grad_norm": 1144.162353515625, + "learning_rate": 7.087973862047067e-06, + "loss": 100.7308, + "step": 52790 + }, + { + "epoch": 0.436778756669562, + "grad_norm": 848.2889404296875, + "learning_rate": 7.08669227240909e-06, + "loss": 87.7345, + "step": 52800 + }, + { + "epoch": 0.4368614799189312, + "grad_norm": 812.3153686523438, + "learning_rate": 7.085410516742586e-06, + "loss": 101.8244, + "step": 52810 + }, + { + "epoch": 0.43694420316830046, + "grad_norm": 872.6069946289062, + "learning_rate": 7.084128595149538e-06, + "loss": 104.2199, + "step": 52820 + }, + { + "epoch": 0.43702692641766966, + "grad_norm": 897.1786499023438, + "learning_rate": 7.082846507731942e-06, + "loss": 96.152, + "step": 52830 + }, + { + "epoch": 0.4371096496670389, + "grad_norm": 631.2103271484375, + "learning_rate": 7.081564254591809e-06, + "loss": 101.3164, + "step": 52840 + }, + { + "epoch": 0.43719237291640817, + "grad_norm": 781.96044921875, + "learning_rate": 7.08028183583116e-06, + "loss": 114.7007, + "step": 52850 + }, + { + "epoch": 0.43727509616577737, + "grad_norm": 995.4803466796875, + "learning_rate": 7.078999251552034e-06, + "loss": 98.927, + "step": 52860 + }, + { + "epoch": 0.4373578194151466, + "grad_norm": 841.7239990234375, + "learning_rate": 7.077716501856478e-06, + "loss": 106.8644, + "step": 52870 + }, + { + "epoch": 0.4374405426645159, + "grad_norm": 455.7330322265625, + "learning_rate": 7.076433586846555e-06, + "loss": 119.5307, + "step": 52880 + }, + { + "epoch": 0.4375232659138851, + "grad_norm": 1019.272216796875, + "learning_rate": 7.075150506624342e-06, + "loss": 120.9018, + "step": 52890 + }, + { + "epoch": 0.4376059891632543, + "grad_norm": 783.7548828125, + "learning_rate": 7.073867261291926e-06, + "loss": 107.6927, + "step": 52900 + }, + { + "epoch": 0.4376887124126236, + "grad_norm": 483.0557556152344, + "learning_rate": 7.0725838509514115e-06, + "loss": 57.1351, + "step": 52910 + }, + { + "epoch": 0.4377714356619928, + "grad_norm": 1054.7470703125, + "learning_rate": 7.07130027570491e-06, + "loss": 99.6627, + "step": 52920 + }, + { + "epoch": 0.43785415891136203, + "grad_norm": 716.9881591796875, + "learning_rate": 7.070016535654551e-06, + "loss": 85.3958, + "step": 52930 + }, + { + "epoch": 0.4379368821607313, + "grad_norm": 1354.7027587890625, + "learning_rate": 7.068732630902479e-06, + "loss": 107.1556, + "step": 52940 + }, + { + "epoch": 0.4380196054101005, + "grad_norm": 1029.6153564453125, + "learning_rate": 7.067448561550844e-06, + "loss": 92.6526, + "step": 52950 + }, + { + "epoch": 0.43810232865946974, + "grad_norm": 1133.869873046875, + "learning_rate": 7.066164327701815e-06, + "loss": 149.094, + "step": 52960 + }, + { + "epoch": 0.438185051908839, + "grad_norm": 1119.0980224609375, + "learning_rate": 7.064879929457573e-06, + "loss": 143.9678, + "step": 52970 + }, + { + "epoch": 0.4382677751582082, + "grad_norm": 643.8549194335938, + "learning_rate": 7.063595366920314e-06, + "loss": 76.5045, + "step": 52980 + }, + { + "epoch": 0.43835049840757745, + "grad_norm": 1303.5244140625, + "learning_rate": 7.062310640192239e-06, + "loss": 111.4981, + "step": 52990 + }, + { + "epoch": 0.4384332216569467, + "grad_norm": 862.070556640625, + "learning_rate": 7.061025749375572e-06, + "loss": 100.7834, + "step": 53000 + }, + { + "epoch": 0.4385159449063159, + "grad_norm": 776.7830810546875, + "learning_rate": 7.059740694572545e-06, + "loss": 103.979, + "step": 53010 + }, + { + "epoch": 0.43859866815568516, + "grad_norm": 449.9796447753906, + "learning_rate": 7.058455475885405e-06, + "loss": 85.2324, + "step": 53020 + }, + { + "epoch": 0.4386813914050544, + "grad_norm": 1027.198486328125, + "learning_rate": 7.05717009341641e-06, + "loss": 93.9646, + "step": 53030 + }, + { + "epoch": 0.4387641146544236, + "grad_norm": 1561.4610595703125, + "learning_rate": 7.05588454726783e-06, + "loss": 98.5875, + "step": 53040 + }, + { + "epoch": 0.43884683790379286, + "grad_norm": 886.0679931640625, + "learning_rate": 7.054598837541951e-06, + "loss": 91.799, + "step": 53050 + }, + { + "epoch": 0.4389295611531621, + "grad_norm": 822.8799438476562, + "learning_rate": 7.053312964341075e-06, + "loss": 96.1325, + "step": 53060 + }, + { + "epoch": 0.4390122844025313, + "grad_norm": 1466.96337890625, + "learning_rate": 7.052026927767508e-06, + "loss": 125.8915, + "step": 53070 + }, + { + "epoch": 0.43909500765190057, + "grad_norm": 1202.5977783203125, + "learning_rate": 7.050740727923576e-06, + "loss": 100.9376, + "step": 53080 + }, + { + "epoch": 0.4391777309012698, + "grad_norm": 834.5575561523438, + "learning_rate": 7.049454364911615e-06, + "loss": 100.5369, + "step": 53090 + }, + { + "epoch": 0.439260454150639, + "grad_norm": 589.8638916015625, + "learning_rate": 7.048167838833977e-06, + "loss": 94.5479, + "step": 53100 + }, + { + "epoch": 0.4393431774000083, + "grad_norm": 722.2205200195312, + "learning_rate": 7.046881149793026e-06, + "loss": 94.6359, + "step": 53110 + }, + { + "epoch": 0.43942590064937753, + "grad_norm": 706.8112182617188, + "learning_rate": 7.045594297891133e-06, + "loss": 80.4411, + "step": 53120 + }, + { + "epoch": 0.43950862389874673, + "grad_norm": 1541.61279296875, + "learning_rate": 7.04430728323069e-06, + "loss": 126.8126, + "step": 53130 + }, + { + "epoch": 0.439591347148116, + "grad_norm": 1694.7259521484375, + "learning_rate": 7.043020105914098e-06, + "loss": 112.2094, + "step": 53140 + }, + { + "epoch": 0.43967407039748524, + "grad_norm": 1049.023193359375, + "learning_rate": 7.041732766043775e-06, + "loss": 96.9257, + "step": 53150 + }, + { + "epoch": 0.43975679364685444, + "grad_norm": 928.5380859375, + "learning_rate": 7.040445263722145e-06, + "loss": 90.1928, + "step": 53160 + }, + { + "epoch": 0.4398395168962237, + "grad_norm": 517.4678344726562, + "learning_rate": 7.039157599051648e-06, + "loss": 100.6094, + "step": 53170 + }, + { + "epoch": 0.43992224014559295, + "grad_norm": 1040.879638671875, + "learning_rate": 7.037869772134741e-06, + "loss": 93.181, + "step": 53180 + }, + { + "epoch": 0.44000496339496215, + "grad_norm": 1226.857421875, + "learning_rate": 7.036581783073888e-06, + "loss": 139.1528, + "step": 53190 + }, + { + "epoch": 0.4400876866443314, + "grad_norm": 2283.062744140625, + "learning_rate": 7.035293631971569e-06, + "loss": 121.1719, + "step": 53200 + }, + { + "epoch": 0.4401704098937006, + "grad_norm": 1155.0572509765625, + "learning_rate": 7.034005318930277e-06, + "loss": 86.4772, + "step": 53210 + }, + { + "epoch": 0.44025313314306985, + "grad_norm": 1117.125244140625, + "learning_rate": 7.032716844052517e-06, + "loss": 111.3512, + "step": 53220 + }, + { + "epoch": 0.4403358563924391, + "grad_norm": 1401.2935791015625, + "learning_rate": 7.031428207440807e-06, + "loss": 122.4468, + "step": 53230 + }, + { + "epoch": 0.4404185796418083, + "grad_norm": 719.4153442382812, + "learning_rate": 7.030139409197676e-06, + "loss": 86.415, + "step": 53240 + }, + { + "epoch": 0.44050130289117756, + "grad_norm": 802.9329833984375, + "learning_rate": 7.02885044942567e-06, + "loss": 161.6186, + "step": 53250 + }, + { + "epoch": 0.4405840261405468, + "grad_norm": 720.5238037109375, + "learning_rate": 7.027561328227345e-06, + "loss": 109.4637, + "step": 53260 + }, + { + "epoch": 0.440666749389916, + "grad_norm": 786.7742919921875, + "learning_rate": 7.02627204570527e-06, + "loss": 113.8812, + "step": 53270 + }, + { + "epoch": 0.44074947263928527, + "grad_norm": 449.480712890625, + "learning_rate": 7.024982601962027e-06, + "loss": 94.8391, + "step": 53280 + }, + { + "epoch": 0.4408321958886545, + "grad_norm": 934.8798217773438, + "learning_rate": 7.023692997100213e-06, + "loss": 79.2688, + "step": 53290 + }, + { + "epoch": 0.4409149191380237, + "grad_norm": 1120.0533447265625, + "learning_rate": 7.0224032312224345e-06, + "loss": 91.2629, + "step": 53300 + }, + { + "epoch": 0.440997642387393, + "grad_norm": 1041.491943359375, + "learning_rate": 7.021113304431313e-06, + "loss": 65.5652, + "step": 53310 + }, + { + "epoch": 0.44108036563676223, + "grad_norm": 1426.9508056640625, + "learning_rate": 7.01982321682948e-06, + "loss": 106.7264, + "step": 53320 + }, + { + "epoch": 0.4411630888861314, + "grad_norm": 1245.83154296875, + "learning_rate": 7.018532968519584e-06, + "loss": 97.5596, + "step": 53330 + }, + { + "epoch": 0.4412458121355007, + "grad_norm": 667.5186157226562, + "learning_rate": 7.0172425596042846e-06, + "loss": 96.0253, + "step": 53340 + }, + { + "epoch": 0.44132853538486994, + "grad_norm": 558.8970947265625, + "learning_rate": 7.0159519901862515e-06, + "loss": 108.9551, + "step": 53350 + }, + { + "epoch": 0.44141125863423913, + "grad_norm": 636.3999633789062, + "learning_rate": 7.014661260368171e-06, + "loss": 84.9733, + "step": 53360 + }, + { + "epoch": 0.4414939818836084, + "grad_norm": 927.197265625, + "learning_rate": 7.01337037025274e-06, + "loss": 111.2972, + "step": 53370 + }, + { + "epoch": 0.44157670513297764, + "grad_norm": 786.5078125, + "learning_rate": 7.012079319942668e-06, + "loss": 95.5135, + "step": 53380 + }, + { + "epoch": 0.44165942838234684, + "grad_norm": 807.628173828125, + "learning_rate": 7.01078810954068e-06, + "loss": 138.3115, + "step": 53390 + }, + { + "epoch": 0.4417421516317161, + "grad_norm": 657.76123046875, + "learning_rate": 7.0094967391495095e-06, + "loss": 96.7088, + "step": 53400 + }, + { + "epoch": 0.44182487488108535, + "grad_norm": 414.78240966796875, + "learning_rate": 7.008205208871906e-06, + "loss": 77.1027, + "step": 53410 + }, + { + "epoch": 0.44190759813045455, + "grad_norm": 719.4336547851562, + "learning_rate": 7.00691351881063e-06, + "loss": 95.9553, + "step": 53420 + }, + { + "epoch": 0.4419903213798238, + "grad_norm": 1148.5152587890625, + "learning_rate": 7.005621669068456e-06, + "loss": 109.5746, + "step": 53430 + }, + { + "epoch": 0.44207304462919306, + "grad_norm": 508.8988342285156, + "learning_rate": 7.004329659748172e-06, + "loss": 123.2771, + "step": 53440 + }, + { + "epoch": 0.44215576787856226, + "grad_norm": 777.6555786132812, + "learning_rate": 7.003037490952574e-06, + "loss": 101.2551, + "step": 53450 + }, + { + "epoch": 0.4422384911279315, + "grad_norm": 1049.9522705078125, + "learning_rate": 7.0017451627844765e-06, + "loss": 137.9618, + "step": 53460 + }, + { + "epoch": 0.44232121437730076, + "grad_norm": 666.4544677734375, + "learning_rate": 7.0004526753467004e-06, + "loss": 109.7146, + "step": 53470 + }, + { + "epoch": 0.44240393762666996, + "grad_norm": 1020.0592651367188, + "learning_rate": 6.999160028742089e-06, + "loss": 113.0266, + "step": 53480 + }, + { + "epoch": 0.4424866608760392, + "grad_norm": 646.3621215820312, + "learning_rate": 6.997867223073487e-06, + "loss": 148.3913, + "step": 53490 + }, + { + "epoch": 0.44256938412540847, + "grad_norm": 1154.8201904296875, + "learning_rate": 6.996574258443761e-06, + "loss": 111.0904, + "step": 53500 + }, + { + "epoch": 0.44265210737477767, + "grad_norm": 1100.033935546875, + "learning_rate": 6.995281134955784e-06, + "loss": 90.6527, + "step": 53510 + }, + { + "epoch": 0.4427348306241469, + "grad_norm": 1160.05712890625, + "learning_rate": 6.993987852712442e-06, + "loss": 113.9817, + "step": 53520 + }, + { + "epoch": 0.4428175538735162, + "grad_norm": 1008.1426391601562, + "learning_rate": 6.992694411816638e-06, + "loss": 91.588, + "step": 53530 + }, + { + "epoch": 0.4429002771228854, + "grad_norm": 892.0647583007812, + "learning_rate": 6.991400812371287e-06, + "loss": 127.6992, + "step": 53540 + }, + { + "epoch": 0.44298300037225463, + "grad_norm": 700.9825439453125, + "learning_rate": 6.990107054479313e-06, + "loss": 85.8635, + "step": 53550 + }, + { + "epoch": 0.44306572362162383, + "grad_norm": 452.05950927734375, + "learning_rate": 6.988813138243652e-06, + "loss": 109.3417, + "step": 53560 + }, + { + "epoch": 0.4431484468709931, + "grad_norm": 1181.6788330078125, + "learning_rate": 6.987519063767257e-06, + "loss": 116.6035, + "step": 53570 + }, + { + "epoch": 0.44323117012036234, + "grad_norm": 991.6622924804688, + "learning_rate": 6.986224831153092e-06, + "loss": 78.8246, + "step": 53580 + }, + { + "epoch": 0.44331389336973154, + "grad_norm": 1134.1353759765625, + "learning_rate": 6.984930440504134e-06, + "loss": 113.1138, + "step": 53590 + }, + { + "epoch": 0.4433966166191008, + "grad_norm": 708.4700317382812, + "learning_rate": 6.9836358919233695e-06, + "loss": 79.0538, + "step": 53600 + }, + { + "epoch": 0.44347933986847005, + "grad_norm": 917.70166015625, + "learning_rate": 6.982341185513799e-06, + "loss": 88.8924, + "step": 53610 + }, + { + "epoch": 0.44356206311783924, + "grad_norm": 966.7335815429688, + "learning_rate": 6.981046321378441e-06, + "loss": 142.0511, + "step": 53620 + }, + { + "epoch": 0.4436447863672085, + "grad_norm": 804.2879028320312, + "learning_rate": 6.979751299620318e-06, + "loss": 75.3757, + "step": 53630 + }, + { + "epoch": 0.44372750961657775, + "grad_norm": 1585.9019775390625, + "learning_rate": 6.978456120342469e-06, + "loss": 95.9753, + "step": 53640 + }, + { + "epoch": 0.44381023286594695, + "grad_norm": 1567.5927734375, + "learning_rate": 6.977160783647947e-06, + "loss": 126.4141, + "step": 53650 + }, + { + "epoch": 0.4438929561153162, + "grad_norm": 1687.5382080078125, + "learning_rate": 6.975865289639815e-06, + "loss": 90.8707, + "step": 53660 + }, + { + "epoch": 0.44397567936468546, + "grad_norm": 973.2371215820312, + "learning_rate": 6.974569638421151e-06, + "loss": 82.7147, + "step": 53670 + }, + { + "epoch": 0.44405840261405466, + "grad_norm": 658.38818359375, + "learning_rate": 6.973273830095042e-06, + "loss": 85.3202, + "step": 53680 + }, + { + "epoch": 0.4441411258634239, + "grad_norm": 1075.40673828125, + "learning_rate": 6.971977864764591e-06, + "loss": 99.2303, + "step": 53690 + }, + { + "epoch": 0.44422384911279317, + "grad_norm": 910.6012573242188, + "learning_rate": 6.970681742532911e-06, + "loss": 129.7319, + "step": 53700 + }, + { + "epoch": 0.44430657236216237, + "grad_norm": 1031.2911376953125, + "learning_rate": 6.969385463503129e-06, + "loss": 96.3028, + "step": 53710 + }, + { + "epoch": 0.4443892956115316, + "grad_norm": 679.598876953125, + "learning_rate": 6.968089027778384e-06, + "loss": 91.5366, + "step": 53720 + }, + { + "epoch": 0.4444720188609009, + "grad_norm": 842.2387084960938, + "learning_rate": 6.9667924354618275e-06, + "loss": 107.6039, + "step": 53730 + }, + { + "epoch": 0.4445547421102701, + "grad_norm": 645.2871704101562, + "learning_rate": 6.965495686656623e-06, + "loss": 125.044, + "step": 53740 + }, + { + "epoch": 0.44463746535963933, + "grad_norm": 614.8228149414062, + "learning_rate": 6.964198781465948e-06, + "loss": 101.3111, + "step": 53750 + }, + { + "epoch": 0.4447201886090086, + "grad_norm": 942.0760498046875, + "learning_rate": 6.962901719992989e-06, + "loss": 89.1372, + "step": 53760 + }, + { + "epoch": 0.4448029118583778, + "grad_norm": 577.6919555664062, + "learning_rate": 6.961604502340949e-06, + "loss": 59.7649, + "step": 53770 + }, + { + "epoch": 0.44488563510774704, + "grad_norm": 1402.629638671875, + "learning_rate": 6.960307128613042e-06, + "loss": 133.4121, + "step": 53780 + }, + { + "epoch": 0.4449683583571163, + "grad_norm": 1055.0478515625, + "learning_rate": 6.959009598912493e-06, + "loss": 127.3038, + "step": 53790 + }, + { + "epoch": 0.4450510816064855, + "grad_norm": 1320.6951904296875, + "learning_rate": 6.957711913342541e-06, + "loss": 86.9509, + "step": 53800 + }, + { + "epoch": 0.44513380485585474, + "grad_norm": 1073.6241455078125, + "learning_rate": 6.956414072006437e-06, + "loss": 122.6924, + "step": 53810 + }, + { + "epoch": 0.445216528105224, + "grad_norm": 870.7139282226562, + "learning_rate": 6.955116075007443e-06, + "loss": 124.2368, + "step": 53820 + }, + { + "epoch": 0.4452992513545932, + "grad_norm": 1268.8851318359375, + "learning_rate": 6.953817922448837e-06, + "loss": 89.0271, + "step": 53830 + }, + { + "epoch": 0.44538197460396245, + "grad_norm": 1143.03955078125, + "learning_rate": 6.9525196144339055e-06, + "loss": 145.819, + "step": 53840 + }, + { + "epoch": 0.4454646978533317, + "grad_norm": 1068.4166259765625, + "learning_rate": 6.951221151065948e-06, + "loss": 131.2076, + "step": 53850 + }, + { + "epoch": 0.4455474211027009, + "grad_norm": 797.7089233398438, + "learning_rate": 6.949922532448279e-06, + "loss": 98.0425, + "step": 53860 + }, + { + "epoch": 0.44563014435207016, + "grad_norm": 760.931640625, + "learning_rate": 6.948623758684223e-06, + "loss": 96.0778, + "step": 53870 + }, + { + "epoch": 0.4457128676014394, + "grad_norm": 624.017822265625, + "learning_rate": 6.9473248298771176e-06, + "loss": 89.5199, + "step": 53880 + }, + { + "epoch": 0.4457955908508086, + "grad_norm": 689.559814453125, + "learning_rate": 6.946025746130312e-06, + "loss": 123.3743, + "step": 53890 + }, + { + "epoch": 0.44587831410017786, + "grad_norm": 1045.9923095703125, + "learning_rate": 6.944726507547169e-06, + "loss": 100.6308, + "step": 53900 + }, + { + "epoch": 0.44596103734954706, + "grad_norm": 1568.38330078125, + "learning_rate": 6.943427114231064e-06, + "loss": 136.4211, + "step": 53910 + }, + { + "epoch": 0.4460437605989163, + "grad_norm": 1263.7076416015625, + "learning_rate": 6.942127566285382e-06, + "loss": 89.5075, + "step": 53920 + }, + { + "epoch": 0.44612648384828557, + "grad_norm": 1032.7841796875, + "learning_rate": 6.940827863813523e-06, + "loss": 124.588, + "step": 53930 + }, + { + "epoch": 0.44620920709765477, + "grad_norm": 1023.0800170898438, + "learning_rate": 6.9395280069188964e-06, + "loss": 127.864, + "step": 53940 + }, + { + "epoch": 0.446291930347024, + "grad_norm": 684.29931640625, + "learning_rate": 6.9382279957049295e-06, + "loss": 106.943, + "step": 53950 + }, + { + "epoch": 0.4463746535963933, + "grad_norm": 590.7765502929688, + "learning_rate": 6.936927830275055e-06, + "loss": 80.368, + "step": 53960 + }, + { + "epoch": 0.4464573768457625, + "grad_norm": 953.7996826171875, + "learning_rate": 6.935627510732724e-06, + "loss": 114.7125, + "step": 53970 + }, + { + "epoch": 0.44654010009513173, + "grad_norm": 665.14697265625, + "learning_rate": 6.934327037181394e-06, + "loss": 107.5566, + "step": 53980 + }, + { + "epoch": 0.446622823344501, + "grad_norm": 933.9505615234375, + "learning_rate": 6.933026409724538e-06, + "loss": 119.2751, + "step": 53990 + }, + { + "epoch": 0.4467055465938702, + "grad_norm": 1047.66796875, + "learning_rate": 6.931725628465643e-06, + "loss": 84.2427, + "step": 54000 + }, + { + "epoch": 0.44678826984323944, + "grad_norm": 1026.7486572265625, + "learning_rate": 6.9304246935082065e-06, + "loss": 80.6261, + "step": 54010 + }, + { + "epoch": 0.4468709930926087, + "grad_norm": 948.4655151367188, + "learning_rate": 6.929123604955735e-06, + "loss": 128.3851, + "step": 54020 + }, + { + "epoch": 0.4469537163419779, + "grad_norm": 1473.8193359375, + "learning_rate": 6.927822362911753e-06, + "loss": 83.6743, + "step": 54030 + }, + { + "epoch": 0.44703643959134715, + "grad_norm": 677.3907470703125, + "learning_rate": 6.926520967479791e-06, + "loss": 96.5376, + "step": 54040 + }, + { + "epoch": 0.4471191628407164, + "grad_norm": 759.7684326171875, + "learning_rate": 6.9252194187634e-06, + "loss": 85.2003, + "step": 54050 + }, + { + "epoch": 0.4472018860900856, + "grad_norm": 1008.9971923828125, + "learning_rate": 6.923917716866133e-06, + "loss": 108.9541, + "step": 54060 + }, + { + "epoch": 0.44728460933945485, + "grad_norm": 1046.4508056640625, + "learning_rate": 6.922615861891564e-06, + "loss": 73.9177, + "step": 54070 + }, + { + "epoch": 0.4473673325888241, + "grad_norm": 1061.5517578125, + "learning_rate": 6.921313853943275e-06, + "loss": 116.9172, + "step": 54080 + }, + { + "epoch": 0.4474500558381933, + "grad_norm": 1054.0621337890625, + "learning_rate": 6.9200116931248575e-06, + "loss": 94.5179, + "step": 54090 + }, + { + "epoch": 0.44753277908756256, + "grad_norm": 1365.836181640625, + "learning_rate": 6.918709379539924e-06, + "loss": 91.8605, + "step": 54100 + }, + { + "epoch": 0.4476155023369318, + "grad_norm": 914.6397094726562, + "learning_rate": 6.917406913292089e-06, + "loss": 95.1237, + "step": 54110 + }, + { + "epoch": 0.447698225586301, + "grad_norm": 1938.41064453125, + "learning_rate": 6.916104294484988e-06, + "loss": 133.195, + "step": 54120 + }, + { + "epoch": 0.44778094883567027, + "grad_norm": 728.0489501953125, + "learning_rate": 6.91480152322226e-06, + "loss": 105.5754, + "step": 54130 + }, + { + "epoch": 0.4478636720850395, + "grad_norm": 1188.586181640625, + "learning_rate": 6.913498599607563e-06, + "loss": 110.4302, + "step": 54140 + }, + { + "epoch": 0.4479463953344087, + "grad_norm": 1016.9649047851562, + "learning_rate": 6.9121955237445644e-06, + "loss": 75.1243, + "step": 54150 + }, + { + "epoch": 0.448029118583778, + "grad_norm": 1644.49755859375, + "learning_rate": 6.910892295736944e-06, + "loss": 87.0271, + "step": 54160 + }, + { + "epoch": 0.44811184183314723, + "grad_norm": 885.784423828125, + "learning_rate": 6.9095889156883934e-06, + "loss": 104.9015, + "step": 54170 + }, + { + "epoch": 0.4481945650825164, + "grad_norm": 575.415283203125, + "learning_rate": 6.908285383702617e-06, + "loss": 90.7336, + "step": 54180 + }, + { + "epoch": 0.4482772883318857, + "grad_norm": 876.1041870117188, + "learning_rate": 6.906981699883329e-06, + "loss": 93.0574, + "step": 54190 + }, + { + "epoch": 0.44836001158125494, + "grad_norm": 665.2525024414062, + "learning_rate": 6.90567786433426e-06, + "loss": 98.4839, + "step": 54200 + }, + { + "epoch": 0.44844273483062413, + "grad_norm": 601.7535400390625, + "learning_rate": 6.904373877159149e-06, + "loss": 88.9101, + "step": 54210 + }, + { + "epoch": 0.4485254580799934, + "grad_norm": 761.7291870117188, + "learning_rate": 6.903069738461749e-06, + "loss": 90.6817, + "step": 54220 + }, + { + "epoch": 0.44860818132936264, + "grad_norm": 510.3106994628906, + "learning_rate": 6.901765448345823e-06, + "loss": 172.6727, + "step": 54230 + }, + { + "epoch": 0.44869090457873184, + "grad_norm": 836.3589477539062, + "learning_rate": 6.900461006915149e-06, + "loss": 107.1047, + "step": 54240 + }, + { + "epoch": 0.4487736278281011, + "grad_norm": 981.2500610351562, + "learning_rate": 6.899156414273514e-06, + "loss": 102.4325, + "step": 54250 + }, + { + "epoch": 0.44885635107747035, + "grad_norm": 996.1396484375, + "learning_rate": 6.89785167052472e-06, + "loss": 76.1564, + "step": 54260 + }, + { + "epoch": 0.44893907432683955, + "grad_norm": 1051.859619140625, + "learning_rate": 6.896546775772577e-06, + "loss": 89.3364, + "step": 54270 + }, + { + "epoch": 0.4490217975762088, + "grad_norm": 743.7044677734375, + "learning_rate": 6.8952417301209114e-06, + "loss": 92.615, + "step": 54280 + }, + { + "epoch": 0.449104520825578, + "grad_norm": 1194.3951416015625, + "learning_rate": 6.893936533673561e-06, + "loss": 87.0885, + "step": 54290 + }, + { + "epoch": 0.44918724407494726, + "grad_norm": 855.311767578125, + "learning_rate": 6.892631186534371e-06, + "loss": 94.2941, + "step": 54300 + }, + { + "epoch": 0.4492699673243165, + "grad_norm": 804.5349731445312, + "learning_rate": 6.891325688807204e-06, + "loss": 119.2308, + "step": 54310 + }, + { + "epoch": 0.4493526905736857, + "grad_norm": 1499.4466552734375, + "learning_rate": 6.890020040595932e-06, + "loss": 117.2243, + "step": 54320 + }, + { + "epoch": 0.44943541382305496, + "grad_norm": 907.1102905273438, + "learning_rate": 6.88871424200444e-06, + "loss": 120.5858, + "step": 54330 + }, + { + "epoch": 0.4495181370724242, + "grad_norm": 1519.6060791015625, + "learning_rate": 6.887408293136621e-06, + "loss": 98.4492, + "step": 54340 + }, + { + "epoch": 0.4496008603217934, + "grad_norm": 802.8118896484375, + "learning_rate": 6.886102194096389e-06, + "loss": 67.4142, + "step": 54350 + }, + { + "epoch": 0.44968358357116267, + "grad_norm": 638.83935546875, + "learning_rate": 6.884795944987661e-06, + "loss": 89.7945, + "step": 54360 + }, + { + "epoch": 0.4497663068205319, + "grad_norm": 857.33984375, + "learning_rate": 6.8834895459143694e-06, + "loss": 103.017, + "step": 54370 + }, + { + "epoch": 0.4498490300699011, + "grad_norm": 1263.925537109375, + "learning_rate": 6.882182996980457e-06, + "loss": 80.3623, + "step": 54380 + }, + { + "epoch": 0.4499317533192704, + "grad_norm": 617.5503540039062, + "learning_rate": 6.880876298289885e-06, + "loss": 90.8478, + "step": 54390 + }, + { + "epoch": 0.45001447656863963, + "grad_norm": 978.7340087890625, + "learning_rate": 6.879569449946617e-06, + "loss": 85.7712, + "step": 54400 + }, + { + "epoch": 0.45009719981800883, + "grad_norm": 1923.4696044921875, + "learning_rate": 6.878262452054632e-06, + "loss": 119.6836, + "step": 54410 + }, + { + "epoch": 0.4501799230673781, + "grad_norm": 713.7830200195312, + "learning_rate": 6.876955304717925e-06, + "loss": 99.3105, + "step": 54420 + }, + { + "epoch": 0.45026264631674734, + "grad_norm": 714.7610473632812, + "learning_rate": 6.875648008040499e-06, + "loss": 67.4421, + "step": 54430 + }, + { + "epoch": 0.45034536956611654, + "grad_norm": 1194.353271484375, + "learning_rate": 6.874340562126368e-06, + "loss": 97.9703, + "step": 54440 + }, + { + "epoch": 0.4504280928154858, + "grad_norm": 747.249755859375, + "learning_rate": 6.873032967079562e-06, + "loss": 113.6194, + "step": 54450 + }, + { + "epoch": 0.45051081606485505, + "grad_norm": 1645.705322265625, + "learning_rate": 6.871725223004118e-06, + "loss": 134.7527, + "step": 54460 + }, + { + "epoch": 0.45059353931422425, + "grad_norm": 704.1566162109375, + "learning_rate": 6.870417330004086e-06, + "loss": 97.6264, + "step": 54470 + }, + { + "epoch": 0.4506762625635935, + "grad_norm": 945.1201782226562, + "learning_rate": 6.869109288183534e-06, + "loss": 82.3648, + "step": 54480 + }, + { + "epoch": 0.45075898581296275, + "grad_norm": 752.2423706054688, + "learning_rate": 6.867801097646534e-06, + "loss": 93.4124, + "step": 54490 + }, + { + "epoch": 0.45084170906233195, + "grad_norm": 1171.9190673828125, + "learning_rate": 6.866492758497171e-06, + "loss": 71.8976, + "step": 54500 + }, + { + "epoch": 0.4509244323117012, + "grad_norm": 1028.865478515625, + "learning_rate": 6.865184270839546e-06, + "loss": 79.6011, + "step": 54510 + }, + { + "epoch": 0.45100715556107046, + "grad_norm": 1303.591552734375, + "learning_rate": 6.863875634777767e-06, + "loss": 111.4603, + "step": 54520 + }, + { + "epoch": 0.45108987881043966, + "grad_norm": 702.6375122070312, + "learning_rate": 6.86256685041596e-06, + "loss": 76.6232, + "step": 54530 + }, + { + "epoch": 0.4511726020598089, + "grad_norm": 1701.81005859375, + "learning_rate": 6.861257917858257e-06, + "loss": 110.8894, + "step": 54540 + }, + { + "epoch": 0.45125532530917817, + "grad_norm": 640.2454833984375, + "learning_rate": 6.859948837208802e-06, + "loss": 83.109, + "step": 54550 + }, + { + "epoch": 0.45133804855854737, + "grad_norm": 681.0009155273438, + "learning_rate": 6.8586396085717536e-06, + "loss": 105.6306, + "step": 54560 + }, + { + "epoch": 0.4514207718079166, + "grad_norm": 2675.404052734375, + "learning_rate": 6.8573302320512836e-06, + "loss": 132.9688, + "step": 54570 + }, + { + "epoch": 0.4515034950572859, + "grad_norm": 891.1157836914062, + "learning_rate": 6.85602070775157e-06, + "loss": 103.1662, + "step": 54580 + }, + { + "epoch": 0.4515862183066551, + "grad_norm": 1364.07666015625, + "learning_rate": 6.854711035776806e-06, + "loss": 99.1324, + "step": 54590 + }, + { + "epoch": 0.45166894155602433, + "grad_norm": 524.9562377929688, + "learning_rate": 6.853401216231198e-06, + "loss": 106.766, + "step": 54600 + }, + { + "epoch": 0.4517516648053936, + "grad_norm": 818.6365966796875, + "learning_rate": 6.8520912492189605e-06, + "loss": 80.276, + "step": 54610 + }, + { + "epoch": 0.4518343880547628, + "grad_norm": 1519.1331787109375, + "learning_rate": 6.850781134844323e-06, + "loss": 69.9319, + "step": 54620 + }, + { + "epoch": 0.45191711130413204, + "grad_norm": 697.9396362304688, + "learning_rate": 6.8494708732115235e-06, + "loss": 123.4269, + "step": 54630 + }, + { + "epoch": 0.45199983455350123, + "grad_norm": 990.307373046875, + "learning_rate": 6.8481604644248155e-06, + "loss": 89.6535, + "step": 54640 + }, + { + "epoch": 0.4520825578028705, + "grad_norm": 651.0850830078125, + "learning_rate": 6.846849908588461e-06, + "loss": 80.7496, + "step": 54650 + }, + { + "epoch": 0.45216528105223974, + "grad_norm": 1296.8258056640625, + "learning_rate": 6.845539205806735e-06, + "loss": 129.8521, + "step": 54660 + }, + { + "epoch": 0.45224800430160894, + "grad_norm": 867.2517700195312, + "learning_rate": 6.844228356183924e-06, + "loss": 76.4351, + "step": 54670 + }, + { + "epoch": 0.4523307275509782, + "grad_norm": 878.8154907226562, + "learning_rate": 6.842917359824326e-06, + "loss": 103.5479, + "step": 54680 + }, + { + "epoch": 0.45241345080034745, + "grad_norm": 589.3108520507812, + "learning_rate": 6.841606216832253e-06, + "loss": 158.1967, + "step": 54690 + }, + { + "epoch": 0.45249617404971665, + "grad_norm": 440.4094543457031, + "learning_rate": 6.840294927312024e-06, + "loss": 84.1425, + "step": 54700 + }, + { + "epoch": 0.4525788972990859, + "grad_norm": 773.7758178710938, + "learning_rate": 6.838983491367974e-06, + "loss": 81.9263, + "step": 54710 + }, + { + "epoch": 0.45266162054845516, + "grad_norm": 1917.0274658203125, + "learning_rate": 6.837671909104447e-06, + "loss": 108.1925, + "step": 54720 + }, + { + "epoch": 0.45274434379782436, + "grad_norm": 528.8961791992188, + "learning_rate": 6.836360180625801e-06, + "loss": 123.8608, + "step": 54730 + }, + { + "epoch": 0.4528270670471936, + "grad_norm": 719.6814575195312, + "learning_rate": 6.835048306036404e-06, + "loss": 108.1355, + "step": 54740 + }, + { + "epoch": 0.45290979029656286, + "grad_norm": 1164.0782470703125, + "learning_rate": 6.833736285440632e-06, + "loss": 83.8529, + "step": 54750 + }, + { + "epoch": 0.45299251354593206, + "grad_norm": 540.9407958984375, + "learning_rate": 6.832424118942881e-06, + "loss": 125.107, + "step": 54760 + }, + { + "epoch": 0.4530752367953013, + "grad_norm": 736.422119140625, + "learning_rate": 6.831111806647552e-06, + "loss": 106.4315, + "step": 54770 + }, + { + "epoch": 0.45315796004467057, + "grad_norm": 697.813232421875, + "learning_rate": 6.829799348659061e-06, + "loss": 105.6159, + "step": 54780 + }, + { + "epoch": 0.45324068329403977, + "grad_norm": 925.9052734375, + "learning_rate": 6.828486745081835e-06, + "loss": 116.2571, + "step": 54790 + }, + { + "epoch": 0.453323406543409, + "grad_norm": 2547.846923828125, + "learning_rate": 6.8271739960203065e-06, + "loss": 142.8061, + "step": 54800 + }, + { + "epoch": 0.4534061297927783, + "grad_norm": 1475.7213134765625, + "learning_rate": 6.825861101578931e-06, + "loss": 81.7697, + "step": 54810 + }, + { + "epoch": 0.4534888530421475, + "grad_norm": 1138.5965576171875, + "learning_rate": 6.824548061862166e-06, + "loss": 92.7645, + "step": 54820 + }, + { + "epoch": 0.45357157629151673, + "grad_norm": 851.4307250976562, + "learning_rate": 6.823234876974489e-06, + "loss": 121.6354, + "step": 54830 + }, + { + "epoch": 0.453654299540886, + "grad_norm": 1226.28076171875, + "learning_rate": 6.8219215470203756e-06, + "loss": 102.6578, + "step": 54840 + }, + { + "epoch": 0.4537370227902552, + "grad_norm": 578.7922973632812, + "learning_rate": 6.820608072104329e-06, + "loss": 102.5517, + "step": 54850 + }, + { + "epoch": 0.45381974603962444, + "grad_norm": 673.4541625976562, + "learning_rate": 6.819294452330853e-06, + "loss": 69.6824, + "step": 54860 + }, + { + "epoch": 0.4539024692889937, + "grad_norm": 2711.473876953125, + "learning_rate": 6.817980687804467e-06, + "loss": 101.1284, + "step": 54870 + }, + { + "epoch": 0.4539851925383629, + "grad_norm": 736.4070434570312, + "learning_rate": 6.8166667786297e-06, + "loss": 96.8542, + "step": 54880 + }, + { + "epoch": 0.45406791578773215, + "grad_norm": 760.29150390625, + "learning_rate": 6.815352724911095e-06, + "loss": 105.462, + "step": 54890 + }, + { + "epoch": 0.4541506390371014, + "grad_norm": 692.9434204101562, + "learning_rate": 6.814038526753205e-06, + "loss": 91.7443, + "step": 54900 + }, + { + "epoch": 0.4542333622864706, + "grad_norm": 549.2918090820312, + "learning_rate": 6.812724184260596e-06, + "loss": 73.3805, + "step": 54910 + }, + { + "epoch": 0.45431608553583985, + "grad_norm": 702.6921997070312, + "learning_rate": 6.811409697537843e-06, + "loss": 84.0114, + "step": 54920 + }, + { + "epoch": 0.4543988087852091, + "grad_norm": 787.5999755859375, + "learning_rate": 6.810095066689533e-06, + "loss": 101.185, + "step": 54930 + }, + { + "epoch": 0.4544815320345783, + "grad_norm": 1066.020263671875, + "learning_rate": 6.808780291820264e-06, + "loss": 99.9101, + "step": 54940 + }, + { + "epoch": 0.45456425528394756, + "grad_norm": 1216.9044189453125, + "learning_rate": 6.80746537303465e-06, + "loss": 120.2617, + "step": 54950 + }, + { + "epoch": 0.4546469785333168, + "grad_norm": 1686.6910400390625, + "learning_rate": 6.806150310437312e-06, + "loss": 121.6102, + "step": 54960 + }, + { + "epoch": 0.454729701782686, + "grad_norm": 1484.89111328125, + "learning_rate": 6.804835104132883e-06, + "loss": 104.0064, + "step": 54970 + }, + { + "epoch": 0.45481242503205527, + "grad_norm": 979.0828857421875, + "learning_rate": 6.803519754226007e-06, + "loss": 94.6825, + "step": 54980 + }, + { + "epoch": 0.4548951482814245, + "grad_norm": 864.7905883789062, + "learning_rate": 6.80220426082134e-06, + "loss": 91.2416, + "step": 54990 + }, + { + "epoch": 0.4549778715307937, + "grad_norm": 888.6685791015625, + "learning_rate": 6.800888624023552e-06, + "loss": 105.9705, + "step": 55000 + }, + { + "epoch": 0.455060594780163, + "grad_norm": 497.9424133300781, + "learning_rate": 6.799572843937322e-06, + "loss": 108.1165, + "step": 55010 + }, + { + "epoch": 0.4551433180295322, + "grad_norm": 518.8756713867188, + "learning_rate": 6.79825692066734e-06, + "loss": 106.614, + "step": 55020 + }, + { + "epoch": 0.4552260412789014, + "grad_norm": 1119.955078125, + "learning_rate": 6.796940854318306e-06, + "loss": 140.4494, + "step": 55030 + }, + { + "epoch": 0.4553087645282707, + "grad_norm": 594.2408447265625, + "learning_rate": 6.795624644994936e-06, + "loss": 77.2674, + "step": 55040 + }, + { + "epoch": 0.4553914877776399, + "grad_norm": 797.1567993164062, + "learning_rate": 6.794308292801954e-06, + "loss": 111.2638, + "step": 55050 + }, + { + "epoch": 0.45547421102700913, + "grad_norm": 821.6486206054688, + "learning_rate": 6.792991797844095e-06, + "loss": 136.5393, + "step": 55060 + }, + { + "epoch": 0.4555569342763784, + "grad_norm": 597.4654541015625, + "learning_rate": 6.791675160226109e-06, + "loss": 96.4142, + "step": 55070 + }, + { + "epoch": 0.4556396575257476, + "grad_norm": 592.2135009765625, + "learning_rate": 6.790358380052752e-06, + "loss": 93.9998, + "step": 55080 + }, + { + "epoch": 0.45572238077511684, + "grad_norm": 1021.7890625, + "learning_rate": 6.789041457428796e-06, + "loss": 124.1362, + "step": 55090 + }, + { + "epoch": 0.4558051040244861, + "grad_norm": 486.76080322265625, + "learning_rate": 6.7877243924590205e-06, + "loss": 77.4204, + "step": 55100 + }, + { + "epoch": 0.4558878272738553, + "grad_norm": 875.5304565429688, + "learning_rate": 6.7864071852482205e-06, + "loss": 104.1027, + "step": 55110 + }, + { + "epoch": 0.45597055052322455, + "grad_norm": 738.2188110351562, + "learning_rate": 6.7850898359012e-06, + "loss": 106.4531, + "step": 55120 + }, + { + "epoch": 0.4560532737725938, + "grad_norm": 1422.643798828125, + "learning_rate": 6.7837723445227724e-06, + "loss": 101.3412, + "step": 55130 + }, + { + "epoch": 0.456135997021963, + "grad_norm": 1019.88818359375, + "learning_rate": 6.782454711217767e-06, + "loss": 104.1804, + "step": 55140 + }, + { + "epoch": 0.45621872027133226, + "grad_norm": 1043.4281005859375, + "learning_rate": 6.78113693609102e-06, + "loss": 159.6309, + "step": 55150 + }, + { + "epoch": 0.4563014435207015, + "grad_norm": 733.2887573242188, + "learning_rate": 6.77981901924738e-06, + "loss": 76.8028, + "step": 55160 + }, + { + "epoch": 0.4563841667700707, + "grad_norm": 650.5491943359375, + "learning_rate": 6.7785009607917095e-06, + "loss": 132.4901, + "step": 55170 + }, + { + "epoch": 0.45646689001943996, + "grad_norm": 993.3848876953125, + "learning_rate": 6.777182760828881e-06, + "loss": 127.0856, + "step": 55180 + }, + { + "epoch": 0.4565496132688092, + "grad_norm": 941.2340087890625, + "learning_rate": 6.7758644194637755e-06, + "loss": 68.8753, + "step": 55190 + }, + { + "epoch": 0.4566323365181784, + "grad_norm": 1339.5758056640625, + "learning_rate": 6.774545936801289e-06, + "loss": 93.002, + "step": 55200 + }, + { + "epoch": 0.45671505976754767, + "grad_norm": 605.7567749023438, + "learning_rate": 6.773227312946327e-06, + "loss": 123.4982, + "step": 55210 + }, + { + "epoch": 0.4567977830169169, + "grad_norm": 984.2488403320312, + "learning_rate": 6.771908548003803e-06, + "loss": 77.7853, + "step": 55220 + }, + { + "epoch": 0.4568805062662861, + "grad_norm": 829.1386108398438, + "learning_rate": 6.77058964207865e-06, + "loss": 213.839, + "step": 55230 + }, + { + "epoch": 0.4569632295156554, + "grad_norm": 764.8698120117188, + "learning_rate": 6.769270595275804e-06, + "loss": 102.3559, + "step": 55240 + }, + { + "epoch": 0.45704595276502463, + "grad_norm": 798.2879028320312, + "learning_rate": 6.767951407700217e-06, + "loss": 90.5174, + "step": 55250 + }, + { + "epoch": 0.45712867601439383, + "grad_norm": 856.211181640625, + "learning_rate": 6.766632079456852e-06, + "loss": 85.0527, + "step": 55260 + }, + { + "epoch": 0.4572113992637631, + "grad_norm": 831.584716796875, + "learning_rate": 6.765312610650677e-06, + "loss": 95.3017, + "step": 55270 + }, + { + "epoch": 0.45729412251313234, + "grad_norm": 883.2146606445312, + "learning_rate": 6.763993001386681e-06, + "loss": 113.933, + "step": 55280 + }, + { + "epoch": 0.45737684576250154, + "grad_norm": 954.8955688476562, + "learning_rate": 6.762673251769858e-06, + "loss": 121.8417, + "step": 55290 + }, + { + "epoch": 0.4574595690118708, + "grad_norm": 1161.1273193359375, + "learning_rate": 6.761353361905214e-06, + "loss": 90.7742, + "step": 55300 + }, + { + "epoch": 0.45754229226124005, + "grad_norm": 683.6365966796875, + "learning_rate": 6.7600333318977655e-06, + "loss": 91.0024, + "step": 55310 + }, + { + "epoch": 0.45762501551060925, + "grad_norm": 1601.54638671875, + "learning_rate": 6.758713161852541e-06, + "loss": 110.4416, + "step": 55320 + }, + { + "epoch": 0.4577077387599785, + "grad_norm": 934.130126953125, + "learning_rate": 6.757392851874584e-06, + "loss": 113.9463, + "step": 55330 + }, + { + "epoch": 0.45779046200934775, + "grad_norm": 1528.1434326171875, + "learning_rate": 6.756072402068943e-06, + "loss": 127.9479, + "step": 55340 + }, + { + "epoch": 0.45787318525871695, + "grad_norm": 771.1652221679688, + "learning_rate": 6.75475181254068e-06, + "loss": 89.5568, + "step": 55350 + }, + { + "epoch": 0.4579559085080862, + "grad_norm": 879.8575439453125, + "learning_rate": 6.753431083394868e-06, + "loss": 99.1166, + "step": 55360 + }, + { + "epoch": 0.4580386317574554, + "grad_norm": 684.24609375, + "learning_rate": 6.75211021473659e-06, + "loss": 86.4435, + "step": 55370 + }, + { + "epoch": 0.45812135500682466, + "grad_norm": 1416.9102783203125, + "learning_rate": 6.750789206670945e-06, + "loss": 95.3575, + "step": 55380 + }, + { + "epoch": 0.4582040782561939, + "grad_norm": 889.2215576171875, + "learning_rate": 6.749468059303039e-06, + "loss": 92.2547, + "step": 55390 + }, + { + "epoch": 0.4582868015055631, + "grad_norm": 1061.4666748046875, + "learning_rate": 6.748146772737988e-06, + "loss": 101.2263, + "step": 55400 + }, + { + "epoch": 0.45836952475493237, + "grad_norm": 1019.367919921875, + "learning_rate": 6.7468253470809205e-06, + "loss": 136.364, + "step": 55410 + }, + { + "epoch": 0.4584522480043016, + "grad_norm": 1758.954833984375, + "learning_rate": 6.745503782436976e-06, + "loss": 124.5927, + "step": 55420 + }, + { + "epoch": 0.4585349712536708, + "grad_norm": 763.4608154296875, + "learning_rate": 6.7441820789113085e-06, + "loss": 105.7726, + "step": 55430 + }, + { + "epoch": 0.4586176945030401, + "grad_norm": 946.8983764648438, + "learning_rate": 6.7428602366090764e-06, + "loss": 112.0909, + "step": 55440 + }, + { + "epoch": 0.45870041775240933, + "grad_norm": 932.01318359375, + "learning_rate": 6.741538255635454e-06, + "loss": 88.3237, + "step": 55450 + }, + { + "epoch": 0.4587831410017785, + "grad_norm": 829.2271118164062, + "learning_rate": 6.740216136095626e-06, + "loss": 77.0077, + "step": 55460 + }, + { + "epoch": 0.4588658642511478, + "grad_norm": 386.30364990234375, + "learning_rate": 6.738893878094786e-06, + "loss": 87.8386, + "step": 55470 + }, + { + "epoch": 0.45894858750051704, + "grad_norm": 1005.0819702148438, + "learning_rate": 6.737571481738141e-06, + "loss": 128.3534, + "step": 55480 + }, + { + "epoch": 0.45903131074988623, + "grad_norm": 921.326904296875, + "learning_rate": 6.736248947130907e-06, + "loss": 107.7665, + "step": 55490 + }, + { + "epoch": 0.4591140339992555, + "grad_norm": 1480.9969482421875, + "learning_rate": 6.734926274378313e-06, + "loss": 163.0793, + "step": 55500 + }, + { + "epoch": 0.45919675724862474, + "grad_norm": 955.8799438476562, + "learning_rate": 6.733603463585598e-06, + "loss": 93.535, + "step": 55510 + }, + { + "epoch": 0.45927948049799394, + "grad_norm": 1354.3538818359375, + "learning_rate": 6.73228051485801e-06, + "loss": 85.5063, + "step": 55520 + }, + { + "epoch": 0.4593622037473632, + "grad_norm": 536.1598510742188, + "learning_rate": 6.7309574283008125e-06, + "loss": 84.9367, + "step": 55530 + }, + { + "epoch": 0.45944492699673245, + "grad_norm": 663.9678344726562, + "learning_rate": 6.729634204019277e-06, + "loss": 104.2453, + "step": 55540 + }, + { + "epoch": 0.45952765024610165, + "grad_norm": 690.8705444335938, + "learning_rate": 6.7283108421186835e-06, + "loss": 108.3504, + "step": 55550 + }, + { + "epoch": 0.4596103734954709, + "grad_norm": 1198.9808349609375, + "learning_rate": 6.726987342704331e-06, + "loss": 83.5574, + "step": 55560 + }, + { + "epoch": 0.45969309674484016, + "grad_norm": 780.8455810546875, + "learning_rate": 6.72566370588152e-06, + "loss": 92.0601, + "step": 55570 + }, + { + "epoch": 0.45977581999420936, + "grad_norm": 1550.3758544921875, + "learning_rate": 6.724339931755568e-06, + "loss": 114.7621, + "step": 55580 + }, + { + "epoch": 0.4598585432435786, + "grad_norm": 515.3203735351562, + "learning_rate": 6.7230160204318e-06, + "loss": 85.6729, + "step": 55590 + }, + { + "epoch": 0.45994126649294786, + "grad_norm": 622.2483520507812, + "learning_rate": 6.721691972015557e-06, + "loss": 91.3313, + "step": 55600 + }, + { + "epoch": 0.46002398974231706, + "grad_norm": 1014.8973999023438, + "learning_rate": 6.720367786612185e-06, + "loss": 71.0175, + "step": 55610 + }, + { + "epoch": 0.4601067129916863, + "grad_norm": 2489.36962890625, + "learning_rate": 6.719043464327043e-06, + "loss": 82.0414, + "step": 55620 + }, + { + "epoch": 0.46018943624105557, + "grad_norm": 736.4323120117188, + "learning_rate": 6.717719005265502e-06, + "loss": 68.9147, + "step": 55630 + }, + { + "epoch": 0.46027215949042477, + "grad_norm": 749.0441284179688, + "learning_rate": 6.716394409532944e-06, + "loss": 117.8003, + "step": 55640 + }, + { + "epoch": 0.460354882739794, + "grad_norm": 869.51123046875, + "learning_rate": 6.715069677234758e-06, + "loss": 116.4776, + "step": 55650 + }, + { + "epoch": 0.4604376059891633, + "grad_norm": 686.819091796875, + "learning_rate": 6.713744808476349e-06, + "loss": 101.6366, + "step": 55660 + }, + { + "epoch": 0.4605203292385325, + "grad_norm": 1027.5953369140625, + "learning_rate": 6.712419803363132e-06, + "loss": 92.2464, + "step": 55670 + }, + { + "epoch": 0.46060305248790173, + "grad_norm": 743.2421875, + "learning_rate": 6.711094662000529e-06, + "loss": 104.2116, + "step": 55680 + }, + { + "epoch": 0.460685775737271, + "grad_norm": 992.1659545898438, + "learning_rate": 6.709769384493978e-06, + "loss": 102.0216, + "step": 55690 + }, + { + "epoch": 0.4607684989866402, + "grad_norm": 763.0567626953125, + "learning_rate": 6.708443970948923e-06, + "loss": 105.3009, + "step": 55700 + }, + { + "epoch": 0.46085122223600944, + "grad_norm": 1082.239501953125, + "learning_rate": 6.707118421470822e-06, + "loss": 82.143, + "step": 55710 + }, + { + "epoch": 0.4609339454853787, + "grad_norm": 943.666259765625, + "learning_rate": 6.705792736165142e-06, + "loss": 117.292, + "step": 55720 + }, + { + "epoch": 0.4610166687347479, + "grad_norm": 761.20166015625, + "learning_rate": 6.7044669151373645e-06, + "loss": 79.007, + "step": 55730 + }, + { + "epoch": 0.46109939198411715, + "grad_norm": 676.6871337890625, + "learning_rate": 6.7031409584929765e-06, + "loss": 96.3533, + "step": 55740 + }, + { + "epoch": 0.46118211523348634, + "grad_norm": 851.9503173828125, + "learning_rate": 6.701814866337477e-06, + "loss": 131.2042, + "step": 55750 + }, + { + "epoch": 0.4612648384828556, + "grad_norm": 1339.9852294921875, + "learning_rate": 6.700488638776379e-06, + "loss": 113.9575, + "step": 55760 + }, + { + "epoch": 0.46134756173222485, + "grad_norm": 945.3772583007812, + "learning_rate": 6.699162275915208e-06, + "loss": 88.1573, + "step": 55770 + }, + { + "epoch": 0.46143028498159405, + "grad_norm": 1223.3470458984375, + "learning_rate": 6.6978357778594896e-06, + "loss": 81.1195, + "step": 55780 + }, + { + "epoch": 0.4615130082309633, + "grad_norm": 922.9274291992188, + "learning_rate": 6.69650914471477e-06, + "loss": 97.8219, + "step": 55790 + }, + { + "epoch": 0.46159573148033256, + "grad_norm": 1972.91162109375, + "learning_rate": 6.695182376586603e-06, + "loss": 99.1887, + "step": 55800 + }, + { + "epoch": 0.46167845472970176, + "grad_norm": 1156.863525390625, + "learning_rate": 6.6938554735805565e-06, + "loss": 99.5834, + "step": 55810 + }, + { + "epoch": 0.461761177979071, + "grad_norm": 1650.710205078125, + "learning_rate": 6.6925284358022035e-06, + "loss": 155.1294, + "step": 55820 + }, + { + "epoch": 0.46184390122844027, + "grad_norm": 520.858642578125, + "learning_rate": 6.69120126335713e-06, + "loss": 122.3727, + "step": 55830 + }, + { + "epoch": 0.46192662447780947, + "grad_norm": 916.6767578125, + "learning_rate": 6.689873956350932e-06, + "loss": 75.5874, + "step": 55840 + }, + { + "epoch": 0.4620093477271787, + "grad_norm": 917.3632202148438, + "learning_rate": 6.688546514889221e-06, + "loss": 82.2032, + "step": 55850 + }, + { + "epoch": 0.462092070976548, + "grad_norm": 1109.2620849609375, + "learning_rate": 6.687218939077613e-06, + "loss": 93.1306, + "step": 55860 + }, + { + "epoch": 0.4621747942259172, + "grad_norm": 933.2094116210938, + "learning_rate": 6.685891229021736e-06, + "loss": 82.2505, + "step": 55870 + }, + { + "epoch": 0.46225751747528643, + "grad_norm": 930.94873046875, + "learning_rate": 6.6845633848272315e-06, + "loss": 113.1939, + "step": 55880 + }, + { + "epoch": 0.4623402407246557, + "grad_norm": 808.5699462890625, + "learning_rate": 6.68323540659975e-06, + "loss": 100.777, + "step": 55890 + }, + { + "epoch": 0.4624229639740249, + "grad_norm": 663.3551025390625, + "learning_rate": 6.681907294444952e-06, + "loss": 67.9267, + "step": 55900 + }, + { + "epoch": 0.46250568722339414, + "grad_norm": 1566.524169921875, + "learning_rate": 6.6805790484685094e-06, + "loss": 93.2308, + "step": 55910 + }, + { + "epoch": 0.4625884104727634, + "grad_norm": 1127.6248779296875, + "learning_rate": 6.679250668776105e-06, + "loss": 140.5565, + "step": 55920 + }, + { + "epoch": 0.4626711337221326, + "grad_norm": 1181.0595703125, + "learning_rate": 6.677922155473432e-06, + "loss": 99.0083, + "step": 55930 + }, + { + "epoch": 0.46275385697150184, + "grad_norm": 1022.6056518554688, + "learning_rate": 6.676593508666192e-06, + "loss": 135.9459, + "step": 55940 + }, + { + "epoch": 0.4628365802208711, + "grad_norm": 1002.8577880859375, + "learning_rate": 6.675264728460103e-06, + "loss": 85.4971, + "step": 55950 + }, + { + "epoch": 0.4629193034702403, + "grad_norm": 944.582763671875, + "learning_rate": 6.673935814960887e-06, + "loss": 107.2265, + "step": 55960 + }, + { + "epoch": 0.46300202671960955, + "grad_norm": 1288.594970703125, + "learning_rate": 6.672606768274281e-06, + "loss": 90.4464, + "step": 55970 + }, + { + "epoch": 0.4630847499689788, + "grad_norm": 14770.8916015625, + "learning_rate": 6.67127758850603e-06, + "loss": 241.7437, + "step": 55980 + }, + { + "epoch": 0.463167473218348, + "grad_norm": 957.4649047851562, + "learning_rate": 6.669948275761893e-06, + "loss": 94.3999, + "step": 55990 + }, + { + "epoch": 0.46325019646771726, + "grad_norm": 1285.9119873046875, + "learning_rate": 6.668618830147634e-06, + "loss": 125.4894, + "step": 56000 + }, + { + "epoch": 0.4633329197170865, + "grad_norm": 722.8943481445312, + "learning_rate": 6.667289251769033e-06, + "loss": 85.9025, + "step": 56010 + }, + { + "epoch": 0.4634156429664557, + "grad_norm": 1016.5885009765625, + "learning_rate": 6.6659595407318775e-06, + "loss": 107.344, + "step": 56020 + }, + { + "epoch": 0.46349836621582496, + "grad_norm": 900.4605712890625, + "learning_rate": 6.664629697141969e-06, + "loss": 111.3321, + "step": 56030 + }, + { + "epoch": 0.4635810894651942, + "grad_norm": 971.4387817382812, + "learning_rate": 6.663299721105113e-06, + "loss": 106.712, + "step": 56040 + }, + { + "epoch": 0.4636638127145634, + "grad_norm": 1102.5128173828125, + "learning_rate": 6.661969612727133e-06, + "loss": 94.0693, + "step": 56050 + }, + { + "epoch": 0.46374653596393267, + "grad_norm": 1239.50341796875, + "learning_rate": 6.660639372113858e-06, + "loss": 109.2637, + "step": 56060 + }, + { + "epoch": 0.4638292592133019, + "grad_norm": 986.812255859375, + "learning_rate": 6.65930899937113e-06, + "loss": 104.8453, + "step": 56070 + }, + { + "epoch": 0.4639119824626711, + "grad_norm": 1008.2682495117188, + "learning_rate": 6.657978494604799e-06, + "loss": 109.9477, + "step": 56080 + }, + { + "epoch": 0.4639947057120404, + "grad_norm": 683.7396850585938, + "learning_rate": 6.656647857920728e-06, + "loss": 109.3742, + "step": 56090 + }, + { + "epoch": 0.4640774289614096, + "grad_norm": 1494.6700439453125, + "learning_rate": 6.655317089424791e-06, + "loss": 106.0912, + "step": 56100 + }, + { + "epoch": 0.46416015221077883, + "grad_norm": 1352.005615234375, + "learning_rate": 6.6539861892228695e-06, + "loss": 96.1662, + "step": 56110 + }, + { + "epoch": 0.4642428754601481, + "grad_norm": 787.8734741210938, + "learning_rate": 6.652655157420859e-06, + "loss": 81.0995, + "step": 56120 + }, + { + "epoch": 0.4643255987095173, + "grad_norm": 837.31787109375, + "learning_rate": 6.651323994124661e-06, + "loss": 93.8052, + "step": 56130 + }, + { + "epoch": 0.46440832195888654, + "grad_norm": 1119.1298828125, + "learning_rate": 6.649992699440191e-06, + "loss": 86.7144, + "step": 56140 + }, + { + "epoch": 0.4644910452082558, + "grad_norm": 1033.329345703125, + "learning_rate": 6.648661273473375e-06, + "loss": 73.4606, + "step": 56150 + }, + { + "epoch": 0.464573768457625, + "grad_norm": 1087.544921875, + "learning_rate": 6.6473297163301485e-06, + "loss": 93.5026, + "step": 56160 + }, + { + "epoch": 0.46465649170699425, + "grad_norm": 637.343505859375, + "learning_rate": 6.645998028116455e-06, + "loss": 106.3616, + "step": 56170 + }, + { + "epoch": 0.4647392149563635, + "grad_norm": 1312.9207763671875, + "learning_rate": 6.6446662089382545e-06, + "loss": 126.186, + "step": 56180 + }, + { + "epoch": 0.4648219382057327, + "grad_norm": 756.4004516601562, + "learning_rate": 6.643334258901511e-06, + "loss": 110.0223, + "step": 56190 + }, + { + "epoch": 0.46490466145510195, + "grad_norm": 577.8167724609375, + "learning_rate": 6.642002178112202e-06, + "loss": 114.2335, + "step": 56200 + }, + { + "epoch": 0.4649873847044712, + "grad_norm": 419.95404052734375, + "learning_rate": 6.640669966676316e-06, + "loss": 88.2521, + "step": 56210 + }, + { + "epoch": 0.4650701079538404, + "grad_norm": 503.7681579589844, + "learning_rate": 6.6393376246998485e-06, + "loss": 105.7174, + "step": 56220 + }, + { + "epoch": 0.46515283120320966, + "grad_norm": 721.5469360351562, + "learning_rate": 6.638005152288811e-06, + "loss": 100.881, + "step": 56230 + }, + { + "epoch": 0.4652355544525789, + "grad_norm": 1773.4716796875, + "learning_rate": 6.636672549549221e-06, + "loss": 115.8908, + "step": 56240 + }, + { + "epoch": 0.4653182777019481, + "grad_norm": 960.2298583984375, + "learning_rate": 6.635339816587109e-06, + "loss": 109.554, + "step": 56250 + }, + { + "epoch": 0.46540100095131737, + "grad_norm": 1094.2969970703125, + "learning_rate": 6.634006953508512e-06, + "loss": 104.5612, + "step": 56260 + }, + { + "epoch": 0.4654837242006866, + "grad_norm": 706.4091186523438, + "learning_rate": 6.63267396041948e-06, + "loss": 113.3086, + "step": 56270 + }, + { + "epoch": 0.4655664474500558, + "grad_norm": 1204.162841796875, + "learning_rate": 6.631340837426075e-06, + "loss": 105.2585, + "step": 56280 + }, + { + "epoch": 0.4656491706994251, + "grad_norm": 807.52734375, + "learning_rate": 6.630007584634366e-06, + "loss": 78.1581, + "step": 56290 + }, + { + "epoch": 0.46573189394879433, + "grad_norm": 912.2439575195312, + "learning_rate": 6.628674202150434e-06, + "loss": 95.7974, + "step": 56300 + }, + { + "epoch": 0.4658146171981635, + "grad_norm": 793.3104858398438, + "learning_rate": 6.627340690080371e-06, + "loss": 94.2195, + "step": 56310 + }, + { + "epoch": 0.4658973404475328, + "grad_norm": 1065.8125, + "learning_rate": 6.626007048530276e-06, + "loss": 72.4793, + "step": 56320 + }, + { + "epoch": 0.46598006369690204, + "grad_norm": 565.6997680664062, + "learning_rate": 6.624673277606264e-06, + "loss": 90.7239, + "step": 56330 + }, + { + "epoch": 0.46606278694627123, + "grad_norm": 1088.072998046875, + "learning_rate": 6.623339377414456e-06, + "loss": 114.9387, + "step": 56340 + }, + { + "epoch": 0.4661455101956405, + "grad_norm": 1036.712158203125, + "learning_rate": 6.622005348060983e-06, + "loss": 98.3773, + "step": 56350 + }, + { + "epoch": 0.46622823344500974, + "grad_norm": 657.3320922851562, + "learning_rate": 6.620671189651988e-06, + "loss": 78.3256, + "step": 56360 + }, + { + "epoch": 0.46631095669437894, + "grad_norm": 396.6316223144531, + "learning_rate": 6.6193369022936245e-06, + "loss": 111.291, + "step": 56370 + }, + { + "epoch": 0.4663936799437482, + "grad_norm": 747.0289306640625, + "learning_rate": 6.618002486092056e-06, + "loss": 131.3509, + "step": 56380 + }, + { + "epoch": 0.46647640319311745, + "grad_norm": 645.677978515625, + "learning_rate": 6.616667941153456e-06, + "loss": 112.265, + "step": 56390 + }, + { + "epoch": 0.46655912644248665, + "grad_norm": 2745.71533203125, + "learning_rate": 6.615333267584007e-06, + "loss": 94.6054, + "step": 56400 + }, + { + "epoch": 0.4666418496918559, + "grad_norm": 617.6024780273438, + "learning_rate": 6.613998465489902e-06, + "loss": 86.2714, + "step": 56410 + }, + { + "epoch": 0.46672457294122516, + "grad_norm": 2982.6630859375, + "learning_rate": 6.612663534977347e-06, + "loss": 158.7063, + "step": 56420 + }, + { + "epoch": 0.46680729619059436, + "grad_norm": 372.95379638671875, + "learning_rate": 6.611328476152557e-06, + "loss": 127.7486, + "step": 56430 + }, + { + "epoch": 0.4668900194399636, + "grad_norm": 665.3734130859375, + "learning_rate": 6.609993289121753e-06, + "loss": 108.4631, + "step": 56440 + }, + { + "epoch": 0.4669727426893328, + "grad_norm": 637.3652954101562, + "learning_rate": 6.608657973991172e-06, + "loss": 84.1843, + "step": 56450 + }, + { + "epoch": 0.46705546593870206, + "grad_norm": 962.6121215820312, + "learning_rate": 6.607322530867061e-06, + "loss": 88.8814, + "step": 56460 + }, + { + "epoch": 0.4671381891880713, + "grad_norm": 588.2682495117188, + "learning_rate": 6.605986959855672e-06, + "loss": 76.9025, + "step": 56470 + }, + { + "epoch": 0.4672209124374405, + "grad_norm": 1287.7611083984375, + "learning_rate": 6.60465126106327e-06, + "loss": 120.8118, + "step": 56480 + }, + { + "epoch": 0.46730363568680977, + "grad_norm": 932.3352661132812, + "learning_rate": 6.6033154345961314e-06, + "loss": 89.3703, + "step": 56490 + }, + { + "epoch": 0.467386358936179, + "grad_norm": 577.4403076171875, + "learning_rate": 6.601979480560543e-06, + "loss": 94.8228, + "step": 56500 + }, + { + "epoch": 0.4674690821855482, + "grad_norm": 937.298583984375, + "learning_rate": 6.6006433990627985e-06, + "loss": 122.7913, + "step": 56510 + }, + { + "epoch": 0.4675518054349175, + "grad_norm": 588.9931030273438, + "learning_rate": 6.599307190209206e-06, + "loss": 112.8304, + "step": 56520 + }, + { + "epoch": 0.46763452868428673, + "grad_norm": 1339.0936279296875, + "learning_rate": 6.5979708541060796e-06, + "loss": 144.9437, + "step": 56530 + }, + { + "epoch": 0.46771725193365593, + "grad_norm": 1465.9853515625, + "learning_rate": 6.596634390859745e-06, + "loss": 91.1747, + "step": 56540 + }, + { + "epoch": 0.4677999751830252, + "grad_norm": 890.4679565429688, + "learning_rate": 6.59529780057654e-06, + "loss": 82.902, + "step": 56550 + }, + { + "epoch": 0.46788269843239444, + "grad_norm": 839.3494873046875, + "learning_rate": 6.593961083362811e-06, + "loss": 85.888, + "step": 56560 + }, + { + "epoch": 0.46796542168176364, + "grad_norm": 992.9126586914062, + "learning_rate": 6.592624239324914e-06, + "loss": 109.3493, + "step": 56570 + }, + { + "epoch": 0.4680481449311329, + "grad_norm": 1971.344482421875, + "learning_rate": 6.591287268569215e-06, + "loss": 99.3406, + "step": 56580 + }, + { + "epoch": 0.46813086818050215, + "grad_norm": 1292.9296875, + "learning_rate": 6.589950171202092e-06, + "loss": 67.5184, + "step": 56590 + }, + { + "epoch": 0.46821359142987135, + "grad_norm": 1166.9710693359375, + "learning_rate": 6.588612947329929e-06, + "loss": 121.0264, + "step": 56600 + }, + { + "epoch": 0.4682963146792406, + "grad_norm": 1211.2210693359375, + "learning_rate": 6.587275597059125e-06, + "loss": 90.2953, + "step": 56610 + }, + { + "epoch": 0.46837903792860985, + "grad_norm": 860.5660400390625, + "learning_rate": 6.585938120496087e-06, + "loss": 110.3769, + "step": 56620 + }, + { + "epoch": 0.46846176117797905, + "grad_norm": 1788.8858642578125, + "learning_rate": 6.584600517747232e-06, + "loss": 111.8886, + "step": 56630 + }, + { + "epoch": 0.4685444844273483, + "grad_norm": 826.596923828125, + "learning_rate": 6.583262788918985e-06, + "loss": 84.6638, + "step": 56640 + }, + { + "epoch": 0.46862720767671756, + "grad_norm": 794.5626220703125, + "learning_rate": 6.581924934117783e-06, + "loss": 108.4789, + "step": 56650 + }, + { + "epoch": 0.46870993092608676, + "grad_norm": 1118.61962890625, + "learning_rate": 6.580586953450076e-06, + "loss": 74.7545, + "step": 56660 + }, + { + "epoch": 0.468792654175456, + "grad_norm": 763.598388671875, + "learning_rate": 6.579248847022317e-06, + "loss": 78.2781, + "step": 56670 + }, + { + "epoch": 0.46887537742482527, + "grad_norm": 1182.76806640625, + "learning_rate": 6.577910614940978e-06, + "loss": 97.6059, + "step": 56680 + }, + { + "epoch": 0.46895810067419447, + "grad_norm": 1287.8709716796875, + "learning_rate": 6.576572257312531e-06, + "loss": 94.5327, + "step": 56690 + }, + { + "epoch": 0.4690408239235637, + "grad_norm": 625.0133666992188, + "learning_rate": 6.5752337742434644e-06, + "loss": 90.6583, + "step": 56700 + }, + { + "epoch": 0.469123547172933, + "grad_norm": 352.9053649902344, + "learning_rate": 6.573895165840276e-06, + "loss": 99.1602, + "step": 56710 + }, + { + "epoch": 0.4692062704223022, + "grad_norm": 1227.5992431640625, + "learning_rate": 6.5725564322094745e-06, + "loss": 106.9007, + "step": 56720 + }, + { + "epoch": 0.46928899367167143, + "grad_norm": 1981.240234375, + "learning_rate": 6.571217573457573e-06, + "loss": 110.456, + "step": 56730 + }, + { + "epoch": 0.4693717169210407, + "grad_norm": 915.96728515625, + "learning_rate": 6.569878589691101e-06, + "loss": 64.4055, + "step": 56740 + }, + { + "epoch": 0.4694544401704099, + "grad_norm": 1044.962890625, + "learning_rate": 6.568539481016593e-06, + "loss": 108.6498, + "step": 56750 + }, + { + "epoch": 0.46953716341977914, + "grad_norm": 1632.702880859375, + "learning_rate": 6.567200247540599e-06, + "loss": 123.9141, + "step": 56760 + }, + { + "epoch": 0.4696198866691484, + "grad_norm": 1159.7938232421875, + "learning_rate": 6.5658608893696714e-06, + "loss": 105.7761, + "step": 56770 + }, + { + "epoch": 0.4697026099185176, + "grad_norm": 1023.9876098632812, + "learning_rate": 6.564521406610382e-06, + "loss": 130.591, + "step": 56780 + }, + { + "epoch": 0.46978533316788684, + "grad_norm": 1106.73779296875, + "learning_rate": 6.563181799369301e-06, + "loss": 89.8389, + "step": 56790 + }, + { + "epoch": 0.4698680564172561, + "grad_norm": 636.095458984375, + "learning_rate": 6.561842067753021e-06, + "loss": 95.9526, + "step": 56800 + }, + { + "epoch": 0.4699507796666253, + "grad_norm": 725.4862670898438, + "learning_rate": 6.560502211868135e-06, + "loss": 71.1143, + "step": 56810 + }, + { + "epoch": 0.47003350291599455, + "grad_norm": 946.0494384765625, + "learning_rate": 6.55916223182125e-06, + "loss": 106.9812, + "step": 56820 + }, + { + "epoch": 0.47011622616536375, + "grad_norm": 1510.7655029296875, + "learning_rate": 6.55782212771898e-06, + "loss": 147.8765, + "step": 56830 + }, + { + "epoch": 0.470198949414733, + "grad_norm": 1354.79296875, + "learning_rate": 6.5564818996679536e-06, + "loss": 121.1624, + "step": 56840 + }, + { + "epoch": 0.47028167266410226, + "grad_norm": 1657.0260009765625, + "learning_rate": 6.555141547774807e-06, + "loss": 153.1369, + "step": 56850 + }, + { + "epoch": 0.47036439591347146, + "grad_norm": 432.9583435058594, + "learning_rate": 6.553801072146184e-06, + "loss": 112.8747, + "step": 56860 + }, + { + "epoch": 0.4704471191628407, + "grad_norm": 1424.265380859375, + "learning_rate": 6.55246047288874e-06, + "loss": 102.5585, + "step": 56870 + }, + { + "epoch": 0.47052984241220996, + "grad_norm": 842.8648071289062, + "learning_rate": 6.551119750109142e-06, + "loss": 95.888, + "step": 56880 + }, + { + "epoch": 0.47061256566157916, + "grad_norm": 1254.891845703125, + "learning_rate": 6.5497789039140635e-06, + "loss": 88.7369, + "step": 56890 + }, + { + "epoch": 0.4706952889109484, + "grad_norm": 593.7796020507812, + "learning_rate": 6.54843793441019e-06, + "loss": 117.424, + "step": 56900 + }, + { + "epoch": 0.47077801216031767, + "grad_norm": 5807.5322265625, + "learning_rate": 6.547096841704217e-06, + "loss": 123.5693, + "step": 56910 + }, + { + "epoch": 0.47086073540968687, + "grad_norm": 698.7401733398438, + "learning_rate": 6.545755625902848e-06, + "loss": 108.5493, + "step": 56920 + }, + { + "epoch": 0.4709434586590561, + "grad_norm": 770.432373046875, + "learning_rate": 6.544414287112798e-06, + "loss": 60.7358, + "step": 56930 + }, + { + "epoch": 0.4710261819084254, + "grad_norm": 772.1907348632812, + "learning_rate": 6.54307282544079e-06, + "loss": 90.2566, + "step": 56940 + }, + { + "epoch": 0.4711089051577946, + "grad_norm": 543.7560424804688, + "learning_rate": 6.5417312409935606e-06, + "loss": 74.9508, + "step": 56950 + }, + { + "epoch": 0.47119162840716383, + "grad_norm": 605.939697265625, + "learning_rate": 6.540389533877852e-06, + "loss": 117.9458, + "step": 56960 + }, + { + "epoch": 0.4712743516565331, + "grad_norm": 546.9862670898438, + "learning_rate": 6.539047704200417e-06, + "loss": 83.1111, + "step": 56970 + }, + { + "epoch": 0.4713570749059023, + "grad_norm": 860.575439453125, + "learning_rate": 6.53770575206802e-06, + "loss": 102.208, + "step": 56980 + }, + { + "epoch": 0.47143979815527154, + "grad_norm": 616.0123291015625, + "learning_rate": 6.536363677587433e-06, + "loss": 101.8752, + "step": 56990 + }, + { + "epoch": 0.4715225214046408, + "grad_norm": 912.4677124023438, + "learning_rate": 6.535021480865439e-06, + "loss": 94.7414, + "step": 57000 + }, + { + "epoch": 0.47160524465401, + "grad_norm": 1033.161376953125, + "learning_rate": 6.5336791620088306e-06, + "loss": 98.8203, + "step": 57010 + }, + { + "epoch": 0.47168796790337925, + "grad_norm": 813.3025512695312, + "learning_rate": 6.53233672112441e-06, + "loss": 111.74, + "step": 57020 + }, + { + "epoch": 0.4717706911527485, + "grad_norm": 1582.189208984375, + "learning_rate": 6.530994158318988e-06, + "loss": 113.1147, + "step": 57030 + }, + { + "epoch": 0.4718534144021177, + "grad_norm": 862.9869384765625, + "learning_rate": 6.529651473699389e-06, + "loss": 85.5126, + "step": 57040 + }, + { + "epoch": 0.47193613765148695, + "grad_norm": 1320.765869140625, + "learning_rate": 6.528308667372441e-06, + "loss": 101.8769, + "step": 57050 + }, + { + "epoch": 0.4720188609008562, + "grad_norm": 717.2901611328125, + "learning_rate": 6.526965739444988e-06, + "loss": 119.4057, + "step": 57060 + }, + { + "epoch": 0.4721015841502254, + "grad_norm": 699.390625, + "learning_rate": 6.525622690023878e-06, + "loss": 105.9801, + "step": 57070 + }, + { + "epoch": 0.47218430739959466, + "grad_norm": 1108.2218017578125, + "learning_rate": 6.524279519215972e-06, + "loss": 105.0386, + "step": 57080 + }, + { + "epoch": 0.4722670306489639, + "grad_norm": 1312.4166259765625, + "learning_rate": 6.522936227128139e-06, + "loss": 116.3358, + "step": 57090 + }, + { + "epoch": 0.4723497538983331, + "grad_norm": 881.8489379882812, + "learning_rate": 6.521592813867261e-06, + "loss": 115.962, + "step": 57100 + }, + { + "epoch": 0.47243247714770237, + "grad_norm": 1025.9259033203125, + "learning_rate": 6.520249279540227e-06, + "loss": 112.2708, + "step": 57110 + }, + { + "epoch": 0.4725152003970716, + "grad_norm": 968.8225708007812, + "learning_rate": 6.5189056242539325e-06, + "loss": 81.8784, + "step": 57120 + }, + { + "epoch": 0.4725979236464408, + "grad_norm": 776.3606567382812, + "learning_rate": 6.51756184811529e-06, + "loss": 97.869, + "step": 57130 + }, + { + "epoch": 0.4726806468958101, + "grad_norm": 1188.362060546875, + "learning_rate": 6.516217951231215e-06, + "loss": 78.5015, + "step": 57140 + }, + { + "epoch": 0.47276337014517933, + "grad_norm": 1396.0478515625, + "learning_rate": 6.514873933708637e-06, + "loss": 115.7227, + "step": 57150 + }, + { + "epoch": 0.47284609339454853, + "grad_norm": 701.3724365234375, + "learning_rate": 6.513529795654493e-06, + "loss": 91.0152, + "step": 57160 + }, + { + "epoch": 0.4729288166439178, + "grad_norm": 773.8468627929688, + "learning_rate": 6.512185537175727e-06, + "loss": 140.7189, + "step": 57170 + }, + { + "epoch": 0.473011539893287, + "grad_norm": 948.971923828125, + "learning_rate": 6.5108411583793e-06, + "loss": 163.1197, + "step": 57180 + }, + { + "epoch": 0.47309426314265624, + "grad_norm": 662.3345336914062, + "learning_rate": 6.509496659372175e-06, + "loss": 92.557, + "step": 57190 + }, + { + "epoch": 0.4731769863920255, + "grad_norm": 902.59521484375, + "learning_rate": 6.508152040261329e-06, + "loss": 98.1511, + "step": 57200 + }, + { + "epoch": 0.4732597096413947, + "grad_norm": 912.7888793945312, + "learning_rate": 6.506807301153746e-06, + "loss": 82.9847, + "step": 57210 + }, + { + "epoch": 0.47334243289076394, + "grad_norm": 711.9552001953125, + "learning_rate": 6.5054624421564204e-06, + "loss": 85.3893, + "step": 57220 + }, + { + "epoch": 0.4734251561401332, + "grad_norm": 996.9633178710938, + "learning_rate": 6.504117463376358e-06, + "loss": 103.8014, + "step": 57230 + }, + { + "epoch": 0.4735078793895024, + "grad_norm": 889.486083984375, + "learning_rate": 6.502772364920573e-06, + "loss": 119.4068, + "step": 57240 + }, + { + "epoch": 0.47359060263887165, + "grad_norm": 1359.7982177734375, + "learning_rate": 6.501427146896087e-06, + "loss": 131.6854, + "step": 57250 + }, + { + "epoch": 0.4736733258882409, + "grad_norm": 863.1353759765625, + "learning_rate": 6.5000818094099345e-06, + "loss": 125.5572, + "step": 57260 + }, + { + "epoch": 0.4737560491376101, + "grad_norm": 583.1968383789062, + "learning_rate": 6.498736352569155e-06, + "loss": 97.2687, + "step": 57270 + }, + { + "epoch": 0.47383877238697936, + "grad_norm": 613.0692138671875, + "learning_rate": 6.497390776480804e-06, + "loss": 83.3367, + "step": 57280 + }, + { + "epoch": 0.4739214956363486, + "grad_norm": 894.8236083984375, + "learning_rate": 6.49604508125194e-06, + "loss": 123.6242, + "step": 57290 + }, + { + "epoch": 0.4740042188857178, + "grad_norm": 1123.2113037109375, + "learning_rate": 6.4946992669896355e-06, + "loss": 90.5414, + "step": 57300 + }, + { + "epoch": 0.47408694213508706, + "grad_norm": 634.5689086914062, + "learning_rate": 6.493353333800969e-06, + "loss": 84.9406, + "step": 57310 + }, + { + "epoch": 0.4741696653844563, + "grad_norm": 618.6438598632812, + "learning_rate": 6.492007281793032e-06, + "loss": 101.8569, + "step": 57320 + }, + { + "epoch": 0.4742523886338255, + "grad_norm": 760.6466064453125, + "learning_rate": 6.490661111072923e-06, + "loss": 98.2763, + "step": 57330 + }, + { + "epoch": 0.47433511188319477, + "grad_norm": 642.0409545898438, + "learning_rate": 6.489314821747751e-06, + "loss": 82.7239, + "step": 57340 + }, + { + "epoch": 0.474417835132564, + "grad_norm": 491.7732849121094, + "learning_rate": 6.487968413924634e-06, + "loss": 82.5276, + "step": 57350 + }, + { + "epoch": 0.4745005583819332, + "grad_norm": 847.6077270507812, + "learning_rate": 6.486621887710698e-06, + "loss": 85.0193, + "step": 57360 + }, + { + "epoch": 0.4745832816313025, + "grad_norm": 1501.081787109375, + "learning_rate": 6.485275243213081e-06, + "loss": 88.0962, + "step": 57370 + }, + { + "epoch": 0.47466600488067173, + "grad_norm": 961.3021850585938, + "learning_rate": 6.4839284805389305e-06, + "loss": 131.3477, + "step": 57380 + }, + { + "epoch": 0.47474872813004093, + "grad_norm": 695.2109985351562, + "learning_rate": 6.4825815997954e-06, + "loss": 99.5357, + "step": 57390 + }, + { + "epoch": 0.4748314513794102, + "grad_norm": 478.3403625488281, + "learning_rate": 6.481234601089655e-06, + "loss": 96.97, + "step": 57400 + }, + { + "epoch": 0.47491417462877944, + "grad_norm": 967.1907348632812, + "learning_rate": 6.4798874845288725e-06, + "loss": 84.3332, + "step": 57410 + }, + { + "epoch": 0.47499689787814864, + "grad_norm": 789.9042358398438, + "learning_rate": 6.4785402502202345e-06, + "loss": 98.0186, + "step": 57420 + }, + { + "epoch": 0.4750796211275179, + "grad_norm": 515.9044799804688, + "learning_rate": 6.477192898270934e-06, + "loss": 98.3447, + "step": 57430 + }, + { + "epoch": 0.47516234437688715, + "grad_norm": 777.9580078125, + "learning_rate": 6.475845428788173e-06, + "loss": 92.8213, + "step": 57440 + }, + { + "epoch": 0.47524506762625635, + "grad_norm": 732.5836791992188, + "learning_rate": 6.474497841879166e-06, + "loss": 100.1301, + "step": 57450 + }, + { + "epoch": 0.4753277908756256, + "grad_norm": 905.1466064453125, + "learning_rate": 6.473150137651132e-06, + "loss": 87.0629, + "step": 57460 + }, + { + "epoch": 0.47541051412499485, + "grad_norm": 1096.08935546875, + "learning_rate": 6.471802316211302e-06, + "loss": 94.4893, + "step": 57470 + }, + { + "epoch": 0.47549323737436405, + "grad_norm": 781.8897094726562, + "learning_rate": 6.4704543776669174e-06, + "loss": 85.6178, + "step": 57480 + }, + { + "epoch": 0.4755759606237333, + "grad_norm": 1561.7882080078125, + "learning_rate": 6.469106322125227e-06, + "loss": 110.5366, + "step": 57490 + }, + { + "epoch": 0.47565868387310256, + "grad_norm": 773.6251220703125, + "learning_rate": 6.467758149693486e-06, + "loss": 94.7606, + "step": 57500 + }, + { + "epoch": 0.47574140712247176, + "grad_norm": 696.1630249023438, + "learning_rate": 6.466409860478967e-06, + "loss": 84.5489, + "step": 57510 + }, + { + "epoch": 0.475824130371841, + "grad_norm": 1580.1494140625, + "learning_rate": 6.465061454588946e-06, + "loss": 114.6306, + "step": 57520 + }, + { + "epoch": 0.47590685362121027, + "grad_norm": 584.140380859375, + "learning_rate": 6.463712932130708e-06, + "loss": 91.1199, + "step": 57530 + }, + { + "epoch": 0.47598957687057947, + "grad_norm": 678.8070068359375, + "learning_rate": 6.462364293211549e-06, + "loss": 80.3412, + "step": 57540 + }, + { + "epoch": 0.4760723001199487, + "grad_norm": 671.9398803710938, + "learning_rate": 6.4610155379387755e-06, + "loss": 116.5642, + "step": 57550 + }, + { + "epoch": 0.4761550233693179, + "grad_norm": 1506.737060546875, + "learning_rate": 6.459666666419699e-06, + "loss": 79.6158, + "step": 57560 + }, + { + "epoch": 0.4762377466186872, + "grad_norm": 695.2562255859375, + "learning_rate": 6.4583176787616466e-06, + "loss": 61.0726, + "step": 57570 + }, + { + "epoch": 0.47632046986805643, + "grad_norm": 1047.5999755859375, + "learning_rate": 6.456968575071951e-06, + "loss": 115.039, + "step": 57580 + }, + { + "epoch": 0.4764031931174256, + "grad_norm": 865.7882690429688, + "learning_rate": 6.45561935545795e-06, + "loss": 111.2492, + "step": 57590 + }, + { + "epoch": 0.4764859163667949, + "grad_norm": 991.0267333984375, + "learning_rate": 6.454270020026996e-06, + "loss": 88.2671, + "step": 57600 + }, + { + "epoch": 0.47656863961616414, + "grad_norm": 985.0994873046875, + "learning_rate": 6.452920568886452e-06, + "loss": 101.4334, + "step": 57610 + }, + { + "epoch": 0.47665136286553333, + "grad_norm": 621.8745727539062, + "learning_rate": 6.451571002143687e-06, + "loss": 80.6865, + "step": 57620 + }, + { + "epoch": 0.4767340861149026, + "grad_norm": 851.007080078125, + "learning_rate": 6.450221319906079e-06, + "loss": 93.8453, + "step": 57630 + }, + { + "epoch": 0.47681680936427184, + "grad_norm": 1132.76708984375, + "learning_rate": 6.448871522281016e-06, + "loss": 85.5419, + "step": 57640 + }, + { + "epoch": 0.47689953261364104, + "grad_norm": 739.5919189453125, + "learning_rate": 6.447521609375894e-06, + "loss": 67.1973, + "step": 57650 + }, + { + "epoch": 0.4769822558630103, + "grad_norm": 829.6648559570312, + "learning_rate": 6.446171581298123e-06, + "loss": 106.6791, + "step": 57660 + }, + { + "epoch": 0.47706497911237955, + "grad_norm": 1077.7239990234375, + "learning_rate": 6.444821438155115e-06, + "loss": 80.1578, + "step": 57670 + }, + { + "epoch": 0.47714770236174875, + "grad_norm": 654.5147094726562, + "learning_rate": 6.443471180054297e-06, + "loss": 69.1088, + "step": 57680 + }, + { + "epoch": 0.477230425611118, + "grad_norm": 1064.0953369140625, + "learning_rate": 6.442120807103102e-06, + "loss": 96.6103, + "step": 57690 + }, + { + "epoch": 0.47731314886048726, + "grad_norm": 846.7904663085938, + "learning_rate": 6.440770319408971e-06, + "loss": 113.4604, + "step": 57700 + }, + { + "epoch": 0.47739587210985646, + "grad_norm": 1160.3079833984375, + "learning_rate": 6.43941971707936e-06, + "loss": 110.0301, + "step": 57710 + }, + { + "epoch": 0.4774785953592257, + "grad_norm": 524.7237548828125, + "learning_rate": 6.438069000221727e-06, + "loss": 92.4454, + "step": 57720 + }, + { + "epoch": 0.47756131860859496, + "grad_norm": 363.4929504394531, + "learning_rate": 6.4367181689435434e-06, + "loss": 107.8367, + "step": 57730 + }, + { + "epoch": 0.47764404185796416, + "grad_norm": 953.7645263671875, + "learning_rate": 6.435367223352289e-06, + "loss": 121.483, + "step": 57740 + }, + { + "epoch": 0.4777267651073334, + "grad_norm": 1026.001708984375, + "learning_rate": 6.434016163555452e-06, + "loss": 91.6397, + "step": 57750 + }, + { + "epoch": 0.47780948835670267, + "grad_norm": 811.3062133789062, + "learning_rate": 6.432664989660531e-06, + "loss": 91.4573, + "step": 57760 + }, + { + "epoch": 0.47789221160607187, + "grad_norm": 1303.14111328125, + "learning_rate": 6.43131370177503e-06, + "loss": 112.111, + "step": 57770 + }, + { + "epoch": 0.4779749348554411, + "grad_norm": 754.6259765625, + "learning_rate": 6.429962300006468e-06, + "loss": 100.0983, + "step": 57780 + }, + { + "epoch": 0.4780576581048104, + "grad_norm": 738.0693359375, + "learning_rate": 6.428610784462368e-06, + "loss": 79.652, + "step": 57790 + }, + { + "epoch": 0.4781403813541796, + "grad_norm": 1683.4595947265625, + "learning_rate": 6.427259155250265e-06, + "loss": 95.6643, + "step": 57800 + }, + { + "epoch": 0.47822310460354883, + "grad_norm": 840.048828125, + "learning_rate": 6.4259074124777e-06, + "loss": 93.0407, + "step": 57810 + }, + { + "epoch": 0.4783058278529181, + "grad_norm": 849.6332397460938, + "learning_rate": 6.4245555562522265e-06, + "loss": 98.129, + "step": 57820 + }, + { + "epoch": 0.4783885511022873, + "grad_norm": 495.7574768066406, + "learning_rate": 6.423203586681406e-06, + "loss": 87.5308, + "step": 57830 + }, + { + "epoch": 0.47847127435165654, + "grad_norm": 998.4563598632812, + "learning_rate": 6.421851503872807e-06, + "loss": 124.0158, + "step": 57840 + }, + { + "epoch": 0.4785539976010258, + "grad_norm": 636.4727783203125, + "learning_rate": 6.42049930793401e-06, + "loss": 74.8035, + "step": 57850 + }, + { + "epoch": 0.478636720850395, + "grad_norm": 872.4989013671875, + "learning_rate": 6.419146998972602e-06, + "loss": 78.3126, + "step": 57860 + }, + { + "epoch": 0.47871944409976425, + "grad_norm": 1358.409912109375, + "learning_rate": 6.417794577096179e-06, + "loss": 98.5134, + "step": 57870 + }, + { + "epoch": 0.4788021673491335, + "grad_norm": 1421.047119140625, + "learning_rate": 6.41644204241235e-06, + "loss": 119.0474, + "step": 57880 + }, + { + "epoch": 0.4788848905985027, + "grad_norm": 750.8233642578125, + "learning_rate": 6.4150893950287275e-06, + "loss": 108.5803, + "step": 57890 + }, + { + "epoch": 0.47896761384787195, + "grad_norm": 2111.186767578125, + "learning_rate": 6.413736635052936e-06, + "loss": 141.4099, + "step": 57900 + }, + { + "epoch": 0.47905033709724115, + "grad_norm": 681.4733276367188, + "learning_rate": 6.41238376259261e-06, + "loss": 63.5274, + "step": 57910 + }, + { + "epoch": 0.4791330603466104, + "grad_norm": 805.0604248046875, + "learning_rate": 6.411030777755389e-06, + "loss": 98.8302, + "step": 57920 + }, + { + "epoch": 0.47921578359597966, + "grad_norm": 810.17041015625, + "learning_rate": 6.409677680648925e-06, + "loss": 113.7874, + "step": 57930 + }, + { + "epoch": 0.47929850684534886, + "grad_norm": 827.2803344726562, + "learning_rate": 6.4083244713808765e-06, + "loss": 119.2214, + "step": 57940 + }, + { + "epoch": 0.4793812300947181, + "grad_norm": 1362.2303466796875, + "learning_rate": 6.406971150058914e-06, + "loss": 92.4825, + "step": 57950 + }, + { + "epoch": 0.47946395334408737, + "grad_norm": 855.9775390625, + "learning_rate": 6.405617716790714e-06, + "loss": 79.9808, + "step": 57960 + }, + { + "epoch": 0.47954667659345657, + "grad_norm": 632.9134521484375, + "learning_rate": 6.404264171683965e-06, + "loss": 87.7965, + "step": 57970 + }, + { + "epoch": 0.4796293998428258, + "grad_norm": 731.3795166015625, + "learning_rate": 6.402910514846358e-06, + "loss": 82.6081, + "step": 57980 + }, + { + "epoch": 0.4797121230921951, + "grad_norm": 498.25, + "learning_rate": 6.4015567463856e-06, + "loss": 82.2082, + "step": 57990 + }, + { + "epoch": 0.4797948463415643, + "grad_norm": 1057.318359375, + "learning_rate": 6.400202866409405e-06, + "loss": 111.3383, + "step": 58000 + }, + { + "epoch": 0.47987756959093353, + "grad_norm": 775.5044555664062, + "learning_rate": 6.398848875025494e-06, + "loss": 88.3121, + "step": 58010 + }, + { + "epoch": 0.4799602928403028, + "grad_norm": 1417.432373046875, + "learning_rate": 6.3974947723415985e-06, + "loss": 95.6965, + "step": 58020 + }, + { + "epoch": 0.480043016089672, + "grad_norm": 644.7517700195312, + "learning_rate": 6.396140558465456e-06, + "loss": 104.0072, + "step": 58030 + }, + { + "epoch": 0.48012573933904124, + "grad_norm": 849.2105712890625, + "learning_rate": 6.394786233504816e-06, + "loss": 86.352, + "step": 58040 + }, + { + "epoch": 0.4802084625884105, + "grad_norm": 824.1756591796875, + "learning_rate": 6.39343179756744e-06, + "loss": 114.2667, + "step": 58050 + }, + { + "epoch": 0.4802911858377797, + "grad_norm": 567.4685668945312, + "learning_rate": 6.392077250761088e-06, + "loss": 88.4801, + "step": 58060 + }, + { + "epoch": 0.48037390908714894, + "grad_norm": 910.6168823242188, + "learning_rate": 6.390722593193538e-06, + "loss": 85.6822, + "step": 58070 + }, + { + "epoch": 0.4804566323365182, + "grad_norm": 924.6448364257812, + "learning_rate": 6.389367824972575e-06, + "loss": 96.7753, + "step": 58080 + }, + { + "epoch": 0.4805393555858874, + "grad_norm": 814.3735961914062, + "learning_rate": 6.388012946205991e-06, + "loss": 90.9101, + "step": 58090 + }, + { + "epoch": 0.48062207883525665, + "grad_norm": 1433.7420654296875, + "learning_rate": 6.386657957001585e-06, + "loss": 105.3125, + "step": 58100 + }, + { + "epoch": 0.4807048020846259, + "grad_norm": 841.0825805664062, + "learning_rate": 6.38530285746717e-06, + "loss": 128.7726, + "step": 58110 + }, + { + "epoch": 0.4807875253339951, + "grad_norm": 1228.2952880859375, + "learning_rate": 6.383947647710565e-06, + "loss": 114.0045, + "step": 58120 + }, + { + "epoch": 0.48087024858336436, + "grad_norm": 716.3721313476562, + "learning_rate": 6.382592327839596e-06, + "loss": 96.0517, + "step": 58130 + }, + { + "epoch": 0.4809529718327336, + "grad_norm": 837.0059204101562, + "learning_rate": 6.381236897962102e-06, + "loss": 112.9432, + "step": 58140 + }, + { + "epoch": 0.4810356950821028, + "grad_norm": 1200.560302734375, + "learning_rate": 6.379881358185926e-06, + "loss": 126.8512, + "step": 58150 + }, + { + "epoch": 0.48111841833147206, + "grad_norm": 772.40478515625, + "learning_rate": 6.378525708618924e-06, + "loss": 75.8569, + "step": 58160 + }, + { + "epoch": 0.4812011415808413, + "grad_norm": 980.9589233398438, + "learning_rate": 6.377169949368956e-06, + "loss": 83.9632, + "step": 58170 + }, + { + "epoch": 0.4812838648302105, + "grad_norm": 647.2731323242188, + "learning_rate": 6.375814080543899e-06, + "loss": 104.6065, + "step": 58180 + }, + { + "epoch": 0.48136658807957977, + "grad_norm": 1349.8760986328125, + "learning_rate": 6.3744581022516285e-06, + "loss": 82.4153, + "step": 58190 + }, + { + "epoch": 0.481449311328949, + "grad_norm": 1214.870361328125, + "learning_rate": 6.373102014600033e-06, + "loss": 106.5302, + "step": 58200 + }, + { + "epoch": 0.4815320345783182, + "grad_norm": 1136.8599853515625, + "learning_rate": 6.371745817697012e-06, + "loss": 110.1129, + "step": 58210 + }, + { + "epoch": 0.4816147578276875, + "grad_norm": 2490.23876953125, + "learning_rate": 6.370389511650474e-06, + "loss": 138.5235, + "step": 58220 + }, + { + "epoch": 0.48169748107705673, + "grad_norm": 851.6533203125, + "learning_rate": 6.3690330965683304e-06, + "loss": 105.1765, + "step": 58230 + }, + { + "epoch": 0.48178020432642593, + "grad_norm": 635.8304443359375, + "learning_rate": 6.367676572558506e-06, + "loss": 138.1114, + "step": 58240 + }, + { + "epoch": 0.4818629275757952, + "grad_norm": 747.5317993164062, + "learning_rate": 6.366319939728934e-06, + "loss": 86.2269, + "step": 58250 + }, + { + "epoch": 0.48194565082516444, + "grad_norm": 1157.3438720703125, + "learning_rate": 6.364963198187555e-06, + "loss": 81.2648, + "step": 58260 + }, + { + "epoch": 0.48202837407453364, + "grad_norm": 961.3770141601562, + "learning_rate": 6.363606348042318e-06, + "loss": 69.4496, + "step": 58270 + }, + { + "epoch": 0.4821110973239029, + "grad_norm": 823.779541015625, + "learning_rate": 6.362249389401183e-06, + "loss": 96.8622, + "step": 58280 + }, + { + "epoch": 0.4821938205732721, + "grad_norm": 510.96942138671875, + "learning_rate": 6.360892322372115e-06, + "loss": 77.7201, + "step": 58290 + }, + { + "epoch": 0.48227654382264135, + "grad_norm": 745.046875, + "learning_rate": 6.359535147063092e-06, + "loss": 81.9999, + "step": 58300 + }, + { + "epoch": 0.4823592670720106, + "grad_norm": 611.5297241210938, + "learning_rate": 6.358177863582095e-06, + "loss": 87.0968, + "step": 58310 + }, + { + "epoch": 0.4824419903213798, + "grad_norm": 860.2018432617188, + "learning_rate": 6.35682047203712e-06, + "loss": 81.5001, + "step": 58320 + }, + { + "epoch": 0.48252471357074905, + "grad_norm": 439.9252014160156, + "learning_rate": 6.355462972536166e-06, + "loss": 88.4102, + "step": 58330 + }, + { + "epoch": 0.4826074368201183, + "grad_norm": 922.5792846679688, + "learning_rate": 6.354105365187244e-06, + "loss": 91.7119, + "step": 58340 + }, + { + "epoch": 0.4826901600694875, + "grad_norm": 1041.380615234375, + "learning_rate": 6.352747650098373e-06, + "loss": 104.9556, + "step": 58350 + }, + { + "epoch": 0.48277288331885676, + "grad_norm": 902.4854125976562, + "learning_rate": 6.35138982737758e-06, + "loss": 104.9955, + "step": 58360 + }, + { + "epoch": 0.482855606568226, + "grad_norm": 856.4240112304688, + "learning_rate": 6.3500318971329e-06, + "loss": 68.9174, + "step": 58370 + }, + { + "epoch": 0.4829383298175952, + "grad_norm": 613.3790893554688, + "learning_rate": 6.348673859472378e-06, + "loss": 104.4474, + "step": 58380 + }, + { + "epoch": 0.48302105306696447, + "grad_norm": 625.4129028320312, + "learning_rate": 6.347315714504066e-06, + "loss": 104.9294, + "step": 58390 + }, + { + "epoch": 0.4831037763163337, + "grad_norm": 597.6600341796875, + "learning_rate": 6.345957462336026e-06, + "loss": 81.4773, + "step": 58400 + }, + { + "epoch": 0.4831864995657029, + "grad_norm": 531.10986328125, + "learning_rate": 6.344599103076329e-06, + "loss": 100.6717, + "step": 58410 + }, + { + "epoch": 0.4832692228150722, + "grad_norm": 812.7492065429688, + "learning_rate": 6.343240636833051e-06, + "loss": 108.6641, + "step": 58420 + }, + { + "epoch": 0.48335194606444143, + "grad_norm": 1158.002685546875, + "learning_rate": 6.341882063714282e-06, + "loss": 125.5026, + "step": 58430 + }, + { + "epoch": 0.4834346693138106, + "grad_norm": 846.1316528320312, + "learning_rate": 6.340523383828115e-06, + "loss": 107.7689, + "step": 58440 + }, + { + "epoch": 0.4835173925631799, + "grad_norm": 1098.09130859375, + "learning_rate": 6.339164597282652e-06, + "loss": 106.0673, + "step": 58450 + }, + { + "epoch": 0.48360011581254914, + "grad_norm": 925.63818359375, + "learning_rate": 6.337805704186011e-06, + "loss": 118.7983, + "step": 58460 + }, + { + "epoch": 0.48368283906191833, + "grad_norm": 558.7349853515625, + "learning_rate": 6.336446704646307e-06, + "loss": 105.5596, + "step": 58470 + }, + { + "epoch": 0.4837655623112876, + "grad_norm": 918.0574340820312, + "learning_rate": 6.335087598771676e-06, + "loss": 102.35, + "step": 58480 + }, + { + "epoch": 0.48384828556065684, + "grad_norm": 2873.980712890625, + "learning_rate": 6.333728386670249e-06, + "loss": 102.2267, + "step": 58490 + }, + { + "epoch": 0.48393100881002604, + "grad_norm": 740.7039794921875, + "learning_rate": 6.332369068450175e-06, + "loss": 92.0805, + "step": 58500 + }, + { + "epoch": 0.4840137320593953, + "grad_norm": 1080.8912353515625, + "learning_rate": 6.33100964421961e-06, + "loss": 99.742, + "step": 58510 + }, + { + "epoch": 0.48409645530876455, + "grad_norm": 1029.510009765625, + "learning_rate": 6.329650114086717e-06, + "loss": 77.3601, + "step": 58520 + }, + { + "epoch": 0.48417917855813375, + "grad_norm": 739.2198486328125, + "learning_rate": 6.328290478159666e-06, + "loss": 108.9343, + "step": 58530 + }, + { + "epoch": 0.484261901807503, + "grad_norm": 584.7896118164062, + "learning_rate": 6.326930736546637e-06, + "loss": 87.4123, + "step": 58540 + }, + { + "epoch": 0.48434462505687226, + "grad_norm": 1001.3221435546875, + "learning_rate": 6.325570889355819e-06, + "loss": 115.3958, + "step": 58550 + }, + { + "epoch": 0.48442734830624146, + "grad_norm": 0.0, + "learning_rate": 6.32421093669541e-06, + "loss": 100.6869, + "step": 58560 + }, + { + "epoch": 0.4845100715556107, + "grad_norm": 1452.001953125, + "learning_rate": 6.322850878673614e-06, + "loss": 127.1429, + "step": 58570 + }, + { + "epoch": 0.48459279480497996, + "grad_norm": 841.3821411132812, + "learning_rate": 6.321490715398644e-06, + "loss": 94.9818, + "step": 58580 + }, + { + "epoch": 0.48467551805434916, + "grad_norm": 813.81298828125, + "learning_rate": 6.320130446978722e-06, + "loss": 76.4308, + "step": 58590 + }, + { + "epoch": 0.4847582413037184, + "grad_norm": 1246.0950927734375, + "learning_rate": 6.31877007352208e-06, + "loss": 111.9752, + "step": 58600 + }, + { + "epoch": 0.48484096455308767, + "grad_norm": 822.40283203125, + "learning_rate": 6.317409595136956e-06, + "loss": 90.058, + "step": 58610 + }, + { + "epoch": 0.48492368780245687, + "grad_norm": 1240.24609375, + "learning_rate": 6.316049011931595e-06, + "loss": 118.0982, + "step": 58620 + }, + { + "epoch": 0.4850064110518261, + "grad_norm": 710.9925537109375, + "learning_rate": 6.314688324014255e-06, + "loss": 99.1123, + "step": 58630 + }, + { + "epoch": 0.4850891343011953, + "grad_norm": 1035.0576171875, + "learning_rate": 6.3133275314931995e-06, + "loss": 123.4674, + "step": 58640 + }, + { + "epoch": 0.4851718575505646, + "grad_norm": 811.2945556640625, + "learning_rate": 6.311966634476698e-06, + "loss": 93.8845, + "step": 58650 + }, + { + "epoch": 0.48525458079993383, + "grad_norm": 568.3858642578125, + "learning_rate": 6.3106056330730335e-06, + "loss": 96.4744, + "step": 58660 + }, + { + "epoch": 0.48533730404930303, + "grad_norm": 682.4791259765625, + "learning_rate": 6.309244527390493e-06, + "loss": 96.0774, + "step": 58670 + }, + { + "epoch": 0.4854200272986723, + "grad_norm": 1391.8082275390625, + "learning_rate": 6.307883317537375e-06, + "loss": 113.0187, + "step": 58680 + }, + { + "epoch": 0.48550275054804154, + "grad_norm": 672.600830078125, + "learning_rate": 6.306522003621983e-06, + "loss": 79.8365, + "step": 58690 + }, + { + "epoch": 0.48558547379741074, + "grad_norm": 1114.2835693359375, + "learning_rate": 6.305160585752632e-06, + "loss": 116.9864, + "step": 58700 + }, + { + "epoch": 0.48566819704678, + "grad_norm": 904.7263793945312, + "learning_rate": 6.303799064037643e-06, + "loss": 95.5957, + "step": 58710 + }, + { + "epoch": 0.48575092029614925, + "grad_norm": 1291.3994140625, + "learning_rate": 6.302437438585345e-06, + "loss": 107.933, + "step": 58720 + }, + { + "epoch": 0.48583364354551845, + "grad_norm": 1033.6456298828125, + "learning_rate": 6.301075709504077e-06, + "loss": 132.5331, + "step": 58730 + }, + { + "epoch": 0.4859163667948877, + "grad_norm": 939.7048950195312, + "learning_rate": 6.299713876902188e-06, + "loss": 93.3385, + "step": 58740 + }, + { + "epoch": 0.48599909004425695, + "grad_norm": 1379.9310302734375, + "learning_rate": 6.29835194088803e-06, + "loss": 93.6808, + "step": 58750 + }, + { + "epoch": 0.48608181329362615, + "grad_norm": 850.0859375, + "learning_rate": 6.296989901569966e-06, + "loss": 106.2699, + "step": 58760 + }, + { + "epoch": 0.4861645365429954, + "grad_norm": 845.896728515625, + "learning_rate": 6.295627759056368e-06, + "loss": 97.0875, + "step": 58770 + }, + { + "epoch": 0.48624725979236466, + "grad_norm": 910.60302734375, + "learning_rate": 6.294265513455616e-06, + "loss": 107.4305, + "step": 58780 + }, + { + "epoch": 0.48632998304173386, + "grad_norm": 895.4052734375, + "learning_rate": 6.292903164876097e-06, + "loss": 99.333, + "step": 58790 + }, + { + "epoch": 0.4864127062911031, + "grad_norm": 443.9016418457031, + "learning_rate": 6.291540713426206e-06, + "loss": 76.0029, + "step": 58800 + }, + { + "epoch": 0.48649542954047237, + "grad_norm": 621.6075439453125, + "learning_rate": 6.290178159214349e-06, + "loss": 88.676, + "step": 58810 + }, + { + "epoch": 0.48657815278984157, + "grad_norm": 459.60284423828125, + "learning_rate": 6.288815502348935e-06, + "loss": 94.1174, + "step": 58820 + }, + { + "epoch": 0.4866608760392108, + "grad_norm": 1254.715576171875, + "learning_rate": 6.287452742938388e-06, + "loss": 91.2785, + "step": 58830 + }, + { + "epoch": 0.4867435992885801, + "grad_norm": 1114.0794677734375, + "learning_rate": 6.286089881091134e-06, + "loss": 139.8125, + "step": 58840 + }, + { + "epoch": 0.4868263225379493, + "grad_norm": 853.3452758789062, + "learning_rate": 6.284726916915611e-06, + "loss": 101.1512, + "step": 58850 + }, + { + "epoch": 0.48690904578731853, + "grad_norm": 741.5625610351562, + "learning_rate": 6.2833638505202635e-06, + "loss": 115.4677, + "step": 58860 + }, + { + "epoch": 0.4869917690366878, + "grad_norm": 1342.6466064453125, + "learning_rate": 6.282000682013545e-06, + "loss": 93.9246, + "step": 58870 + }, + { + "epoch": 0.487074492286057, + "grad_norm": 594.8483276367188, + "learning_rate": 6.280637411503913e-06, + "loss": 108.2251, + "step": 58880 + }, + { + "epoch": 0.48715721553542624, + "grad_norm": 899.0263061523438, + "learning_rate": 6.279274039099842e-06, + "loss": 93.0218, + "step": 58890 + }, + { + "epoch": 0.4872399387847955, + "grad_norm": 858.6878662109375, + "learning_rate": 6.277910564909806e-06, + "loss": 91.2543, + "step": 58900 + }, + { + "epoch": 0.4873226620341647, + "grad_norm": 974.9313354492188, + "learning_rate": 6.276546989042292e-06, + "loss": 91.2285, + "step": 58910 + }, + { + "epoch": 0.48740538528353394, + "grad_norm": 653.6936645507812, + "learning_rate": 6.275183311605793e-06, + "loss": 101.074, + "step": 58920 + }, + { + "epoch": 0.4874881085329032, + "grad_norm": 816.3530883789062, + "learning_rate": 6.273819532708807e-06, + "loss": 94.144, + "step": 58930 + }, + { + "epoch": 0.4875708317822724, + "grad_norm": 1098.2479248046875, + "learning_rate": 6.27245565245985e-06, + "loss": 104.6022, + "step": 58940 + }, + { + "epoch": 0.48765355503164165, + "grad_norm": 1253.642333984375, + "learning_rate": 6.271091670967437e-06, + "loss": 101.674, + "step": 58950 + }, + { + "epoch": 0.4877362782810109, + "grad_norm": 577.8956909179688, + "learning_rate": 6.269727588340091e-06, + "loss": 82.5646, + "step": 58960 + }, + { + "epoch": 0.4878190015303801, + "grad_norm": 667.8779296875, + "learning_rate": 6.268363404686348e-06, + "loss": 116.7945, + "step": 58970 + }, + { + "epoch": 0.48790172477974936, + "grad_norm": 716.1468505859375, + "learning_rate": 6.26699912011475e-06, + "loss": 118.8406, + "step": 58980 + }, + { + "epoch": 0.4879844480291186, + "grad_norm": 606.7252197265625, + "learning_rate": 6.265634734733848e-06, + "loss": 74.8637, + "step": 58990 + }, + { + "epoch": 0.4880671712784878, + "grad_norm": 1299.558349609375, + "learning_rate": 6.264270248652199e-06, + "loss": 110.4696, + "step": 59000 + }, + { + "epoch": 0.48814989452785706, + "grad_norm": 913.1611938476562, + "learning_rate": 6.262905661978367e-06, + "loss": 79.307, + "step": 59010 + }, + { + "epoch": 0.48823261777722626, + "grad_norm": 590.417724609375, + "learning_rate": 6.261540974820928e-06, + "loss": 99.6395, + "step": 59020 + }, + { + "epoch": 0.4883153410265955, + "grad_norm": 696.5634155273438, + "learning_rate": 6.260176187288463e-06, + "loss": 100.7072, + "step": 59030 + }, + { + "epoch": 0.48839806427596477, + "grad_norm": 1761.085693359375, + "learning_rate": 6.2588112994895636e-06, + "loss": 151.4153, + "step": 59040 + }, + { + "epoch": 0.48848078752533397, + "grad_norm": 1071.5557861328125, + "learning_rate": 6.257446311532824e-06, + "loss": 69.3153, + "step": 59050 + }, + { + "epoch": 0.4885635107747032, + "grad_norm": 1153.7186279296875, + "learning_rate": 6.256081223526854e-06, + "loss": 93.4576, + "step": 59060 + }, + { + "epoch": 0.4886462340240725, + "grad_norm": 1251.8411865234375, + "learning_rate": 6.254716035580264e-06, + "loss": 71.2112, + "step": 59070 + }, + { + "epoch": 0.4887289572734417, + "grad_norm": 653.9671630859375, + "learning_rate": 6.25335074780168e-06, + "loss": 101.7776, + "step": 59080 + }, + { + "epoch": 0.48881168052281093, + "grad_norm": 917.6962280273438, + "learning_rate": 6.251985360299728e-06, + "loss": 109.7955, + "step": 59090 + }, + { + "epoch": 0.4888944037721802, + "grad_norm": 1049.5726318359375, + "learning_rate": 6.250619873183046e-06, + "loss": 97.9137, + "step": 59100 + }, + { + "epoch": 0.4889771270215494, + "grad_norm": 960.5833129882812, + "learning_rate": 6.249254286560281e-06, + "loss": 115.5338, + "step": 59110 + }, + { + "epoch": 0.48905985027091864, + "grad_norm": 1100.854248046875, + "learning_rate": 6.247888600540084e-06, + "loss": 102.4374, + "step": 59120 + }, + { + "epoch": 0.4891425735202879, + "grad_norm": 858.7109985351562, + "learning_rate": 6.246522815231121e-06, + "loss": 86.8003, + "step": 59130 + }, + { + "epoch": 0.4892252967696571, + "grad_norm": 758.7923583984375, + "learning_rate": 6.245156930742057e-06, + "loss": 99.9746, + "step": 59140 + }, + { + "epoch": 0.48930802001902635, + "grad_norm": 957.5679931640625, + "learning_rate": 6.24379094718157e-06, + "loss": 105.2421, + "step": 59150 + }, + { + "epoch": 0.4893907432683956, + "grad_norm": 932.5010986328125, + "learning_rate": 6.2424248646583455e-06, + "loss": 109.755, + "step": 59160 + }, + { + "epoch": 0.4894734665177648, + "grad_norm": 1039.6802978515625, + "learning_rate": 6.241058683281077e-06, + "loss": 157.5874, + "step": 59170 + }, + { + "epoch": 0.48955618976713405, + "grad_norm": 1249.9261474609375, + "learning_rate": 6.239692403158465e-06, + "loss": 80.0913, + "step": 59180 + }, + { + "epoch": 0.4896389130165033, + "grad_norm": 769.3261108398438, + "learning_rate": 6.238326024399217e-06, + "loss": 116.0565, + "step": 59190 + }, + { + "epoch": 0.4897216362658725, + "grad_norm": 3054.625, + "learning_rate": 6.236959547112051e-06, + "loss": 135.2796, + "step": 59200 + }, + { + "epoch": 0.48980435951524176, + "grad_norm": 1218.1517333984375, + "learning_rate": 6.235592971405691e-06, + "loss": 96.0189, + "step": 59210 + }, + { + "epoch": 0.489887082764611, + "grad_norm": 964.074462890625, + "learning_rate": 6.234226297388869e-06, + "loss": 135.8784, + "step": 59220 + }, + { + "epoch": 0.4899698060139802, + "grad_norm": 822.4147338867188, + "learning_rate": 6.232859525170324e-06, + "loss": 74.8447, + "step": 59230 + }, + { + "epoch": 0.49005252926334947, + "grad_norm": 1401.8765869140625, + "learning_rate": 6.231492654858805e-06, + "loss": 124.0554, + "step": 59240 + }, + { + "epoch": 0.4901352525127187, + "grad_norm": 665.4353637695312, + "learning_rate": 6.230125686563068e-06, + "loss": 116.1569, + "step": 59250 + }, + { + "epoch": 0.4902179757620879, + "grad_norm": 1295.380126953125, + "learning_rate": 6.2287586203918745e-06, + "loss": 114.117, + "step": 59260 + }, + { + "epoch": 0.4903006990114572, + "grad_norm": 1130.935791015625, + "learning_rate": 6.227391456453997e-06, + "loss": 73.4887, + "step": 59270 + }, + { + "epoch": 0.49038342226082643, + "grad_norm": 615.1046142578125, + "learning_rate": 6.226024194858214e-06, + "loss": 83.7547, + "step": 59280 + }, + { + "epoch": 0.49046614551019563, + "grad_norm": 909.8365478515625, + "learning_rate": 6.224656835713313e-06, + "loss": 99.4262, + "step": 59290 + }, + { + "epoch": 0.4905488687595649, + "grad_norm": 768.0503540039062, + "learning_rate": 6.223289379128088e-06, + "loss": 88.5417, + "step": 59300 + }, + { + "epoch": 0.49063159200893414, + "grad_norm": 1546.40771484375, + "learning_rate": 6.221921825211342e-06, + "loss": 88.7849, + "step": 59310 + }, + { + "epoch": 0.49071431525830334, + "grad_norm": 1119.7420654296875, + "learning_rate": 6.220554174071884e-06, + "loss": 216.5368, + "step": 59320 + }, + { + "epoch": 0.4907970385076726, + "grad_norm": 738.0167846679688, + "learning_rate": 6.219186425818531e-06, + "loss": 111.826, + "step": 59330 + }, + { + "epoch": 0.49087976175704184, + "grad_norm": 1079.2379150390625, + "learning_rate": 6.217818580560111e-06, + "loss": 138.3639, + "step": 59340 + }, + { + "epoch": 0.49096248500641104, + "grad_norm": 455.43560791015625, + "learning_rate": 6.216450638405454e-06, + "loss": 106.5501, + "step": 59350 + }, + { + "epoch": 0.4910452082557803, + "grad_norm": 1222.3670654296875, + "learning_rate": 6.2150825994634025e-06, + "loss": 76.1361, + "step": 59360 + }, + { + "epoch": 0.4911279315051495, + "grad_norm": 2011.9967041015625, + "learning_rate": 6.2137144638428045e-06, + "loss": 113.7866, + "step": 59370 + }, + { + "epoch": 0.49121065475451875, + "grad_norm": 1221.827880859375, + "learning_rate": 6.21234623165252e-06, + "loss": 79.5845, + "step": 59380 + }, + { + "epoch": 0.491293378003888, + "grad_norm": 780.6744384765625, + "learning_rate": 6.210977903001406e-06, + "loss": 88.4106, + "step": 59390 + }, + { + "epoch": 0.4913761012532572, + "grad_norm": 755.7965087890625, + "learning_rate": 6.209609477998339e-06, + "loss": 97.9078, + "step": 59400 + }, + { + "epoch": 0.49145882450262646, + "grad_norm": 883.141357421875, + "learning_rate": 6.2082409567521975e-06, + "loss": 97.257, + "step": 59410 + }, + { + "epoch": 0.4915415477519957, + "grad_norm": 1329.85107421875, + "learning_rate": 6.206872339371867e-06, + "loss": 101.6907, + "step": 59420 + }, + { + "epoch": 0.4916242710013649, + "grad_norm": 2107.642578125, + "learning_rate": 6.205503625966247e-06, + "loss": 109.3981, + "step": 59430 + }, + { + "epoch": 0.49170699425073416, + "grad_norm": 663.9820556640625, + "learning_rate": 6.204134816644233e-06, + "loss": 77.1192, + "step": 59440 + }, + { + "epoch": 0.4917897175001034, + "grad_norm": 1082.100341796875, + "learning_rate": 6.2027659115147375e-06, + "loss": 109.6853, + "step": 59450 + }, + { + "epoch": 0.4918724407494726, + "grad_norm": 734.315185546875, + "learning_rate": 6.201396910686679e-06, + "loss": 92.7889, + "step": 59460 + }, + { + "epoch": 0.49195516399884187, + "grad_norm": 825.7645874023438, + "learning_rate": 6.200027814268984e-06, + "loss": 82.5341, + "step": 59470 + }, + { + "epoch": 0.4920378872482111, + "grad_norm": 990.0794677734375, + "learning_rate": 6.198658622370582e-06, + "loss": 91.5214, + "step": 59480 + }, + { + "epoch": 0.4921206104975803, + "grad_norm": 672.1451416015625, + "learning_rate": 6.197289335100412e-06, + "loss": 99.9061, + "step": 59490 + }, + { + "epoch": 0.4922033337469496, + "grad_norm": 663.6240234375, + "learning_rate": 6.195919952567426e-06, + "loss": 84.1417, + "step": 59500 + }, + { + "epoch": 0.49228605699631883, + "grad_norm": 1181.8443603515625, + "learning_rate": 6.194550474880579e-06, + "loss": 101.2219, + "step": 59510 + }, + { + "epoch": 0.49236878024568803, + "grad_norm": 1052.6051025390625, + "learning_rate": 6.193180902148833e-06, + "loss": 101.8608, + "step": 59520 + }, + { + "epoch": 0.4924515034950573, + "grad_norm": 467.2412109375, + "learning_rate": 6.1918112344811575e-06, + "loss": 64.4284, + "step": 59530 + }, + { + "epoch": 0.49253422674442654, + "grad_norm": 756.0142822265625, + "learning_rate": 6.190441471986533e-06, + "loss": 93.9991, + "step": 59540 + }, + { + "epoch": 0.49261694999379574, + "grad_norm": 650.9651489257812, + "learning_rate": 6.18907161477394e-06, + "loss": 134.6619, + "step": 59550 + }, + { + "epoch": 0.492699673243165, + "grad_norm": 845.457275390625, + "learning_rate": 6.187701662952381e-06, + "loss": 71.9417, + "step": 59560 + }, + { + "epoch": 0.49278239649253425, + "grad_norm": 1039.0081787109375, + "learning_rate": 6.18633161663085e-06, + "loss": 96.217, + "step": 59570 + }, + { + "epoch": 0.49286511974190345, + "grad_norm": 1419.552734375, + "learning_rate": 6.184961475918355e-06, + "loss": 117.6555, + "step": 59580 + }, + { + "epoch": 0.4929478429912727, + "grad_norm": 1182.2574462890625, + "learning_rate": 6.183591240923914e-06, + "loss": 116.7017, + "step": 59590 + }, + { + "epoch": 0.49303056624064195, + "grad_norm": 1182.273681640625, + "learning_rate": 6.182220911756551e-06, + "loss": 96.5914, + "step": 59600 + }, + { + "epoch": 0.49311328949001115, + "grad_norm": 1710.057861328125, + "learning_rate": 6.1808504885252955e-06, + "loss": 116.0724, + "step": 59610 + }, + { + "epoch": 0.4931960127393804, + "grad_norm": 738.5501708984375, + "learning_rate": 6.179479971339186e-06, + "loss": 102.2499, + "step": 59620 + }, + { + "epoch": 0.49327873598874966, + "grad_norm": 1123.935791015625, + "learning_rate": 6.178109360307267e-06, + "loss": 98.9781, + "step": 59630 + }, + { + "epoch": 0.49336145923811886, + "grad_norm": 1302.947998046875, + "learning_rate": 6.176738655538594e-06, + "loss": 86.7837, + "step": 59640 + }, + { + "epoch": 0.4934441824874881, + "grad_norm": 1149.8046875, + "learning_rate": 6.175367857142227e-06, + "loss": 101.772, + "step": 59650 + }, + { + "epoch": 0.49352690573685737, + "grad_norm": 313.6778564453125, + "learning_rate": 6.173996965227234e-06, + "loss": 67.0711, + "step": 59660 + }, + { + "epoch": 0.49360962898622657, + "grad_norm": 992.2843627929688, + "learning_rate": 6.17262597990269e-06, + "loss": 122.4615, + "step": 59670 + }, + { + "epoch": 0.4936923522355958, + "grad_norm": 544.5889282226562, + "learning_rate": 6.171254901277678e-06, + "loss": 118.6765, + "step": 59680 + }, + { + "epoch": 0.4937750754849651, + "grad_norm": 760.4612426757812, + "learning_rate": 6.169883729461289e-06, + "loss": 73.9667, + "step": 59690 + }, + { + "epoch": 0.4938577987343343, + "grad_norm": 879.9442749023438, + "learning_rate": 6.16851246456262e-06, + "loss": 121.3691, + "step": 59700 + }, + { + "epoch": 0.49394052198370353, + "grad_norm": 838.228271484375, + "learning_rate": 6.167141106690778e-06, + "loss": 81.8349, + "step": 59710 + }, + { + "epoch": 0.4940232452330727, + "grad_norm": 600.8211059570312, + "learning_rate": 6.1657696559548755e-06, + "loss": 82.6974, + "step": 59720 + }, + { + "epoch": 0.494105968482442, + "grad_norm": 1331.113525390625, + "learning_rate": 6.16439811246403e-06, + "loss": 91.3955, + "step": 59730 + }, + { + "epoch": 0.49418869173181124, + "grad_norm": 1298.560791015625, + "learning_rate": 6.163026476327371e-06, + "loss": 127.3068, + "step": 59740 + }, + { + "epoch": 0.49427141498118043, + "grad_norm": 1139.0467529296875, + "learning_rate": 6.161654747654033e-06, + "loss": 103.0033, + "step": 59750 + }, + { + "epoch": 0.4943541382305497, + "grad_norm": 740.8123779296875, + "learning_rate": 6.1602829265531585e-06, + "loss": 111.8351, + "step": 59760 + }, + { + "epoch": 0.49443686147991894, + "grad_norm": 811.9702758789062, + "learning_rate": 6.158911013133896e-06, + "loss": 106.3484, + "step": 59770 + }, + { + "epoch": 0.49451958472928814, + "grad_norm": 751.2868041992188, + "learning_rate": 6.157539007505402e-06, + "loss": 81.8818, + "step": 59780 + }, + { + "epoch": 0.4946023079786574, + "grad_norm": 596.0902099609375, + "learning_rate": 6.156166909776842e-06, + "loss": 141.5883, + "step": 59790 + }, + { + "epoch": 0.49468503122802665, + "grad_norm": 669.7060546875, + "learning_rate": 6.154794720057388e-06, + "loss": 92.8498, + "step": 59800 + }, + { + "epoch": 0.49476775447739585, + "grad_norm": 769.1928100585938, + "learning_rate": 6.153422438456218e-06, + "loss": 111.0396, + "step": 59810 + }, + { + "epoch": 0.4948504777267651, + "grad_norm": 1065.379638671875, + "learning_rate": 6.1520500650825175e-06, + "loss": 109.4096, + "step": 59820 + }, + { + "epoch": 0.49493320097613436, + "grad_norm": 1148.633056640625, + "learning_rate": 6.150677600045479e-06, + "loss": 107.9428, + "step": 59830 + }, + { + "epoch": 0.49501592422550356, + "grad_norm": 530.499267578125, + "learning_rate": 6.1493050434543065e-06, + "loss": 80.3538, + "step": 59840 + }, + { + "epoch": 0.4950986474748728, + "grad_norm": 888.7738647460938, + "learning_rate": 6.1479323954182055e-06, + "loss": 82.7131, + "step": 59850 + }, + { + "epoch": 0.49518137072424206, + "grad_norm": 1272.56884765625, + "learning_rate": 6.146559656046394e-06, + "loss": 115.8495, + "step": 59860 + }, + { + "epoch": 0.49526409397361126, + "grad_norm": 1251.605712890625, + "learning_rate": 6.1451868254480914e-06, + "loss": 103.9669, + "step": 59870 + }, + { + "epoch": 0.4953468172229805, + "grad_norm": 1469.060546875, + "learning_rate": 6.143813903732527e-06, + "loss": 98.5221, + "step": 59880 + }, + { + "epoch": 0.49542954047234977, + "grad_norm": 613.4310913085938, + "learning_rate": 6.142440891008941e-06, + "loss": 95.7254, + "step": 59890 + }, + { + "epoch": 0.49551226372171897, + "grad_norm": 1487.4815673828125, + "learning_rate": 6.141067787386579e-06, + "loss": 134.6069, + "step": 59900 + }, + { + "epoch": 0.4955949869710882, + "grad_norm": 1083.0224609375, + "learning_rate": 6.139694592974687e-06, + "loss": 103.1761, + "step": 59910 + }, + { + "epoch": 0.4956777102204575, + "grad_norm": 1133.168701171875, + "learning_rate": 6.1383213078825275e-06, + "loss": 111.6922, + "step": 59920 + }, + { + "epoch": 0.4957604334698267, + "grad_norm": 1490.791015625, + "learning_rate": 6.136947932219365e-06, + "loss": 102.0797, + "step": 59930 + }, + { + "epoch": 0.49584315671919593, + "grad_norm": 717.7562866210938, + "learning_rate": 6.135574466094475e-06, + "loss": 103.1788, + "step": 59940 + }, + { + "epoch": 0.4959258799685652, + "grad_norm": 707.7864379882812, + "learning_rate": 6.134200909617135e-06, + "loss": 84.5769, + "step": 59950 + }, + { + "epoch": 0.4960086032179344, + "grad_norm": 607.8535766601562, + "learning_rate": 6.132827262896634e-06, + "loss": 109.7207, + "step": 59960 + }, + { + "epoch": 0.49609132646730364, + "grad_norm": 575.4254150390625, + "learning_rate": 6.131453526042267e-06, + "loss": 94.0905, + "step": 59970 + }, + { + "epoch": 0.4961740497166729, + "grad_norm": 1168.943603515625, + "learning_rate": 6.130079699163335e-06, + "loss": 123.4409, + "step": 59980 + }, + { + "epoch": 0.4962567729660421, + "grad_norm": 806.4592895507812, + "learning_rate": 6.128705782369149e-06, + "loss": 126.4985, + "step": 59990 + }, + { + "epoch": 0.49633949621541135, + "grad_norm": 994.6439208984375, + "learning_rate": 6.127331775769023e-06, + "loss": 81.3202, + "step": 60000 + }, + { + "epoch": 0.4964222194647806, + "grad_norm": 764.6784057617188, + "learning_rate": 6.125957679472282e-06, + "loss": 98.2079, + "step": 60010 + }, + { + "epoch": 0.4965049427141498, + "grad_norm": 963.4498901367188, + "learning_rate": 6.124583493588254e-06, + "loss": 107.3761, + "step": 60020 + }, + { + "epoch": 0.49658766596351905, + "grad_norm": 763.531494140625, + "learning_rate": 6.123209218226282e-06, + "loss": 86.249, + "step": 60030 + }, + { + "epoch": 0.4966703892128883, + "grad_norm": 1130.901611328125, + "learning_rate": 6.121834853495704e-06, + "loss": 80.7327, + "step": 60040 + }, + { + "epoch": 0.4967531124622575, + "grad_norm": 830.3397827148438, + "learning_rate": 6.120460399505876e-06, + "loss": 79.5612, + "step": 60050 + }, + { + "epoch": 0.49683583571162676, + "grad_norm": 1038.9749755859375, + "learning_rate": 6.119085856366158e-06, + "loss": 104.2222, + "step": 60060 + }, + { + "epoch": 0.496918558960996, + "grad_norm": 1272.2034912109375, + "learning_rate": 6.117711224185913e-06, + "loss": 95.4032, + "step": 60070 + }, + { + "epoch": 0.4970012822103652, + "grad_norm": 678.36083984375, + "learning_rate": 6.116336503074516e-06, + "loss": 111.6325, + "step": 60080 + }, + { + "epoch": 0.49708400545973447, + "grad_norm": 902.4628295898438, + "learning_rate": 6.114961693141346e-06, + "loss": 109.0453, + "step": 60090 + }, + { + "epoch": 0.49716672870910367, + "grad_norm": 1161.1533203125, + "learning_rate": 6.113586794495792e-06, + "loss": 102.0886, + "step": 60100 + }, + { + "epoch": 0.4972494519584729, + "grad_norm": 883.01025390625, + "learning_rate": 6.112211807247246e-06, + "loss": 79.8961, + "step": 60110 + }, + { + "epoch": 0.4973321752078422, + "grad_norm": 560.7022094726562, + "learning_rate": 6.110836731505112e-06, + "loss": 104.4858, + "step": 60120 + }, + { + "epoch": 0.4974148984572114, + "grad_norm": 563.2030639648438, + "learning_rate": 6.109461567378796e-06, + "loss": 76.5894, + "step": 60130 + }, + { + "epoch": 0.49749762170658063, + "grad_norm": 567.1194458007812, + "learning_rate": 6.108086314977717e-06, + "loss": 74.5852, + "step": 60140 + }, + { + "epoch": 0.4975803449559499, + "grad_norm": 1001.0436401367188, + "learning_rate": 6.106710974411294e-06, + "loss": 89.3131, + "step": 60150 + }, + { + "epoch": 0.4976630682053191, + "grad_norm": 1134.8504638671875, + "learning_rate": 6.105335545788957e-06, + "loss": 92.9293, + "step": 60160 + }, + { + "epoch": 0.49774579145468834, + "grad_norm": 1836.4036865234375, + "learning_rate": 6.103960029220145e-06, + "loss": 96.6835, + "step": 60170 + }, + { + "epoch": 0.4978285147040576, + "grad_norm": 884.6735229492188, + "learning_rate": 6.102584424814299e-06, + "loss": 96.8814, + "step": 60180 + }, + { + "epoch": 0.4979112379534268, + "grad_norm": 1286.9527587890625, + "learning_rate": 6.101208732680872e-06, + "loss": 84.8719, + "step": 60190 + }, + { + "epoch": 0.49799396120279604, + "grad_norm": 1096.192138671875, + "learning_rate": 6.09983295292932e-06, + "loss": 133.576, + "step": 60200 + }, + { + "epoch": 0.4980766844521653, + "grad_norm": 396.5540771484375, + "learning_rate": 6.0984570856691046e-06, + "loss": 71.0101, + "step": 60210 + }, + { + "epoch": 0.4981594077015345, + "grad_norm": 1324.0350341796875, + "learning_rate": 6.097081131009703e-06, + "loss": 84.5404, + "step": 60220 + }, + { + "epoch": 0.49824213095090375, + "grad_norm": 1569.8370361328125, + "learning_rate": 6.095705089060589e-06, + "loss": 131.2618, + "step": 60230 + }, + { + "epoch": 0.498324854200273, + "grad_norm": 452.867919921875, + "learning_rate": 6.094328959931252e-06, + "loss": 98.2553, + "step": 60240 + }, + { + "epoch": 0.4984075774496422, + "grad_norm": 754.5675659179688, + "learning_rate": 6.092952743731179e-06, + "loss": 109.5702, + "step": 60250 + }, + { + "epoch": 0.49849030069901146, + "grad_norm": 1038.2977294921875, + "learning_rate": 6.091576440569873e-06, + "loss": 102.3048, + "step": 60260 + }, + { + "epoch": 0.4985730239483807, + "grad_norm": 1505.541748046875, + "learning_rate": 6.09020005055684e-06, + "loss": 93.0384, + "step": 60270 + }, + { + "epoch": 0.4986557471977499, + "grad_norm": 714.1956787109375, + "learning_rate": 6.088823573801592e-06, + "loss": 89.4964, + "step": 60280 + }, + { + "epoch": 0.49873847044711916, + "grad_norm": 1760.59033203125, + "learning_rate": 6.087447010413651e-06, + "loss": 123.4378, + "step": 60290 + }, + { + "epoch": 0.4988211936964884, + "grad_norm": 647.3226318359375, + "learning_rate": 6.08607036050254e-06, + "loss": 97.7962, + "step": 60300 + }, + { + "epoch": 0.4989039169458576, + "grad_norm": 670.925048828125, + "learning_rate": 6.084693624177794e-06, + "loss": 90.9299, + "step": 60310 + }, + { + "epoch": 0.49898664019522687, + "grad_norm": 861.1676635742188, + "learning_rate": 6.083316801548956e-06, + "loss": 96.199, + "step": 60320 + }, + { + "epoch": 0.4990693634445961, + "grad_norm": 844.1892700195312, + "learning_rate": 6.081939892725572e-06, + "loss": 89.0784, + "step": 60330 + }, + { + "epoch": 0.4991520866939653, + "grad_norm": 1057.48876953125, + "learning_rate": 6.080562897817196e-06, + "loss": 80.8512, + "step": 60340 + }, + { + "epoch": 0.4992348099433346, + "grad_norm": 547.9786987304688, + "learning_rate": 6.079185816933388e-06, + "loss": 123.7075, + "step": 60350 + }, + { + "epoch": 0.49931753319270383, + "grad_norm": 813.2322998046875, + "learning_rate": 6.077808650183718e-06, + "loss": 75.0818, + "step": 60360 + }, + { + "epoch": 0.49940025644207303, + "grad_norm": 1088.018310546875, + "learning_rate": 6.076431397677762e-06, + "loss": 99.3224, + "step": 60370 + }, + { + "epoch": 0.4994829796914423, + "grad_norm": 443.6618347167969, + "learning_rate": 6.0750540595250986e-06, + "loss": 92.7872, + "step": 60380 + }, + { + "epoch": 0.49956570294081154, + "grad_norm": 1097.77099609375, + "learning_rate": 6.073676635835317e-06, + "loss": 101.5357, + "step": 60390 + }, + { + "epoch": 0.49964842619018074, + "grad_norm": 809.2193603515625, + "learning_rate": 6.072299126718012e-06, + "loss": 102.8223, + "step": 60400 + }, + { + "epoch": 0.49973114943955, + "grad_norm": 1209.04296875, + "learning_rate": 6.070921532282788e-06, + "loss": 90.7184, + "step": 60410 + }, + { + "epoch": 0.49981387268891925, + "grad_norm": 483.89910888671875, + "learning_rate": 6.0695438526392536e-06, + "loss": 90.6916, + "step": 60420 + }, + { + "epoch": 0.49989659593828845, + "grad_norm": 939.6087036132812, + "learning_rate": 6.068166087897022e-06, + "loss": 95.3435, + "step": 60430 + }, + { + "epoch": 0.4999793191876577, + "grad_norm": 1028.2034912109375, + "learning_rate": 6.066788238165717e-06, + "loss": 122.2318, + "step": 60440 + }, + { + "epoch": 0.5000620424370269, + "grad_norm": 871.5949096679688, + "learning_rate": 6.0654103035549686e-06, + "loss": 107.04, + "step": 60450 + }, + { + "epoch": 0.5001447656863962, + "grad_norm": 631.7724609375, + "learning_rate": 6.064032284174411e-06, + "loss": 107.0457, + "step": 60460 + }, + { + "epoch": 0.5002274889357654, + "grad_norm": 1057.8795166015625, + "learning_rate": 6.062654180133689e-06, + "loss": 95.4936, + "step": 60470 + }, + { + "epoch": 0.5003102121851346, + "grad_norm": 625.803466796875, + "learning_rate": 6.06127599154245e-06, + "loss": 112.727, + "step": 60480 + }, + { + "epoch": 0.5003929354345039, + "grad_norm": 957.1911010742188, + "learning_rate": 6.059897718510351e-06, + "loss": 141.4194, + "step": 60490 + }, + { + "epoch": 0.5004756586838731, + "grad_norm": 1082.177734375, + "learning_rate": 6.058519361147055e-06, + "loss": 106.0662, + "step": 60500 + }, + { + "epoch": 0.5005583819332423, + "grad_norm": 961.278076171875, + "learning_rate": 6.057140919562231e-06, + "loss": 92.1514, + "step": 60510 + }, + { + "epoch": 0.5006411051826116, + "grad_norm": 1194.2098388671875, + "learning_rate": 6.055762393865555e-06, + "loss": 114.0582, + "step": 60520 + }, + { + "epoch": 0.5007238284319808, + "grad_norm": 2295.308349609375, + "learning_rate": 6.054383784166712e-06, + "loss": 122.28, + "step": 60530 + }, + { + "epoch": 0.50080655168135, + "grad_norm": 950.94482421875, + "learning_rate": 6.05300509057539e-06, + "loss": 65.6176, + "step": 60540 + }, + { + "epoch": 0.5008892749307193, + "grad_norm": 575.2659301757812, + "learning_rate": 6.051626313201285e-06, + "loss": 75.2322, + "step": 60550 + }, + { + "epoch": 0.5009719981800885, + "grad_norm": 955.7214965820312, + "learning_rate": 6.0502474521541014e-06, + "loss": 78.4364, + "step": 60560 + }, + { + "epoch": 0.5010547214294577, + "grad_norm": 1058.46240234375, + "learning_rate": 6.048868507543547e-06, + "loss": 76.487, + "step": 60570 + }, + { + "epoch": 0.501137444678827, + "grad_norm": 1243.02880859375, + "learning_rate": 6.047489479479339e-06, + "loss": 94.1711, + "step": 60580 + }, + { + "epoch": 0.5012201679281962, + "grad_norm": 539.5478515625, + "learning_rate": 6.046110368071201e-06, + "loss": 80.0069, + "step": 60590 + }, + { + "epoch": 0.5013028911775654, + "grad_norm": 1104.5489501953125, + "learning_rate": 6.044731173428862e-06, + "loss": 109.4119, + "step": 60600 + }, + { + "epoch": 0.5013856144269347, + "grad_norm": 547.853515625, + "learning_rate": 6.043351895662059e-06, + "loss": 74.3687, + "step": 60610 + }, + { + "epoch": 0.5014683376763039, + "grad_norm": 815.2512817382812, + "learning_rate": 6.041972534880533e-06, + "loss": 98.8586, + "step": 60620 + }, + { + "epoch": 0.5015510609256731, + "grad_norm": 934.5771484375, + "learning_rate": 6.040593091194035e-06, + "loss": 113.7061, + "step": 60630 + }, + { + "epoch": 0.5016337841750423, + "grad_norm": 980.511962890625, + "learning_rate": 6.039213564712319e-06, + "loss": 77.5835, + "step": 60640 + }, + { + "epoch": 0.5017165074244117, + "grad_norm": 875.10693359375, + "learning_rate": 6.03783395554515e-06, + "loss": 90.5567, + "step": 60650 + }, + { + "epoch": 0.5017992306737808, + "grad_norm": 564.4214477539062, + "learning_rate": 6.036454263802297e-06, + "loss": 99.0541, + "step": 60660 + }, + { + "epoch": 0.50188195392315, + "grad_norm": 859.4292602539062, + "learning_rate": 6.035074489593536e-06, + "loss": 81.3836, + "step": 60670 + }, + { + "epoch": 0.5019646771725194, + "grad_norm": 1366.642333984375, + "learning_rate": 6.033694633028644e-06, + "loss": 89.8926, + "step": 60680 + }, + { + "epoch": 0.5020474004218886, + "grad_norm": 707.115478515625, + "learning_rate": 6.032314694217416e-06, + "loss": 119.7774, + "step": 60690 + }, + { + "epoch": 0.5021301236712578, + "grad_norm": 509.8358154296875, + "learning_rate": 6.030934673269646e-06, + "loss": 76.9633, + "step": 60700 + }, + { + "epoch": 0.5022128469206271, + "grad_norm": 1127.6051025390625, + "learning_rate": 6.029554570295135e-06, + "loss": 98.3396, + "step": 60710 + }, + { + "epoch": 0.5022955701699963, + "grad_norm": 489.0888366699219, + "learning_rate": 6.028174385403693e-06, + "loss": 74.8344, + "step": 60720 + }, + { + "epoch": 0.5023782934193655, + "grad_norm": 563.7764282226562, + "learning_rate": 6.026794118705133e-06, + "loss": 123.1201, + "step": 60730 + }, + { + "epoch": 0.5024610166687348, + "grad_norm": 922.4900512695312, + "learning_rate": 6.025413770309278e-06, + "loss": 105.9961, + "step": 60740 + }, + { + "epoch": 0.502543739918104, + "grad_norm": 711.1076049804688, + "learning_rate": 6.024033340325954e-06, + "loss": 138.0339, + "step": 60750 + }, + { + "epoch": 0.5026264631674732, + "grad_norm": 1032.06787109375, + "learning_rate": 6.022652828864999e-06, + "loss": 129.3473, + "step": 60760 + }, + { + "epoch": 0.5027091864168425, + "grad_norm": 987.8187255859375, + "learning_rate": 6.0212722360362496e-06, + "loss": 98.7341, + "step": 60770 + }, + { + "epoch": 0.5027919096662117, + "grad_norm": 922.1596069335938, + "learning_rate": 6.019891561949554e-06, + "loss": 113.0199, + "step": 60780 + }, + { + "epoch": 0.5028746329155809, + "grad_norm": 966.7632446289062, + "learning_rate": 6.01851080671477e-06, + "loss": 95.6403, + "step": 60790 + }, + { + "epoch": 0.5029573561649502, + "grad_norm": 626.9385986328125, + "learning_rate": 6.017129970441756e-06, + "loss": 85.6963, + "step": 60800 + }, + { + "epoch": 0.5030400794143194, + "grad_norm": 1487.6356201171875, + "learning_rate": 6.015749053240378e-06, + "loss": 97.1684, + "step": 60810 + }, + { + "epoch": 0.5031228026636886, + "grad_norm": 789.6949462890625, + "learning_rate": 6.0143680552205075e-06, + "loss": 103.032, + "step": 60820 + }, + { + "epoch": 0.5032055259130579, + "grad_norm": 737.3887329101562, + "learning_rate": 6.012986976492025e-06, + "loss": 86.7182, + "step": 60830 + }, + { + "epoch": 0.5032882491624271, + "grad_norm": 600.4498291015625, + "learning_rate": 6.011605817164822e-06, + "loss": 94.8057, + "step": 60840 + }, + { + "epoch": 0.5033709724117963, + "grad_norm": 593.8978271484375, + "learning_rate": 6.0102245773487855e-06, + "loss": 107.6144, + "step": 60850 + }, + { + "epoch": 0.5034536956611656, + "grad_norm": 1065.4730224609375, + "learning_rate": 6.008843257153815e-06, + "loss": 90.4793, + "step": 60860 + }, + { + "epoch": 0.5035364189105348, + "grad_norm": 824.6838989257812, + "learning_rate": 6.007461856689815e-06, + "loss": 75.2064, + "step": 60870 + }, + { + "epoch": 0.503619142159904, + "grad_norm": 450.9040832519531, + "learning_rate": 6.0060803760667e-06, + "loss": 88.3228, + "step": 60880 + }, + { + "epoch": 0.5037018654092733, + "grad_norm": 829.0480346679688, + "learning_rate": 6.004698815394389e-06, + "loss": 124.8127, + "step": 60890 + }, + { + "epoch": 0.5037845886586425, + "grad_norm": 1101.244384765625, + "learning_rate": 6.003317174782801e-06, + "loss": 77.5046, + "step": 60900 + }, + { + "epoch": 0.5038673119080117, + "grad_norm": 1373.401611328125, + "learning_rate": 6.001935454341872e-06, + "loss": 92.4626, + "step": 60910 + }, + { + "epoch": 0.503950035157381, + "grad_norm": 489.91644287109375, + "learning_rate": 6.000553654181536e-06, + "loss": 92.368, + "step": 60920 + }, + { + "epoch": 0.5040327584067502, + "grad_norm": 813.798583984375, + "learning_rate": 5.999171774411737e-06, + "loss": 96.8709, + "step": 60930 + }, + { + "epoch": 0.5041154816561194, + "grad_norm": 1986.8172607421875, + "learning_rate": 5.997789815142427e-06, + "loss": 83.3268, + "step": 60940 + }, + { + "epoch": 0.5041982049054887, + "grad_norm": 721.0343017578125, + "learning_rate": 5.99640777648356e-06, + "loss": 111.3407, + "step": 60950 + }, + { + "epoch": 0.5042809281548579, + "grad_norm": 1814.00146484375, + "learning_rate": 5.9950256585450995e-06, + "loss": 101.1115, + "step": 60960 + }, + { + "epoch": 0.5043636514042271, + "grad_norm": 1539.85009765625, + "learning_rate": 5.993643461437013e-06, + "loss": 109.2939, + "step": 60970 + }, + { + "epoch": 0.5044463746535964, + "grad_norm": 1455.0811767578125, + "learning_rate": 5.992261185269278e-06, + "loss": 116.5055, + "step": 60980 + }, + { + "epoch": 0.5045290979029656, + "grad_norm": 673.0990600585938, + "learning_rate": 5.990878830151873e-06, + "loss": 84.7869, + "step": 60990 + }, + { + "epoch": 0.5046118211523348, + "grad_norm": 990.6354370117188, + "learning_rate": 5.989496396194787e-06, + "loss": 117.0743, + "step": 61000 + }, + { + "epoch": 0.5046945444017041, + "grad_norm": 659.1134033203125, + "learning_rate": 5.988113883508016e-06, + "loss": 85.4938, + "step": 61010 + }, + { + "epoch": 0.5047772676510733, + "grad_norm": 1276.8919677734375, + "learning_rate": 5.986731292201555e-06, + "loss": 111.5496, + "step": 61020 + }, + { + "epoch": 0.5048599909004425, + "grad_norm": 1091.1859130859375, + "learning_rate": 5.985348622385415e-06, + "loss": 139.6149, + "step": 61030 + }, + { + "epoch": 0.5049427141498118, + "grad_norm": 1123.3651123046875, + "learning_rate": 5.9839658741696085e-06, + "loss": 60.2066, + "step": 61040 + }, + { + "epoch": 0.505025437399181, + "grad_norm": 405.6517028808594, + "learning_rate": 5.982583047664151e-06, + "loss": 89.4623, + "step": 61050 + }, + { + "epoch": 0.5051081606485502, + "grad_norm": 489.998046875, + "learning_rate": 5.981200142979071e-06, + "loss": 70.606, + "step": 61060 + }, + { + "epoch": 0.5051908838979196, + "grad_norm": 611.7427368164062, + "learning_rate": 5.9798171602244e-06, + "loss": 90.265, + "step": 61070 + }, + { + "epoch": 0.5052736071472888, + "grad_norm": 500.9188537597656, + "learning_rate": 5.978434099510172e-06, + "loss": 102.3768, + "step": 61080 + }, + { + "epoch": 0.505356330396658, + "grad_norm": 939.5604858398438, + "learning_rate": 5.977050960946433e-06, + "loss": 95.7826, + "step": 61090 + }, + { + "epoch": 0.5054390536460273, + "grad_norm": 1079.417724609375, + "learning_rate": 5.975667744643235e-06, + "loss": 57.3004, + "step": 61100 + }, + { + "epoch": 0.5055217768953965, + "grad_norm": 801.0790405273438, + "learning_rate": 5.974284450710631e-06, + "loss": 100.4074, + "step": 61110 + }, + { + "epoch": 0.5056045001447657, + "grad_norm": 1189.5526123046875, + "learning_rate": 5.972901079258685e-06, + "loss": 89.206, + "step": 61120 + }, + { + "epoch": 0.505687223394135, + "grad_norm": 544.3245239257812, + "learning_rate": 5.971517630397465e-06, + "loss": 87.3128, + "step": 61130 + }, + { + "epoch": 0.5057699466435042, + "grad_norm": 804.9583129882812, + "learning_rate": 5.970134104237046e-06, + "loss": 118.5412, + "step": 61140 + }, + { + "epoch": 0.5058526698928734, + "grad_norm": 878.0859985351562, + "learning_rate": 5.96875050088751e-06, + "loss": 151.7707, + "step": 61150 + }, + { + "epoch": 0.5059353931422427, + "grad_norm": 555.1686401367188, + "learning_rate": 5.9673668204589396e-06, + "loss": 80.5509, + "step": 61160 + }, + { + "epoch": 0.5060181163916119, + "grad_norm": 887.9091186523438, + "learning_rate": 5.965983063061432e-06, + "loss": 97.7346, + "step": 61170 + }, + { + "epoch": 0.5061008396409811, + "grad_norm": 2689.885498046875, + "learning_rate": 5.964599228805087e-06, + "loss": 109.0754, + "step": 61180 + }, + { + "epoch": 0.5061835628903504, + "grad_norm": 851.7760009765625, + "learning_rate": 5.963215317800008e-06, + "loss": 111.0486, + "step": 61190 + }, + { + "epoch": 0.5062662861397196, + "grad_norm": 912.8546142578125, + "learning_rate": 5.961831330156306e-06, + "loss": 131.0121, + "step": 61200 + }, + { + "epoch": 0.5063490093890888, + "grad_norm": 641.365478515625, + "learning_rate": 5.960447265984098e-06, + "loss": 88.7224, + "step": 61210 + }, + { + "epoch": 0.5064317326384581, + "grad_norm": 1167.5374755859375, + "learning_rate": 5.95906312539351e-06, + "loss": 90.7471, + "step": 61220 + }, + { + "epoch": 0.5065144558878273, + "grad_norm": 808.2752685546875, + "learning_rate": 5.9576789084946705e-06, + "loss": 74.6994, + "step": 61230 + }, + { + "epoch": 0.5065971791371965, + "grad_norm": 1379.4791259765625, + "learning_rate": 5.956294615397716e-06, + "loss": 105.455, + "step": 61240 + }, + { + "epoch": 0.5066799023865658, + "grad_norm": 1430.2303466796875, + "learning_rate": 5.954910246212787e-06, + "loss": 126.3884, + "step": 61250 + }, + { + "epoch": 0.506762625635935, + "grad_norm": 1149.9755859375, + "learning_rate": 5.953525801050032e-06, + "loss": 83.9256, + "step": 61260 + }, + { + "epoch": 0.5068453488853042, + "grad_norm": 558.4307861328125, + "learning_rate": 5.952141280019605e-06, + "loss": 96.2179, + "step": 61270 + }, + { + "epoch": 0.5069280721346735, + "grad_norm": 742.656494140625, + "learning_rate": 5.950756683231667e-06, + "loss": 74.2085, + "step": 61280 + }, + { + "epoch": 0.5070107953840427, + "grad_norm": 596.1998291015625, + "learning_rate": 5.949372010796384e-06, + "loss": 52.8268, + "step": 61290 + }, + { + "epoch": 0.5070935186334119, + "grad_norm": 881.3981323242188, + "learning_rate": 5.947987262823924e-06, + "loss": 107.5389, + "step": 61300 + }, + { + "epoch": 0.5071762418827812, + "grad_norm": 675.0526123046875, + "learning_rate": 5.94660243942447e-06, + "loss": 123.2516, + "step": 61310 + }, + { + "epoch": 0.5072589651321504, + "grad_norm": 889.6369018554688, + "learning_rate": 5.945217540708206e-06, + "loss": 86.9526, + "step": 61320 + }, + { + "epoch": 0.5073416883815196, + "grad_norm": 736.0260620117188, + "learning_rate": 5.9438325667853185e-06, + "loss": 94.7347, + "step": 61330 + }, + { + "epoch": 0.5074244116308889, + "grad_norm": 899.1821899414062, + "learning_rate": 5.942447517766005e-06, + "loss": 106.1979, + "step": 61340 + }, + { + "epoch": 0.5075071348802581, + "grad_norm": 760.8129272460938, + "learning_rate": 5.941062393760467e-06, + "loss": 94.1305, + "step": 61350 + }, + { + "epoch": 0.5075898581296273, + "grad_norm": 434.6575622558594, + "learning_rate": 5.939677194878915e-06, + "loss": 140.0839, + "step": 61360 + }, + { + "epoch": 0.5076725813789965, + "grad_norm": 577.9569091796875, + "learning_rate": 5.93829192123156e-06, + "loss": 74.5071, + "step": 61370 + }, + { + "epoch": 0.5077553046283658, + "grad_norm": 694.65625, + "learning_rate": 5.936906572928625e-06, + "loss": 120.0574, + "step": 61380 + }, + { + "epoch": 0.507838027877735, + "grad_norm": 772.7642211914062, + "learning_rate": 5.935521150080331e-06, + "loss": 75.3494, + "step": 61390 + }, + { + "epoch": 0.5079207511271042, + "grad_norm": 790.8641967773438, + "learning_rate": 5.934135652796914e-06, + "loss": 87.367, + "step": 61400 + }, + { + "epoch": 0.5080034743764735, + "grad_norm": 813.5361938476562, + "learning_rate": 5.9327500811886095e-06, + "loss": 83.0071, + "step": 61410 + }, + { + "epoch": 0.5080861976258427, + "grad_norm": 437.74139404296875, + "learning_rate": 5.931364435365663e-06, + "loss": 115.0982, + "step": 61420 + }, + { + "epoch": 0.5081689208752119, + "grad_norm": 890.75927734375, + "learning_rate": 5.929978715438322e-06, + "loss": 75.1045, + "step": 61430 + }, + { + "epoch": 0.5082516441245812, + "grad_norm": 861.6392822265625, + "learning_rate": 5.928592921516843e-06, + "loss": 87.0132, + "step": 61440 + }, + { + "epoch": 0.5083343673739504, + "grad_norm": 401.1084289550781, + "learning_rate": 5.9272070537114855e-06, + "loss": 111.1549, + "step": 61450 + }, + { + "epoch": 0.5084170906233196, + "grad_norm": 886.0931396484375, + "learning_rate": 5.92582111213252e-06, + "loss": 130.5894, + "step": 61460 + }, + { + "epoch": 0.5084998138726889, + "grad_norm": 573.5504150390625, + "learning_rate": 5.924435096890216e-06, + "loss": 96.3187, + "step": 61470 + }, + { + "epoch": 0.5085825371220581, + "grad_norm": 915.1154174804688, + "learning_rate": 5.923049008094855e-06, + "loss": 102.2801, + "step": 61480 + }, + { + "epoch": 0.5086652603714273, + "grad_norm": 789.848876953125, + "learning_rate": 5.921662845856719e-06, + "loss": 93.1844, + "step": 61490 + }, + { + "epoch": 0.5087479836207967, + "grad_norm": 1229.4134521484375, + "learning_rate": 5.920276610286102e-06, + "loss": 119.4875, + "step": 61500 + }, + { + "epoch": 0.5088307068701658, + "grad_norm": 1035.0843505859375, + "learning_rate": 5.918890301493299e-06, + "loss": 95.7163, + "step": 61510 + }, + { + "epoch": 0.508913430119535, + "grad_norm": 880.9725952148438, + "learning_rate": 5.91750391958861e-06, + "loss": 73.2856, + "step": 61520 + }, + { + "epoch": 0.5089961533689044, + "grad_norm": 646.2332763671875, + "learning_rate": 5.916117464682346e-06, + "loss": 106.2007, + "step": 61530 + }, + { + "epoch": 0.5090788766182736, + "grad_norm": 485.7245178222656, + "learning_rate": 5.914730936884819e-06, + "loss": 112.5809, + "step": 61540 + }, + { + "epoch": 0.5091615998676428, + "grad_norm": 1304.14794921875, + "learning_rate": 5.91334433630635e-06, + "loss": 124.1074, + "step": 61550 + }, + { + "epoch": 0.5092443231170121, + "grad_norm": 911.8284912109375, + "learning_rate": 5.911957663057264e-06, + "loss": 87.838, + "step": 61560 + }, + { + "epoch": 0.5093270463663813, + "grad_norm": 863.0823364257812, + "learning_rate": 5.910570917247892e-06, + "loss": 79.3177, + "step": 61570 + }, + { + "epoch": 0.5094097696157505, + "grad_norm": 518.0543823242188, + "learning_rate": 5.909184098988571e-06, + "loss": 97.5815, + "step": 61580 + }, + { + "epoch": 0.5094924928651198, + "grad_norm": 641.208984375, + "learning_rate": 5.907797208389644e-06, + "loss": 112.5585, + "step": 61590 + }, + { + "epoch": 0.509575216114489, + "grad_norm": 644.5621337890625, + "learning_rate": 5.906410245561459e-06, + "loss": 80.3052, + "step": 61600 + }, + { + "epoch": 0.5096579393638582, + "grad_norm": 1210.9039306640625, + "learning_rate": 5.90502321061437e-06, + "loss": 103.0409, + "step": 61610 + }, + { + "epoch": 0.5097406626132275, + "grad_norm": 1110.852294921875, + "learning_rate": 5.90363610365874e-06, + "loss": 120.3473, + "step": 61620 + }, + { + "epoch": 0.5098233858625967, + "grad_norm": 892.0844116210938, + "learning_rate": 5.9022489248049295e-06, + "loss": 101.4571, + "step": 61630 + }, + { + "epoch": 0.5099061091119659, + "grad_norm": 456.140625, + "learning_rate": 5.900861674163314e-06, + "loss": 74.4327, + "step": 61640 + }, + { + "epoch": 0.5099888323613352, + "grad_norm": 755.904541015625, + "learning_rate": 5.89947435184427e-06, + "loss": 90.8847, + "step": 61650 + }, + { + "epoch": 0.5100715556107044, + "grad_norm": 1451.006591796875, + "learning_rate": 5.89808695795818e-06, + "loss": 101.8353, + "step": 61660 + }, + { + "epoch": 0.5101542788600736, + "grad_norm": 1081.45263671875, + "learning_rate": 5.896699492615432e-06, + "loss": 86.6635, + "step": 61670 + }, + { + "epoch": 0.5102370021094429, + "grad_norm": 497.5079040527344, + "learning_rate": 5.895311955926419e-06, + "loss": 83.6076, + "step": 61680 + }, + { + "epoch": 0.5103197253588121, + "grad_norm": 643.5697631835938, + "learning_rate": 5.893924348001544e-06, + "loss": 97.4833, + "step": 61690 + }, + { + "epoch": 0.5104024486081813, + "grad_norm": 956.6340942382812, + "learning_rate": 5.8925366689512124e-06, + "loss": 103.3445, + "step": 61700 + }, + { + "epoch": 0.5104851718575506, + "grad_norm": 665.5700073242188, + "learning_rate": 5.891148918885834e-06, + "loss": 65.6439, + "step": 61710 + }, + { + "epoch": 0.5105678951069198, + "grad_norm": 1049.6845703125, + "learning_rate": 5.8897610979158245e-06, + "loss": 89.6039, + "step": 61720 + }, + { + "epoch": 0.510650618356289, + "grad_norm": 814.806396484375, + "learning_rate": 5.888373206151608e-06, + "loss": 102.2241, + "step": 61730 + }, + { + "epoch": 0.5107333416056583, + "grad_norm": 841.8108520507812, + "learning_rate": 5.886985243703612e-06, + "loss": 103.3497, + "step": 61740 + }, + { + "epoch": 0.5108160648550275, + "grad_norm": 488.0580749511719, + "learning_rate": 5.885597210682273e-06, + "loss": 117.5541, + "step": 61750 + }, + { + "epoch": 0.5108987881043967, + "grad_norm": 780.1454467773438, + "learning_rate": 5.884209107198027e-06, + "loss": 118.4064, + "step": 61760 + }, + { + "epoch": 0.510981511353766, + "grad_norm": 1028.7705078125, + "learning_rate": 5.882820933361321e-06, + "loss": 127.9731, + "step": 61770 + }, + { + "epoch": 0.5110642346031352, + "grad_norm": 684.5718383789062, + "learning_rate": 5.881432689282604e-06, + "loss": 121.3453, + "step": 61780 + }, + { + "epoch": 0.5111469578525044, + "grad_norm": 910.465087890625, + "learning_rate": 5.880044375072333e-06, + "loss": 135.5772, + "step": 61790 + }, + { + "epoch": 0.5112296811018737, + "grad_norm": 1077.9769287109375, + "learning_rate": 5.8786559908409715e-06, + "loss": 110.6625, + "step": 61800 + }, + { + "epoch": 0.5113124043512429, + "grad_norm": 1104.657470703125, + "learning_rate": 5.877267536698984e-06, + "loss": 91.3742, + "step": 61810 + }, + { + "epoch": 0.5113951276006121, + "grad_norm": 924.7949829101562, + "learning_rate": 5.875879012756845e-06, + "loss": 93.8301, + "step": 61820 + }, + { + "epoch": 0.5114778508499814, + "grad_norm": 830.5784301757812, + "learning_rate": 5.8744904191250326e-06, + "loss": 135.9706, + "step": 61830 + }, + { + "epoch": 0.5115605740993506, + "grad_norm": 1287.56884765625, + "learning_rate": 5.873101755914031e-06, + "loss": 113.9375, + "step": 61840 + }, + { + "epoch": 0.5116432973487198, + "grad_norm": 2128.367431640625, + "learning_rate": 5.87171302323433e-06, + "loss": 91.666, + "step": 61850 + }, + { + "epoch": 0.5117260205980891, + "grad_norm": 1100.2708740234375, + "learning_rate": 5.870324221196424e-06, + "loss": 91.6138, + "step": 61860 + }, + { + "epoch": 0.5118087438474583, + "grad_norm": 636.7507934570312, + "learning_rate": 5.868935349910814e-06, + "loss": 105.8906, + "step": 61870 + }, + { + "epoch": 0.5118914670968275, + "grad_norm": 928.61083984375, + "learning_rate": 5.867546409488006e-06, + "loss": 99.2666, + "step": 61880 + }, + { + "epoch": 0.5119741903461968, + "grad_norm": 562.5609130859375, + "learning_rate": 5.8661574000385115e-06, + "loss": 76.9907, + "step": 61890 + }, + { + "epoch": 0.512056913595566, + "grad_norm": 390.1648864746094, + "learning_rate": 5.864768321672848e-06, + "loss": 76.5244, + "step": 61900 + }, + { + "epoch": 0.5121396368449352, + "grad_norm": 944.2666625976562, + "learning_rate": 5.863379174501538e-06, + "loss": 98.3042, + "step": 61910 + }, + { + "epoch": 0.5122223600943046, + "grad_norm": 1686.9356689453125, + "learning_rate": 5.861989958635109e-06, + "loss": 121.1383, + "step": 61920 + }, + { + "epoch": 0.5123050833436738, + "grad_norm": 878.6798095703125, + "learning_rate": 5.860600674184096e-06, + "loss": 105.9373, + "step": 61930 + }, + { + "epoch": 0.512387806593043, + "grad_norm": 528.4129638671875, + "learning_rate": 5.859211321259036e-06, + "loss": 83.7999, + "step": 61940 + }, + { + "epoch": 0.5124705298424123, + "grad_norm": 1257.8131103515625, + "learning_rate": 5.857821899970475e-06, + "loss": 112.0766, + "step": 61950 + }, + { + "epoch": 0.5125532530917815, + "grad_norm": 1426.787353515625, + "learning_rate": 5.856432410428963e-06, + "loss": 127.3927, + "step": 61960 + }, + { + "epoch": 0.5126359763411507, + "grad_norm": 1306.6192626953125, + "learning_rate": 5.8550428527450534e-06, + "loss": 63.1702, + "step": 61970 + }, + { + "epoch": 0.51271869959052, + "grad_norm": 1105.7908935546875, + "learning_rate": 5.8536532270293076e-06, + "loss": 80.9909, + "step": 61980 + }, + { + "epoch": 0.5128014228398892, + "grad_norm": 1267.3624267578125, + "learning_rate": 5.852263533392294e-06, + "loss": 94.7298, + "step": 61990 + }, + { + "epoch": 0.5128841460892584, + "grad_norm": 594.2600708007812, + "learning_rate": 5.850873771944581e-06, + "loss": 91.9315, + "step": 62000 + }, + { + "epoch": 0.5129668693386277, + "grad_norm": 652.3613891601562, + "learning_rate": 5.849483942796747e-06, + "loss": 87.1766, + "step": 62010 + }, + { + "epoch": 0.5130495925879969, + "grad_norm": 602.3596801757812, + "learning_rate": 5.848094046059375e-06, + "loss": 124.0294, + "step": 62020 + }, + { + "epoch": 0.5131323158373661, + "grad_norm": 573.1605224609375, + "learning_rate": 5.846704081843052e-06, + "loss": 111.6119, + "step": 62030 + }, + { + "epoch": 0.5132150390867354, + "grad_norm": 780.5606689453125, + "learning_rate": 5.84531405025837e-06, + "loss": 82.9944, + "step": 62040 + }, + { + "epoch": 0.5132977623361046, + "grad_norm": 913.4967041015625, + "learning_rate": 5.843923951415931e-06, + "loss": 108.7144, + "step": 62050 + }, + { + "epoch": 0.5133804855854738, + "grad_norm": 1110.280029296875, + "learning_rate": 5.842533785426334e-06, + "loss": 101.928, + "step": 62060 + }, + { + "epoch": 0.5134632088348431, + "grad_norm": 1380.97314453125, + "learning_rate": 5.84114355240019e-06, + "loss": 95.0093, + "step": 62070 + }, + { + "epoch": 0.5135459320842123, + "grad_norm": 570.7285766601562, + "learning_rate": 5.839753252448115e-06, + "loss": 88.7862, + "step": 62080 + }, + { + "epoch": 0.5136286553335815, + "grad_norm": 1217.79931640625, + "learning_rate": 5.838362885680728e-06, + "loss": 94.9879, + "step": 62090 + }, + { + "epoch": 0.5137113785829507, + "grad_norm": 1030.6907958984375, + "learning_rate": 5.8369724522086545e-06, + "loss": 88.0829, + "step": 62100 + }, + { + "epoch": 0.51379410183232, + "grad_norm": 2263.951416015625, + "learning_rate": 5.835581952142522e-06, + "loss": 147.4353, + "step": 62110 + }, + { + "epoch": 0.5138768250816892, + "grad_norm": 499.37347412109375, + "learning_rate": 5.834191385592969e-06, + "loss": 119.7903, + "step": 62120 + }, + { + "epoch": 0.5139595483310584, + "grad_norm": 1055.9906005859375, + "learning_rate": 5.8328007526706354e-06, + "loss": 87.058, + "step": 62130 + }, + { + "epoch": 0.5140422715804277, + "grad_norm": 1002.3789672851562, + "learning_rate": 5.83141005348617e-06, + "loss": 107.1515, + "step": 62140 + }, + { + "epoch": 0.5141249948297969, + "grad_norm": 859.0476684570312, + "learning_rate": 5.830019288150222e-06, + "loss": 90.1526, + "step": 62150 + }, + { + "epoch": 0.5142077180791661, + "grad_norm": 765.904052734375, + "learning_rate": 5.8286284567734456e-06, + "loss": 81.6313, + "step": 62160 + }, + { + "epoch": 0.5142904413285354, + "grad_norm": 676.10009765625, + "learning_rate": 5.827237559466508e-06, + "loss": 84.7288, + "step": 62170 + }, + { + "epoch": 0.5143731645779046, + "grad_norm": 1681.114990234375, + "learning_rate": 5.825846596340075e-06, + "loss": 122.0725, + "step": 62180 + }, + { + "epoch": 0.5144558878272738, + "grad_norm": 708.9700927734375, + "learning_rate": 5.824455567504817e-06, + "loss": 123.205, + "step": 62190 + }, + { + "epoch": 0.5145386110766431, + "grad_norm": 853.5513305664062, + "learning_rate": 5.823064473071414e-06, + "loss": 96.5516, + "step": 62200 + }, + { + "epoch": 0.5146213343260123, + "grad_norm": 937.1123046875, + "learning_rate": 5.821673313150546e-06, + "loss": 98.6421, + "step": 62210 + }, + { + "epoch": 0.5147040575753815, + "grad_norm": 802.1367797851562, + "learning_rate": 5.820282087852906e-06, + "loss": 99.0799, + "step": 62220 + }, + { + "epoch": 0.5147867808247508, + "grad_norm": 451.8292236328125, + "learning_rate": 5.818890797289185e-06, + "loss": 90.0844, + "step": 62230 + }, + { + "epoch": 0.51486950407412, + "grad_norm": 1798.909912109375, + "learning_rate": 5.81749944157008e-06, + "loss": 137.7885, + "step": 62240 + }, + { + "epoch": 0.5149522273234892, + "grad_norm": 985.250732421875, + "learning_rate": 5.816108020806297e-06, + "loss": 97.535, + "step": 62250 + }, + { + "epoch": 0.5150349505728585, + "grad_norm": 672.7926635742188, + "learning_rate": 5.814716535108545e-06, + "loss": 88.2172, + "step": 62260 + }, + { + "epoch": 0.5151176738222277, + "grad_norm": 716.781982421875, + "learning_rate": 5.813324984587536e-06, + "loss": 106.1954, + "step": 62270 + }, + { + "epoch": 0.5152003970715969, + "grad_norm": 1432.325439453125, + "learning_rate": 5.811933369353992e-06, + "loss": 111.049, + "step": 62280 + }, + { + "epoch": 0.5152831203209662, + "grad_norm": 760.8700561523438, + "learning_rate": 5.810541689518634e-06, + "loss": 94.8973, + "step": 62290 + }, + { + "epoch": 0.5153658435703354, + "grad_norm": 845.1988525390625, + "learning_rate": 5.809149945192194e-06, + "loss": 62.3175, + "step": 62300 + }, + { + "epoch": 0.5154485668197046, + "grad_norm": 770.4382934570312, + "learning_rate": 5.807758136485409e-06, + "loss": 106.6364, + "step": 62310 + }, + { + "epoch": 0.5155312900690739, + "grad_norm": 1148.9901123046875, + "learning_rate": 5.8063662635090136e-06, + "loss": 98.8217, + "step": 62320 + }, + { + "epoch": 0.5156140133184431, + "grad_norm": 1040.4677734375, + "learning_rate": 5.804974326373756e-06, + "loss": 92.393, + "step": 62330 + }, + { + "epoch": 0.5156967365678123, + "grad_norm": 609.6980590820312, + "learning_rate": 5.803582325190387e-06, + "loss": 97.1185, + "step": 62340 + }, + { + "epoch": 0.5157794598171817, + "grad_norm": 799.3777465820312, + "learning_rate": 5.802190260069657e-06, + "loss": 87.1282, + "step": 62350 + }, + { + "epoch": 0.5158621830665509, + "grad_norm": 968.39990234375, + "learning_rate": 5.800798131122332e-06, + "loss": 86.2219, + "step": 62360 + }, + { + "epoch": 0.51594490631592, + "grad_norm": 1028.2894287109375, + "learning_rate": 5.799405938459175e-06, + "loss": 76.0357, + "step": 62370 + }, + { + "epoch": 0.5160276295652894, + "grad_norm": 1118.3125, + "learning_rate": 5.7980136821909565e-06, + "loss": 91.065, + "step": 62380 + }, + { + "epoch": 0.5161103528146586, + "grad_norm": 817.6798706054688, + "learning_rate": 5.79662136242845e-06, + "loss": 112.0168, + "step": 62390 + }, + { + "epoch": 0.5161930760640278, + "grad_norm": 847.1541748046875, + "learning_rate": 5.795228979282439e-06, + "loss": 78.081, + "step": 62400 + }, + { + "epoch": 0.5162757993133971, + "grad_norm": 474.493896484375, + "learning_rate": 5.793836532863707e-06, + "loss": 70.5183, + "step": 62410 + }, + { + "epoch": 0.5163585225627663, + "grad_norm": 720.2383422851562, + "learning_rate": 5.792444023283046e-06, + "loss": 87.0467, + "step": 62420 + }, + { + "epoch": 0.5164412458121355, + "grad_norm": 847.5111083984375, + "learning_rate": 5.791051450651251e-06, + "loss": 80.678, + "step": 62430 + }, + { + "epoch": 0.5165239690615048, + "grad_norm": 1734.1534423828125, + "learning_rate": 5.789658815079121e-06, + "loss": 106.6263, + "step": 62440 + }, + { + "epoch": 0.516606692310874, + "grad_norm": 418.1882019042969, + "learning_rate": 5.788266116677464e-06, + "loss": 88.7378, + "step": 62450 + }, + { + "epoch": 0.5166894155602432, + "grad_norm": 1164.404541015625, + "learning_rate": 5.78687335555709e-06, + "loss": 109.2202, + "step": 62460 + }, + { + "epoch": 0.5167721388096125, + "grad_norm": 843.881103515625, + "learning_rate": 5.785480531828815e-06, + "loss": 95.9288, + "step": 62470 + }, + { + "epoch": 0.5168548620589817, + "grad_norm": 753.1651000976562, + "learning_rate": 5.784087645603459e-06, + "loss": 88.32, + "step": 62480 + }, + { + "epoch": 0.5169375853083509, + "grad_norm": 488.68487548828125, + "learning_rate": 5.782694696991845e-06, + "loss": 77.3577, + "step": 62490 + }, + { + "epoch": 0.5170203085577202, + "grad_norm": 748.185546875, + "learning_rate": 5.781301686104808e-06, + "loss": 99.6195, + "step": 62500 + }, + { + "epoch": 0.5171030318070894, + "grad_norm": 1028.9573974609375, + "learning_rate": 5.779908613053181e-06, + "loss": 89.3196, + "step": 62510 + }, + { + "epoch": 0.5171857550564586, + "grad_norm": 823.2115478515625, + "learning_rate": 5.778515477947807e-06, + "loss": 85.4209, + "step": 62520 + }, + { + "epoch": 0.5172684783058279, + "grad_norm": 720.9722900390625, + "learning_rate": 5.777122280899527e-06, + "loss": 87.5316, + "step": 62530 + }, + { + "epoch": 0.5173512015551971, + "grad_norm": 393.12646484375, + "learning_rate": 5.775729022019193e-06, + "loss": 132.2376, + "step": 62540 + }, + { + "epoch": 0.5174339248045663, + "grad_norm": 517.8870849609375, + "learning_rate": 5.774335701417662e-06, + "loss": 74.0599, + "step": 62550 + }, + { + "epoch": 0.5175166480539356, + "grad_norm": 779.7863159179688, + "learning_rate": 5.7729423192057936e-06, + "loss": 104.9648, + "step": 62560 + }, + { + "epoch": 0.5175993713033048, + "grad_norm": 1225.2161865234375, + "learning_rate": 5.771548875494453e-06, + "loss": 73.9343, + "step": 62570 + }, + { + "epoch": 0.517682094552674, + "grad_norm": 1787.88232421875, + "learning_rate": 5.7701553703945055e-06, + "loss": 99.922, + "step": 62580 + }, + { + "epoch": 0.5177648178020433, + "grad_norm": 761.9515380859375, + "learning_rate": 5.768761804016833e-06, + "loss": 126.8296, + "step": 62590 + }, + { + "epoch": 0.5178475410514125, + "grad_norm": 700.2348022460938, + "learning_rate": 5.767368176472311e-06, + "loss": 77.3953, + "step": 62600 + }, + { + "epoch": 0.5179302643007817, + "grad_norm": 1287.00732421875, + "learning_rate": 5.765974487871826e-06, + "loss": 110.9408, + "step": 62610 + }, + { + "epoch": 0.518012987550151, + "grad_norm": 1209.4278564453125, + "learning_rate": 5.764580738326265e-06, + "loss": 122.3806, + "step": 62620 + }, + { + "epoch": 0.5180957107995202, + "grad_norm": 972.08251953125, + "learning_rate": 5.763186927946523e-06, + "loss": 96.5278, + "step": 62630 + }, + { + "epoch": 0.5181784340488894, + "grad_norm": 939.2523193359375, + "learning_rate": 5.761793056843501e-06, + "loss": 96.9909, + "step": 62640 + }, + { + "epoch": 0.5182611572982587, + "grad_norm": 729.4473876953125, + "learning_rate": 5.760399125128102e-06, + "loss": 98.4378, + "step": 62650 + }, + { + "epoch": 0.5183438805476279, + "grad_norm": 780.3011474609375, + "learning_rate": 5.759005132911233e-06, + "loss": 78.8927, + "step": 62660 + }, + { + "epoch": 0.5184266037969971, + "grad_norm": 546.9750366210938, + "learning_rate": 5.75761108030381e-06, + "loss": 97.8188, + "step": 62670 + }, + { + "epoch": 0.5185093270463664, + "grad_norm": 898.6033325195312, + "learning_rate": 5.756216967416749e-06, + "loss": 73.9803, + "step": 62680 + }, + { + "epoch": 0.5185920502957356, + "grad_norm": 1270.0867919921875, + "learning_rate": 5.754822794360976e-06, + "loss": 100.2555, + "step": 62690 + }, + { + "epoch": 0.5186747735451048, + "grad_norm": 799.2184448242188, + "learning_rate": 5.753428561247416e-06, + "loss": 104.4311, + "step": 62700 + }, + { + "epoch": 0.5187574967944741, + "grad_norm": 756.2783813476562, + "learning_rate": 5.752034268187005e-06, + "loss": 90.7448, + "step": 62710 + }, + { + "epoch": 0.5188402200438433, + "grad_norm": 673.90576171875, + "learning_rate": 5.750639915290677e-06, + "loss": 100.3478, + "step": 62720 + }, + { + "epoch": 0.5189229432932125, + "grad_norm": 961.5318603515625, + "learning_rate": 5.749245502669375e-06, + "loss": 88.5219, + "step": 62730 + }, + { + "epoch": 0.5190056665425818, + "grad_norm": 700.3480834960938, + "learning_rate": 5.747851030434049e-06, + "loss": 85.1043, + "step": 62740 + }, + { + "epoch": 0.519088389791951, + "grad_norm": 508.70166015625, + "learning_rate": 5.746456498695648e-06, + "loss": 95.531, + "step": 62750 + }, + { + "epoch": 0.5191711130413202, + "grad_norm": 490.99737548828125, + "learning_rate": 5.7450619075651305e-06, + "loss": 87.5613, + "step": 62760 + }, + { + "epoch": 0.5192538362906896, + "grad_norm": 1027.1727294921875, + "learning_rate": 5.743667257153454e-06, + "loss": 81.3641, + "step": 62770 + }, + { + "epoch": 0.5193365595400588, + "grad_norm": 1106.285400390625, + "learning_rate": 5.742272547571588e-06, + "loss": 113.6713, + "step": 62780 + }, + { + "epoch": 0.519419282789428, + "grad_norm": 657.1253662109375, + "learning_rate": 5.740877778930503e-06, + "loss": 92.9361, + "step": 62790 + }, + { + "epoch": 0.5195020060387973, + "grad_norm": 831.34521484375, + "learning_rate": 5.739482951341172e-06, + "loss": 120.2162, + "step": 62800 + }, + { + "epoch": 0.5195847292881665, + "grad_norm": 709.2473754882812, + "learning_rate": 5.738088064914576e-06, + "loss": 112.4378, + "step": 62810 + }, + { + "epoch": 0.5196674525375357, + "grad_norm": 847.8458862304688, + "learning_rate": 5.7366931197617e-06, + "loss": 85.5061, + "step": 62820 + }, + { + "epoch": 0.5197501757869049, + "grad_norm": 1197.6585693359375, + "learning_rate": 5.735298115993535e-06, + "loss": 93.4385, + "step": 62830 + }, + { + "epoch": 0.5198328990362742, + "grad_norm": 1183.1817626953125, + "learning_rate": 5.733903053721072e-06, + "loss": 123.1538, + "step": 62840 + }, + { + "epoch": 0.5199156222856434, + "grad_norm": 1146.695068359375, + "learning_rate": 5.732507933055311e-06, + "loss": 133.2361, + "step": 62850 + }, + { + "epoch": 0.5199983455350126, + "grad_norm": 830.4008178710938, + "learning_rate": 5.731112754107257e-06, + "loss": 92.5354, + "step": 62860 + }, + { + "epoch": 0.5200810687843819, + "grad_norm": 1141.0968017578125, + "learning_rate": 5.729717516987916e-06, + "loss": 91.7082, + "step": 62870 + }, + { + "epoch": 0.5201637920337511, + "grad_norm": 860.5159301757812, + "learning_rate": 5.7283222218083e-06, + "loss": 84.899, + "step": 62880 + }, + { + "epoch": 0.5202465152831203, + "grad_norm": 632.0242309570312, + "learning_rate": 5.726926868679429e-06, + "loss": 127.0738, + "step": 62890 + }, + { + "epoch": 0.5203292385324896, + "grad_norm": 877.5741577148438, + "learning_rate": 5.725531457712321e-06, + "loss": 98.0383, + "step": 62900 + }, + { + "epoch": 0.5204119617818588, + "grad_norm": 955.6021728515625, + "learning_rate": 5.724135989018007e-06, + "loss": 93.8473, + "step": 62910 + }, + { + "epoch": 0.520494685031228, + "grad_norm": 1114.900634765625, + "learning_rate": 5.722740462707515e-06, + "loss": 97.0482, + "step": 62920 + }, + { + "epoch": 0.5205774082805973, + "grad_norm": 1169.9739990234375, + "learning_rate": 5.72134487889188e-06, + "loss": 75.5919, + "step": 62930 + }, + { + "epoch": 0.5206601315299665, + "grad_norm": 1042.204833984375, + "learning_rate": 5.719949237682145e-06, + "loss": 96.2548, + "step": 62940 + }, + { + "epoch": 0.5207428547793357, + "grad_norm": 923.940185546875, + "learning_rate": 5.718553539189353e-06, + "loss": 120.747, + "step": 62950 + }, + { + "epoch": 0.520825578028705, + "grad_norm": 773.2394409179688, + "learning_rate": 5.717157783524553e-06, + "loss": 107.0883, + "step": 62960 + }, + { + "epoch": 0.5209083012780742, + "grad_norm": 1147.978759765625, + "learning_rate": 5.7157619707988e-06, + "loss": 112.3523, + "step": 62970 + }, + { + "epoch": 0.5209910245274434, + "grad_norm": 869.9029541015625, + "learning_rate": 5.714366101123152e-06, + "loss": 91.1115, + "step": 62980 + }, + { + "epoch": 0.5210737477768127, + "grad_norm": 503.1574401855469, + "learning_rate": 5.712970174608671e-06, + "loss": 62.8698, + "step": 62990 + }, + { + "epoch": 0.5211564710261819, + "grad_norm": 567.345947265625, + "learning_rate": 5.711574191366427e-06, + "loss": 87.9423, + "step": 63000 + }, + { + "epoch": 0.5212391942755511, + "grad_norm": 706.41748046875, + "learning_rate": 5.710178151507488e-06, + "loss": 73.9817, + "step": 63010 + }, + { + "epoch": 0.5213219175249204, + "grad_norm": 1259.374755859375, + "learning_rate": 5.708782055142934e-06, + "loss": 91.3289, + "step": 63020 + }, + { + "epoch": 0.5214046407742896, + "grad_norm": 1063.552001953125, + "learning_rate": 5.707385902383845e-06, + "loss": 120.5428, + "step": 63030 + }, + { + "epoch": 0.5214873640236588, + "grad_norm": 910.1624145507812, + "learning_rate": 5.7059896933413076e-06, + "loss": 83.0945, + "step": 63040 + }, + { + "epoch": 0.5215700872730281, + "grad_norm": 2553.767333984375, + "learning_rate": 5.7045934281264085e-06, + "loss": 102.2294, + "step": 63050 + }, + { + "epoch": 0.5216528105223973, + "grad_norm": 1032.9830322265625, + "learning_rate": 5.7031971068502425e-06, + "loss": 98.3443, + "step": 63060 + }, + { + "epoch": 0.5217355337717665, + "grad_norm": 837.3251342773438, + "learning_rate": 5.701800729623911e-06, + "loss": 105.6823, + "step": 63070 + }, + { + "epoch": 0.5218182570211358, + "grad_norm": 470.49658203125, + "learning_rate": 5.700404296558518e-06, + "loss": 94.3055, + "step": 63080 + }, + { + "epoch": 0.521900980270505, + "grad_norm": 722.3120727539062, + "learning_rate": 5.699007807765169e-06, + "loss": 96.7037, + "step": 63090 + }, + { + "epoch": 0.5219837035198742, + "grad_norm": 1043.8006591796875, + "learning_rate": 5.6976112633549764e-06, + "loss": 63.1547, + "step": 63100 + }, + { + "epoch": 0.5220664267692435, + "grad_norm": 559.216552734375, + "learning_rate": 5.696214663439055e-06, + "loss": 89.2568, + "step": 63110 + }, + { + "epoch": 0.5221491500186127, + "grad_norm": 1489.68994140625, + "learning_rate": 5.694818008128531e-06, + "loss": 106.3069, + "step": 63120 + }, + { + "epoch": 0.5222318732679819, + "grad_norm": 1087.22802734375, + "learning_rate": 5.693421297534526e-06, + "loss": 116.6925, + "step": 63130 + }, + { + "epoch": 0.5223145965173512, + "grad_norm": 1571.4100341796875, + "learning_rate": 5.69202453176817e-06, + "loss": 104.0308, + "step": 63140 + }, + { + "epoch": 0.5223973197667204, + "grad_norm": 1171.318359375, + "learning_rate": 5.6906277109406e-06, + "loss": 85.1603, + "step": 63150 + }, + { + "epoch": 0.5224800430160896, + "grad_norm": 1105.7423095703125, + "learning_rate": 5.689230835162949e-06, + "loss": 92.7724, + "step": 63160 + }, + { + "epoch": 0.5225627662654589, + "grad_norm": 2501.460693359375, + "learning_rate": 5.687833904546367e-06, + "loss": 124.3585, + "step": 63170 + }, + { + "epoch": 0.5226454895148281, + "grad_norm": 673.11328125, + "learning_rate": 5.686436919201996e-06, + "loss": 95.2214, + "step": 63180 + }, + { + "epoch": 0.5227282127641973, + "grad_norm": 803.44970703125, + "learning_rate": 5.68503987924099e-06, + "loss": 96.271, + "step": 63190 + }, + { + "epoch": 0.5228109360135667, + "grad_norm": 733.796875, + "learning_rate": 5.683642784774506e-06, + "loss": 94.5968, + "step": 63200 + }, + { + "epoch": 0.5228936592629359, + "grad_norm": 893.05615234375, + "learning_rate": 5.682245635913701e-06, + "loss": 93.2139, + "step": 63210 + }, + { + "epoch": 0.522976382512305, + "grad_norm": 806.1270751953125, + "learning_rate": 5.680848432769743e-06, + "loss": 83.2208, + "step": 63220 + }, + { + "epoch": 0.5230591057616744, + "grad_norm": 950.8448486328125, + "learning_rate": 5.6794511754538005e-06, + "loss": 102.1387, + "step": 63230 + }, + { + "epoch": 0.5231418290110436, + "grad_norm": 994.8265991210938, + "learning_rate": 5.6780538640770455e-06, + "loss": 94.9941, + "step": 63240 + }, + { + "epoch": 0.5232245522604128, + "grad_norm": 805.738037109375, + "learning_rate": 5.6766564987506564e-06, + "loss": 97.3443, + "step": 63250 + }, + { + "epoch": 0.5233072755097821, + "grad_norm": 1825.2171630859375, + "learning_rate": 5.675259079585816e-06, + "loss": 93.6461, + "step": 63260 + }, + { + "epoch": 0.5233899987591513, + "grad_norm": 899.5811767578125, + "learning_rate": 5.673861606693708e-06, + "loss": 124.3475, + "step": 63270 + }, + { + "epoch": 0.5234727220085205, + "grad_norm": 738.1337890625, + "learning_rate": 5.672464080185526e-06, + "loss": 82.6199, + "step": 63280 + }, + { + "epoch": 0.5235554452578898, + "grad_norm": 1002.6314086914062, + "learning_rate": 5.671066500172462e-06, + "loss": 118.9046, + "step": 63290 + }, + { + "epoch": 0.523638168507259, + "grad_norm": 644.3387451171875, + "learning_rate": 5.669668866765717e-06, + "loss": 127.9811, + "step": 63300 + }, + { + "epoch": 0.5237208917566282, + "grad_norm": 654.7725219726562, + "learning_rate": 5.6682711800764935e-06, + "loss": 93.9227, + "step": 63310 + }, + { + "epoch": 0.5238036150059975, + "grad_norm": 973.210205078125, + "learning_rate": 5.6668734402159994e-06, + "loss": 84.845, + "step": 63320 + }, + { + "epoch": 0.5238863382553667, + "grad_norm": 1666.18017578125, + "learning_rate": 5.6654756472954464e-06, + "loss": 133.6506, + "step": 63330 + }, + { + "epoch": 0.5239690615047359, + "grad_norm": 1285.848388671875, + "learning_rate": 5.66407780142605e-06, + "loss": 102.1989, + "step": 63340 + }, + { + "epoch": 0.5240517847541052, + "grad_norm": 1405.201904296875, + "learning_rate": 5.66267990271903e-06, + "loss": 101.3061, + "step": 63350 + }, + { + "epoch": 0.5241345080034744, + "grad_norm": 995.1788940429688, + "learning_rate": 5.661281951285613e-06, + "loss": 80.424, + "step": 63360 + }, + { + "epoch": 0.5242172312528436, + "grad_norm": 1280.261962890625, + "learning_rate": 5.6598839472370245e-06, + "loss": 129.4751, + "step": 63370 + }, + { + "epoch": 0.5242999545022129, + "grad_norm": 1111.2105712890625, + "learning_rate": 5.6584858906845e-06, + "loss": 111.6386, + "step": 63380 + }, + { + "epoch": 0.5243826777515821, + "grad_norm": 438.5060119628906, + "learning_rate": 5.657087781739274e-06, + "loss": 88.3789, + "step": 63390 + }, + { + "epoch": 0.5244654010009513, + "grad_norm": 1246.3992919921875, + "learning_rate": 5.6556896205125896e-06, + "loss": 113.5103, + "step": 63400 + }, + { + "epoch": 0.5245481242503206, + "grad_norm": 867.26220703125, + "learning_rate": 5.654291407115692e-06, + "loss": 90.075, + "step": 63410 + }, + { + "epoch": 0.5246308474996898, + "grad_norm": 922.8386840820312, + "learning_rate": 5.652893141659829e-06, + "loss": 103.2854, + "step": 63420 + }, + { + "epoch": 0.524713570749059, + "grad_norm": 1118.366455078125, + "learning_rate": 5.651494824256256e-06, + "loss": 92.4039, + "step": 63430 + }, + { + "epoch": 0.5247962939984283, + "grad_norm": 1036.559326171875, + "learning_rate": 5.650096455016227e-06, + "loss": 77.7015, + "step": 63440 + }, + { + "epoch": 0.5248790172477975, + "grad_norm": 649.5186157226562, + "learning_rate": 5.648698034051009e-06, + "loss": 93.3135, + "step": 63450 + }, + { + "epoch": 0.5249617404971667, + "grad_norm": 685.0455322265625, + "learning_rate": 5.647299561471865e-06, + "loss": 88.9663, + "step": 63460 + }, + { + "epoch": 0.525044463746536, + "grad_norm": 786.32470703125, + "learning_rate": 5.645901037390067e-06, + "loss": 108.5513, + "step": 63470 + }, + { + "epoch": 0.5251271869959052, + "grad_norm": 863.156005859375, + "learning_rate": 5.644502461916886e-06, + "loss": 86.3591, + "step": 63480 + }, + { + "epoch": 0.5252099102452744, + "grad_norm": 911.6554565429688, + "learning_rate": 5.643103835163602e-06, + "loss": 92.7152, + "step": 63490 + }, + { + "epoch": 0.5252926334946437, + "grad_norm": 1143.3475341796875, + "learning_rate": 5.641705157241497e-06, + "loss": 126.4204, + "step": 63500 + }, + { + "epoch": 0.5253753567440129, + "grad_norm": 1113.570556640625, + "learning_rate": 5.64030642826186e-06, + "loss": 106.2012, + "step": 63510 + }, + { + "epoch": 0.5254580799933821, + "grad_norm": 734.0611572265625, + "learning_rate": 5.6389076483359774e-06, + "loss": 85.6654, + "step": 63520 + }, + { + "epoch": 0.5255408032427513, + "grad_norm": 762.94677734375, + "learning_rate": 5.637508817575145e-06, + "loss": 111.5038, + "step": 63530 + }, + { + "epoch": 0.5256235264921206, + "grad_norm": 730.315673828125, + "learning_rate": 5.636109936090661e-06, + "loss": 118.9952, + "step": 63540 + }, + { + "epoch": 0.5257062497414898, + "grad_norm": 1400.8131103515625, + "learning_rate": 5.634711003993832e-06, + "loss": 109.5317, + "step": 63550 + }, + { + "epoch": 0.525788972990859, + "grad_norm": 822.1387329101562, + "learning_rate": 5.633312021395959e-06, + "loss": 115.5602, + "step": 63560 + }, + { + "epoch": 0.5258716962402283, + "grad_norm": 836.3711547851562, + "learning_rate": 5.631912988408356e-06, + "loss": 110.6894, + "step": 63570 + }, + { + "epoch": 0.5259544194895975, + "grad_norm": 709.7254028320312, + "learning_rate": 5.630513905142334e-06, + "loss": 92.7573, + "step": 63580 + }, + { + "epoch": 0.5260371427389667, + "grad_norm": 633.5755004882812, + "learning_rate": 5.629114771709217e-06, + "loss": 97.9029, + "step": 63590 + }, + { + "epoch": 0.526119865988336, + "grad_norm": 1466.9072265625, + "learning_rate": 5.627715588220325e-06, + "loss": 82.6277, + "step": 63600 + }, + { + "epoch": 0.5262025892377052, + "grad_norm": 1547.8734130859375, + "learning_rate": 5.626316354786982e-06, + "loss": 113.497, + "step": 63610 + }, + { + "epoch": 0.5262853124870744, + "grad_norm": 691.7115478515625, + "learning_rate": 5.624917071520524e-06, + "loss": 99.4881, + "step": 63620 + }, + { + "epoch": 0.5263680357364438, + "grad_norm": 1155.3173828125, + "learning_rate": 5.62351773853228e-06, + "loss": 85.3007, + "step": 63630 + }, + { + "epoch": 0.526450758985813, + "grad_norm": 1123.159423828125, + "learning_rate": 5.6221183559335935e-06, + "loss": 103.121, + "step": 63640 + }, + { + "epoch": 0.5265334822351821, + "grad_norm": 1173.2393798828125, + "learning_rate": 5.6207189238358025e-06, + "loss": 88.9635, + "step": 63650 + }, + { + "epoch": 0.5266162054845515, + "grad_norm": 1155.658935546875, + "learning_rate": 5.619319442350256e-06, + "loss": 100.2915, + "step": 63660 + }, + { + "epoch": 0.5266989287339207, + "grad_norm": 1091.1961669921875, + "learning_rate": 5.617919911588304e-06, + "loss": 92.6392, + "step": 63670 + }, + { + "epoch": 0.5267816519832899, + "grad_norm": 922.4959106445312, + "learning_rate": 5.616520331661301e-06, + "loss": 112.2287, + "step": 63680 + }, + { + "epoch": 0.5268643752326592, + "grad_norm": 765.5256958007812, + "learning_rate": 5.615120702680604e-06, + "loss": 98.105, + "step": 63690 + }, + { + "epoch": 0.5269470984820284, + "grad_norm": 1066.0155029296875, + "learning_rate": 5.6137210247575754e-06, + "loss": 103.8472, + "step": 63700 + }, + { + "epoch": 0.5270298217313976, + "grad_norm": 1942.541015625, + "learning_rate": 5.6123212980035825e-06, + "loss": 109.4006, + "step": 63710 + }, + { + "epoch": 0.5271125449807669, + "grad_norm": 926.7212524414062, + "learning_rate": 5.610921522529994e-06, + "loss": 103.9244, + "step": 63720 + }, + { + "epoch": 0.5271952682301361, + "grad_norm": 811.9002685546875, + "learning_rate": 5.609521698448183e-06, + "loss": 96.8324, + "step": 63730 + }, + { + "epoch": 0.5272779914795053, + "grad_norm": 774.9049682617188, + "learning_rate": 5.608121825869528e-06, + "loss": 173.0452, + "step": 63740 + }, + { + "epoch": 0.5273607147288746, + "grad_norm": 1026.793212890625, + "learning_rate": 5.60672190490541e-06, + "loss": 94.8755, + "step": 63750 + }, + { + "epoch": 0.5274434379782438, + "grad_norm": 405.6708679199219, + "learning_rate": 5.6053219356672155e-06, + "loss": 86.6626, + "step": 63760 + }, + { + "epoch": 0.527526161227613, + "grad_norm": 890.8868408203125, + "learning_rate": 5.603921918266332e-06, + "loss": 87.5938, + "step": 63770 + }, + { + "epoch": 0.5276088844769823, + "grad_norm": 737.17236328125, + "learning_rate": 5.602521852814152e-06, + "loss": 73.9403, + "step": 63780 + }, + { + "epoch": 0.5276916077263515, + "grad_norm": 809.0032348632812, + "learning_rate": 5.6011217394220755e-06, + "loss": 79.7671, + "step": 63790 + }, + { + "epoch": 0.5277743309757207, + "grad_norm": 1906.908203125, + "learning_rate": 5.599721578201499e-06, + "loss": 82.5073, + "step": 63800 + }, + { + "epoch": 0.52785705422509, + "grad_norm": 1029.4927978515625, + "learning_rate": 5.59832136926383e-06, + "loss": 106.927, + "step": 63810 + }, + { + "epoch": 0.5279397774744592, + "grad_norm": 864.1856079101562, + "learning_rate": 5.5969211127204744e-06, + "loss": 100.7081, + "step": 63820 + }, + { + "epoch": 0.5280225007238284, + "grad_norm": 958.0577392578125, + "learning_rate": 5.595520808682848e-06, + "loss": 81.9101, + "step": 63830 + }, + { + "epoch": 0.5281052239731977, + "grad_norm": 1057.2666015625, + "learning_rate": 5.594120457262361e-06, + "loss": 109.2872, + "step": 63840 + }, + { + "epoch": 0.5281879472225669, + "grad_norm": 963.5335083007812, + "learning_rate": 5.592720058570438e-06, + "loss": 95.8562, + "step": 63850 + }, + { + "epoch": 0.5282706704719361, + "grad_norm": 841.0330810546875, + "learning_rate": 5.591319612718498e-06, + "loss": 76.5216, + "step": 63860 + }, + { + "epoch": 0.5283533937213054, + "grad_norm": 1039.90771484375, + "learning_rate": 5.589919119817971e-06, + "loss": 107.0152, + "step": 63870 + }, + { + "epoch": 0.5284361169706746, + "grad_norm": 1642.884033203125, + "learning_rate": 5.588518579980288e-06, + "loss": 140.7489, + "step": 63880 + }, + { + "epoch": 0.5285188402200438, + "grad_norm": 1069.137451171875, + "learning_rate": 5.587117993316882e-06, + "loss": 101.0652, + "step": 63890 + }, + { + "epoch": 0.5286015634694131, + "grad_norm": 494.1618957519531, + "learning_rate": 5.585717359939192e-06, + "loss": 85.4378, + "step": 63900 + }, + { + "epoch": 0.5286842867187823, + "grad_norm": 1175.2783203125, + "learning_rate": 5.584316679958659e-06, + "loss": 103.4738, + "step": 63910 + }, + { + "epoch": 0.5287670099681515, + "grad_norm": 927.4609375, + "learning_rate": 5.58291595348673e-06, + "loss": 104.5927, + "step": 63920 + }, + { + "epoch": 0.5288497332175208, + "grad_norm": 832.366455078125, + "learning_rate": 5.581515180634853e-06, + "loss": 80.5153, + "step": 63930 + }, + { + "epoch": 0.52893245646689, + "grad_norm": 1159.866943359375, + "learning_rate": 5.580114361514484e-06, + "loss": 90.9927, + "step": 63940 + }, + { + "epoch": 0.5290151797162592, + "grad_norm": 938.1612548828125, + "learning_rate": 5.5787134962370755e-06, + "loss": 67.0976, + "step": 63950 + }, + { + "epoch": 0.5290979029656285, + "grad_norm": 965.9879150390625, + "learning_rate": 5.57731258491409e-06, + "loss": 91.4234, + "step": 63960 + }, + { + "epoch": 0.5291806262149977, + "grad_norm": 816.1832885742188, + "learning_rate": 5.575911627656993e-06, + "loss": 87.2834, + "step": 63970 + }, + { + "epoch": 0.5292633494643669, + "grad_norm": 424.278564453125, + "learning_rate": 5.5745106245772506e-06, + "loss": 82.5088, + "step": 63980 + }, + { + "epoch": 0.5293460727137362, + "grad_norm": 1383.386962890625, + "learning_rate": 5.573109575786334e-06, + "loss": 99.203, + "step": 63990 + }, + { + "epoch": 0.5294287959631054, + "grad_norm": 1369.8514404296875, + "learning_rate": 5.571708481395719e-06, + "loss": 87.6147, + "step": 64000 + }, + { + "epoch": 0.5295115192124746, + "grad_norm": 679.2786865234375, + "learning_rate": 5.570307341516882e-06, + "loss": 109.6216, + "step": 64010 + }, + { + "epoch": 0.529594242461844, + "grad_norm": 688.9489135742188, + "learning_rate": 5.568906156261309e-06, + "loss": 78.812, + "step": 64020 + }, + { + "epoch": 0.5296769657112131, + "grad_norm": 716.52880859375, + "learning_rate": 5.567504925740484e-06, + "loss": 85.4848, + "step": 64030 + }, + { + "epoch": 0.5297596889605823, + "grad_norm": 1037.4881591796875, + "learning_rate": 5.566103650065897e-06, + "loss": 98.9546, + "step": 64040 + }, + { + "epoch": 0.5298424122099517, + "grad_norm": 835.7286376953125, + "learning_rate": 5.564702329349041e-06, + "loss": 104.4599, + "step": 64050 + }, + { + "epoch": 0.5299251354593209, + "grad_norm": 1021.035888671875, + "learning_rate": 5.56330096370141e-06, + "loss": 82.6299, + "step": 64060 + }, + { + "epoch": 0.53000785870869, + "grad_norm": 469.7624816894531, + "learning_rate": 5.561899553234509e-06, + "loss": 90.2693, + "step": 64070 + }, + { + "epoch": 0.5300905819580594, + "grad_norm": 698.7485961914062, + "learning_rate": 5.560498098059838e-06, + "loss": 116.0395, + "step": 64080 + }, + { + "epoch": 0.5301733052074286, + "grad_norm": 1172.5970458984375, + "learning_rate": 5.559096598288906e-06, + "loss": 70.6558, + "step": 64090 + }, + { + "epoch": 0.5302560284567978, + "grad_norm": 901.9520263671875, + "learning_rate": 5.557695054033223e-06, + "loss": 85.0805, + "step": 64100 + }, + { + "epoch": 0.5303387517061671, + "grad_norm": 1102.83056640625, + "learning_rate": 5.556293465404304e-06, + "loss": 104.11, + "step": 64110 + }, + { + "epoch": 0.5304214749555363, + "grad_norm": 862.3082275390625, + "learning_rate": 5.554891832513668e-06, + "loss": 69.0226, + "step": 64120 + }, + { + "epoch": 0.5305041982049055, + "grad_norm": 850.2838745117188, + "learning_rate": 5.553490155472835e-06, + "loss": 88.536, + "step": 64130 + }, + { + "epoch": 0.5305869214542748, + "grad_norm": 1342.24951171875, + "learning_rate": 5.55208843439333e-06, + "loss": 83.8514, + "step": 64140 + }, + { + "epoch": 0.530669644703644, + "grad_norm": 704.4754028320312, + "learning_rate": 5.550686669386683e-06, + "loss": 52.0388, + "step": 64150 + }, + { + "epoch": 0.5307523679530132, + "grad_norm": 709.6826171875, + "learning_rate": 5.549284860564425e-06, + "loss": 151.5818, + "step": 64160 + }, + { + "epoch": 0.5308350912023825, + "grad_norm": 891.2298583984375, + "learning_rate": 5.547883008038091e-06, + "loss": 132.4302, + "step": 64170 + }, + { + "epoch": 0.5309178144517517, + "grad_norm": 668.7271728515625, + "learning_rate": 5.54648111191922e-06, + "loss": 102.1614, + "step": 64180 + }, + { + "epoch": 0.5310005377011209, + "grad_norm": 1067.2647705078125, + "learning_rate": 5.545079172319355e-06, + "loss": 157.2434, + "step": 64190 + }, + { + "epoch": 0.5310832609504902, + "grad_norm": 731.7130737304688, + "learning_rate": 5.543677189350043e-06, + "loss": 86.4254, + "step": 64200 + }, + { + "epoch": 0.5311659841998594, + "grad_norm": 895.8660888671875, + "learning_rate": 5.542275163122831e-06, + "loss": 81.3476, + "step": 64210 + }, + { + "epoch": 0.5312487074492286, + "grad_norm": 549.8636474609375, + "learning_rate": 5.540873093749274e-06, + "loss": 91.9869, + "step": 64220 + }, + { + "epoch": 0.5313314306985979, + "grad_norm": 711.4556274414062, + "learning_rate": 5.539470981340926e-06, + "loss": 80.405, + "step": 64230 + }, + { + "epoch": 0.5314141539479671, + "grad_norm": 565.5111083984375, + "learning_rate": 5.538068826009349e-06, + "loss": 95.2233, + "step": 64240 + }, + { + "epoch": 0.5314968771973363, + "grad_norm": 1099.053955078125, + "learning_rate": 5.536666627866104e-06, + "loss": 78.3078, + "step": 64250 + }, + { + "epoch": 0.5315796004467055, + "grad_norm": 1019.4757080078125, + "learning_rate": 5.53526438702276e-06, + "loss": 100.4306, + "step": 64260 + }, + { + "epoch": 0.5316623236960748, + "grad_norm": 684.2471313476562, + "learning_rate": 5.533862103590883e-06, + "loss": 81.8137, + "step": 64270 + }, + { + "epoch": 0.531745046945444, + "grad_norm": 1522.2894287109375, + "learning_rate": 5.532459777682051e-06, + "loss": 85.9613, + "step": 64280 + }, + { + "epoch": 0.5318277701948132, + "grad_norm": 403.1702880859375, + "learning_rate": 5.5310574094078365e-06, + "loss": 84.0117, + "step": 64290 + }, + { + "epoch": 0.5319104934441825, + "grad_norm": 679.674072265625, + "learning_rate": 5.529654998879821e-06, + "loss": 59.2508, + "step": 64300 + }, + { + "epoch": 0.5319932166935517, + "grad_norm": 565.5519409179688, + "learning_rate": 5.528252546209588e-06, + "loss": 76.0347, + "step": 64310 + }, + { + "epoch": 0.5320759399429209, + "grad_norm": 939.0302124023438, + "learning_rate": 5.526850051508725e-06, + "loss": 84.2198, + "step": 64320 + }, + { + "epoch": 0.5321586631922902, + "grad_norm": 1065.8779296875, + "learning_rate": 5.525447514888822e-06, + "loss": 146.9439, + "step": 64330 + }, + { + "epoch": 0.5322413864416594, + "grad_norm": 1845.5435791015625, + "learning_rate": 5.52404493646147e-06, + "loss": 111.1832, + "step": 64340 + }, + { + "epoch": 0.5323241096910286, + "grad_norm": 1164.8885498046875, + "learning_rate": 5.522642316338268e-06, + "loss": 89.9883, + "step": 64350 + }, + { + "epoch": 0.5324068329403979, + "grad_norm": 661.2352294921875, + "learning_rate": 5.521239654630816e-06, + "loss": 92.0326, + "step": 64360 + }, + { + "epoch": 0.5324895561897671, + "grad_norm": 995.6377563476562, + "learning_rate": 5.519836951450716e-06, + "loss": 120.3884, + "step": 64370 + }, + { + "epoch": 0.5325722794391363, + "grad_norm": 664.2215576171875, + "learning_rate": 5.518434206909577e-06, + "loss": 102.107, + "step": 64380 + }, + { + "epoch": 0.5326550026885056, + "grad_norm": 934.103759765625, + "learning_rate": 5.517031421119006e-06, + "loss": 73.7082, + "step": 64390 + }, + { + "epoch": 0.5327377259378748, + "grad_norm": 702.5838012695312, + "learning_rate": 5.5156285941906175e-06, + "loss": 93.3802, + "step": 64400 + }, + { + "epoch": 0.532820449187244, + "grad_norm": 627.2533569335938, + "learning_rate": 5.51422572623603e-06, + "loss": 101.0479, + "step": 64410 + }, + { + "epoch": 0.5329031724366133, + "grad_norm": 362.7051696777344, + "learning_rate": 5.512822817366859e-06, + "loss": 79.3406, + "step": 64420 + }, + { + "epoch": 0.5329858956859825, + "grad_norm": 895.7844848632812, + "learning_rate": 5.511419867694733e-06, + "loss": 91.442, + "step": 64430 + }, + { + "epoch": 0.5330686189353517, + "grad_norm": 726.357421875, + "learning_rate": 5.510016877331271e-06, + "loss": 93.3173, + "step": 64440 + }, + { + "epoch": 0.533151342184721, + "grad_norm": 779.744873046875, + "learning_rate": 5.50861384638811e-06, + "loss": 88.6204, + "step": 64450 + }, + { + "epoch": 0.5332340654340902, + "grad_norm": 591.9418334960938, + "learning_rate": 5.50721077497688e-06, + "loss": 72.3884, + "step": 64460 + }, + { + "epoch": 0.5333167886834594, + "grad_norm": 1127.8675537109375, + "learning_rate": 5.505807663209215e-06, + "loss": 124.7344, + "step": 64470 + }, + { + "epoch": 0.5333995119328288, + "grad_norm": 932.3980102539062, + "learning_rate": 5.504404511196755e-06, + "loss": 89.4976, + "step": 64480 + }, + { + "epoch": 0.533482235182198, + "grad_norm": 972.6319580078125, + "learning_rate": 5.503001319051142e-06, + "loss": 84.1025, + "step": 64490 + }, + { + "epoch": 0.5335649584315671, + "grad_norm": 898.1183471679688, + "learning_rate": 5.5015980868840254e-06, + "loss": 126.5537, + "step": 64500 + }, + { + "epoch": 0.5336476816809365, + "grad_norm": 1008.814208984375, + "learning_rate": 5.500194814807051e-06, + "loss": 94.5552, + "step": 64510 + }, + { + "epoch": 0.5337304049303057, + "grad_norm": 1275.499267578125, + "learning_rate": 5.498791502931868e-06, + "loss": 98.7025, + "step": 64520 + }, + { + "epoch": 0.5338131281796749, + "grad_norm": 939.2733154296875, + "learning_rate": 5.497388151370136e-06, + "loss": 120.4424, + "step": 64530 + }, + { + "epoch": 0.5338958514290442, + "grad_norm": 2313.487548828125, + "learning_rate": 5.495984760233511e-06, + "loss": 104.2078, + "step": 64540 + }, + { + "epoch": 0.5339785746784134, + "grad_norm": 2875.86474609375, + "learning_rate": 5.494581329633656e-06, + "loss": 118.5582, + "step": 64550 + }, + { + "epoch": 0.5340612979277826, + "grad_norm": 1355.4534912109375, + "learning_rate": 5.493177859682234e-06, + "loss": 119.3427, + "step": 64560 + }, + { + "epoch": 0.5341440211771519, + "grad_norm": 643.4739990234375, + "learning_rate": 5.491774350490912e-06, + "loss": 76.2629, + "step": 64570 + }, + { + "epoch": 0.5342267444265211, + "grad_norm": 814.0101928710938, + "learning_rate": 5.490370802171362e-06, + "loss": 102.4891, + "step": 64580 + }, + { + "epoch": 0.5343094676758903, + "grad_norm": 653.1177978515625, + "learning_rate": 5.488967214835259e-06, + "loss": 104.6328, + "step": 64590 + }, + { + "epoch": 0.5343921909252596, + "grad_norm": 650.8169555664062, + "learning_rate": 5.487563588594278e-06, + "loss": 106.7863, + "step": 64600 + }, + { + "epoch": 0.5344749141746288, + "grad_norm": 875.9237060546875, + "learning_rate": 5.4861599235601e-06, + "loss": 78.3624, + "step": 64610 + }, + { + "epoch": 0.534557637423998, + "grad_norm": 1083.6778564453125, + "learning_rate": 5.484756219844408e-06, + "loss": 117.263, + "step": 64620 + }, + { + "epoch": 0.5346403606733673, + "grad_norm": 751.8464965820312, + "learning_rate": 5.483352477558889e-06, + "loss": 113.5279, + "step": 64630 + }, + { + "epoch": 0.5347230839227365, + "grad_norm": 1062.31982421875, + "learning_rate": 5.48194869681523e-06, + "loss": 103.9293, + "step": 64640 + }, + { + "epoch": 0.5348058071721057, + "grad_norm": 515.4086303710938, + "learning_rate": 5.480544877725127e-06, + "loss": 97.1701, + "step": 64650 + }, + { + "epoch": 0.534888530421475, + "grad_norm": 725.5648193359375, + "learning_rate": 5.479141020400271e-06, + "loss": 76.7176, + "step": 64660 + }, + { + "epoch": 0.5349712536708442, + "grad_norm": 719.1748657226562, + "learning_rate": 5.477737124952366e-06, + "loss": 96.5835, + "step": 64670 + }, + { + "epoch": 0.5350539769202134, + "grad_norm": 916.6452026367188, + "learning_rate": 5.476333191493108e-06, + "loss": 85.3995, + "step": 64680 + }, + { + "epoch": 0.5351367001695827, + "grad_norm": 436.0997619628906, + "learning_rate": 5.474929220134205e-06, + "loss": 86.9272, + "step": 64690 + }, + { + "epoch": 0.5352194234189519, + "grad_norm": 1398.6942138671875, + "learning_rate": 5.473525210987363e-06, + "loss": 91.1325, + "step": 64700 + }, + { + "epoch": 0.5353021466683211, + "grad_norm": 1058.8551025390625, + "learning_rate": 5.472121164164295e-06, + "loss": 80.0483, + "step": 64710 + }, + { + "epoch": 0.5353848699176904, + "grad_norm": 784.1776123046875, + "learning_rate": 5.47071707977671e-06, + "loss": 115.0421, + "step": 64720 + }, + { + "epoch": 0.5354675931670596, + "grad_norm": 1069.0155029296875, + "learning_rate": 5.46931295793633e-06, + "loss": 97.4913, + "step": 64730 + }, + { + "epoch": 0.5355503164164288, + "grad_norm": 1227.8470458984375, + "learning_rate": 5.46790879875487e-06, + "loss": 98.9259, + "step": 64740 + }, + { + "epoch": 0.5356330396657981, + "grad_norm": 632.8845825195312, + "learning_rate": 5.466504602344055e-06, + "loss": 74.2294, + "step": 64750 + }, + { + "epoch": 0.5357157629151673, + "grad_norm": 1411.2496337890625, + "learning_rate": 5.465100368815609e-06, + "loss": 109.9609, + "step": 64760 + }, + { + "epoch": 0.5357984861645365, + "grad_norm": 819.7656860351562, + "learning_rate": 5.463696098281262e-06, + "loss": 113.5461, + "step": 64770 + }, + { + "epoch": 0.5358812094139058, + "grad_norm": 576.60546875, + "learning_rate": 5.462291790852744e-06, + "loss": 83.2793, + "step": 64780 + }, + { + "epoch": 0.535963932663275, + "grad_norm": 563.931640625, + "learning_rate": 5.46088744664179e-06, + "loss": 112.9462, + "step": 64790 + }, + { + "epoch": 0.5360466559126442, + "grad_norm": 1462.51416015625, + "learning_rate": 5.459483065760138e-06, + "loss": 93.2692, + "step": 64800 + }, + { + "epoch": 0.5361293791620135, + "grad_norm": 717.8792114257812, + "learning_rate": 5.458078648319526e-06, + "loss": 72.0933, + "step": 64810 + }, + { + "epoch": 0.5362121024113827, + "grad_norm": 873.50341796875, + "learning_rate": 5.456674194431698e-06, + "loss": 112.2229, + "step": 64820 + }, + { + "epoch": 0.5362948256607519, + "grad_norm": 710.7631225585938, + "learning_rate": 5.455269704208401e-06, + "loss": 66.035, + "step": 64830 + }, + { + "epoch": 0.5363775489101212, + "grad_norm": 909.28564453125, + "learning_rate": 5.453865177761384e-06, + "loss": 100.6332, + "step": 64840 + }, + { + "epoch": 0.5364602721594904, + "grad_norm": 1102.3038330078125, + "learning_rate": 5.4524606152023975e-06, + "loss": 100.337, + "step": 64850 + }, + { + "epoch": 0.5365429954088596, + "grad_norm": 2068.275390625, + "learning_rate": 5.4510560166431935e-06, + "loss": 116.3672, + "step": 64860 + }, + { + "epoch": 0.536625718658229, + "grad_norm": 770.456787109375, + "learning_rate": 5.449651382195535e-06, + "loss": 93.339, + "step": 64870 + }, + { + "epoch": 0.5367084419075981, + "grad_norm": 614.5863037109375, + "learning_rate": 5.448246711971178e-06, + "loss": 112.4163, + "step": 64880 + }, + { + "epoch": 0.5367911651569673, + "grad_norm": 853.5326538085938, + "learning_rate": 5.44684200608189e-06, + "loss": 110.5457, + "step": 64890 + }, + { + "epoch": 0.5368738884063367, + "grad_norm": 718.2283935546875, + "learning_rate": 5.445437264639433e-06, + "loss": 94.2823, + "step": 64900 + }, + { + "epoch": 0.5369566116557059, + "grad_norm": 723.5383911132812, + "learning_rate": 5.444032487755575e-06, + "loss": 93.2752, + "step": 64910 + }, + { + "epoch": 0.537039334905075, + "grad_norm": 694.3563842773438, + "learning_rate": 5.442627675542092e-06, + "loss": 79.1228, + "step": 64920 + }, + { + "epoch": 0.5371220581544444, + "grad_norm": 1094.6685791015625, + "learning_rate": 5.441222828110756e-06, + "loss": 97.7163, + "step": 64930 + }, + { + "epoch": 0.5372047814038136, + "grad_norm": 809.086669921875, + "learning_rate": 5.439817945573345e-06, + "loss": 78.5984, + "step": 64940 + }, + { + "epoch": 0.5372875046531828, + "grad_norm": 1463.13037109375, + "learning_rate": 5.438413028041637e-06, + "loss": 137.7098, + "step": 64950 + }, + { + "epoch": 0.5373702279025521, + "grad_norm": 686.939697265625, + "learning_rate": 5.4370080756274155e-06, + "loss": 119.1664, + "step": 64960 + }, + { + "epoch": 0.5374529511519213, + "grad_norm": 1107.8900146484375, + "learning_rate": 5.435603088442471e-06, + "loss": 95.0079, + "step": 64970 + }, + { + "epoch": 0.5375356744012905, + "grad_norm": 705.9915161132812, + "learning_rate": 5.434198066598585e-06, + "loss": 78.4787, + "step": 64980 + }, + { + "epoch": 0.5376183976506597, + "grad_norm": 954.3821411132812, + "learning_rate": 5.4327930102075525e-06, + "loss": 89.3804, + "step": 64990 + }, + { + "epoch": 0.537701120900029, + "grad_norm": 947.5150146484375, + "learning_rate": 5.431387919381166e-06, + "loss": 84.6999, + "step": 65000 + }, + { + "epoch": 0.5377838441493982, + "grad_norm": 356.02276611328125, + "learning_rate": 5.429982794231221e-06, + "loss": 107.2157, + "step": 65010 + }, + { + "epoch": 0.5378665673987674, + "grad_norm": 1482.7928466796875, + "learning_rate": 5.428577634869521e-06, + "loss": 92.7534, + "step": 65020 + }, + { + "epoch": 0.5379492906481367, + "grad_norm": 966.0228271484375, + "learning_rate": 5.427172441407864e-06, + "loss": 112.3402, + "step": 65030 + }, + { + "epoch": 0.5380320138975059, + "grad_norm": 688.48583984375, + "learning_rate": 5.425767213958057e-06, + "loss": 67.1227, + "step": 65040 + }, + { + "epoch": 0.5381147371468751, + "grad_norm": 847.5924072265625, + "learning_rate": 5.424361952631907e-06, + "loss": 125.7834, + "step": 65050 + }, + { + "epoch": 0.5381974603962444, + "grad_norm": 2318.634033203125, + "learning_rate": 5.422956657541224e-06, + "loss": 103.7072, + "step": 65060 + }, + { + "epoch": 0.5382801836456136, + "grad_norm": 1350.766357421875, + "learning_rate": 5.421551328797821e-06, + "loss": 96.6112, + "step": 65070 + }, + { + "epoch": 0.5383629068949828, + "grad_norm": 693.9425659179688, + "learning_rate": 5.420145966513513e-06, + "loss": 59.4491, + "step": 65080 + }, + { + "epoch": 0.5384456301443521, + "grad_norm": 1209.9505615234375, + "learning_rate": 5.418740570800117e-06, + "loss": 100.2799, + "step": 65090 + }, + { + "epoch": 0.5385283533937213, + "grad_norm": 918.7635498046875, + "learning_rate": 5.4173351417694575e-06, + "loss": 98.6169, + "step": 65100 + }, + { + "epoch": 0.5386110766430905, + "grad_norm": 681.4931640625, + "learning_rate": 5.415929679533356e-06, + "loss": 95.7693, + "step": 65110 + }, + { + "epoch": 0.5386937998924598, + "grad_norm": 583.2526245117188, + "learning_rate": 5.414524184203638e-06, + "loss": 91.5053, + "step": 65120 + }, + { + "epoch": 0.538776523141829, + "grad_norm": 2256.79736328125, + "learning_rate": 5.4131186558921335e-06, + "loss": 111.124, + "step": 65130 + }, + { + "epoch": 0.5388592463911982, + "grad_norm": 1016.837890625, + "learning_rate": 5.411713094710673e-06, + "loss": 98.1128, + "step": 65140 + }, + { + "epoch": 0.5389419696405675, + "grad_norm": 914.6116943359375, + "learning_rate": 5.410307500771092e-06, + "loss": 74.292, + "step": 65150 + }, + { + "epoch": 0.5390246928899367, + "grad_norm": 1791.11962890625, + "learning_rate": 5.4089018741852264e-06, + "loss": 103.5447, + "step": 65160 + }, + { + "epoch": 0.5391074161393059, + "grad_norm": 1446.0499267578125, + "learning_rate": 5.407496215064915e-06, + "loss": 91.7238, + "step": 65170 + }, + { + "epoch": 0.5391901393886752, + "grad_norm": 865.380859375, + "learning_rate": 5.406090523521999e-06, + "loss": 127.9576, + "step": 65180 + }, + { + "epoch": 0.5392728626380444, + "grad_norm": 1105.864013671875, + "learning_rate": 5.404684799668325e-06, + "loss": 94.1973, + "step": 65190 + }, + { + "epoch": 0.5393555858874136, + "grad_norm": 675.471435546875, + "learning_rate": 5.403279043615738e-06, + "loss": 110.8662, + "step": 65200 + }, + { + "epoch": 0.5394383091367829, + "grad_norm": 0.0, + "learning_rate": 5.4018732554760875e-06, + "loss": 74.0708, + "step": 65210 + }, + { + "epoch": 0.5395210323861521, + "grad_norm": 609.0394897460938, + "learning_rate": 5.400467435361227e-06, + "loss": 110.9384, + "step": 65220 + }, + { + "epoch": 0.5396037556355213, + "grad_norm": 674.0571899414062, + "learning_rate": 5.399061583383013e-06, + "loss": 129.5516, + "step": 65230 + }, + { + "epoch": 0.5396864788848906, + "grad_norm": 1786.3568115234375, + "learning_rate": 5.3976556996532965e-06, + "loss": 100.5763, + "step": 65240 + }, + { + "epoch": 0.5397692021342598, + "grad_norm": 1053.052001953125, + "learning_rate": 5.396249784283943e-06, + "loss": 79.2251, + "step": 65250 + }, + { + "epoch": 0.539851925383629, + "grad_norm": 659.5496215820312, + "learning_rate": 5.394843837386812e-06, + "loss": 93.9248, + "step": 65260 + }, + { + "epoch": 0.5399346486329983, + "grad_norm": 1476.619140625, + "learning_rate": 5.39343785907377e-06, + "loss": 69.9831, + "step": 65270 + }, + { + "epoch": 0.5400173718823675, + "grad_norm": 873.2113647460938, + "learning_rate": 5.392031849456683e-06, + "loss": 97.2353, + "step": 65280 + }, + { + "epoch": 0.5401000951317367, + "grad_norm": 674.3704223632812, + "learning_rate": 5.39062580864742e-06, + "loss": 83.2568, + "step": 65290 + }, + { + "epoch": 0.540182818381106, + "grad_norm": 2080.84765625, + "learning_rate": 5.3892197367578535e-06, + "loss": 76.2265, + "step": 65300 + }, + { + "epoch": 0.5402655416304752, + "grad_norm": 442.13330078125, + "learning_rate": 5.38781363389986e-06, + "loss": 95.2125, + "step": 65310 + }, + { + "epoch": 0.5403482648798444, + "grad_norm": 638.2518920898438, + "learning_rate": 5.386407500185316e-06, + "loss": 72.7291, + "step": 65320 + }, + { + "epoch": 0.5404309881292138, + "grad_norm": 841.8578491210938, + "learning_rate": 5.3850013357261e-06, + "loss": 85.5195, + "step": 65330 + }, + { + "epoch": 0.540513711378583, + "grad_norm": 486.00140380859375, + "learning_rate": 5.383595140634093e-06, + "loss": 103.8048, + "step": 65340 + }, + { + "epoch": 0.5405964346279521, + "grad_norm": 945.3947143554688, + "learning_rate": 5.382188915021182e-06, + "loss": 105.8814, + "step": 65350 + }, + { + "epoch": 0.5406791578773215, + "grad_norm": 620.21435546875, + "learning_rate": 5.380782658999256e-06, + "loss": 61.2339, + "step": 65360 + }, + { + "epoch": 0.5407618811266907, + "grad_norm": 1061.0386962890625, + "learning_rate": 5.379376372680199e-06, + "loss": 87.4281, + "step": 65370 + }, + { + "epoch": 0.5408446043760599, + "grad_norm": 906.3760986328125, + "learning_rate": 5.377970056175905e-06, + "loss": 96.8989, + "step": 65380 + }, + { + "epoch": 0.5409273276254292, + "grad_norm": 621.90283203125, + "learning_rate": 5.376563709598267e-06, + "loss": 70.7825, + "step": 65390 + }, + { + "epoch": 0.5410100508747984, + "grad_norm": 818.9396362304688, + "learning_rate": 5.3751573330591855e-06, + "loss": 101.6279, + "step": 65400 + }, + { + "epoch": 0.5410927741241676, + "grad_norm": 859.1585693359375, + "learning_rate": 5.3737509266705555e-06, + "loss": 107.53, + "step": 65410 + }, + { + "epoch": 0.5411754973735369, + "grad_norm": 423.7478942871094, + "learning_rate": 5.37234449054428e-06, + "loss": 88.1217, + "step": 65420 + }, + { + "epoch": 0.5412582206229061, + "grad_norm": 1409.0037841796875, + "learning_rate": 5.370938024792262e-06, + "loss": 91.726, + "step": 65430 + }, + { + "epoch": 0.5413409438722753, + "grad_norm": 1655.0394287109375, + "learning_rate": 5.369531529526406e-06, + "loss": 95.8715, + "step": 65440 + }, + { + "epoch": 0.5414236671216446, + "grad_norm": 825.59326171875, + "learning_rate": 5.3681250048586246e-06, + "loss": 93.8956, + "step": 65450 + }, + { + "epoch": 0.5415063903710138, + "grad_norm": 1459.590576171875, + "learning_rate": 5.366718450900825e-06, + "loss": 122.376, + "step": 65460 + }, + { + "epoch": 0.541589113620383, + "grad_norm": 586.8179321289062, + "learning_rate": 5.365311867764922e-06, + "loss": 80.8746, + "step": 65470 + }, + { + "epoch": 0.5416718368697523, + "grad_norm": 973.2362060546875, + "learning_rate": 5.363905255562828e-06, + "loss": 163.7341, + "step": 65480 + }, + { + "epoch": 0.5417545601191215, + "grad_norm": 664.8019409179688, + "learning_rate": 5.362498614406466e-06, + "loss": 145.3861, + "step": 65490 + }, + { + "epoch": 0.5418372833684907, + "grad_norm": 776.0678100585938, + "learning_rate": 5.361091944407751e-06, + "loss": 91.7549, + "step": 65500 + }, + { + "epoch": 0.54192000661786, + "grad_norm": 998.9888305664062, + "learning_rate": 5.3596852456786075e-06, + "loss": 107.9666, + "step": 65510 + }, + { + "epoch": 0.5420027298672292, + "grad_norm": 866.9566040039062, + "learning_rate": 5.35827851833096e-06, + "loss": 93.1625, + "step": 65520 + }, + { + "epoch": 0.5420854531165984, + "grad_norm": 372.51800537109375, + "learning_rate": 5.356871762476735e-06, + "loss": 89.3893, + "step": 65530 + }, + { + "epoch": 0.5421681763659677, + "grad_norm": 1260.4573974609375, + "learning_rate": 5.355464978227861e-06, + "loss": 117.8481, + "step": 65540 + }, + { + "epoch": 0.5422508996153369, + "grad_norm": 911.54296875, + "learning_rate": 5.354058165696271e-06, + "loss": 107.3121, + "step": 65550 + }, + { + "epoch": 0.5423336228647061, + "grad_norm": 1118.6298828125, + "learning_rate": 5.352651324993897e-06, + "loss": 120.4601, + "step": 65560 + }, + { + "epoch": 0.5424163461140754, + "grad_norm": 873.2643432617188, + "learning_rate": 5.351244456232676e-06, + "loss": 133.8039, + "step": 65570 + }, + { + "epoch": 0.5424990693634446, + "grad_norm": 803.547119140625, + "learning_rate": 5.349837559524546e-06, + "loss": 113.3396, + "step": 65580 + }, + { + "epoch": 0.5425817926128138, + "grad_norm": 838.561767578125, + "learning_rate": 5.3484306349814455e-06, + "loss": 106.6709, + "step": 65590 + }, + { + "epoch": 0.5426645158621831, + "grad_norm": 929.80029296875, + "learning_rate": 5.34702368271532e-06, + "loss": 93.5446, + "step": 65600 + }, + { + "epoch": 0.5427472391115523, + "grad_norm": 1133.1529541015625, + "learning_rate": 5.345616702838111e-06, + "loss": 77.5028, + "step": 65610 + }, + { + "epoch": 0.5428299623609215, + "grad_norm": 945.4187622070312, + "learning_rate": 5.344209695461768e-06, + "loss": 85.2792, + "step": 65620 + }, + { + "epoch": 0.5429126856102908, + "grad_norm": 798.3004150390625, + "learning_rate": 5.3428026606982396e-06, + "loss": 75.1708, + "step": 65630 + }, + { + "epoch": 0.54299540885966, + "grad_norm": 840.9337158203125, + "learning_rate": 5.341395598659477e-06, + "loss": 139.3945, + "step": 65640 + }, + { + "epoch": 0.5430781321090292, + "grad_norm": 7416.7548828125, + "learning_rate": 5.339988509457432e-06, + "loss": 101.7888, + "step": 65650 + }, + { + "epoch": 0.5431608553583985, + "grad_norm": 914.0125732421875, + "learning_rate": 5.338581393204064e-06, + "loss": 94.0368, + "step": 65660 + }, + { + "epoch": 0.5432435786077677, + "grad_norm": 1436.60888671875, + "learning_rate": 5.337174250011326e-06, + "loss": 123.4113, + "step": 65670 + }, + { + "epoch": 0.5433263018571369, + "grad_norm": 1032.3555908203125, + "learning_rate": 5.3357670799911805e-06, + "loss": 87.8645, + "step": 65680 + }, + { + "epoch": 0.5434090251065062, + "grad_norm": 958.6307983398438, + "learning_rate": 5.334359883255591e-06, + "loss": 83.8339, + "step": 65690 + }, + { + "epoch": 0.5434917483558754, + "grad_norm": 968.6178588867188, + "learning_rate": 5.33295265991652e-06, + "loss": 102.6183, + "step": 65700 + }, + { + "epoch": 0.5435744716052446, + "grad_norm": 601.3204345703125, + "learning_rate": 5.331545410085933e-06, + "loss": 74.607, + "step": 65710 + }, + { + "epoch": 0.5436571948546138, + "grad_norm": 1049.9119873046875, + "learning_rate": 5.330138133875799e-06, + "loss": 78.9104, + "step": 65720 + }, + { + "epoch": 0.5437399181039831, + "grad_norm": 879.0147705078125, + "learning_rate": 5.328730831398089e-06, + "loss": 92.8446, + "step": 65730 + }, + { + "epoch": 0.5438226413533523, + "grad_norm": 1260.738525390625, + "learning_rate": 5.3273235027647764e-06, + "loss": 81.406, + "step": 65740 + }, + { + "epoch": 0.5439053646027215, + "grad_norm": 807.532958984375, + "learning_rate": 5.3259161480878354e-06, + "loss": 80.0483, + "step": 65750 + }, + { + "epoch": 0.5439880878520909, + "grad_norm": 628.903076171875, + "learning_rate": 5.324508767479239e-06, + "loss": 111.094, + "step": 65760 + }, + { + "epoch": 0.54407081110146, + "grad_norm": 1162.60888671875, + "learning_rate": 5.323101361050972e-06, + "loss": 102.2003, + "step": 65770 + }, + { + "epoch": 0.5441535343508292, + "grad_norm": 840.9710693359375, + "learning_rate": 5.321693928915012e-06, + "loss": 85.5873, + "step": 65780 + }, + { + "epoch": 0.5442362576001986, + "grad_norm": 588.482666015625, + "learning_rate": 5.320286471183343e-06, + "loss": 88.2541, + "step": 65790 + }, + { + "epoch": 0.5443189808495678, + "grad_norm": 826.7291259765625, + "learning_rate": 5.3188789879679496e-06, + "loss": 113.2529, + "step": 65800 + }, + { + "epoch": 0.544401704098937, + "grad_norm": 988.3714599609375, + "learning_rate": 5.317471479380816e-06, + "loss": 92.3975, + "step": 65810 + }, + { + "epoch": 0.5444844273483063, + "grad_norm": 1229.006103515625, + "learning_rate": 5.3160639455339355e-06, + "loss": 99.8077, + "step": 65820 + }, + { + "epoch": 0.5445671505976755, + "grad_norm": 558.47802734375, + "learning_rate": 5.314656386539298e-06, + "loss": 98.0959, + "step": 65830 + }, + { + "epoch": 0.5446498738470447, + "grad_norm": 1046.564697265625, + "learning_rate": 5.313248802508896e-06, + "loss": 94.008, + "step": 65840 + }, + { + "epoch": 0.544732597096414, + "grad_norm": 943.0609741210938, + "learning_rate": 5.311841193554723e-06, + "loss": 94.6098, + "step": 65850 + }, + { + "epoch": 0.5448153203457832, + "grad_norm": 465.5363464355469, + "learning_rate": 5.310433559788778e-06, + "loss": 100.7524, + "step": 65860 + }, + { + "epoch": 0.5448980435951524, + "grad_norm": 902.67333984375, + "learning_rate": 5.309025901323059e-06, + "loss": 67.7871, + "step": 65870 + }, + { + "epoch": 0.5449807668445217, + "grad_norm": 641.2943725585938, + "learning_rate": 5.307618218269569e-06, + "loss": 89.4344, + "step": 65880 + }, + { + "epoch": 0.5450634900938909, + "grad_norm": 730.5675048828125, + "learning_rate": 5.306210510740307e-06, + "loss": 78.7692, + "step": 65890 + }, + { + "epoch": 0.5451462133432601, + "grad_norm": 562.7182006835938, + "learning_rate": 5.304802778847281e-06, + "loss": 62.6107, + "step": 65900 + }, + { + "epoch": 0.5452289365926294, + "grad_norm": 776.1548461914062, + "learning_rate": 5.303395022702495e-06, + "loss": 83.2003, + "step": 65910 + }, + { + "epoch": 0.5453116598419986, + "grad_norm": 716.1808471679688, + "learning_rate": 5.301987242417963e-06, + "loss": 77.2049, + "step": 65920 + }, + { + "epoch": 0.5453943830913678, + "grad_norm": 1019.53759765625, + "learning_rate": 5.300579438105689e-06, + "loss": 108.0206, + "step": 65930 + }, + { + "epoch": 0.5454771063407371, + "grad_norm": 559.3324584960938, + "learning_rate": 5.29917160987769e-06, + "loss": 83.0377, + "step": 65940 + }, + { + "epoch": 0.5455598295901063, + "grad_norm": 785.0572509765625, + "learning_rate": 5.297763757845979e-06, + "loss": 88.7534, + "step": 65950 + }, + { + "epoch": 0.5456425528394755, + "grad_norm": 606.4642944335938, + "learning_rate": 5.296355882122572e-06, + "loss": 82.7635, + "step": 65960 + }, + { + "epoch": 0.5457252760888448, + "grad_norm": 1158.042236328125, + "learning_rate": 5.294947982819488e-06, + "loss": 108.4545, + "step": 65970 + }, + { + "epoch": 0.545807999338214, + "grad_norm": 412.3054504394531, + "learning_rate": 5.293540060048746e-06, + "loss": 116.0526, + "step": 65980 + }, + { + "epoch": 0.5458907225875832, + "grad_norm": 661.3358154296875, + "learning_rate": 5.292132113922369e-06, + "loss": 74.4473, + "step": 65990 + }, + { + "epoch": 0.5459734458369525, + "grad_norm": 637.5306396484375, + "learning_rate": 5.290724144552379e-06, + "loss": 104.4344, + "step": 66000 + }, + { + "epoch": 0.5460561690863217, + "grad_norm": 1194.7322998046875, + "learning_rate": 5.2893161520508055e-06, + "loss": 93.1337, + "step": 66010 + }, + { + "epoch": 0.5461388923356909, + "grad_norm": 1273.340576171875, + "learning_rate": 5.287908136529671e-06, + "loss": 102.8886, + "step": 66020 + }, + { + "epoch": 0.5462216155850602, + "grad_norm": 923.6551513671875, + "learning_rate": 5.28650009810101e-06, + "loss": 125.1421, + "step": 66030 + }, + { + "epoch": 0.5463043388344294, + "grad_norm": 996.6854248046875, + "learning_rate": 5.28509203687685e-06, + "loss": 93.7317, + "step": 66040 + }, + { + "epoch": 0.5463870620837986, + "grad_norm": 655.3250122070312, + "learning_rate": 5.283683952969224e-06, + "loss": 98.5502, + "step": 66050 + }, + { + "epoch": 0.5464697853331679, + "grad_norm": 724.341064453125, + "learning_rate": 5.282275846490169e-06, + "loss": 76.2478, + "step": 66060 + }, + { + "epoch": 0.5465525085825371, + "grad_norm": 665.1072998046875, + "learning_rate": 5.280867717551719e-06, + "loss": 90.0001, + "step": 66070 + }, + { + "epoch": 0.5466352318319063, + "grad_norm": 774.87451171875, + "learning_rate": 5.279459566265915e-06, + "loss": 83.016, + "step": 66080 + }, + { + "epoch": 0.5467179550812756, + "grad_norm": 1207.277587890625, + "learning_rate": 5.278051392744796e-06, + "loss": 115.3367, + "step": 66090 + }, + { + "epoch": 0.5468006783306448, + "grad_norm": 846.2992553710938, + "learning_rate": 5.2766431971004025e-06, + "loss": 76.7401, + "step": 66100 + }, + { + "epoch": 0.546883401580014, + "grad_norm": 794.3187866210938, + "learning_rate": 5.275234979444781e-06, + "loss": 104.4632, + "step": 66110 + }, + { + "epoch": 0.5469661248293833, + "grad_norm": 609.81787109375, + "learning_rate": 5.273826739889975e-06, + "loss": 93.3402, + "step": 66120 + }, + { + "epoch": 0.5470488480787525, + "grad_norm": 818.2113647460938, + "learning_rate": 5.272418478548031e-06, + "loss": 77.1904, + "step": 66130 + }, + { + "epoch": 0.5471315713281217, + "grad_norm": 2090.50390625, + "learning_rate": 5.271010195530999e-06, + "loss": 101.5279, + "step": 66140 + }, + { + "epoch": 0.547214294577491, + "grad_norm": 1038.06103515625, + "learning_rate": 5.26960189095093e-06, + "loss": 95.1865, + "step": 66150 + }, + { + "epoch": 0.5472970178268602, + "grad_norm": 984.3367309570312, + "learning_rate": 5.268193564919876e-06, + "loss": 91.2734, + "step": 66160 + }, + { + "epoch": 0.5473797410762294, + "grad_norm": 1194.6197509765625, + "learning_rate": 5.26678521754989e-06, + "loss": 116.3886, + "step": 66170 + }, + { + "epoch": 0.5474624643255988, + "grad_norm": 978.132080078125, + "learning_rate": 5.265376848953031e-06, + "loss": 114.3536, + "step": 66180 + }, + { + "epoch": 0.547545187574968, + "grad_norm": 1035.0242919921875, + "learning_rate": 5.263968459241351e-06, + "loss": 89.8195, + "step": 66190 + }, + { + "epoch": 0.5476279108243371, + "grad_norm": 1031.2353515625, + "learning_rate": 5.262560048526913e-06, + "loss": 80.5158, + "step": 66200 + }, + { + "epoch": 0.5477106340737065, + "grad_norm": 308.54156494140625, + "learning_rate": 5.261151616921778e-06, + "loss": 95.6141, + "step": 66210 + }, + { + "epoch": 0.5477933573230757, + "grad_norm": 1179.9146728515625, + "learning_rate": 5.259743164538008e-06, + "loss": 115.8417, + "step": 66220 + }, + { + "epoch": 0.5478760805724449, + "grad_norm": 1147.329345703125, + "learning_rate": 5.2583346914876655e-06, + "loss": 81.3131, + "step": 66230 + }, + { + "epoch": 0.5479588038218142, + "grad_norm": 823.7957153320312, + "learning_rate": 5.2569261978828155e-06, + "loss": 151.2819, + "step": 66240 + }, + { + "epoch": 0.5480415270711834, + "grad_norm": 466.2208557128906, + "learning_rate": 5.255517683835528e-06, + "loss": 84.0521, + "step": 66250 + }, + { + "epoch": 0.5481242503205526, + "grad_norm": 1068.723876953125, + "learning_rate": 5.254109149457873e-06, + "loss": 93.1827, + "step": 66260 + }, + { + "epoch": 0.5482069735699219, + "grad_norm": 1123.1505126953125, + "learning_rate": 5.252700594861918e-06, + "loss": 107.1272, + "step": 66270 + }, + { + "epoch": 0.5482896968192911, + "grad_norm": 1002.3335571289062, + "learning_rate": 5.251292020159736e-06, + "loss": 88.9466, + "step": 66280 + }, + { + "epoch": 0.5483724200686603, + "grad_norm": 658.9024047851562, + "learning_rate": 5.2498834254634005e-06, + "loss": 87.6717, + "step": 66290 + }, + { + "epoch": 0.5484551433180296, + "grad_norm": 1128.9300537109375, + "learning_rate": 5.248474810884988e-06, + "loss": 105.05, + "step": 66300 + }, + { + "epoch": 0.5485378665673988, + "grad_norm": 853.0302734375, + "learning_rate": 5.247066176536577e-06, + "loss": 136.7064, + "step": 66310 + }, + { + "epoch": 0.548620589816768, + "grad_norm": 1292.0511474609375, + "learning_rate": 5.245657522530243e-06, + "loss": 88.9545, + "step": 66320 + }, + { + "epoch": 0.5487033130661373, + "grad_norm": 1117.95947265625, + "learning_rate": 5.244248848978067e-06, + "loss": 74.3471, + "step": 66330 + }, + { + "epoch": 0.5487860363155065, + "grad_norm": 1050.1473388671875, + "learning_rate": 5.242840155992131e-06, + "loss": 104.2194, + "step": 66340 + }, + { + "epoch": 0.5488687595648757, + "grad_norm": 462.470458984375, + "learning_rate": 5.24143144368452e-06, + "loss": 78.9187, + "step": 66350 + }, + { + "epoch": 0.548951482814245, + "grad_norm": 1257.4759521484375, + "learning_rate": 5.240022712167315e-06, + "loss": 83.4133, + "step": 66360 + }, + { + "epoch": 0.5490342060636142, + "grad_norm": 1386.3408203125, + "learning_rate": 5.2386139615526046e-06, + "loss": 116.1773, + "step": 66370 + }, + { + "epoch": 0.5491169293129834, + "grad_norm": 742.0828247070312, + "learning_rate": 5.237205191952477e-06, + "loss": 102.3286, + "step": 66380 + }, + { + "epoch": 0.5491996525623527, + "grad_norm": 1096.52392578125, + "learning_rate": 5.235796403479021e-06, + "loss": 80.9894, + "step": 66390 + }, + { + "epoch": 0.5492823758117219, + "grad_norm": 466.41204833984375, + "learning_rate": 5.2343875962443255e-06, + "loss": 75.4198, + "step": 66400 + }, + { + "epoch": 0.5493650990610911, + "grad_norm": 1372.77685546875, + "learning_rate": 5.2329787703604875e-06, + "loss": 86.9037, + "step": 66410 + }, + { + "epoch": 0.5494478223104604, + "grad_norm": 733.8797607421875, + "learning_rate": 5.231569925939596e-06, + "loss": 84.0945, + "step": 66420 + }, + { + "epoch": 0.5495305455598296, + "grad_norm": 815.2667236328125, + "learning_rate": 5.230161063093749e-06, + "loss": 83.4252, + "step": 66430 + }, + { + "epoch": 0.5496132688091988, + "grad_norm": 1322.669921875, + "learning_rate": 5.228752181935042e-06, + "loss": 100.3188, + "step": 66440 + }, + { + "epoch": 0.549695992058568, + "grad_norm": 1326.2945556640625, + "learning_rate": 5.227343282575574e-06, + "loss": 90.2418, + "step": 66450 + }, + { + "epoch": 0.5497787153079373, + "grad_norm": 1287.527099609375, + "learning_rate": 5.225934365127445e-06, + "loss": 82.3157, + "step": 66460 + }, + { + "epoch": 0.5498614385573065, + "grad_norm": 564.8834228515625, + "learning_rate": 5.224525429702755e-06, + "loss": 67.5519, + "step": 66470 + }, + { + "epoch": 0.5499441618066757, + "grad_norm": 1590.8048095703125, + "learning_rate": 5.223116476413606e-06, + "loss": 145.7727, + "step": 66480 + }, + { + "epoch": 0.550026885056045, + "grad_norm": 424.3537292480469, + "learning_rate": 5.221707505372105e-06, + "loss": 89.2432, + "step": 66490 + }, + { + "epoch": 0.5501096083054142, + "grad_norm": 562.13330078125, + "learning_rate": 5.220298516690353e-06, + "loss": 91.3321, + "step": 66500 + }, + { + "epoch": 0.5501923315547834, + "grad_norm": 416.3981018066406, + "learning_rate": 5.21888951048046e-06, + "loss": 114.0917, + "step": 66510 + }, + { + "epoch": 0.5502750548041527, + "grad_norm": 935.5563354492188, + "learning_rate": 5.217480486854534e-06, + "loss": 112.0527, + "step": 66520 + }, + { + "epoch": 0.5503577780535219, + "grad_norm": 448.1026916503906, + "learning_rate": 5.216071445924683e-06, + "loss": 71.1966, + "step": 66530 + }, + { + "epoch": 0.5504405013028911, + "grad_norm": 715.922119140625, + "learning_rate": 5.214662387803019e-06, + "loss": 69.2816, + "step": 66540 + }, + { + "epoch": 0.5505232245522604, + "grad_norm": 816.540771484375, + "learning_rate": 5.213253312601654e-06, + "loss": 115.1411, + "step": 66550 + }, + { + "epoch": 0.5506059478016296, + "grad_norm": 613.8909912109375, + "learning_rate": 5.211844220432702e-06, + "loss": 74.3888, + "step": 66560 + }, + { + "epoch": 0.5506886710509988, + "grad_norm": 1052.91064453125, + "learning_rate": 5.210435111408276e-06, + "loss": 91.0357, + "step": 66570 + }, + { + "epoch": 0.5507713943003681, + "grad_norm": 1757.2353515625, + "learning_rate": 5.209025985640496e-06, + "loss": 101.9064, + "step": 66580 + }, + { + "epoch": 0.5508541175497373, + "grad_norm": 1139.6336669921875, + "learning_rate": 5.207616843241476e-06, + "loss": 102.2799, + "step": 66590 + }, + { + "epoch": 0.5509368407991065, + "grad_norm": 1313.9873046875, + "learning_rate": 5.206207684323337e-06, + "loss": 93.977, + "step": 66600 + }, + { + "epoch": 0.5510195640484759, + "grad_norm": 574.5587158203125, + "learning_rate": 5.2047985089982e-06, + "loss": 88.6549, + "step": 66610 + }, + { + "epoch": 0.551102287297845, + "grad_norm": 881.9164428710938, + "learning_rate": 5.203389317378183e-06, + "loss": 108.2257, + "step": 66620 + }, + { + "epoch": 0.5511850105472142, + "grad_norm": 702.6726684570312, + "learning_rate": 5.201980109575414e-06, + "loss": 102.8949, + "step": 66630 + }, + { + "epoch": 0.5512677337965836, + "grad_norm": 975.09375, + "learning_rate": 5.200570885702013e-06, + "loss": 95.4903, + "step": 66640 + }, + { + "epoch": 0.5513504570459528, + "grad_norm": 502.3410339355469, + "learning_rate": 5.19916164587011e-06, + "loss": 116.9168, + "step": 66650 + }, + { + "epoch": 0.551433180295322, + "grad_norm": 2496.73193359375, + "learning_rate": 5.197752390191827e-06, + "loss": 95.1221, + "step": 66660 + }, + { + "epoch": 0.5515159035446913, + "grad_norm": 1161.61572265625, + "learning_rate": 5.196343118779292e-06, + "loss": 103.2815, + "step": 66670 + }, + { + "epoch": 0.5515986267940605, + "grad_norm": 501.6972961425781, + "learning_rate": 5.194933831744637e-06, + "loss": 89.6646, + "step": 66680 + }, + { + "epoch": 0.5516813500434297, + "grad_norm": 808.2440795898438, + "learning_rate": 5.1935245291999945e-06, + "loss": 132.3228, + "step": 66690 + }, + { + "epoch": 0.551764073292799, + "grad_norm": 602.8800048828125, + "learning_rate": 5.192115211257491e-06, + "loss": 100.7651, + "step": 66700 + }, + { + "epoch": 0.5518467965421682, + "grad_norm": 705.0828247070312, + "learning_rate": 5.19070587802926e-06, + "loss": 94.9037, + "step": 66710 + }, + { + "epoch": 0.5519295197915374, + "grad_norm": 685.058349609375, + "learning_rate": 5.189296529627441e-06, + "loss": 101.691, + "step": 66720 + }, + { + "epoch": 0.5520122430409067, + "grad_norm": 765.4905395507812, + "learning_rate": 5.187887166164165e-06, + "loss": 89.7552, + "step": 66730 + }, + { + "epoch": 0.5520949662902759, + "grad_norm": 695.8983154296875, + "learning_rate": 5.186477787751569e-06, + "loss": 77.7547, + "step": 66740 + }, + { + "epoch": 0.5521776895396451, + "grad_norm": 633.330810546875, + "learning_rate": 5.185068394501791e-06, + "loss": 93.5725, + "step": 66750 + }, + { + "epoch": 0.5522604127890144, + "grad_norm": 566.3089599609375, + "learning_rate": 5.183658986526969e-06, + "loss": 86.7824, + "step": 66760 + }, + { + "epoch": 0.5523431360383836, + "grad_norm": 870.3569946289062, + "learning_rate": 5.1822495639392465e-06, + "loss": 137.4197, + "step": 66770 + }, + { + "epoch": 0.5524258592877528, + "grad_norm": 1036.458251953125, + "learning_rate": 5.180840126850764e-06, + "loss": 80.6906, + "step": 66780 + }, + { + "epoch": 0.5525085825371221, + "grad_norm": 792.98779296875, + "learning_rate": 5.179430675373659e-06, + "loss": 91.3037, + "step": 66790 + }, + { + "epoch": 0.5525913057864913, + "grad_norm": 1176.4154052734375, + "learning_rate": 5.17802120962008e-06, + "loss": 129.9155, + "step": 66800 + }, + { + "epoch": 0.5526740290358605, + "grad_norm": 1051.582275390625, + "learning_rate": 5.17661172970217e-06, + "loss": 73.2745, + "step": 66810 + }, + { + "epoch": 0.5527567522852298, + "grad_norm": 1047.6094970703125, + "learning_rate": 5.175202235732077e-06, + "loss": 99.0051, + "step": 66820 + }, + { + "epoch": 0.552839475534599, + "grad_norm": 871.4730834960938, + "learning_rate": 5.1737927278219446e-06, + "loss": 86.3209, + "step": 66830 + }, + { + "epoch": 0.5529221987839682, + "grad_norm": 644.1160888671875, + "learning_rate": 5.1723832060839216e-06, + "loss": 90.1693, + "step": 66840 + }, + { + "epoch": 0.5530049220333375, + "grad_norm": 688.15576171875, + "learning_rate": 5.170973670630159e-06, + "loss": 126.0678, + "step": 66850 + }, + { + "epoch": 0.5530876452827067, + "grad_norm": 737.1720581054688, + "learning_rate": 5.169564121572806e-06, + "loss": 83.4195, + "step": 66860 + }, + { + "epoch": 0.5531703685320759, + "grad_norm": 719.3773803710938, + "learning_rate": 5.168154559024014e-06, + "loss": 91.4589, + "step": 66870 + }, + { + "epoch": 0.5532530917814452, + "grad_norm": 1081.61572265625, + "learning_rate": 5.166744983095937e-06, + "loss": 115.1463, + "step": 66880 + }, + { + "epoch": 0.5533358150308144, + "grad_norm": 863.379150390625, + "learning_rate": 5.165335393900726e-06, + "loss": 91.273, + "step": 66890 + }, + { + "epoch": 0.5534185382801836, + "grad_norm": 1196.47509765625, + "learning_rate": 5.163925791550536e-06, + "loss": 74.5763, + "step": 66900 + }, + { + "epoch": 0.5535012615295529, + "grad_norm": 754.5050659179688, + "learning_rate": 5.162516176157523e-06, + "loss": 78.6246, + "step": 66910 + }, + { + "epoch": 0.5535839847789221, + "grad_norm": 332.3321228027344, + "learning_rate": 5.161106547833843e-06, + "loss": 78.1392, + "step": 66920 + }, + { + "epoch": 0.5536667080282913, + "grad_norm": 459.50701904296875, + "learning_rate": 5.159696906691656e-06, + "loss": 78.0986, + "step": 66930 + }, + { + "epoch": 0.5537494312776606, + "grad_norm": 1238.329833984375, + "learning_rate": 5.158287252843118e-06, + "loss": 105.3073, + "step": 66940 + }, + { + "epoch": 0.5538321545270298, + "grad_norm": 885.58251953125, + "learning_rate": 5.1568775864003894e-06, + "loss": 102.8519, + "step": 66950 + }, + { + "epoch": 0.553914877776399, + "grad_norm": 634.6365966796875, + "learning_rate": 5.155467907475632e-06, + "loss": 114.2997, + "step": 66960 + }, + { + "epoch": 0.5539976010257683, + "grad_norm": 957.9181518554688, + "learning_rate": 5.154058216181007e-06, + "loss": 86.0915, + "step": 66970 + }, + { + "epoch": 0.5540803242751375, + "grad_norm": 1111.42919921875, + "learning_rate": 5.1526485126286766e-06, + "loss": 98.9156, + "step": 66980 + }, + { + "epoch": 0.5541630475245067, + "grad_norm": 1166.4410400390625, + "learning_rate": 5.151238796930804e-06, + "loss": 125.5735, + "step": 66990 + }, + { + "epoch": 0.554245770773876, + "grad_norm": 650.7413330078125, + "learning_rate": 5.149829069199555e-06, + "loss": 83.7539, + "step": 67000 + }, + { + "epoch": 0.5543284940232452, + "grad_norm": 654.6986694335938, + "learning_rate": 5.148419329547094e-06, + "loss": 85.525, + "step": 67010 + }, + { + "epoch": 0.5544112172726144, + "grad_norm": 713.3673706054688, + "learning_rate": 5.147009578085589e-06, + "loss": 96.1334, + "step": 67020 + }, + { + "epoch": 0.5544939405219838, + "grad_norm": 1134.079345703125, + "learning_rate": 5.145599814927205e-06, + "loss": 106.5868, + "step": 67030 + }, + { + "epoch": 0.554576663771353, + "grad_norm": 782.4326171875, + "learning_rate": 5.144190040184114e-06, + "loss": 81.4991, + "step": 67040 + }, + { + "epoch": 0.5546593870207221, + "grad_norm": 885.5360107421875, + "learning_rate": 5.142780253968481e-06, + "loss": 106.0122, + "step": 67050 + }, + { + "epoch": 0.5547421102700915, + "grad_norm": 761.8356323242188, + "learning_rate": 5.14137045639248e-06, + "loss": 106.5318, + "step": 67060 + }, + { + "epoch": 0.5548248335194607, + "grad_norm": 796.8743896484375, + "learning_rate": 5.13996064756828e-06, + "loss": 70.0908, + "step": 67070 + }, + { + "epoch": 0.5549075567688299, + "grad_norm": 1602.5904541015625, + "learning_rate": 5.138550827608055e-06, + "loss": 97.1062, + "step": 67080 + }, + { + "epoch": 0.5549902800181992, + "grad_norm": 1278.1387939453125, + "learning_rate": 5.137140996623975e-06, + "loss": 85.0867, + "step": 67090 + }, + { + "epoch": 0.5550730032675684, + "grad_norm": 1092.7744140625, + "learning_rate": 5.135731154728215e-06, + "loss": 95.7226, + "step": 67100 + }, + { + "epoch": 0.5551557265169376, + "grad_norm": 897.9187622070312, + "learning_rate": 5.134321302032951e-06, + "loss": 118.3043, + "step": 67110 + }, + { + "epoch": 0.5552384497663069, + "grad_norm": 1308.584716796875, + "learning_rate": 5.1329114386503585e-06, + "loss": 136.3629, + "step": 67120 + }, + { + "epoch": 0.5553211730156761, + "grad_norm": 1024.6846923828125, + "learning_rate": 5.131501564692611e-06, + "loss": 121.1511, + "step": 67130 + }, + { + "epoch": 0.5554038962650453, + "grad_norm": 894.513671875, + "learning_rate": 5.130091680271887e-06, + "loss": 98.0504, + "step": 67140 + }, + { + "epoch": 0.5554866195144146, + "grad_norm": 1355.4970703125, + "learning_rate": 5.128681785500365e-06, + "loss": 91.5048, + "step": 67150 + }, + { + "epoch": 0.5555693427637838, + "grad_norm": 660.9675903320312, + "learning_rate": 5.127271880490227e-06, + "loss": 107.9526, + "step": 67160 + }, + { + "epoch": 0.555652066013153, + "grad_norm": 639.190185546875, + "learning_rate": 5.125861965353647e-06, + "loss": 95.762, + "step": 67170 + }, + { + "epoch": 0.5557347892625222, + "grad_norm": 1066.5399169921875, + "learning_rate": 5.124452040202809e-06, + "loss": 72.5684, + "step": 67180 + }, + { + "epoch": 0.5558175125118915, + "grad_norm": 1595.59033203125, + "learning_rate": 5.1230421051498914e-06, + "loss": 101.4106, + "step": 67190 + }, + { + "epoch": 0.5559002357612607, + "grad_norm": 700.05419921875, + "learning_rate": 5.121632160307078e-06, + "loss": 110.3626, + "step": 67200 + }, + { + "epoch": 0.5559829590106299, + "grad_norm": 1288.469482421875, + "learning_rate": 5.120222205786556e-06, + "loss": 134.359, + "step": 67210 + }, + { + "epoch": 0.5560656822599992, + "grad_norm": 1454.6519775390625, + "learning_rate": 5.118812241700501e-06, + "loss": 92.2554, + "step": 67220 + }, + { + "epoch": 0.5561484055093684, + "grad_norm": 937.5780029296875, + "learning_rate": 5.117402268161103e-06, + "loss": 78.8586, + "step": 67230 + }, + { + "epoch": 0.5562311287587376, + "grad_norm": 1452.914306640625, + "learning_rate": 5.115992285280543e-06, + "loss": 90.6466, + "step": 67240 + }, + { + "epoch": 0.5563138520081069, + "grad_norm": 808.0802612304688, + "learning_rate": 5.114582293171012e-06, + "loss": 84.3542, + "step": 67250 + }, + { + "epoch": 0.5563965752574761, + "grad_norm": 1291.392333984375, + "learning_rate": 5.113172291944693e-06, + "loss": 80.4222, + "step": 67260 + }, + { + "epoch": 0.5564792985068453, + "grad_norm": 0.0, + "learning_rate": 5.111762281713773e-06, + "loss": 96.9373, + "step": 67270 + }, + { + "epoch": 0.5565620217562146, + "grad_norm": 460.95623779296875, + "learning_rate": 5.110352262590442e-06, + "loss": 98.4892, + "step": 67280 + }, + { + "epoch": 0.5566447450055838, + "grad_norm": 921.4959106445312, + "learning_rate": 5.108942234686889e-06, + "loss": 85.5142, + "step": 67290 + }, + { + "epoch": 0.556727468254953, + "grad_norm": 909.4285278320312, + "learning_rate": 5.1075321981153014e-06, + "loss": 79.041, + "step": 67300 + }, + { + "epoch": 0.5568101915043223, + "grad_norm": 786.9864501953125, + "learning_rate": 5.106122152987869e-06, + "loss": 131.3443, + "step": 67310 + }, + { + "epoch": 0.5568929147536915, + "grad_norm": 837.8712158203125, + "learning_rate": 5.1047120994167855e-06, + "loss": 111.7526, + "step": 67320 + }, + { + "epoch": 0.5569756380030607, + "grad_norm": 748.6630859375, + "learning_rate": 5.103302037514241e-06, + "loss": 83.1395, + "step": 67330 + }, + { + "epoch": 0.55705836125243, + "grad_norm": 422.8838195800781, + "learning_rate": 5.101891967392426e-06, + "loss": 132.004, + "step": 67340 + }, + { + "epoch": 0.5571410845017992, + "grad_norm": 784.1287231445312, + "learning_rate": 5.100481889163535e-06, + "loss": 84.0244, + "step": 67350 + }, + { + "epoch": 0.5572238077511684, + "grad_norm": 871.6853637695312, + "learning_rate": 5.099071802939763e-06, + "loss": 96.8194, + "step": 67360 + }, + { + "epoch": 0.5573065310005377, + "grad_norm": 1200.704833984375, + "learning_rate": 5.097661708833302e-06, + "loss": 101.7852, + "step": 67370 + }, + { + "epoch": 0.5573892542499069, + "grad_norm": 605.1507568359375, + "learning_rate": 5.096251606956345e-06, + "loss": 114.6154, + "step": 67380 + }, + { + "epoch": 0.5574719774992761, + "grad_norm": 1629.7506103515625, + "learning_rate": 5.0948414974210906e-06, + "loss": 108.8764, + "step": 67390 + }, + { + "epoch": 0.5575547007486454, + "grad_norm": 963.1193237304688, + "learning_rate": 5.093431380339734e-06, + "loss": 83.0829, + "step": 67400 + }, + { + "epoch": 0.5576374239980146, + "grad_norm": 984.2836303710938, + "learning_rate": 5.092021255824471e-06, + "loss": 90.3172, + "step": 67410 + }, + { + "epoch": 0.5577201472473838, + "grad_norm": 846.0245971679688, + "learning_rate": 5.090611123987498e-06, + "loss": 75.3123, + "step": 67420 + }, + { + "epoch": 0.5578028704967531, + "grad_norm": 880.2362670898438, + "learning_rate": 5.089200984941014e-06, + "loss": 86.5572, + "step": 67430 + }, + { + "epoch": 0.5578855937461223, + "grad_norm": 1226.6094970703125, + "learning_rate": 5.087790838797217e-06, + "loss": 76.6975, + "step": 67440 + }, + { + "epoch": 0.5579683169954915, + "grad_norm": 1174.342529296875, + "learning_rate": 5.0863806856683076e-06, + "loss": 97.9613, + "step": 67450 + }, + { + "epoch": 0.5580510402448609, + "grad_norm": 832.6563110351562, + "learning_rate": 5.084970525666481e-06, + "loss": 77.5357, + "step": 67460 + }, + { + "epoch": 0.55813376349423, + "grad_norm": 1207.7735595703125, + "learning_rate": 5.083560358903942e-06, + "loss": 108.6904, + "step": 67470 + }, + { + "epoch": 0.5582164867435992, + "grad_norm": 1580.60791015625, + "learning_rate": 5.082150185492887e-06, + "loss": 123.0784, + "step": 67480 + }, + { + "epoch": 0.5582992099929686, + "grad_norm": 911.670654296875, + "learning_rate": 5.080740005545519e-06, + "loss": 81.2963, + "step": 67490 + }, + { + "epoch": 0.5583819332423378, + "grad_norm": 1346.0203857421875, + "learning_rate": 5.07932981917404e-06, + "loss": 105.0922, + "step": 67500 + }, + { + "epoch": 0.558464656491707, + "grad_norm": 1385.7469482421875, + "learning_rate": 5.077919626490651e-06, + "loss": 111.0049, + "step": 67510 + }, + { + "epoch": 0.5585473797410763, + "grad_norm": 2228.890380859375, + "learning_rate": 5.076509427607555e-06, + "loss": 89.5055, + "step": 67520 + }, + { + "epoch": 0.5586301029904455, + "grad_norm": 507.3185119628906, + "learning_rate": 5.075099222636954e-06, + "loss": 78.911, + "step": 67530 + }, + { + "epoch": 0.5587128262398147, + "grad_norm": 551.4053955078125, + "learning_rate": 5.073689011691054e-06, + "loss": 150.896, + "step": 67540 + }, + { + "epoch": 0.558795549489184, + "grad_norm": 350.7704162597656, + "learning_rate": 5.072278794882058e-06, + "loss": 78.5772, + "step": 67550 + }, + { + "epoch": 0.5588782727385532, + "grad_norm": 682.6192626953125, + "learning_rate": 5.07086857232217e-06, + "loss": 93.3245, + "step": 67560 + }, + { + "epoch": 0.5589609959879224, + "grad_norm": 699.5905151367188, + "learning_rate": 5.069458344123592e-06, + "loss": 109.2068, + "step": 67570 + }, + { + "epoch": 0.5590437192372917, + "grad_norm": 1194.6055908203125, + "learning_rate": 5.068048110398535e-06, + "loss": 103.4686, + "step": 67580 + }, + { + "epoch": 0.5591264424866609, + "grad_norm": 671.2708740234375, + "learning_rate": 5.066637871259201e-06, + "loss": 69.1066, + "step": 67590 + }, + { + "epoch": 0.5592091657360301, + "grad_norm": 817.7105102539062, + "learning_rate": 5.065227626817798e-06, + "loss": 105.476, + "step": 67600 + }, + { + "epoch": 0.5592918889853994, + "grad_norm": 664.9489135742188, + "learning_rate": 5.063817377186531e-06, + "loss": 67.6632, + "step": 67610 + }, + { + "epoch": 0.5593746122347686, + "grad_norm": 1241.629638671875, + "learning_rate": 5.062407122477609e-06, + "loss": 87.1826, + "step": 67620 + }, + { + "epoch": 0.5594573354841378, + "grad_norm": 1491.961181640625, + "learning_rate": 5.060996862803239e-06, + "loss": 78.7851, + "step": 67630 + }, + { + "epoch": 0.5595400587335071, + "grad_norm": 981.352294921875, + "learning_rate": 5.0595865982756284e-06, + "loss": 106.6009, + "step": 67640 + }, + { + "epoch": 0.5596227819828763, + "grad_norm": 695.9772338867188, + "learning_rate": 5.0581763290069865e-06, + "loss": 85.9514, + "step": 67650 + }, + { + "epoch": 0.5597055052322455, + "grad_norm": 1229.713134765625, + "learning_rate": 5.05676605510952e-06, + "loss": 105.0807, + "step": 67660 + }, + { + "epoch": 0.5597882284816148, + "grad_norm": 534.1005859375, + "learning_rate": 5.055355776695437e-06, + "loss": 93.8244, + "step": 67670 + }, + { + "epoch": 0.559870951730984, + "grad_norm": 911.0359497070312, + "learning_rate": 5.0539454938769525e-06, + "loss": 101.3288, + "step": 67680 + }, + { + "epoch": 0.5599536749803532, + "grad_norm": 918.2195434570312, + "learning_rate": 5.052535206766271e-06, + "loss": 89.4984, + "step": 67690 + }, + { + "epoch": 0.5600363982297225, + "grad_norm": 1059.2137451171875, + "learning_rate": 5.051124915475604e-06, + "loss": 99.4896, + "step": 67700 + }, + { + "epoch": 0.5601191214790917, + "grad_norm": 1157.1102294921875, + "learning_rate": 5.049714620117162e-06, + "loss": 105.2878, + "step": 67710 + }, + { + "epoch": 0.5602018447284609, + "grad_norm": 666.695068359375, + "learning_rate": 5.0483043208031575e-06, + "loss": 109.8217, + "step": 67720 + }, + { + "epoch": 0.5602845679778302, + "grad_norm": 1238.229736328125, + "learning_rate": 5.0468940176458e-06, + "loss": 94.0387, + "step": 67730 + }, + { + "epoch": 0.5603672912271994, + "grad_norm": 2138.0751953125, + "learning_rate": 5.045483710757298e-06, + "loss": 95.2673, + "step": 67740 + }, + { + "epoch": 0.5604500144765686, + "grad_norm": 914.434326171875, + "learning_rate": 5.044073400249867e-06, + "loss": 92.1393, + "step": 67750 + }, + { + "epoch": 0.5605327377259379, + "grad_norm": 1934.4398193359375, + "learning_rate": 5.0426630862357176e-06, + "loss": 142.6514, + "step": 67760 + }, + { + "epoch": 0.5606154609753071, + "grad_norm": 1084.5147705078125, + "learning_rate": 5.041252768827064e-06, + "loss": 133.2949, + "step": 67770 + }, + { + "epoch": 0.5606981842246763, + "grad_norm": 748.2398071289062, + "learning_rate": 5.039842448136115e-06, + "loss": 69.7304, + "step": 67780 + }, + { + "epoch": 0.5607809074740456, + "grad_norm": 804.4089965820312, + "learning_rate": 5.038432124275087e-06, + "loss": 88.9984, + "step": 67790 + }, + { + "epoch": 0.5608636307234148, + "grad_norm": 535.5092163085938, + "learning_rate": 5.03702179735619e-06, + "loss": 88.9917, + "step": 67800 + }, + { + "epoch": 0.560946353972784, + "grad_norm": 635.5907592773438, + "learning_rate": 5.035611467491638e-06, + "loss": 92.2593, + "step": 67810 + }, + { + "epoch": 0.5610290772221533, + "grad_norm": 851.3953247070312, + "learning_rate": 5.034201134793646e-06, + "loss": 126.7755, + "step": 67820 + }, + { + "epoch": 0.5611118004715225, + "grad_norm": 780.0504760742188, + "learning_rate": 5.032790799374426e-06, + "loss": 131.121, + "step": 67830 + }, + { + "epoch": 0.5611945237208917, + "grad_norm": 1291.3529052734375, + "learning_rate": 5.0313804613461925e-06, + "loss": 110.4924, + "step": 67840 + }, + { + "epoch": 0.561277246970261, + "grad_norm": 860.24072265625, + "learning_rate": 5.0299701208211605e-06, + "loss": 83.46, + "step": 67850 + }, + { + "epoch": 0.5613599702196302, + "grad_norm": 1199.1927490234375, + "learning_rate": 5.028559777911543e-06, + "loss": 107.3534, + "step": 67860 + }, + { + "epoch": 0.5614426934689994, + "grad_norm": 613.6046752929688, + "learning_rate": 5.027149432729555e-06, + "loss": 88.3334, + "step": 67870 + }, + { + "epoch": 0.5615254167183688, + "grad_norm": 1034.6842041015625, + "learning_rate": 5.025739085387411e-06, + "loss": 106.7181, + "step": 67880 + }, + { + "epoch": 0.561608139967738, + "grad_norm": 968.4409790039062, + "learning_rate": 5.024328735997327e-06, + "loss": 73.4767, + "step": 67890 + }, + { + "epoch": 0.5616908632171072, + "grad_norm": 636.9981689453125, + "learning_rate": 5.0229183846715154e-06, + "loss": 80.4445, + "step": 67900 + }, + { + "epoch": 0.5617735864664763, + "grad_norm": 762.04931640625, + "learning_rate": 5.021508031522195e-06, + "loss": 106.2328, + "step": 67910 + }, + { + "epoch": 0.5618563097158457, + "grad_norm": 1687.2122802734375, + "learning_rate": 5.0200976766615785e-06, + "loss": 103.8904, + "step": 67920 + }, + { + "epoch": 0.5619390329652149, + "grad_norm": 1144.8087158203125, + "learning_rate": 5.018687320201882e-06, + "loss": 88.8534, + "step": 67930 + }, + { + "epoch": 0.5620217562145841, + "grad_norm": 1060.1658935546875, + "learning_rate": 5.017276962255323e-06, + "loss": 117.6395, + "step": 67940 + }, + { + "epoch": 0.5621044794639534, + "grad_norm": 541.5599975585938, + "learning_rate": 5.015866602934112e-06, + "loss": 81.2837, + "step": 67950 + }, + { + "epoch": 0.5621872027133226, + "grad_norm": 1171.819091796875, + "learning_rate": 5.01445624235047e-06, + "loss": 122.8472, + "step": 67960 + }, + { + "epoch": 0.5622699259626918, + "grad_norm": 569.9471435546875, + "learning_rate": 5.013045880616612e-06, + "loss": 99.2828, + "step": 67970 + }, + { + "epoch": 0.5623526492120611, + "grad_norm": 857.0740356445312, + "learning_rate": 5.011635517844753e-06, + "loss": 79.8719, + "step": 67980 + }, + { + "epoch": 0.5624353724614303, + "grad_norm": 1649.099853515625, + "learning_rate": 5.010225154147107e-06, + "loss": 116.2859, + "step": 67990 + }, + { + "epoch": 0.5625180957107995, + "grad_norm": 991.4071044921875, + "learning_rate": 5.008814789635894e-06, + "loss": 84.7862, + "step": 68000 + }, + { + "epoch": 0.5626008189601688, + "grad_norm": 752.6597900390625, + "learning_rate": 5.007404424423329e-06, + "loss": 98.8501, + "step": 68010 + }, + { + "epoch": 0.562683542209538, + "grad_norm": 720.8966064453125, + "learning_rate": 5.0059940586216284e-06, + "loss": 105.6753, + "step": 68020 + }, + { + "epoch": 0.5627662654589072, + "grad_norm": 720.338134765625, + "learning_rate": 5.004583692343007e-06, + "loss": 104.9102, + "step": 68030 + }, + { + "epoch": 0.5628489887082765, + "grad_norm": 3147.158447265625, + "learning_rate": 5.003173325699682e-06, + "loss": 115.8981, + "step": 68040 + }, + { + "epoch": 0.5629317119576457, + "grad_norm": 649.7725830078125, + "learning_rate": 5.00176295880387e-06, + "loss": 99.7788, + "step": 68050 + }, + { + "epoch": 0.5630144352070149, + "grad_norm": 523.4342651367188, + "learning_rate": 5.000352591767787e-06, + "loss": 82.1472, + "step": 68060 + }, + { + "epoch": 0.5630971584563842, + "grad_norm": 704.3622436523438, + "learning_rate": 4.998942224703651e-06, + "loss": 95.6916, + "step": 68070 + }, + { + "epoch": 0.5631798817057534, + "grad_norm": 487.66796875, + "learning_rate": 4.997531857723678e-06, + "loss": 77.8491, + "step": 68080 + }, + { + "epoch": 0.5632626049551226, + "grad_norm": 959.2645874023438, + "learning_rate": 4.996121490940084e-06, + "loss": 106.0061, + "step": 68090 + }, + { + "epoch": 0.5633453282044919, + "grad_norm": 1130.561279296875, + "learning_rate": 4.994711124465084e-06, + "loss": 86.5281, + "step": 68100 + }, + { + "epoch": 0.5634280514538611, + "grad_norm": 675.5780639648438, + "learning_rate": 4.993300758410895e-06, + "loss": 110.0674, + "step": 68110 + }, + { + "epoch": 0.5635107747032303, + "grad_norm": 833.6633911132812, + "learning_rate": 4.991890392889735e-06, + "loss": 93.0616, + "step": 68120 + }, + { + "epoch": 0.5635934979525996, + "grad_norm": 497.2891845703125, + "learning_rate": 4.990480028013818e-06, + "loss": 102.913, + "step": 68130 + }, + { + "epoch": 0.5636762212019688, + "grad_norm": 976.9085693359375, + "learning_rate": 4.989069663895361e-06, + "loss": 94.7653, + "step": 68140 + }, + { + "epoch": 0.563758944451338, + "grad_norm": 1526.7978515625, + "learning_rate": 4.9876593006465825e-06, + "loss": 74.5033, + "step": 68150 + }, + { + "epoch": 0.5638416677007073, + "grad_norm": 903.107177734375, + "learning_rate": 4.986248938379696e-06, + "loss": 80.8739, + "step": 68160 + }, + { + "epoch": 0.5639243909500765, + "grad_norm": 1482.0433349609375, + "learning_rate": 4.984838577206921e-06, + "loss": 106.9815, + "step": 68170 + }, + { + "epoch": 0.5640071141994457, + "grad_norm": 4656.763671875, + "learning_rate": 4.9834282172404665e-06, + "loss": 115.4343, + "step": 68180 + }, + { + "epoch": 0.564089837448815, + "grad_norm": 661.7854614257812, + "learning_rate": 4.982017858592555e-06, + "loss": 98.6977, + "step": 68190 + }, + { + "epoch": 0.5641725606981842, + "grad_norm": 654.1882934570312, + "learning_rate": 4.980607501375399e-06, + "loss": 113.0739, + "step": 68200 + }, + { + "epoch": 0.5642552839475534, + "grad_norm": 1355.635498046875, + "learning_rate": 4.979197145701216e-06, + "loss": 125.4591, + "step": 68210 + }, + { + "epoch": 0.5643380071969227, + "grad_norm": 793.7406005859375, + "learning_rate": 4.977786791682221e-06, + "loss": 94.5731, + "step": 68220 + }, + { + "epoch": 0.5644207304462919, + "grad_norm": 819.9609985351562, + "learning_rate": 4.976376439430627e-06, + "loss": 94.1121, + "step": 68230 + }, + { + "epoch": 0.5645034536956611, + "grad_norm": 687.6693725585938, + "learning_rate": 4.974966089058652e-06, + "loss": 91.6705, + "step": 68240 + }, + { + "epoch": 0.5645861769450304, + "grad_norm": 645.421875, + "learning_rate": 4.973555740678512e-06, + "loss": 115.3419, + "step": 68250 + }, + { + "epoch": 0.5646689001943996, + "grad_norm": 675.6929931640625, + "learning_rate": 4.972145394402421e-06, + "loss": 83.4908, + "step": 68260 + }, + { + "epoch": 0.5647516234437688, + "grad_norm": 1000.2984008789062, + "learning_rate": 4.9707350503425905e-06, + "loss": 93.1049, + "step": 68270 + }, + { + "epoch": 0.5648343466931381, + "grad_norm": 996.8165283203125, + "learning_rate": 4.969324708611239e-06, + "loss": 87.6225, + "step": 68280 + }, + { + "epoch": 0.5649170699425073, + "grad_norm": 605.3995361328125, + "learning_rate": 4.9679143693205785e-06, + "loss": 92.8545, + "step": 68290 + }, + { + "epoch": 0.5649997931918765, + "grad_norm": 1098.21044921875, + "learning_rate": 4.966504032582826e-06, + "loss": 129.6778, + "step": 68300 + }, + { + "epoch": 0.5650825164412459, + "grad_norm": 822.2824096679688, + "learning_rate": 4.965093698510192e-06, + "loss": 96.9237, + "step": 68310 + }, + { + "epoch": 0.565165239690615, + "grad_norm": 818.29931640625, + "learning_rate": 4.963683367214895e-06, + "loss": 88.808, + "step": 68320 + }, + { + "epoch": 0.5652479629399842, + "grad_norm": 1037.8428955078125, + "learning_rate": 4.962273038809143e-06, + "loss": 108.7637, + "step": 68330 + }, + { + "epoch": 0.5653306861893536, + "grad_norm": 614.8590698242188, + "learning_rate": 4.960862713405153e-06, + "loss": 89.6115, + "step": 68340 + }, + { + "epoch": 0.5654134094387228, + "grad_norm": 389.6104736328125, + "learning_rate": 4.95945239111514e-06, + "loss": 93.0896, + "step": 68350 + }, + { + "epoch": 0.565496132688092, + "grad_norm": 1135.781005859375, + "learning_rate": 4.9580420720513115e-06, + "loss": 151.4895, + "step": 68360 + }, + { + "epoch": 0.5655788559374613, + "grad_norm": 816.8753051757812, + "learning_rate": 4.956631756325882e-06, + "loss": 98.8892, + "step": 68370 + }, + { + "epoch": 0.5656615791868305, + "grad_norm": 782.5609741210938, + "learning_rate": 4.955221444051066e-06, + "loss": 77.5715, + "step": 68380 + }, + { + "epoch": 0.5657443024361997, + "grad_norm": 545.4894409179688, + "learning_rate": 4.953811135339073e-06, + "loss": 98.956, + "step": 68390 + }, + { + "epoch": 0.565827025685569, + "grad_norm": 789.0388793945312, + "learning_rate": 4.952400830302117e-06, + "loss": 128.0591, + "step": 68400 + }, + { + "epoch": 0.5659097489349382, + "grad_norm": 688.3363037109375, + "learning_rate": 4.950990529052409e-06, + "loss": 79.7653, + "step": 68410 + }, + { + "epoch": 0.5659924721843074, + "grad_norm": 1568.041259765625, + "learning_rate": 4.949580231702158e-06, + "loss": 100.4565, + "step": 68420 + }, + { + "epoch": 0.5660751954336767, + "grad_norm": 828.0899658203125, + "learning_rate": 4.94816993836358e-06, + "loss": 102.3955, + "step": 68430 + }, + { + "epoch": 0.5661579186830459, + "grad_norm": 532.2860107421875, + "learning_rate": 4.946759649148879e-06, + "loss": 77.4463, + "step": 68440 + }, + { + "epoch": 0.5662406419324151, + "grad_norm": 925.8582763671875, + "learning_rate": 4.945349364170269e-06, + "loss": 89.3156, + "step": 68450 + }, + { + "epoch": 0.5663233651817844, + "grad_norm": 2182.206787109375, + "learning_rate": 4.94393908353996e-06, + "loss": 113.8303, + "step": 68460 + }, + { + "epoch": 0.5664060884311536, + "grad_norm": 785.1996459960938, + "learning_rate": 4.942528807370158e-06, + "loss": 98.9256, + "step": 68470 + }, + { + "epoch": 0.5664888116805228, + "grad_norm": 636.3436279296875, + "learning_rate": 4.941118535773078e-06, + "loss": 89.0144, + "step": 68480 + }, + { + "epoch": 0.5665715349298921, + "grad_norm": 1138.821044921875, + "learning_rate": 4.9397082688609245e-06, + "loss": 65.4779, + "step": 68490 + }, + { + "epoch": 0.5666542581792613, + "grad_norm": 704.7152099609375, + "learning_rate": 4.938298006745909e-06, + "loss": 97.2847, + "step": 68500 + }, + { + "epoch": 0.5667369814286305, + "grad_norm": 1183.618896484375, + "learning_rate": 4.936887749540236e-06, + "loss": 85.178, + "step": 68510 + }, + { + "epoch": 0.5668197046779998, + "grad_norm": 960.0731201171875, + "learning_rate": 4.935477497356118e-06, + "loss": 87.2744, + "step": 68520 + }, + { + "epoch": 0.566902427927369, + "grad_norm": 1119.224853515625, + "learning_rate": 4.934067250305757e-06, + "loss": 104.8437, + "step": 68530 + }, + { + "epoch": 0.5669851511767382, + "grad_norm": 606.4401245117188, + "learning_rate": 4.932657008501362e-06, + "loss": 114.7352, + "step": 68540 + }, + { + "epoch": 0.5670678744261075, + "grad_norm": 829.0963745117188, + "learning_rate": 4.931246772055141e-06, + "loss": 112.2022, + "step": 68550 + }, + { + "epoch": 0.5671505976754767, + "grad_norm": 1182.5452880859375, + "learning_rate": 4.9298365410792985e-06, + "loss": 118.4558, + "step": 68560 + }, + { + "epoch": 0.5672333209248459, + "grad_norm": 873.345947265625, + "learning_rate": 4.928426315686039e-06, + "loss": 66.0995, + "step": 68570 + }, + { + "epoch": 0.5673160441742152, + "grad_norm": 1000.1868896484375, + "learning_rate": 4.92701609598757e-06, + "loss": 99.3208, + "step": 68580 + }, + { + "epoch": 0.5673987674235844, + "grad_norm": 782.4760131835938, + "learning_rate": 4.925605882096096e-06, + "loss": 91.0728, + "step": 68590 + }, + { + "epoch": 0.5674814906729536, + "grad_norm": 884.9342651367188, + "learning_rate": 4.924195674123821e-06, + "loss": 89.9195, + "step": 68600 + }, + { + "epoch": 0.5675642139223229, + "grad_norm": 813.310546875, + "learning_rate": 4.922785472182948e-06, + "loss": 72.9367, + "step": 68610 + }, + { + "epoch": 0.5676469371716921, + "grad_norm": 566.2291259765625, + "learning_rate": 4.92137527638568e-06, + "loss": 67.9247, + "step": 68620 + }, + { + "epoch": 0.5677296604210613, + "grad_norm": 667.0711059570312, + "learning_rate": 4.919965086844221e-06, + "loss": 102.4399, + "step": 68630 + }, + { + "epoch": 0.5678123836704305, + "grad_norm": 680.4052124023438, + "learning_rate": 4.9185549036707715e-06, + "loss": 74.7348, + "step": 68640 + }, + { + "epoch": 0.5678951069197998, + "grad_norm": 801.6771240234375, + "learning_rate": 4.917144726977535e-06, + "loss": 124.5145, + "step": 68650 + }, + { + "epoch": 0.567977830169169, + "grad_norm": 318.88824462890625, + "learning_rate": 4.915734556876713e-06, + "loss": 72.6641, + "step": 68660 + }, + { + "epoch": 0.5680605534185382, + "grad_norm": 822.7666625976562, + "learning_rate": 4.914324393480504e-06, + "loss": 89.2944, + "step": 68670 + }, + { + "epoch": 0.5681432766679075, + "grad_norm": 956.2263793945312, + "learning_rate": 4.9129142369011105e-06, + "loss": 106.6015, + "step": 68680 + }, + { + "epoch": 0.5682259999172767, + "grad_norm": 811.5741577148438, + "learning_rate": 4.911504087250735e-06, + "loss": 95.9006, + "step": 68690 + }, + { + "epoch": 0.5683087231666459, + "grad_norm": 606.1180419921875, + "learning_rate": 4.910093944641569e-06, + "loss": 75.9035, + "step": 68700 + }, + { + "epoch": 0.5683914464160152, + "grad_norm": 671.3385620117188, + "learning_rate": 4.9086838091858155e-06, + "loss": 130.6404, + "step": 68710 + }, + { + "epoch": 0.5684741696653844, + "grad_norm": 775.1796875, + "learning_rate": 4.9072736809956735e-06, + "loss": 73.8619, + "step": 68720 + }, + { + "epoch": 0.5685568929147536, + "grad_norm": 940.5592651367188, + "learning_rate": 4.9058635601833384e-06, + "loss": 64.6535, + "step": 68730 + }, + { + "epoch": 0.568639616164123, + "grad_norm": 385.8565673828125, + "learning_rate": 4.904453446861008e-06, + "loss": 84.9902, + "step": 68740 + }, + { + "epoch": 0.5687223394134922, + "grad_norm": 1663.734375, + "learning_rate": 4.903043341140879e-06, + "loss": 100.3515, + "step": 68750 + }, + { + "epoch": 0.5688050626628613, + "grad_norm": 715.17626953125, + "learning_rate": 4.901633243135144e-06, + "loss": 88.3827, + "step": 68760 + }, + { + "epoch": 0.5688877859122307, + "grad_norm": 985.6260375976562, + "learning_rate": 4.900223152956003e-06, + "loss": 103.8463, + "step": 68770 + }, + { + "epoch": 0.5689705091615999, + "grad_norm": 981.5747680664062, + "learning_rate": 4.898813070715649e-06, + "loss": 105.5598, + "step": 68780 + }, + { + "epoch": 0.5690532324109691, + "grad_norm": 541.0383911132812, + "learning_rate": 4.897402996526273e-06, + "loss": 91.828, + "step": 68790 + }, + { + "epoch": 0.5691359556603384, + "grad_norm": 378.74261474609375, + "learning_rate": 4.895992930500068e-06, + "loss": 94.0054, + "step": 68800 + }, + { + "epoch": 0.5692186789097076, + "grad_norm": 524.0851440429688, + "learning_rate": 4.894582872749229e-06, + "loss": 80.3705, + "step": 68810 + }, + { + "epoch": 0.5693014021590768, + "grad_norm": 640.1279907226562, + "learning_rate": 4.893172823385947e-06, + "loss": 74.0081, + "step": 68820 + }, + { + "epoch": 0.5693841254084461, + "grad_norm": 1020.3170776367188, + "learning_rate": 4.8917627825224146e-06, + "loss": 96.6822, + "step": 68830 + }, + { + "epoch": 0.5694668486578153, + "grad_norm": 511.52783203125, + "learning_rate": 4.89035275027082e-06, + "loss": 93.7879, + "step": 68840 + }, + { + "epoch": 0.5695495719071845, + "grad_norm": 881.3643188476562, + "learning_rate": 4.888942726743353e-06, + "loss": 113.874, + "step": 68850 + }, + { + "epoch": 0.5696322951565538, + "grad_norm": 638.2870483398438, + "learning_rate": 4.887532712052206e-06, + "loss": 95.4349, + "step": 68860 + }, + { + "epoch": 0.569715018405923, + "grad_norm": 943.398681640625, + "learning_rate": 4.886122706309563e-06, + "loss": 90.6776, + "step": 68870 + }, + { + "epoch": 0.5697977416552922, + "grad_norm": 827.296142578125, + "learning_rate": 4.884712709627614e-06, + "loss": 106.1661, + "step": 68880 + }, + { + "epoch": 0.5698804649046615, + "grad_norm": 942.6498413085938, + "learning_rate": 4.8833027221185455e-06, + "loss": 101.3283, + "step": 68890 + }, + { + "epoch": 0.5699631881540307, + "grad_norm": 805.362060546875, + "learning_rate": 4.881892743894543e-06, + "loss": 63.2547, + "step": 68900 + }, + { + "epoch": 0.5700459114033999, + "grad_norm": 812.5333251953125, + "learning_rate": 4.880482775067794e-06, + "loss": 80.0272, + "step": 68910 + }, + { + "epoch": 0.5701286346527692, + "grad_norm": 1515.912109375, + "learning_rate": 4.879072815750481e-06, + "loss": 102.9636, + "step": 68920 + }, + { + "epoch": 0.5702113579021384, + "grad_norm": 705.251953125, + "learning_rate": 4.87766286605479e-06, + "loss": 78.7777, + "step": 68930 + }, + { + "epoch": 0.5702940811515076, + "grad_norm": 1174.0625, + "learning_rate": 4.876252926092903e-06, + "loss": 75.163, + "step": 68940 + }, + { + "epoch": 0.5703768044008769, + "grad_norm": 731.8579711914062, + "learning_rate": 4.874842995977004e-06, + "loss": 100.2167, + "step": 68950 + }, + { + "epoch": 0.5704595276502461, + "grad_norm": 615.9132690429688, + "learning_rate": 4.873433075819272e-06, + "loss": 74.5286, + "step": 68960 + }, + { + "epoch": 0.5705422508996153, + "grad_norm": 1427.2105712890625, + "learning_rate": 4.87202316573189e-06, + "loss": 115.191, + "step": 68970 + }, + { + "epoch": 0.5706249741489846, + "grad_norm": 464.9676208496094, + "learning_rate": 4.870613265827037e-06, + "loss": 107.5646, + "step": 68980 + }, + { + "epoch": 0.5707076973983538, + "grad_norm": 664.8630981445312, + "learning_rate": 4.869203376216891e-06, + "loss": 89.4376, + "step": 68990 + }, + { + "epoch": 0.570790420647723, + "grad_norm": 952.9496459960938, + "learning_rate": 4.867793497013634e-06, + "loss": 87.7081, + "step": 69000 + }, + { + "epoch": 0.5708731438970923, + "grad_norm": 864.7783203125, + "learning_rate": 4.866383628329442e-06, + "loss": 103.678, + "step": 69010 + }, + { + "epoch": 0.5709558671464615, + "grad_norm": 616.1771850585938, + "learning_rate": 4.86497377027649e-06, + "loss": 74.9962, + "step": 69020 + }, + { + "epoch": 0.5710385903958307, + "grad_norm": 1259.6622314453125, + "learning_rate": 4.863563922966957e-06, + "loss": 98.8335, + "step": 69030 + }, + { + "epoch": 0.5711213136452, + "grad_norm": 562.259765625, + "learning_rate": 4.862154086513016e-06, + "loss": 79.6309, + "step": 69040 + }, + { + "epoch": 0.5712040368945692, + "grad_norm": 941.865966796875, + "learning_rate": 4.860744261026841e-06, + "loss": 91.4525, + "step": 69050 + }, + { + "epoch": 0.5712867601439384, + "grad_norm": 1558.767578125, + "learning_rate": 4.8593344466206075e-06, + "loss": 102.5667, + "step": 69060 + }, + { + "epoch": 0.5713694833933077, + "grad_norm": 677.3783569335938, + "learning_rate": 4.857924643406485e-06, + "loss": 121.3402, + "step": 69070 + }, + { + "epoch": 0.5714522066426769, + "grad_norm": 1102.9102783203125, + "learning_rate": 4.856514851496647e-06, + "loss": 89.2696, + "step": 69080 + }, + { + "epoch": 0.5715349298920461, + "grad_norm": 510.9609375, + "learning_rate": 4.8551050710032625e-06, + "loss": 120.3483, + "step": 69090 + }, + { + "epoch": 0.5716176531414154, + "grad_norm": 624.8123779296875, + "learning_rate": 4.853695302038504e-06, + "loss": 99.4614, + "step": 69100 + }, + { + "epoch": 0.5717003763907846, + "grad_norm": 953.24267578125, + "learning_rate": 4.8522855447145385e-06, + "loss": 99.8863, + "step": 69110 + }, + { + "epoch": 0.5717830996401538, + "grad_norm": 415.95648193359375, + "learning_rate": 4.850875799143537e-06, + "loss": 79.1049, + "step": 69120 + }, + { + "epoch": 0.5718658228895231, + "grad_norm": 1333.311279296875, + "learning_rate": 4.84946606543766e-06, + "loss": 77.1295, + "step": 69130 + }, + { + "epoch": 0.5719485461388923, + "grad_norm": 1068.9039306640625, + "learning_rate": 4.848056343709079e-06, + "loss": 63.2696, + "step": 69140 + }, + { + "epoch": 0.5720312693882615, + "grad_norm": 2413.59814453125, + "learning_rate": 4.846646634069957e-06, + "loss": 98.2298, + "step": 69150 + }, + { + "epoch": 0.5721139926376309, + "grad_norm": 919.4255981445312, + "learning_rate": 4.845236936632458e-06, + "loss": 105.757, + "step": 69160 + }, + { + "epoch": 0.572196715887, + "grad_norm": 1202.3345947265625, + "learning_rate": 4.843827251508747e-06, + "loss": 101.2527, + "step": 69170 + }, + { + "epoch": 0.5722794391363693, + "grad_norm": 699.7549438476562, + "learning_rate": 4.842417578810984e-06, + "loss": 76.5603, + "step": 69180 + }, + { + "epoch": 0.5723621623857386, + "grad_norm": 579.6343383789062, + "learning_rate": 4.841007918651329e-06, + "loss": 106.7701, + "step": 69190 + }, + { + "epoch": 0.5724448856351078, + "grad_norm": 625.5684204101562, + "learning_rate": 4.839598271141947e-06, + "loss": 91.3166, + "step": 69200 + }, + { + "epoch": 0.572527608884477, + "grad_norm": 773.6903076171875, + "learning_rate": 4.8381886363949956e-06, + "loss": 75.625, + "step": 69210 + }, + { + "epoch": 0.5726103321338463, + "grad_norm": 980.545166015625, + "learning_rate": 4.83677901452263e-06, + "loss": 93.6299, + "step": 69220 + }, + { + "epoch": 0.5726930553832155, + "grad_norm": 606.6036987304688, + "learning_rate": 4.835369405637009e-06, + "loss": 102.3185, + "step": 69230 + }, + { + "epoch": 0.5727757786325847, + "grad_norm": 922.7852783203125, + "learning_rate": 4.833959809850288e-06, + "loss": 89.2718, + "step": 69240 + }, + { + "epoch": 0.572858501881954, + "grad_norm": 918.3201293945312, + "learning_rate": 4.832550227274624e-06, + "loss": 95.1504, + "step": 69250 + }, + { + "epoch": 0.5729412251313232, + "grad_norm": 972.9816284179688, + "learning_rate": 4.83114065802217e-06, + "loss": 106.1438, + "step": 69260 + }, + { + "epoch": 0.5730239483806924, + "grad_norm": 1341.721435546875, + "learning_rate": 4.829731102205079e-06, + "loss": 99.2695, + "step": 69270 + }, + { + "epoch": 0.5731066716300617, + "grad_norm": 927.3786010742188, + "learning_rate": 4.828321559935502e-06, + "loss": 92.5324, + "step": 69280 + }, + { + "epoch": 0.5731893948794309, + "grad_norm": 835.2514038085938, + "learning_rate": 4.826912031325592e-06, + "loss": 79.8504, + "step": 69290 + }, + { + "epoch": 0.5732721181288001, + "grad_norm": 480.0574951171875, + "learning_rate": 4.825502516487497e-06, + "loss": 68.6633, + "step": 69300 + }, + { + "epoch": 0.5733548413781694, + "grad_norm": 1109.6693115234375, + "learning_rate": 4.824093015533365e-06, + "loss": 91.0974, + "step": 69310 + }, + { + "epoch": 0.5734375646275386, + "grad_norm": 1023.72314453125, + "learning_rate": 4.822683528575344e-06, + "loss": 81.7075, + "step": 69320 + }, + { + "epoch": 0.5735202878769078, + "grad_norm": 689.5050048828125, + "learning_rate": 4.8212740557255815e-06, + "loss": 88.6784, + "step": 69330 + }, + { + "epoch": 0.573603011126277, + "grad_norm": 864.6280517578125, + "learning_rate": 4.819864597096222e-06, + "loss": 98.9668, + "step": 69340 + }, + { + "epoch": 0.5736857343756463, + "grad_norm": 502.53045654296875, + "learning_rate": 4.81845515279941e-06, + "loss": 80.9407, + "step": 69350 + }, + { + "epoch": 0.5737684576250155, + "grad_norm": 1652.8223876953125, + "learning_rate": 4.817045722947288e-06, + "loss": 88.5993, + "step": 69360 + }, + { + "epoch": 0.5738511808743847, + "grad_norm": 417.58795166015625, + "learning_rate": 4.815636307651998e-06, + "loss": 64.912, + "step": 69370 + }, + { + "epoch": 0.573933904123754, + "grad_norm": 2181.54443359375, + "learning_rate": 4.814226907025683e-06, + "loss": 93.8224, + "step": 69380 + }, + { + "epoch": 0.5740166273731232, + "grad_norm": 961.0506591796875, + "learning_rate": 4.812817521180479e-06, + "loss": 94.1438, + "step": 69390 + }, + { + "epoch": 0.5740993506224924, + "grad_norm": 783.2133178710938, + "learning_rate": 4.811408150228526e-06, + "loss": 79.3694, + "step": 69400 + }, + { + "epoch": 0.5741820738718617, + "grad_norm": 614.4639892578125, + "learning_rate": 4.80999879428196e-06, + "loss": 96.5799, + "step": 69410 + }, + { + "epoch": 0.5742647971212309, + "grad_norm": 1084.011474609375, + "learning_rate": 4.808589453452918e-06, + "loss": 112.4365, + "step": 69420 + }, + { + "epoch": 0.5743475203706001, + "grad_norm": 781.2046508789062, + "learning_rate": 4.807180127853535e-06, + "loss": 82.4409, + "step": 69430 + }, + { + "epoch": 0.5744302436199694, + "grad_norm": 884.5288696289062, + "learning_rate": 4.8057708175959446e-06, + "loss": 113.9509, + "step": 69440 + }, + { + "epoch": 0.5745129668693386, + "grad_norm": 927.2460327148438, + "learning_rate": 4.804361522792278e-06, + "loss": 82.8712, + "step": 69450 + }, + { + "epoch": 0.5745956901187078, + "grad_norm": 778.8930053710938, + "learning_rate": 4.8029522435546695e-06, + "loss": 71.4773, + "step": 69460 + }, + { + "epoch": 0.5746784133680771, + "grad_norm": 631.3436889648438, + "learning_rate": 4.801542979995245e-06, + "loss": 81.125, + "step": 69470 + }, + { + "epoch": 0.5747611366174463, + "grad_norm": 812.0714111328125, + "learning_rate": 4.800133732226135e-06, + "loss": 82.7905, + "step": 69480 + }, + { + "epoch": 0.5748438598668155, + "grad_norm": 738.4343872070312, + "learning_rate": 4.798724500359467e-06, + "loss": 109.1766, + "step": 69490 + }, + { + "epoch": 0.5749265831161848, + "grad_norm": 1364.8973388671875, + "learning_rate": 4.7973152845073666e-06, + "loss": 74.5933, + "step": 69500 + }, + { + "epoch": 0.575009306365554, + "grad_norm": 1043.5596923828125, + "learning_rate": 4.795906084781958e-06, + "loss": 78.0541, + "step": 69510 + }, + { + "epoch": 0.5750920296149232, + "grad_norm": 1055.900146484375, + "learning_rate": 4.7944969012953656e-06, + "loss": 67.5411, + "step": 69520 + }, + { + "epoch": 0.5751747528642925, + "grad_norm": 1076.0845947265625, + "learning_rate": 4.793087734159711e-06, + "loss": 84.8314, + "step": 69530 + }, + { + "epoch": 0.5752574761136617, + "grad_norm": 731.822265625, + "learning_rate": 4.791678583487118e-06, + "loss": 116.2365, + "step": 69540 + }, + { + "epoch": 0.5753401993630309, + "grad_norm": 490.7128601074219, + "learning_rate": 4.790269449389703e-06, + "loss": 113.4962, + "step": 69550 + }, + { + "epoch": 0.5754229226124002, + "grad_norm": 4792.376953125, + "learning_rate": 4.788860331979586e-06, + "loss": 195.9267, + "step": 69560 + }, + { + "epoch": 0.5755056458617694, + "grad_norm": 718.0413208007812, + "learning_rate": 4.787451231368883e-06, + "loss": 83.7023, + "step": 69570 + }, + { + "epoch": 0.5755883691111386, + "grad_norm": 1221.1470947265625, + "learning_rate": 4.786042147669709e-06, + "loss": 99.3645, + "step": 69580 + }, + { + "epoch": 0.575671092360508, + "grad_norm": 642.6139526367188, + "learning_rate": 4.784633080994181e-06, + "loss": 74.7277, + "step": 69590 + }, + { + "epoch": 0.5757538156098772, + "grad_norm": 725.33349609375, + "learning_rate": 4.783224031454409e-06, + "loss": 108.2193, + "step": 69600 + }, + { + "epoch": 0.5758365388592463, + "grad_norm": 512.4448852539062, + "learning_rate": 4.781814999162507e-06, + "loss": 100.0694, + "step": 69610 + }, + { + "epoch": 0.5759192621086157, + "grad_norm": 540.7772216796875, + "learning_rate": 4.780405984230582e-06, + "loss": 76.5325, + "step": 69620 + }, + { + "epoch": 0.5760019853579849, + "grad_norm": 1100.6224365234375, + "learning_rate": 4.778996986770747e-06, + "loss": 125.3361, + "step": 69630 + }, + { + "epoch": 0.5760847086073541, + "grad_norm": 704.91748046875, + "learning_rate": 4.777588006895109e-06, + "loss": 84.2594, + "step": 69640 + }, + { + "epoch": 0.5761674318567234, + "grad_norm": 976.7535400390625, + "learning_rate": 4.77617904471577e-06, + "loss": 86.525, + "step": 69650 + }, + { + "epoch": 0.5762501551060926, + "grad_norm": 1758.328857421875, + "learning_rate": 4.774770100344838e-06, + "loss": 105.5501, + "step": 69660 + }, + { + "epoch": 0.5763328783554618, + "grad_norm": 717.3983764648438, + "learning_rate": 4.7733611738944155e-06, + "loss": 78.1173, + "step": 69670 + }, + { + "epoch": 0.5764156016048311, + "grad_norm": 1015.1489868164062, + "learning_rate": 4.7719522654766044e-06, + "loss": 93.4123, + "step": 69680 + }, + { + "epoch": 0.5764983248542003, + "grad_norm": 2092.849365234375, + "learning_rate": 4.7705433752035045e-06, + "loss": 120.9727, + "step": 69690 + }, + { + "epoch": 0.5765810481035695, + "grad_norm": 491.3244323730469, + "learning_rate": 4.7691345031872156e-06, + "loss": 70.0393, + "step": 69700 + }, + { + "epoch": 0.5766637713529388, + "grad_norm": 954.6812133789062, + "learning_rate": 4.767725649539833e-06, + "loss": 87.8252, + "step": 69710 + }, + { + "epoch": 0.576746494602308, + "grad_norm": 780.2823486328125, + "learning_rate": 4.766316814373458e-06, + "loss": 125.7246, + "step": 69720 + }, + { + "epoch": 0.5768292178516772, + "grad_norm": 1444.184326171875, + "learning_rate": 4.76490799780018e-06, + "loss": 94.4206, + "step": 69730 + }, + { + "epoch": 0.5769119411010465, + "grad_norm": 838.7236328125, + "learning_rate": 4.763499199932093e-06, + "loss": 126.1872, + "step": 69740 + }, + { + "epoch": 0.5769946643504157, + "grad_norm": 1014.107421875, + "learning_rate": 4.762090420881289e-06, + "loss": 108.3225, + "step": 69750 + }, + { + "epoch": 0.5770773875997849, + "grad_norm": 613.5991821289062, + "learning_rate": 4.760681660759859e-06, + "loss": 101.5193, + "step": 69760 + }, + { + "epoch": 0.5771601108491542, + "grad_norm": 661.6652221679688, + "learning_rate": 4.7592729196798905e-06, + "loss": 78.5238, + "step": 69770 + }, + { + "epoch": 0.5772428340985234, + "grad_norm": 917.331298828125, + "learning_rate": 4.757864197753472e-06, + "loss": 94.9886, + "step": 69780 + }, + { + "epoch": 0.5773255573478926, + "grad_norm": 750.0388793945312, + "learning_rate": 4.7564554950926876e-06, + "loss": 68.4733, + "step": 69790 + }, + { + "epoch": 0.5774082805972619, + "grad_norm": 754.461181640625, + "learning_rate": 4.755046811809621e-06, + "loss": 68.0218, + "step": 69800 + }, + { + "epoch": 0.5774910038466311, + "grad_norm": 1071.869384765625, + "learning_rate": 4.7536381480163575e-06, + "loss": 147.5622, + "step": 69810 + }, + { + "epoch": 0.5775737270960003, + "grad_norm": 809.9874877929688, + "learning_rate": 4.752229503824974e-06, + "loss": 68.0273, + "step": 69820 + }, + { + "epoch": 0.5776564503453696, + "grad_norm": 1067.3671875, + "learning_rate": 4.7508208793475515e-06, + "loss": 77.8083, + "step": 69830 + }, + { + "epoch": 0.5777391735947388, + "grad_norm": 757.71435546875, + "learning_rate": 4.749412274696169e-06, + "loss": 62.3041, + "step": 69840 + }, + { + "epoch": 0.577821896844108, + "grad_norm": 619.0784912109375, + "learning_rate": 4.748003689982901e-06, + "loss": 79.2833, + "step": 69850 + }, + { + "epoch": 0.5779046200934773, + "grad_norm": 631.8236083984375, + "learning_rate": 4.746595125319823e-06, + "loss": 99.2251, + "step": 69860 + }, + { + "epoch": 0.5779873433428465, + "grad_norm": 1292.0946044921875, + "learning_rate": 4.745186580819008e-06, + "loss": 108.7097, + "step": 69870 + }, + { + "epoch": 0.5780700665922157, + "grad_norm": 2003.7862548828125, + "learning_rate": 4.743778056592528e-06, + "loss": 101.937, + "step": 69880 + }, + { + "epoch": 0.578152789841585, + "grad_norm": 978.4115600585938, + "learning_rate": 4.742369552752453e-06, + "loss": 106.5378, + "step": 69890 + }, + { + "epoch": 0.5782355130909542, + "grad_norm": 603.2467041015625, + "learning_rate": 4.740961069410848e-06, + "loss": 93.0756, + "step": 69900 + }, + { + "epoch": 0.5783182363403234, + "grad_norm": 951.097900390625, + "learning_rate": 4.7395526066797835e-06, + "loss": 85.2565, + "step": 69910 + }, + { + "epoch": 0.5784009595896927, + "grad_norm": 813.0645751953125, + "learning_rate": 4.738144164671322e-06, + "loss": 123.9502, + "step": 69920 + }, + { + "epoch": 0.5784836828390619, + "grad_norm": 1287.8382568359375, + "learning_rate": 4.736735743497528e-06, + "loss": 98.9144, + "step": 69930 + }, + { + "epoch": 0.5785664060884311, + "grad_norm": 976.73974609375, + "learning_rate": 4.735327343270461e-06, + "loss": 102.1547, + "step": 69940 + }, + { + "epoch": 0.5786491293378004, + "grad_norm": 874.163330078125, + "learning_rate": 4.733918964102185e-06, + "loss": 108.0046, + "step": 69950 + }, + { + "epoch": 0.5787318525871696, + "grad_norm": 493.4253845214844, + "learning_rate": 4.732510606104754e-06, + "loss": 81.0991, + "step": 69960 + }, + { + "epoch": 0.5788145758365388, + "grad_norm": 481.89520263671875, + "learning_rate": 4.731102269390227e-06, + "loss": 72.0266, + "step": 69970 + }, + { + "epoch": 0.5788972990859081, + "grad_norm": 1048.3861083984375, + "learning_rate": 4.729693954070661e-06, + "loss": 101.3536, + "step": 69980 + }, + { + "epoch": 0.5789800223352773, + "grad_norm": 514.9654541015625, + "learning_rate": 4.728285660258104e-06, + "loss": 84.173, + "step": 69990 + }, + { + "epoch": 0.5790627455846465, + "grad_norm": 1212.3302001953125, + "learning_rate": 4.726877388064609e-06, + "loss": 95.1656, + "step": 70000 + }, + { + "epoch": 0.5791454688340159, + "grad_norm": 594.7462768554688, + "learning_rate": 4.725469137602229e-06, + "loss": 77.2462, + "step": 70010 + }, + { + "epoch": 0.579228192083385, + "grad_norm": 1222.4024658203125, + "learning_rate": 4.724060908983008e-06, + "loss": 77.3384, + "step": 70020 + }, + { + "epoch": 0.5793109153327543, + "grad_norm": 953.1253051757812, + "learning_rate": 4.7226527023189954e-06, + "loss": 69.5169, + "step": 70030 + }, + { + "epoch": 0.5793936385821236, + "grad_norm": 474.82373046875, + "learning_rate": 4.721244517722233e-06, + "loss": 62.3261, + "step": 70040 + }, + { + "epoch": 0.5794763618314928, + "grad_norm": 958.0091552734375, + "learning_rate": 4.719836355304766e-06, + "loss": 114.5203, + "step": 70050 + }, + { + "epoch": 0.579559085080862, + "grad_norm": 977.0399780273438, + "learning_rate": 4.718428215178634e-06, + "loss": 92.8079, + "step": 70060 + }, + { + "epoch": 0.5796418083302312, + "grad_norm": 889.9998779296875, + "learning_rate": 4.717020097455879e-06, + "loss": 118.2158, + "step": 70070 + }, + { + "epoch": 0.5797245315796005, + "grad_norm": 730.516357421875, + "learning_rate": 4.715612002248533e-06, + "loss": 90.555, + "step": 70080 + }, + { + "epoch": 0.5798072548289697, + "grad_norm": 905.8795776367188, + "learning_rate": 4.714203929668637e-06, + "loss": 82.7224, + "step": 70090 + }, + { + "epoch": 0.5798899780783389, + "grad_norm": 925.4937744140625, + "learning_rate": 4.712795879828221e-06, + "loss": 84.4116, + "step": 70100 + }, + { + "epoch": 0.5799727013277082, + "grad_norm": 1022.7550659179688, + "learning_rate": 4.71138785283932e-06, + "loss": 79.103, + "step": 70110 + }, + { + "epoch": 0.5800554245770774, + "grad_norm": 765.399169921875, + "learning_rate": 4.709979848813963e-06, + "loss": 88.3323, + "step": 70120 + }, + { + "epoch": 0.5801381478264466, + "grad_norm": 806.2305297851562, + "learning_rate": 4.7085718678641776e-06, + "loss": 117.2993, + "step": 70130 + }, + { + "epoch": 0.5802208710758159, + "grad_norm": 770.2034301757812, + "learning_rate": 4.70716391010199e-06, + "loss": 96.3582, + "step": 70140 + }, + { + "epoch": 0.5803035943251851, + "grad_norm": 934.9890747070312, + "learning_rate": 4.70575597563943e-06, + "loss": 85.1314, + "step": 70150 + }, + { + "epoch": 0.5803863175745543, + "grad_norm": 1144.4893798828125, + "learning_rate": 4.704348064588514e-06, + "loss": 95.0485, + "step": 70160 + }, + { + "epoch": 0.5804690408239236, + "grad_norm": 441.04132080078125, + "learning_rate": 4.702940177061266e-06, + "loss": 59.0982, + "step": 70170 + }, + { + "epoch": 0.5805517640732928, + "grad_norm": 883.7022705078125, + "learning_rate": 4.7015323131697035e-06, + "loss": 163.2424, + "step": 70180 + }, + { + "epoch": 0.580634487322662, + "grad_norm": 1098.4498291015625, + "learning_rate": 4.700124473025846e-06, + "loss": 87.0862, + "step": 70190 + }, + { + "epoch": 0.5807172105720313, + "grad_norm": 1009.8033447265625, + "learning_rate": 4.6987166567417085e-06, + "loss": 78.1907, + "step": 70200 + }, + { + "epoch": 0.5807999338214005, + "grad_norm": 1105.3592529296875, + "learning_rate": 4.697308864429303e-06, + "loss": 98.8104, + "step": 70210 + }, + { + "epoch": 0.5808826570707697, + "grad_norm": 786.5978393554688, + "learning_rate": 4.695901096200643e-06, + "loss": 79.2503, + "step": 70220 + }, + { + "epoch": 0.580965380320139, + "grad_norm": 480.7000427246094, + "learning_rate": 4.694493352167736e-06, + "loss": 107.3822, + "step": 70230 + }, + { + "epoch": 0.5810481035695082, + "grad_norm": 647.6439208984375, + "learning_rate": 4.693085632442593e-06, + "loss": 55.4946, + "step": 70240 + }, + { + "epoch": 0.5811308268188774, + "grad_norm": 1218.6649169921875, + "learning_rate": 4.691677937137217e-06, + "loss": 116.4978, + "step": 70250 + }, + { + "epoch": 0.5812135500682467, + "grad_norm": 511.2562561035156, + "learning_rate": 4.690270266363612e-06, + "loss": 120.063, + "step": 70260 + }, + { + "epoch": 0.5812962733176159, + "grad_norm": 822.0852661132812, + "learning_rate": 4.688862620233779e-06, + "loss": 100.5438, + "step": 70270 + }, + { + "epoch": 0.5813789965669851, + "grad_norm": 1118.2822265625, + "learning_rate": 4.687454998859721e-06, + "loss": 99.0379, + "step": 70280 + }, + { + "epoch": 0.5814617198163544, + "grad_norm": 733.8569946289062, + "learning_rate": 4.686047402353433e-06, + "loss": 78.6712, + "step": 70290 + }, + { + "epoch": 0.5815444430657236, + "grad_norm": 691.5249633789062, + "learning_rate": 4.684639830826913e-06, + "loss": 99.4106, + "step": 70300 + }, + { + "epoch": 0.5816271663150928, + "grad_norm": 542.6314086914062, + "learning_rate": 4.683232284392155e-06, + "loss": 96.8427, + "step": 70310 + }, + { + "epoch": 0.5817098895644621, + "grad_norm": 357.3701477050781, + "learning_rate": 4.681824763161151e-06, + "loss": 96.8162, + "step": 70320 + }, + { + "epoch": 0.5817926128138313, + "grad_norm": 951.8953857421875, + "learning_rate": 4.6804172672458905e-06, + "loss": 111.6947, + "step": 70330 + }, + { + "epoch": 0.5818753360632005, + "grad_norm": 743.7538452148438, + "learning_rate": 4.67900979675836e-06, + "loss": 97.8226, + "step": 70340 + }, + { + "epoch": 0.5819580593125698, + "grad_norm": 659.83056640625, + "learning_rate": 4.677602351810547e-06, + "loss": 96.5504, + "step": 70350 + }, + { + "epoch": 0.582040782561939, + "grad_norm": 643.2234497070312, + "learning_rate": 4.676194932514435e-06, + "loss": 70.3423, + "step": 70360 + }, + { + "epoch": 0.5821235058113082, + "grad_norm": 760.5200805664062, + "learning_rate": 4.674787538982006e-06, + "loss": 100.9573, + "step": 70370 + }, + { + "epoch": 0.5822062290606775, + "grad_norm": 504.2164306640625, + "learning_rate": 4.6733801713252405e-06, + "loss": 85.723, + "step": 70380 + }, + { + "epoch": 0.5822889523100467, + "grad_norm": 458.2579650878906, + "learning_rate": 4.671972829656116e-06, + "loss": 67.0866, + "step": 70390 + }, + { + "epoch": 0.5823716755594159, + "grad_norm": 1060.4288330078125, + "learning_rate": 4.670565514086607e-06, + "loss": 86.1434, + "step": 70400 + }, + { + "epoch": 0.5824543988087852, + "grad_norm": 478.5907897949219, + "learning_rate": 4.669158224728691e-06, + "loss": 87.8763, + "step": 70410 + }, + { + "epoch": 0.5825371220581544, + "grad_norm": 1053.1619873046875, + "learning_rate": 4.667750961694334e-06, + "loss": 95.2859, + "step": 70420 + }, + { + "epoch": 0.5826198453075236, + "grad_norm": 752.842041015625, + "learning_rate": 4.666343725095509e-06, + "loss": 100.9934, + "step": 70430 + }, + { + "epoch": 0.582702568556893, + "grad_norm": 687.784912109375, + "learning_rate": 4.6649365150441825e-06, + "loss": 76.4099, + "step": 70440 + }, + { + "epoch": 0.5827852918062622, + "grad_norm": 836.1194458007812, + "learning_rate": 4.66352933165232e-06, + "loss": 91.4804, + "step": 70450 + }, + { + "epoch": 0.5828680150556313, + "grad_norm": 739.9501953125, + "learning_rate": 4.6621221750318835e-06, + "loss": 71.6738, + "step": 70460 + }, + { + "epoch": 0.5829507383050007, + "grad_norm": 392.7385559082031, + "learning_rate": 4.660715045294834e-06, + "loss": 110.5148, + "step": 70470 + }, + { + "epoch": 0.5830334615543699, + "grad_norm": 821.631103515625, + "learning_rate": 4.659307942553133e-06, + "loss": 89.2418, + "step": 70480 + }, + { + "epoch": 0.5831161848037391, + "grad_norm": 978.71142578125, + "learning_rate": 4.657900866918735e-06, + "loss": 94.3087, + "step": 70490 + }, + { + "epoch": 0.5831989080531084, + "grad_norm": 1593.7996826171875, + "learning_rate": 4.6564938185035954e-06, + "loss": 103.8715, + "step": 70500 + }, + { + "epoch": 0.5832816313024776, + "grad_norm": 825.0999145507812, + "learning_rate": 4.655086797419666e-06, + "loss": 99.5245, + "step": 70510 + }, + { + "epoch": 0.5833643545518468, + "grad_norm": 541.9385375976562, + "learning_rate": 4.653679803778897e-06, + "loss": 87.7526, + "step": 70520 + }, + { + "epoch": 0.5834470778012161, + "grad_norm": 1196.7366943359375, + "learning_rate": 4.652272837693237e-06, + "loss": 104.002, + "step": 70530 + }, + { + "epoch": 0.5835298010505853, + "grad_norm": 718.0545043945312, + "learning_rate": 4.650865899274632e-06, + "loss": 125.1911, + "step": 70540 + }, + { + "epoch": 0.5836125242999545, + "grad_norm": 612.04736328125, + "learning_rate": 4.649458988635023e-06, + "loss": 75.9752, + "step": 70550 + }, + { + "epoch": 0.5836952475493238, + "grad_norm": 1365.9921875, + "learning_rate": 4.6480521058863546e-06, + "loss": 114.9041, + "step": 70560 + }, + { + "epoch": 0.583777970798693, + "grad_norm": 1484.0291748046875, + "learning_rate": 4.646645251140564e-06, + "loss": 102.265, + "step": 70570 + }, + { + "epoch": 0.5838606940480622, + "grad_norm": 756.23974609375, + "learning_rate": 4.6452384245095924e-06, + "loss": 82.9127, + "step": 70580 + }, + { + "epoch": 0.5839434172974315, + "grad_norm": 841.9822387695312, + "learning_rate": 4.643831626105369e-06, + "loss": 107.5036, + "step": 70590 + }, + { + "epoch": 0.5840261405468007, + "grad_norm": 947.1768798828125, + "learning_rate": 4.642424856039827e-06, + "loss": 89.3748, + "step": 70600 + }, + { + "epoch": 0.5841088637961699, + "grad_norm": 939.9810791015625, + "learning_rate": 4.6410181144249e-06, + "loss": 113.3098, + "step": 70610 + }, + { + "epoch": 0.5841915870455392, + "grad_norm": 819.6731567382812, + "learning_rate": 4.639611401372514e-06, + "loss": 75.1481, + "step": 70620 + }, + { + "epoch": 0.5842743102949084, + "grad_norm": 1223.1297607421875, + "learning_rate": 4.638204716994594e-06, + "loss": 76.0569, + "step": 70630 + }, + { + "epoch": 0.5843570335442776, + "grad_norm": 775.81689453125, + "learning_rate": 4.636798061403065e-06, + "loss": 79.0008, + "step": 70640 + }, + { + "epoch": 0.5844397567936469, + "grad_norm": 796.52978515625, + "learning_rate": 4.635391434709847e-06, + "loss": 87.8786, + "step": 70650 + }, + { + "epoch": 0.5845224800430161, + "grad_norm": 4887.2314453125, + "learning_rate": 4.6339848370268585e-06, + "loss": 128.6168, + "step": 70660 + }, + { + "epoch": 0.5846052032923853, + "grad_norm": 644.0697021484375, + "learning_rate": 4.63257826846602e-06, + "loss": 83.8805, + "step": 70670 + }, + { + "epoch": 0.5846879265417546, + "grad_norm": 866.1959838867188, + "learning_rate": 4.6311717291392396e-06, + "loss": 101.9503, + "step": 70680 + }, + { + "epoch": 0.5847706497911238, + "grad_norm": 1018.76025390625, + "learning_rate": 4.629765219158433e-06, + "loss": 82.65, + "step": 70690 + }, + { + "epoch": 0.584853373040493, + "grad_norm": 1179.1864013671875, + "learning_rate": 4.628358738635507e-06, + "loss": 134.8187, + "step": 70700 + }, + { + "epoch": 0.5849360962898623, + "grad_norm": 656.47900390625, + "learning_rate": 4.626952287682372e-06, + "loss": 117.6582, + "step": 70710 + }, + { + "epoch": 0.5850188195392315, + "grad_norm": 1300.9158935546875, + "learning_rate": 4.6255458664109306e-06, + "loss": 121.145, + "step": 70720 + }, + { + "epoch": 0.5851015427886007, + "grad_norm": 1486.7906494140625, + "learning_rate": 4.624139474933087e-06, + "loss": 94.2786, + "step": 70730 + }, + { + "epoch": 0.58518426603797, + "grad_norm": 759.8827514648438, + "learning_rate": 4.62273311336074e-06, + "loss": 88.7782, + "step": 70740 + }, + { + "epoch": 0.5852669892873392, + "grad_norm": 746.8486938476562, + "learning_rate": 4.62132678180579e-06, + "loss": 78.3264, + "step": 70750 + }, + { + "epoch": 0.5853497125367084, + "grad_norm": 837.21337890625, + "learning_rate": 4.619920480380127e-06, + "loss": 119.9007, + "step": 70760 + }, + { + "epoch": 0.5854324357860777, + "grad_norm": 10413.9169921875, + "learning_rate": 4.618514209195648e-06, + "loss": 153.6559, + "step": 70770 + }, + { + "epoch": 0.5855151590354469, + "grad_norm": 851.4038696289062, + "learning_rate": 4.617107968364243e-06, + "loss": 90.0972, + "step": 70780 + }, + { + "epoch": 0.5855978822848161, + "grad_norm": 551.212646484375, + "learning_rate": 4.615701757997799e-06, + "loss": 134.4456, + "step": 70790 + }, + { + "epoch": 0.5856806055341853, + "grad_norm": 1029.0963134765625, + "learning_rate": 4.614295578208202e-06, + "loss": 74.9261, + "step": 70800 + }, + { + "epoch": 0.5857633287835546, + "grad_norm": 1182.5975341796875, + "learning_rate": 4.612889429107337e-06, + "loss": 94.9276, + "step": 70810 + }, + { + "epoch": 0.5858460520329238, + "grad_norm": 879.9530639648438, + "learning_rate": 4.611483310807082e-06, + "loss": 109.8622, + "step": 70820 + }, + { + "epoch": 0.585928775282293, + "grad_norm": 756.7501831054688, + "learning_rate": 4.610077223419319e-06, + "loss": 111.9088, + "step": 70830 + }, + { + "epoch": 0.5860114985316623, + "grad_norm": 935.3068237304688, + "learning_rate": 4.608671167055922e-06, + "loss": 114.9015, + "step": 70840 + }, + { + "epoch": 0.5860942217810315, + "grad_norm": 764.818359375, + "learning_rate": 4.607265141828762e-06, + "loss": 95.3971, + "step": 70850 + }, + { + "epoch": 0.5861769450304007, + "grad_norm": 516.7097778320312, + "learning_rate": 4.605859147849713e-06, + "loss": 66.5408, + "step": 70860 + }, + { + "epoch": 0.58625966827977, + "grad_norm": 1200.6766357421875, + "learning_rate": 4.604453185230643e-06, + "loss": 94.5176, + "step": 70870 + }, + { + "epoch": 0.5863423915291393, + "grad_norm": 1096.214599609375, + "learning_rate": 4.603047254083418e-06, + "loss": 123.1105, + "step": 70880 + }, + { + "epoch": 0.5864251147785084, + "grad_norm": 789.3768920898438, + "learning_rate": 4.601641354519901e-06, + "loss": 97.1097, + "step": 70890 + }, + { + "epoch": 0.5865078380278778, + "grad_norm": 483.42315673828125, + "learning_rate": 4.6002354866519526e-06, + "loss": 100.7335, + "step": 70900 + }, + { + "epoch": 0.586590561277247, + "grad_norm": 978.4453125, + "learning_rate": 4.598829650591432e-06, + "loss": 91.333, + "step": 70910 + }, + { + "epoch": 0.5866732845266162, + "grad_norm": 653.1732177734375, + "learning_rate": 4.597423846450196e-06, + "loss": 86.567, + "step": 70920 + }, + { + "epoch": 0.5867560077759855, + "grad_norm": 714.1948852539062, + "learning_rate": 4.596018074340097e-06, + "loss": 86.6666, + "step": 70930 + }, + { + "epoch": 0.5868387310253547, + "grad_norm": 635.244140625, + "learning_rate": 4.594612334372985e-06, + "loss": 81.3609, + "step": 70940 + }, + { + "epoch": 0.5869214542747239, + "grad_norm": 790.4684448242188, + "learning_rate": 4.59320662666071e-06, + "loss": 91.6696, + "step": 70950 + }, + { + "epoch": 0.5870041775240932, + "grad_norm": 636.0848388671875, + "learning_rate": 4.591800951315116e-06, + "loss": 78.6052, + "step": 70960 + }, + { + "epoch": 0.5870869007734624, + "grad_norm": 1063.884765625, + "learning_rate": 4.590395308448046e-06, + "loss": 88.4161, + "step": 70970 + }, + { + "epoch": 0.5871696240228316, + "grad_norm": 1092.5953369140625, + "learning_rate": 4.588989698171343e-06, + "loss": 89.8243, + "step": 70980 + }, + { + "epoch": 0.5872523472722009, + "grad_norm": 758.7847290039062, + "learning_rate": 4.587584120596842e-06, + "loss": 95.1261, + "step": 70990 + }, + { + "epoch": 0.5873350705215701, + "grad_norm": 634.198486328125, + "learning_rate": 4.58617857583638e-06, + "loss": 72.3683, + "step": 71000 + }, + { + "epoch": 0.5874177937709393, + "grad_norm": 774.2512817382812, + "learning_rate": 4.5847730640017926e-06, + "loss": 86.8859, + "step": 71010 + }, + { + "epoch": 0.5875005170203086, + "grad_norm": 682.7206420898438, + "learning_rate": 4.5833675852049045e-06, + "loss": 95.423, + "step": 71020 + }, + { + "epoch": 0.5875832402696778, + "grad_norm": 458.109130859375, + "learning_rate": 4.5819621395575445e-06, + "loss": 90.4719, + "step": 71030 + }, + { + "epoch": 0.587665963519047, + "grad_norm": 784.8795166015625, + "learning_rate": 4.5805567271715395e-06, + "loss": 93.2228, + "step": 71040 + }, + { + "epoch": 0.5877486867684163, + "grad_norm": 802.4894409179688, + "learning_rate": 4.5791513481587105e-06, + "loss": 68.1955, + "step": 71050 + }, + { + "epoch": 0.5878314100177855, + "grad_norm": 834.863525390625, + "learning_rate": 4.577746002630878e-06, + "loss": 81.5406, + "step": 71060 + }, + { + "epoch": 0.5879141332671547, + "grad_norm": 1166.91796875, + "learning_rate": 4.576340690699857e-06, + "loss": 88.6559, + "step": 71070 + }, + { + "epoch": 0.587996856516524, + "grad_norm": 426.1348876953125, + "learning_rate": 4.574935412477464e-06, + "loss": 92.7223, + "step": 71080 + }, + { + "epoch": 0.5880795797658932, + "grad_norm": 980.8372192382812, + "learning_rate": 4.573530168075508e-06, + "loss": 126.4129, + "step": 71090 + }, + { + "epoch": 0.5881623030152624, + "grad_norm": 789.66796875, + "learning_rate": 4.572124957605803e-06, + "loss": 80.9297, + "step": 71100 + }, + { + "epoch": 0.5882450262646317, + "grad_norm": 647.4459228515625, + "learning_rate": 4.5707197811801484e-06, + "loss": 68.6454, + "step": 71110 + }, + { + "epoch": 0.5883277495140009, + "grad_norm": 1236.953369140625, + "learning_rate": 4.569314638910352e-06, + "loss": 79.3519, + "step": 71120 + }, + { + "epoch": 0.5884104727633701, + "grad_norm": 808.24462890625, + "learning_rate": 4.56790953090821e-06, + "loss": 126.6436, + "step": 71130 + }, + { + "epoch": 0.5884931960127394, + "grad_norm": 1413.169921875, + "learning_rate": 4.566504457285527e-06, + "loss": 102.8847, + "step": 71140 + }, + { + "epoch": 0.5885759192621086, + "grad_norm": 751.8162231445312, + "learning_rate": 4.565099418154093e-06, + "loss": 84.422, + "step": 71150 + }, + { + "epoch": 0.5886586425114778, + "grad_norm": 608.4118041992188, + "learning_rate": 4.563694413625703e-06, + "loss": 66.0182, + "step": 71160 + }, + { + "epoch": 0.5887413657608471, + "grad_norm": 413.452392578125, + "learning_rate": 4.5622894438121465e-06, + "loss": 75.759, + "step": 71170 + }, + { + "epoch": 0.5888240890102163, + "grad_norm": 522.5778198242188, + "learning_rate": 4.560884508825212e-06, + "loss": 99.5577, + "step": 71180 + }, + { + "epoch": 0.5889068122595855, + "grad_norm": 492.1758728027344, + "learning_rate": 4.559479608776679e-06, + "loss": 91.1286, + "step": 71190 + }, + { + "epoch": 0.5889895355089548, + "grad_norm": 694.331787109375, + "learning_rate": 4.558074743778333e-06, + "loss": 96.5787, + "step": 71200 + }, + { + "epoch": 0.589072258758324, + "grad_norm": 828.5624389648438, + "learning_rate": 4.556669913941951e-06, + "loss": 85.6967, + "step": 71210 + }, + { + "epoch": 0.5891549820076932, + "grad_norm": 963.8417358398438, + "learning_rate": 4.555265119379308e-06, + "loss": 84.3966, + "step": 71220 + }, + { + "epoch": 0.5892377052570625, + "grad_norm": 869.2294921875, + "learning_rate": 4.55386036020218e-06, + "loss": 124.0421, + "step": 71230 + }, + { + "epoch": 0.5893204285064317, + "grad_norm": 1175.8673095703125, + "learning_rate": 4.552455636522335e-06, + "loss": 127.9119, + "step": 71240 + }, + { + "epoch": 0.5894031517558009, + "grad_norm": 815.5340576171875, + "learning_rate": 4.551050948451542e-06, + "loss": 76.2241, + "step": 71250 + }, + { + "epoch": 0.5894858750051702, + "grad_norm": 769.5640869140625, + "learning_rate": 4.549646296101564e-06, + "loss": 106.5304, + "step": 71260 + }, + { + "epoch": 0.5895685982545394, + "grad_norm": 693.4418334960938, + "learning_rate": 4.548241679584165e-06, + "loss": 79.3208, + "step": 71270 + }, + { + "epoch": 0.5896513215039086, + "grad_norm": 1170.5986328125, + "learning_rate": 4.546837099011101e-06, + "loss": 126.9079, + "step": 71280 + }, + { + "epoch": 0.589734044753278, + "grad_norm": 750.7109375, + "learning_rate": 4.545432554494128e-06, + "loss": 109.4266, + "step": 71290 + }, + { + "epoch": 0.5898167680026472, + "grad_norm": 1614.88671875, + "learning_rate": 4.544028046145002e-06, + "loss": 115.2326, + "step": 71300 + }, + { + "epoch": 0.5898994912520164, + "grad_norm": 1244.8895263671875, + "learning_rate": 4.542623574075471e-06, + "loss": 115.9653, + "step": 71310 + }, + { + "epoch": 0.5899822145013857, + "grad_norm": 1223.9134521484375, + "learning_rate": 4.541219138397283e-06, + "loss": 107.9536, + "step": 71320 + }, + { + "epoch": 0.5900649377507549, + "grad_norm": 645.8054809570312, + "learning_rate": 4.539814739222182e-06, + "loss": 88.0276, + "step": 71330 + }, + { + "epoch": 0.5901476610001241, + "grad_norm": 906.608154296875, + "learning_rate": 4.538410376661912e-06, + "loss": 97.0788, + "step": 71340 + }, + { + "epoch": 0.5902303842494934, + "grad_norm": 655.9107666015625, + "learning_rate": 4.537006050828209e-06, + "loss": 102.7088, + "step": 71350 + }, + { + "epoch": 0.5903131074988626, + "grad_norm": 881.0200805664062, + "learning_rate": 4.535601761832811e-06, + "loss": 94.3685, + "step": 71360 + }, + { + "epoch": 0.5903958307482318, + "grad_norm": 593.4007568359375, + "learning_rate": 4.534197509787448e-06, + "loss": 96.9625, + "step": 71370 + }, + { + "epoch": 0.5904785539976011, + "grad_norm": 362.5093688964844, + "learning_rate": 4.5327932948038525e-06, + "loss": 76.0315, + "step": 71380 + }, + { + "epoch": 0.5905612772469703, + "grad_norm": 724.7342529296875, + "learning_rate": 4.5313891169937495e-06, + "loss": 68.4613, + "step": 71390 + }, + { + "epoch": 0.5906440004963395, + "grad_norm": 320.0503845214844, + "learning_rate": 4.529984976468864e-06, + "loss": 59.5351, + "step": 71400 + }, + { + "epoch": 0.5907267237457088, + "grad_norm": 689.7297973632812, + "learning_rate": 4.528580873340916e-06, + "loss": 71.2971, + "step": 71410 + }, + { + "epoch": 0.590809446995078, + "grad_norm": 698.4686889648438, + "learning_rate": 4.5271768077216245e-06, + "loss": 81.2217, + "step": 71420 + }, + { + "epoch": 0.5908921702444472, + "grad_norm": 778.4169311523438, + "learning_rate": 4.525772779722705e-06, + "loss": 78.8058, + "step": 71430 + }, + { + "epoch": 0.5909748934938165, + "grad_norm": 723.3839721679688, + "learning_rate": 4.524368789455872e-06, + "loss": 140.3945, + "step": 71440 + }, + { + "epoch": 0.5910576167431857, + "grad_norm": 1784.62158203125, + "learning_rate": 4.5229648370328276e-06, + "loss": 99.9209, + "step": 71450 + }, + { + "epoch": 0.5911403399925549, + "grad_norm": 688.6642456054688, + "learning_rate": 4.521560922565282e-06, + "loss": 110.5966, + "step": 71460 + }, + { + "epoch": 0.5912230632419242, + "grad_norm": 1047.2376708984375, + "learning_rate": 4.52015704616494e-06, + "loss": 67.9934, + "step": 71470 + }, + { + "epoch": 0.5913057864912934, + "grad_norm": 956.0989379882812, + "learning_rate": 4.518753207943498e-06, + "loss": 77.9636, + "step": 71480 + }, + { + "epoch": 0.5913885097406626, + "grad_norm": 832.9586181640625, + "learning_rate": 4.517349408012656e-06, + "loss": 89.3105, + "step": 71490 + }, + { + "epoch": 0.5914712329900319, + "grad_norm": 1294.6611328125, + "learning_rate": 4.515945646484105e-06, + "loss": 108.4286, + "step": 71500 + }, + { + "epoch": 0.5915539562394011, + "grad_norm": 635.0994262695312, + "learning_rate": 4.514541923469538e-06, + "loss": 74.8464, + "step": 71510 + }, + { + "epoch": 0.5916366794887703, + "grad_norm": 1128.41748046875, + "learning_rate": 4.513138239080641e-06, + "loss": 65.9964, + "step": 71520 + }, + { + "epoch": 0.5917194027381395, + "grad_norm": 722.1233520507812, + "learning_rate": 4.511734593429104e-06, + "loss": 106.5479, + "step": 71530 + }, + { + "epoch": 0.5918021259875088, + "grad_norm": 948.386474609375, + "learning_rate": 4.510330986626602e-06, + "loss": 87.1451, + "step": 71540 + }, + { + "epoch": 0.591884849236878, + "grad_norm": 1609.5699462890625, + "learning_rate": 4.5089274187848144e-06, + "loss": 74.7854, + "step": 71550 + }, + { + "epoch": 0.5919675724862472, + "grad_norm": 1418.9605712890625, + "learning_rate": 4.507523890015421e-06, + "loss": 88.9027, + "step": 71560 + }, + { + "epoch": 0.5920502957356165, + "grad_norm": 553.1968383789062, + "learning_rate": 4.5061204004300905e-06, + "loss": 79.0763, + "step": 71570 + }, + { + "epoch": 0.5921330189849857, + "grad_norm": 1095.764892578125, + "learning_rate": 4.504716950140492e-06, + "loss": 103.5799, + "step": 71580 + }, + { + "epoch": 0.5922157422343549, + "grad_norm": 527.2064819335938, + "learning_rate": 4.503313539258294e-06, + "loss": 88.4465, + "step": 71590 + }, + { + "epoch": 0.5922984654837242, + "grad_norm": 1063.0721435546875, + "learning_rate": 4.501910167895158e-06, + "loss": 98.2458, + "step": 71600 + }, + { + "epoch": 0.5923811887330934, + "grad_norm": 2767.47705078125, + "learning_rate": 4.500506836162746e-06, + "loss": 92.3343, + "step": 71610 + }, + { + "epoch": 0.5924639119824626, + "grad_norm": 455.406982421875, + "learning_rate": 4.499103544172711e-06, + "loss": 81.4368, + "step": 71620 + }, + { + "epoch": 0.5925466352318319, + "grad_norm": 850.3991088867188, + "learning_rate": 4.497700292036708e-06, + "loss": 70.8303, + "step": 71630 + }, + { + "epoch": 0.5926293584812011, + "grad_norm": 778.4962768554688, + "learning_rate": 4.4962970798663865e-06, + "loss": 66.9589, + "step": 71640 + }, + { + "epoch": 0.5927120817305703, + "grad_norm": 543.8009033203125, + "learning_rate": 4.494893907773394e-06, + "loss": 69.5775, + "step": 71650 + }, + { + "epoch": 0.5927948049799396, + "grad_norm": 799.34375, + "learning_rate": 4.493490775869377e-06, + "loss": 78.267, + "step": 71660 + }, + { + "epoch": 0.5928775282293088, + "grad_norm": 1260.3624267578125, + "learning_rate": 4.492087684265975e-06, + "loss": 91.3813, + "step": 71670 + }, + { + "epoch": 0.592960251478678, + "grad_norm": 714.5103149414062, + "learning_rate": 4.490684633074824e-06, + "loss": 87.9972, + "step": 71680 + }, + { + "epoch": 0.5930429747280473, + "grad_norm": 419.715087890625, + "learning_rate": 4.489281622407559e-06, + "loss": 85.3557, + "step": 71690 + }, + { + "epoch": 0.5931256979774165, + "grad_norm": 939.987060546875, + "learning_rate": 4.487878652375813e-06, + "loss": 132.4992, + "step": 71700 + }, + { + "epoch": 0.5932084212267857, + "grad_norm": 623.05712890625, + "learning_rate": 4.486475723091211e-06, + "loss": 97.7154, + "step": 71710 + }, + { + "epoch": 0.593291144476155, + "grad_norm": 852.7836303710938, + "learning_rate": 4.485072834665379e-06, + "loss": 133.3668, + "step": 71720 + }, + { + "epoch": 0.5933738677255243, + "grad_norm": 830.4113159179688, + "learning_rate": 4.483669987209938e-06, + "loss": 85.3274, + "step": 71730 + }, + { + "epoch": 0.5934565909748934, + "grad_norm": 1263.1221923828125, + "learning_rate": 4.482267180836508e-06, + "loss": 96.8079, + "step": 71740 + }, + { + "epoch": 0.5935393142242628, + "grad_norm": 605.0487060546875, + "learning_rate": 4.4808644156567e-06, + "loss": 86.8117, + "step": 71750 + }, + { + "epoch": 0.593622037473632, + "grad_norm": 524.0765380859375, + "learning_rate": 4.479461691782129e-06, + "loss": 74.157, + "step": 71760 + }, + { + "epoch": 0.5937047607230012, + "grad_norm": 708.2615966796875, + "learning_rate": 4.478059009324403e-06, + "loss": 106.5555, + "step": 71770 + }, + { + "epoch": 0.5937874839723705, + "grad_norm": 427.41162109375, + "learning_rate": 4.476656368395126e-06, + "loss": 91.2204, + "step": 71780 + }, + { + "epoch": 0.5938702072217397, + "grad_norm": 744.795654296875, + "learning_rate": 4.4752537691059e-06, + "loss": 78.5461, + "step": 71790 + }, + { + "epoch": 0.5939529304711089, + "grad_norm": 1040.6856689453125, + "learning_rate": 4.473851211568323e-06, + "loss": 105.2389, + "step": 71800 + }, + { + "epoch": 0.5940356537204782, + "grad_norm": 558.6827392578125, + "learning_rate": 4.472448695893991e-06, + "loss": 97.6422, + "step": 71810 + }, + { + "epoch": 0.5941183769698474, + "grad_norm": 1102.7012939453125, + "learning_rate": 4.471046222194494e-06, + "loss": 104.5667, + "step": 71820 + }, + { + "epoch": 0.5942011002192166, + "grad_norm": 1997.569091796875, + "learning_rate": 4.469643790581422e-06, + "loss": 84.4513, + "step": 71830 + }, + { + "epoch": 0.5942838234685859, + "grad_norm": 1167.2252197265625, + "learning_rate": 4.468241401166359e-06, + "loss": 102.4033, + "step": 71840 + }, + { + "epoch": 0.5943665467179551, + "grad_norm": 1216.229736328125, + "learning_rate": 4.466839054060888e-06, + "loss": 73.6999, + "step": 71850 + }, + { + "epoch": 0.5944492699673243, + "grad_norm": 371.2035217285156, + "learning_rate": 4.465436749376586e-06, + "loss": 87.3126, + "step": 71860 + }, + { + "epoch": 0.5945319932166936, + "grad_norm": 993.4617309570312, + "learning_rate": 4.464034487225031e-06, + "loss": 84.5187, + "step": 71870 + }, + { + "epoch": 0.5946147164660628, + "grad_norm": 668.97802734375, + "learning_rate": 4.462632267717789e-06, + "loss": 93.8088, + "step": 71880 + }, + { + "epoch": 0.594697439715432, + "grad_norm": 723.9701538085938, + "learning_rate": 4.461230090966433e-06, + "loss": 92.2385, + "step": 71890 + }, + { + "epoch": 0.5947801629648013, + "grad_norm": 1200.336669921875, + "learning_rate": 4.4598279570825244e-06, + "loss": 108.3329, + "step": 71900 + }, + { + "epoch": 0.5948628862141705, + "grad_norm": 749.9968872070312, + "learning_rate": 4.458425866177628e-06, + "loss": 96.5593, + "step": 71910 + }, + { + "epoch": 0.5949456094635397, + "grad_norm": 984.7999877929688, + "learning_rate": 4.457023818363299e-06, + "loss": 105.6873, + "step": 71920 + }, + { + "epoch": 0.595028332712909, + "grad_norm": 679.338134765625, + "learning_rate": 4.455621813751093e-06, + "loss": 73.7637, + "step": 71930 + }, + { + "epoch": 0.5951110559622782, + "grad_norm": 1122.4134521484375, + "learning_rate": 4.45421985245256e-06, + "loss": 98.8108, + "step": 71940 + }, + { + "epoch": 0.5951937792116474, + "grad_norm": 988.9681396484375, + "learning_rate": 4.452817934579249e-06, + "loss": 84.0397, + "step": 71950 + }, + { + "epoch": 0.5952765024610167, + "grad_norm": 918.6084594726562, + "learning_rate": 4.451416060242707e-06, + "loss": 103.6519, + "step": 71960 + }, + { + "epoch": 0.5953592257103859, + "grad_norm": 676.3186645507812, + "learning_rate": 4.450014229554468e-06, + "loss": 80.6245, + "step": 71970 + }, + { + "epoch": 0.5954419489597551, + "grad_norm": 987.3781127929688, + "learning_rate": 4.448612442626073e-06, + "loss": 89.4239, + "step": 71980 + }, + { + "epoch": 0.5955246722091244, + "grad_norm": 1492.4959716796875, + "learning_rate": 4.447210699569055e-06, + "loss": 85.7063, + "step": 71990 + }, + { + "epoch": 0.5956073954584936, + "grad_norm": 614.7178955078125, + "learning_rate": 4.445809000494945e-06, + "loss": 101.3085, + "step": 72000 + }, + { + "epoch": 0.5956901187078628, + "grad_norm": 1345.1634521484375, + "learning_rate": 4.4444073455152705e-06, + "loss": 87.2096, + "step": 72010 + }, + { + "epoch": 0.5957728419572321, + "grad_norm": 536.4259033203125, + "learning_rate": 4.443005734741553e-06, + "loss": 71.8604, + "step": 72020 + }, + { + "epoch": 0.5958555652066013, + "grad_norm": 652.9493408203125, + "learning_rate": 4.441604168285313e-06, + "loss": 121.6692, + "step": 72030 + }, + { + "epoch": 0.5959382884559705, + "grad_norm": 1071.4775390625, + "learning_rate": 4.440202646258067e-06, + "loss": 125.7619, + "step": 72040 + }, + { + "epoch": 0.5960210117053398, + "grad_norm": 540.5606689453125, + "learning_rate": 4.4388011687713274e-06, + "loss": 76.9964, + "step": 72050 + }, + { + "epoch": 0.596103734954709, + "grad_norm": 979.0853881835938, + "learning_rate": 4.437399735936603e-06, + "loss": 86.8726, + "step": 72060 + }, + { + "epoch": 0.5961864582040782, + "grad_norm": 1528.9964599609375, + "learning_rate": 4.435998347865399e-06, + "loss": 139.583, + "step": 72070 + }, + { + "epoch": 0.5962691814534475, + "grad_norm": 581.6967163085938, + "learning_rate": 4.4345970046692174e-06, + "loss": 79.9947, + "step": 72080 + }, + { + "epoch": 0.5963519047028167, + "grad_norm": 915.5377807617188, + "learning_rate": 4.433195706459558e-06, + "loss": 67.4258, + "step": 72090 + }, + { + "epoch": 0.5964346279521859, + "grad_norm": 1992.865234375, + "learning_rate": 4.431794453347915e-06, + "loss": 97.0232, + "step": 72100 + }, + { + "epoch": 0.5965173512015552, + "grad_norm": 653.1949462890625, + "learning_rate": 4.430393245445781e-06, + "loss": 99.4039, + "step": 72110 + }, + { + "epoch": 0.5966000744509244, + "grad_norm": 1475.4862060546875, + "learning_rate": 4.42899208286464e-06, + "loss": 134.1732, + "step": 72120 + }, + { + "epoch": 0.5966827977002936, + "grad_norm": 985.6160888671875, + "learning_rate": 4.427590965715981e-06, + "loss": 93.6811, + "step": 72130 + }, + { + "epoch": 0.596765520949663, + "grad_norm": 1121.754638671875, + "learning_rate": 4.426189894111281e-06, + "loss": 98.63, + "step": 72140 + }, + { + "epoch": 0.5968482441990322, + "grad_norm": 778.4381103515625, + "learning_rate": 4.4247888681620165e-06, + "loss": 95.378, + "step": 72150 + }, + { + "epoch": 0.5969309674484014, + "grad_norm": 1081.6031494140625, + "learning_rate": 4.423387887979663e-06, + "loss": 76.984, + "step": 72160 + }, + { + "epoch": 0.5970136906977707, + "grad_norm": 844.283203125, + "learning_rate": 4.421986953675687e-06, + "loss": 95.1035, + "step": 72170 + }, + { + "epoch": 0.5970964139471399, + "grad_norm": 1140.15869140625, + "learning_rate": 4.420586065361558e-06, + "loss": 98.3029, + "step": 72180 + }, + { + "epoch": 0.5971791371965091, + "grad_norm": 1047.5072021484375, + "learning_rate": 4.419185223148737e-06, + "loss": 71.558, + "step": 72190 + }, + { + "epoch": 0.5972618604458784, + "grad_norm": 512.5674438476562, + "learning_rate": 4.417784427148681e-06, + "loss": 118.2006, + "step": 72200 + }, + { + "epoch": 0.5973445836952476, + "grad_norm": 922.7410888671875, + "learning_rate": 4.4163836774728466e-06, + "loss": 88.5922, + "step": 72210 + }, + { + "epoch": 0.5974273069446168, + "grad_norm": 695.3885498046875, + "learning_rate": 4.414982974232686e-06, + "loss": 85.0964, + "step": 72220 + }, + { + "epoch": 0.5975100301939861, + "grad_norm": 756.3115844726562, + "learning_rate": 4.413582317539644e-06, + "loss": 84.8309, + "step": 72230 + }, + { + "epoch": 0.5975927534433553, + "grad_norm": 893.6836547851562, + "learning_rate": 4.412181707505167e-06, + "loss": 76.9076, + "step": 72240 + }, + { + "epoch": 0.5976754766927245, + "grad_norm": 830.41943359375, + "learning_rate": 4.410781144240692e-06, + "loss": 99.8375, + "step": 72250 + }, + { + "epoch": 0.5977581999420937, + "grad_norm": 520.194580078125, + "learning_rate": 4.409380627857658e-06, + "loss": 77.2131, + "step": 72260 + }, + { + "epoch": 0.597840923191463, + "grad_norm": 1167.1990966796875, + "learning_rate": 4.4079801584674955e-06, + "loss": 106.8954, + "step": 72270 + }, + { + "epoch": 0.5979236464408322, + "grad_norm": 987.2964477539062, + "learning_rate": 4.406579736181636e-06, + "loss": 109.5783, + "step": 72280 + }, + { + "epoch": 0.5980063696902014, + "grad_norm": 755.127197265625, + "learning_rate": 4.405179361111503e-06, + "loss": 102.5628, + "step": 72290 + }, + { + "epoch": 0.5980890929395707, + "grad_norm": 858.6559448242188, + "learning_rate": 4.403779033368521e-06, + "loss": 83.2092, + "step": 72300 + }, + { + "epoch": 0.5981718161889399, + "grad_norm": 1175.3221435546875, + "learning_rate": 4.402378753064102e-06, + "loss": 120.481, + "step": 72310 + }, + { + "epoch": 0.5982545394383091, + "grad_norm": 1265.4703369140625, + "learning_rate": 4.400978520309663e-06, + "loss": 126.6442, + "step": 72320 + }, + { + "epoch": 0.5983372626876784, + "grad_norm": 837.8228759765625, + "learning_rate": 4.399578335216615e-06, + "loss": 83.0502, + "step": 72330 + }, + { + "epoch": 0.5984199859370476, + "grad_norm": 454.5643310546875, + "learning_rate": 4.3981781978963625e-06, + "loss": 195.8178, + "step": 72340 + }, + { + "epoch": 0.5985027091864168, + "grad_norm": 1586.3955078125, + "learning_rate": 4.39677810846031e-06, + "loss": 104.6424, + "step": 72350 + }, + { + "epoch": 0.5985854324357861, + "grad_norm": 472.3536376953125, + "learning_rate": 4.395378067019854e-06, + "loss": 103.8725, + "step": 72360 + }, + { + "epoch": 0.5986681556851553, + "grad_norm": 1100.48486328125, + "learning_rate": 4.39397807368639e-06, + "loss": 95.1332, + "step": 72370 + }, + { + "epoch": 0.5987508789345245, + "grad_norm": 836.9981079101562, + "learning_rate": 4.39257812857131e-06, + "loss": 88.9254, + "step": 72380 + }, + { + "epoch": 0.5988336021838938, + "grad_norm": 861.1360473632812, + "learning_rate": 4.391178231786003e-06, + "loss": 154.0604, + "step": 72390 + }, + { + "epoch": 0.598916325433263, + "grad_norm": 801.0167846679688, + "learning_rate": 4.389778383441847e-06, + "loss": 57.2983, + "step": 72400 + }, + { + "epoch": 0.5989990486826322, + "grad_norm": 737.025390625, + "learning_rate": 4.388378583650225e-06, + "loss": 106.2906, + "step": 72410 + }, + { + "epoch": 0.5990817719320015, + "grad_norm": 770.6280517578125, + "learning_rate": 4.386978832522512e-06, + "loss": 60.7984, + "step": 72420 + }, + { + "epoch": 0.5991644951813707, + "grad_norm": 454.4928894042969, + "learning_rate": 4.38557913017008e-06, + "loss": 85.5769, + "step": 72430 + }, + { + "epoch": 0.5992472184307399, + "grad_norm": 799.1620483398438, + "learning_rate": 4.384179476704297e-06, + "loss": 69.6634, + "step": 72440 + }, + { + "epoch": 0.5993299416801092, + "grad_norm": 1317.85693359375, + "learning_rate": 4.382779872236527e-06, + "loss": 96.343, + "step": 72450 + }, + { + "epoch": 0.5994126649294784, + "grad_norm": 550.043212890625, + "learning_rate": 4.3813803168781295e-06, + "loss": 66.2317, + "step": 72460 + }, + { + "epoch": 0.5994953881788476, + "grad_norm": 359.5018005371094, + "learning_rate": 4.379980810740463e-06, + "loss": 92.4579, + "step": 72470 + }, + { + "epoch": 0.5995781114282169, + "grad_norm": 773.148193359375, + "learning_rate": 4.378581353934876e-06, + "loss": 109.3504, + "step": 72480 + }, + { + "epoch": 0.5996608346775861, + "grad_norm": 994.1666259765625, + "learning_rate": 4.3771819465727185e-06, + "loss": 106.4414, + "step": 72490 + }, + { + "epoch": 0.5997435579269553, + "grad_norm": 1295.1717529296875, + "learning_rate": 4.3757825887653345e-06, + "loss": 86.4112, + "step": 72500 + }, + { + "epoch": 0.5998262811763246, + "grad_norm": 1010.71142578125, + "learning_rate": 4.374383280624066e-06, + "loss": 85.4114, + "step": 72510 + }, + { + "epoch": 0.5999090044256938, + "grad_norm": 731.9288940429688, + "learning_rate": 4.372984022260249e-06, + "loss": 104.8326, + "step": 72520 + }, + { + "epoch": 0.599991727675063, + "grad_norm": 670.1849975585938, + "learning_rate": 4.371584813785216e-06, + "loss": 86.4142, + "step": 72530 + }, + { + "epoch": 0.6000744509244323, + "grad_norm": 669.2977905273438, + "learning_rate": 4.370185655310295e-06, + "loss": 116.1969, + "step": 72540 + }, + { + "epoch": 0.6001571741738015, + "grad_norm": 729.8284912109375, + "learning_rate": 4.368786546946811e-06, + "loss": 79.6399, + "step": 72550 + }, + { + "epoch": 0.6002398974231707, + "grad_norm": 600.2257690429688, + "learning_rate": 4.367387488806086e-06, + "loss": 77.0065, + "step": 72560 + }, + { + "epoch": 0.60032262067254, + "grad_norm": 879.3544311523438, + "learning_rate": 4.365988480999434e-06, + "loss": 83.5863, + "step": 72570 + }, + { + "epoch": 0.6004053439219093, + "grad_norm": 1029.636962890625, + "learning_rate": 4.364589523638168e-06, + "loss": 103.5862, + "step": 72580 + }, + { + "epoch": 0.6004880671712785, + "grad_norm": 1054.551513671875, + "learning_rate": 4.363190616833598e-06, + "loss": 81.133, + "step": 72590 + }, + { + "epoch": 0.6005707904206478, + "grad_norm": 1133.460693359375, + "learning_rate": 4.361791760697027e-06, + "loss": 104.2316, + "step": 72600 + }, + { + "epoch": 0.600653513670017, + "grad_norm": 795.6099243164062, + "learning_rate": 4.360392955339758e-06, + "loss": 114.3038, + "step": 72610 + }, + { + "epoch": 0.6007362369193862, + "grad_norm": 892.427001953125, + "learning_rate": 4.358994200873085e-06, + "loss": 90.4173, + "step": 72620 + }, + { + "epoch": 0.6008189601687555, + "grad_norm": 511.8495788574219, + "learning_rate": 4.357595497408303e-06, + "loss": 71.118, + "step": 72630 + }, + { + "epoch": 0.6009016834181247, + "grad_norm": 1011.441650390625, + "learning_rate": 4.356196845056699e-06, + "loss": 97.9198, + "step": 72640 + }, + { + "epoch": 0.6009844066674939, + "grad_norm": 1414.120361328125, + "learning_rate": 4.3547982439295576e-06, + "loss": 116.4794, + "step": 72650 + }, + { + "epoch": 0.6010671299168632, + "grad_norm": 1085.852294921875, + "learning_rate": 4.353399694138158e-06, + "loss": 69.8157, + "step": 72660 + }, + { + "epoch": 0.6011498531662324, + "grad_norm": 836.6179809570312, + "learning_rate": 4.352001195793778e-06, + "loss": 68.6478, + "step": 72670 + }, + { + "epoch": 0.6012325764156016, + "grad_norm": 1110.6588134765625, + "learning_rate": 4.350602749007688e-06, + "loss": 70.8156, + "step": 72680 + }, + { + "epoch": 0.6013152996649709, + "grad_norm": 648.1185302734375, + "learning_rate": 4.349204353891158e-06, + "loss": 92.2575, + "step": 72690 + }, + { + "epoch": 0.6013980229143401, + "grad_norm": 998.8807373046875, + "learning_rate": 4.347806010555448e-06, + "loss": 86.0575, + "step": 72700 + }, + { + "epoch": 0.6014807461637093, + "grad_norm": 579.22265625, + "learning_rate": 4.346407719111823e-06, + "loss": 122.89, + "step": 72710 + }, + { + "epoch": 0.6015634694130786, + "grad_norm": 1019.9585571289062, + "learning_rate": 4.3450094796715354e-06, + "loss": 119.5738, + "step": 72720 + }, + { + "epoch": 0.6016461926624478, + "grad_norm": 903.5271606445312, + "learning_rate": 4.343611292345839e-06, + "loss": 107.9593, + "step": 72730 + }, + { + "epoch": 0.601728915911817, + "grad_norm": 816.7589111328125, + "learning_rate": 4.342213157245979e-06, + "loss": 84.478, + "step": 72740 + }, + { + "epoch": 0.6018116391611863, + "grad_norm": 550.0929565429688, + "learning_rate": 4.340815074483199e-06, + "loss": 69.082, + "step": 72750 + }, + { + "epoch": 0.6018943624105555, + "grad_norm": 576.094482421875, + "learning_rate": 4.339417044168738e-06, + "loss": 81.2501, + "step": 72760 + }, + { + "epoch": 0.6019770856599247, + "grad_norm": 1026.7596435546875, + "learning_rate": 4.338019066413832e-06, + "loss": 96.2626, + "step": 72770 + }, + { + "epoch": 0.602059808909294, + "grad_norm": 739.3609008789062, + "learning_rate": 4.33662114132971e-06, + "loss": 80.703, + "step": 72780 + }, + { + "epoch": 0.6021425321586632, + "grad_norm": 1358.84716796875, + "learning_rate": 4.335223269027599e-06, + "loss": 113.0873, + "step": 72790 + }, + { + "epoch": 0.6022252554080324, + "grad_norm": 932.6506958007812, + "learning_rate": 4.333825449618721e-06, + "loss": 76.9534, + "step": 72800 + }, + { + "epoch": 0.6023079786574017, + "grad_norm": 744.7998046875, + "learning_rate": 4.332427683214295e-06, + "loss": 95.1433, + "step": 72810 + }, + { + "epoch": 0.6023907019067709, + "grad_norm": 856.265380859375, + "learning_rate": 4.331029969925538e-06, + "loss": 69.2679, + "step": 72820 + }, + { + "epoch": 0.6024734251561401, + "grad_norm": 1203.6240234375, + "learning_rate": 4.329632309863652e-06, + "loss": 75.6211, + "step": 72830 + }, + { + "epoch": 0.6025561484055094, + "grad_norm": 814.6486206054688, + "learning_rate": 4.328234703139847e-06, + "loss": 83.6226, + "step": 72840 + }, + { + "epoch": 0.6026388716548786, + "grad_norm": 1125.9552001953125, + "learning_rate": 4.326837149865325e-06, + "loss": 120.5001, + "step": 72850 + }, + { + "epoch": 0.6027215949042478, + "grad_norm": 682.9524536132812, + "learning_rate": 4.325439650151281e-06, + "loss": 59.8708, + "step": 72860 + }, + { + "epoch": 0.6028043181536171, + "grad_norm": 1073.811279296875, + "learning_rate": 4.324042204108908e-06, + "loss": 79.1899, + "step": 72870 + }, + { + "epoch": 0.6028870414029863, + "grad_norm": 764.7388305664062, + "learning_rate": 4.322644811849395e-06, + "loss": 75.46, + "step": 72880 + }, + { + "epoch": 0.6029697646523555, + "grad_norm": 827.508056640625, + "learning_rate": 4.321247473483924e-06, + "loss": 73.4172, + "step": 72890 + }, + { + "epoch": 0.6030524879017248, + "grad_norm": 989.9174194335938, + "learning_rate": 4.319850189123681e-06, + "loss": 79.5693, + "step": 72900 + }, + { + "epoch": 0.603135211151094, + "grad_norm": 1122.249267578125, + "learning_rate": 4.3184529588798335e-06, + "loss": 98.8418, + "step": 72910 + }, + { + "epoch": 0.6032179344004632, + "grad_norm": 922.3504028320312, + "learning_rate": 4.3170557828635565e-06, + "loss": 139.0302, + "step": 72920 + }, + { + "epoch": 0.6033006576498325, + "grad_norm": 818.5303955078125, + "learning_rate": 4.315658661186016e-06, + "loss": 77.4534, + "step": 72930 + }, + { + "epoch": 0.6033833808992017, + "grad_norm": 795.8893432617188, + "learning_rate": 4.314261593958376e-06, + "loss": 88.9662, + "step": 72940 + }, + { + "epoch": 0.6034661041485709, + "grad_norm": 891.454833984375, + "learning_rate": 4.3128645812917935e-06, + "loss": 83.1203, + "step": 72950 + }, + { + "epoch": 0.6035488273979402, + "grad_norm": 707.6808471679688, + "learning_rate": 4.311467623297423e-06, + "loss": 88.5659, + "step": 72960 + }, + { + "epoch": 0.6036315506473094, + "grad_norm": 584.8573608398438, + "learning_rate": 4.310070720086414e-06, + "loss": 80.7194, + "step": 72970 + }, + { + "epoch": 0.6037142738966786, + "grad_norm": 665.1079711914062, + "learning_rate": 4.30867387176991e-06, + "loss": 73.1562, + "step": 72980 + }, + { + "epoch": 0.6037969971460478, + "grad_norm": 1153.0792236328125, + "learning_rate": 4.307277078459057e-06, + "loss": 89.2579, + "step": 72990 + }, + { + "epoch": 0.6038797203954172, + "grad_norm": 922.5089721679688, + "learning_rate": 4.305880340264985e-06, + "loss": 100.4802, + "step": 73000 + }, + { + "epoch": 0.6039624436447864, + "grad_norm": 940.73681640625, + "learning_rate": 4.3044836572988285e-06, + "loss": 78.375, + "step": 73010 + }, + { + "epoch": 0.6040451668941555, + "grad_norm": 867.5308227539062, + "learning_rate": 4.3030870296717155e-06, + "loss": 87.0764, + "step": 73020 + }, + { + "epoch": 0.6041278901435249, + "grad_norm": 1026.0672607421875, + "learning_rate": 4.301690457494769e-06, + "loss": 99.893, + "step": 73030 + }, + { + "epoch": 0.6042106133928941, + "grad_norm": 835.3468017578125, + "learning_rate": 4.300293940879108e-06, + "loss": 67.1481, + "step": 73040 + }, + { + "epoch": 0.6042933366422633, + "grad_norm": 1170.596923828125, + "learning_rate": 4.298897479935847e-06, + "loss": 110.3714, + "step": 73050 + }, + { + "epoch": 0.6043760598916326, + "grad_norm": 505.8780822753906, + "learning_rate": 4.297501074776097e-06, + "loss": 94.121, + "step": 73060 + }, + { + "epoch": 0.6044587831410018, + "grad_norm": 674.9179077148438, + "learning_rate": 4.296104725510961e-06, + "loss": 79.6617, + "step": 73070 + }, + { + "epoch": 0.604541506390371, + "grad_norm": 982.7257690429688, + "learning_rate": 4.294708432251544e-06, + "loss": 82.3634, + "step": 73080 + }, + { + "epoch": 0.6046242296397403, + "grad_norm": 908.1831665039062, + "learning_rate": 4.293312195108938e-06, + "loss": 96.1006, + "step": 73090 + }, + { + "epoch": 0.6047069528891095, + "grad_norm": 594.184814453125, + "learning_rate": 4.291916014194238e-06, + "loss": 87.1127, + "step": 73100 + }, + { + "epoch": 0.6047896761384787, + "grad_norm": 1231.35107421875, + "learning_rate": 4.290519889618531e-06, + "loss": 89.3334, + "step": 73110 + }, + { + "epoch": 0.604872399387848, + "grad_norm": 1115.8880615234375, + "learning_rate": 4.2891238214928995e-06, + "loss": 65.0904, + "step": 73120 + }, + { + "epoch": 0.6049551226372172, + "grad_norm": 802.1785278320312, + "learning_rate": 4.287727809928423e-06, + "loss": 83.0461, + "step": 73130 + }, + { + "epoch": 0.6050378458865864, + "grad_norm": 779.43896484375, + "learning_rate": 4.286331855036177e-06, + "loss": 94.5422, + "step": 73140 + }, + { + "epoch": 0.6051205691359557, + "grad_norm": 808.46337890625, + "learning_rate": 4.284935956927229e-06, + "loss": 96.0789, + "step": 73150 + }, + { + "epoch": 0.6052032923853249, + "grad_norm": 1208.354736328125, + "learning_rate": 4.283540115712647e-06, + "loss": 92.2509, + "step": 73160 + }, + { + "epoch": 0.6052860156346941, + "grad_norm": 839.9061889648438, + "learning_rate": 4.282144331503488e-06, + "loss": 89.2915, + "step": 73170 + }, + { + "epoch": 0.6053687388840634, + "grad_norm": 876.1185913085938, + "learning_rate": 4.280748604410811e-06, + "loss": 70.9787, + "step": 73180 + }, + { + "epoch": 0.6054514621334326, + "grad_norm": 6405.99951171875, + "learning_rate": 4.279352934545666e-06, + "loss": 128.2961, + "step": 73190 + }, + { + "epoch": 0.6055341853828018, + "grad_norm": 900.7869873046875, + "learning_rate": 4.277957322019101e-06, + "loss": 131.7455, + "step": 73200 + }, + { + "epoch": 0.6056169086321711, + "grad_norm": 611.6780395507812, + "learning_rate": 4.276561766942158e-06, + "loss": 79.8027, + "step": 73210 + }, + { + "epoch": 0.6056996318815403, + "grad_norm": 584.4486083984375, + "learning_rate": 4.275166269425874e-06, + "loss": 94.241, + "step": 73220 + }, + { + "epoch": 0.6057823551309095, + "grad_norm": 680.3448486328125, + "learning_rate": 4.273770829581285e-06, + "loss": 82.9358, + "step": 73230 + }, + { + "epoch": 0.6058650783802788, + "grad_norm": 1168.8824462890625, + "learning_rate": 4.272375447519418e-06, + "loss": 84.9492, + "step": 73240 + }, + { + "epoch": 0.605947801629648, + "grad_norm": 1853.1300048828125, + "learning_rate": 4.270980123351299e-06, + "loss": 88.8922, + "step": 73250 + }, + { + "epoch": 0.6060305248790172, + "grad_norm": 588.9846801757812, + "learning_rate": 4.269584857187942e-06, + "loss": 86.5293, + "step": 73260 + }, + { + "epoch": 0.6061132481283865, + "grad_norm": 684.2811889648438, + "learning_rate": 4.268189649140369e-06, + "loss": 75.5877, + "step": 73270 + }, + { + "epoch": 0.6061959713777557, + "grad_norm": 751.708251953125, + "learning_rate": 4.266794499319585e-06, + "loss": 94.9124, + "step": 73280 + }, + { + "epoch": 0.6062786946271249, + "grad_norm": 709.6283569335938, + "learning_rate": 4.265399407836598e-06, + "loss": 88.3451, + "step": 73290 + }, + { + "epoch": 0.6063614178764942, + "grad_norm": 558.1085815429688, + "learning_rate": 4.26400437480241e-06, + "loss": 131.0363, + "step": 73300 + }, + { + "epoch": 0.6064441411258634, + "grad_norm": 655.4963989257812, + "learning_rate": 4.262609400328015e-06, + "loss": 95.9021, + "step": 73310 + }, + { + "epoch": 0.6065268643752326, + "grad_norm": 762.81689453125, + "learning_rate": 4.2612144845244044e-06, + "loss": 92.3398, + "step": 73320 + }, + { + "epoch": 0.6066095876246019, + "grad_norm": 1324.61962890625, + "learning_rate": 4.259819627502571e-06, + "loss": 86.3487, + "step": 73330 + }, + { + "epoch": 0.6066923108739711, + "grad_norm": 915.4453125, + "learning_rate": 4.258424829373491e-06, + "loss": 95.3454, + "step": 73340 + }, + { + "epoch": 0.6067750341233403, + "grad_norm": 1026.8973388671875, + "learning_rate": 4.257030090248142e-06, + "loss": 121.2151, + "step": 73350 + }, + { + "epoch": 0.6068577573727096, + "grad_norm": 995.72412109375, + "learning_rate": 4.2556354102374994e-06, + "loss": 93.051, + "step": 73360 + }, + { + "epoch": 0.6069404806220788, + "grad_norm": 1735.9681396484375, + "learning_rate": 4.254240789452532e-06, + "loss": 123.005, + "step": 73370 + }, + { + "epoch": 0.607023203871448, + "grad_norm": 819.5543212890625, + "learning_rate": 4.252846228004203e-06, + "loss": 89.1001, + "step": 73380 + }, + { + "epoch": 0.6071059271208173, + "grad_norm": 981.048828125, + "learning_rate": 4.25145172600347e-06, + "loss": 94.8838, + "step": 73390 + }, + { + "epoch": 0.6071886503701865, + "grad_norm": 996.7430419921875, + "learning_rate": 4.2500572835612876e-06, + "loss": 130.0341, + "step": 73400 + }, + { + "epoch": 0.6072713736195557, + "grad_norm": 890.7987060546875, + "learning_rate": 4.248662900788605e-06, + "loss": 95.0139, + "step": 73410 + }, + { + "epoch": 0.607354096868925, + "grad_norm": 766.2911376953125, + "learning_rate": 4.247268577796368e-06, + "loss": 61.7691, + "step": 73420 + }, + { + "epoch": 0.6074368201182943, + "grad_norm": 953.1488037109375, + "learning_rate": 4.245874314695516e-06, + "loss": 146.5053, + "step": 73430 + }, + { + "epoch": 0.6075195433676635, + "grad_norm": 1011.7604370117188, + "learning_rate": 4.244480111596984e-06, + "loss": 85.3021, + "step": 73440 + }, + { + "epoch": 0.6076022666170328, + "grad_norm": 575.0761108398438, + "learning_rate": 4.2430859686117e-06, + "loss": 95.1132, + "step": 73450 + }, + { + "epoch": 0.607684989866402, + "grad_norm": 753.287353515625, + "learning_rate": 4.241691885850593e-06, + "loss": 75.5522, + "step": 73460 + }, + { + "epoch": 0.6077677131157712, + "grad_norm": 2446.799072265625, + "learning_rate": 4.240297863424582e-06, + "loss": 84.9961, + "step": 73470 + }, + { + "epoch": 0.6078504363651405, + "grad_norm": 1110.462158203125, + "learning_rate": 4.2389039014445846e-06, + "loss": 92.255, + "step": 73480 + }, + { + "epoch": 0.6079331596145097, + "grad_norm": 927.8515014648438, + "learning_rate": 4.23751000002151e-06, + "loss": 101.0412, + "step": 73490 + }, + { + "epoch": 0.6080158828638789, + "grad_norm": 1213.8529052734375, + "learning_rate": 4.2361161592662655e-06, + "loss": 91.7586, + "step": 73500 + }, + { + "epoch": 0.6080986061132482, + "grad_norm": 960.1309814453125, + "learning_rate": 4.234722379289753e-06, + "loss": 129.1659, + "step": 73510 + }, + { + "epoch": 0.6081813293626174, + "grad_norm": 704.3455810546875, + "learning_rate": 4.233328660202869e-06, + "loss": 67.6366, + "step": 73520 + }, + { + "epoch": 0.6082640526119866, + "grad_norm": 843.2509765625, + "learning_rate": 4.231935002116504e-06, + "loss": 78.1124, + "step": 73530 + }, + { + "epoch": 0.6083467758613559, + "grad_norm": 598.6126708984375, + "learning_rate": 4.230541405141546e-06, + "loss": 81.4852, + "step": 73540 + }, + { + "epoch": 0.6084294991107251, + "grad_norm": 1050.9449462890625, + "learning_rate": 4.229147869388875e-06, + "loss": 94.9926, + "step": 73550 + }, + { + "epoch": 0.6085122223600943, + "grad_norm": 839.3172607421875, + "learning_rate": 4.227754394969373e-06, + "loss": 104.2477, + "step": 73560 + }, + { + "epoch": 0.6085949456094636, + "grad_norm": 916.7781372070312, + "learning_rate": 4.226360981993909e-06, + "loss": 104.4644, + "step": 73570 + }, + { + "epoch": 0.6086776688588328, + "grad_norm": 1136.1072998046875, + "learning_rate": 4.224967630573351e-06, + "loss": 97.9894, + "step": 73580 + }, + { + "epoch": 0.608760392108202, + "grad_norm": 1269.821044921875, + "learning_rate": 4.2235743408185635e-06, + "loss": 80.4433, + "step": 73590 + }, + { + "epoch": 0.6088431153575713, + "grad_norm": 922.0454711914062, + "learning_rate": 4.222181112840401e-06, + "loss": 86.2411, + "step": 73600 + }, + { + "epoch": 0.6089258386069405, + "grad_norm": 841.0752563476562, + "learning_rate": 4.220787946749717e-06, + "loss": 89.3184, + "step": 73610 + }, + { + "epoch": 0.6090085618563097, + "grad_norm": 681.7354736328125, + "learning_rate": 4.219394842657361e-06, + "loss": 86.7437, + "step": 73620 + }, + { + "epoch": 0.609091285105679, + "grad_norm": 935.4651489257812, + "learning_rate": 4.218001800674174e-06, + "loss": 88.4934, + "step": 73630 + }, + { + "epoch": 0.6091740083550482, + "grad_norm": 675.4620971679688, + "learning_rate": 4.216608820910995e-06, + "loss": 67.0971, + "step": 73640 + }, + { + "epoch": 0.6092567316044174, + "grad_norm": 710.9379272460938, + "learning_rate": 4.2152159034786554e-06, + "loss": 62.8751, + "step": 73650 + }, + { + "epoch": 0.6093394548537867, + "grad_norm": 829.56103515625, + "learning_rate": 4.213823048487987e-06, + "loss": 108.2473, + "step": 73660 + }, + { + "epoch": 0.6094221781031559, + "grad_norm": 757.4158935546875, + "learning_rate": 4.212430256049809e-06, + "loss": 99.3676, + "step": 73670 + }, + { + "epoch": 0.6095049013525251, + "grad_norm": 564.0368041992188, + "learning_rate": 4.2110375262749435e-06, + "loss": 86.9687, + "step": 73680 + }, + { + "epoch": 0.6095876246018944, + "grad_norm": 513.57470703125, + "learning_rate": 4.209644859274199e-06, + "loss": 70.3236, + "step": 73690 + }, + { + "epoch": 0.6096703478512636, + "grad_norm": 1249.1611328125, + "learning_rate": 4.208252255158387e-06, + "loss": 94.686, + "step": 73700 + }, + { + "epoch": 0.6097530711006328, + "grad_norm": 1189.990478515625, + "learning_rate": 4.2068597140383084e-06, + "loss": 114.4235, + "step": 73710 + }, + { + "epoch": 0.609835794350002, + "grad_norm": 561.5841674804688, + "learning_rate": 4.205467236024763e-06, + "loss": 97.9067, + "step": 73720 + }, + { + "epoch": 0.6099185175993713, + "grad_norm": 1291.4609375, + "learning_rate": 4.204074821228542e-06, + "loss": 97.8942, + "step": 73730 + }, + { + "epoch": 0.6100012408487405, + "grad_norm": 960.28076171875, + "learning_rate": 4.202682469760436e-06, + "loss": 82.6604, + "step": 73740 + }, + { + "epoch": 0.6100839640981097, + "grad_norm": 1440.652587890625, + "learning_rate": 4.2012901817312255e-06, + "loss": 116.6677, + "step": 73750 + }, + { + "epoch": 0.610166687347479, + "grad_norm": 665.2211303710938, + "learning_rate": 4.199897957251693e-06, + "loss": 82.3861, + "step": 73760 + }, + { + "epoch": 0.6102494105968482, + "grad_norm": 2500.0634765625, + "learning_rate": 4.198505796432605e-06, + "loss": 163.031, + "step": 73770 + }, + { + "epoch": 0.6103321338462174, + "grad_norm": 365.54327392578125, + "learning_rate": 4.197113699384732e-06, + "loss": 84.3422, + "step": 73780 + }, + { + "epoch": 0.6104148570955867, + "grad_norm": 1687.4744873046875, + "learning_rate": 4.1957216662188385e-06, + "loss": 114.6087, + "step": 73790 + }, + { + "epoch": 0.6104975803449559, + "grad_norm": 1095.3680419921875, + "learning_rate": 4.194329697045681e-06, + "loss": 79.8403, + "step": 73800 + }, + { + "epoch": 0.6105803035943251, + "grad_norm": 939.5096435546875, + "learning_rate": 4.19293779197601e-06, + "loss": 76.4956, + "step": 73810 + }, + { + "epoch": 0.6106630268436944, + "grad_norm": 1039.002197265625, + "learning_rate": 4.191545951120577e-06, + "loss": 127.6832, + "step": 73820 + }, + { + "epoch": 0.6107457500930636, + "grad_norm": 605.078857421875, + "learning_rate": 4.190154174590122e-06, + "loss": 107.0245, + "step": 73830 + }, + { + "epoch": 0.6108284733424328, + "grad_norm": 1389.932373046875, + "learning_rate": 4.188762462495381e-06, + "loss": 127.3608, + "step": 73840 + }, + { + "epoch": 0.6109111965918022, + "grad_norm": 716.9305419921875, + "learning_rate": 4.187370814947091e-06, + "loss": 74.3108, + "step": 73850 + }, + { + "epoch": 0.6109939198411714, + "grad_norm": 835.6632690429688, + "learning_rate": 4.185979232055975e-06, + "loss": 104.9701, + "step": 73860 + }, + { + "epoch": 0.6110766430905405, + "grad_norm": 960.3486938476562, + "learning_rate": 4.184587713932755e-06, + "loss": 118.767, + "step": 73870 + }, + { + "epoch": 0.6111593663399099, + "grad_norm": 811.2726440429688, + "learning_rate": 4.183196260688147e-06, + "loss": 83.4491, + "step": 73880 + }, + { + "epoch": 0.6112420895892791, + "grad_norm": 1344.4681396484375, + "learning_rate": 4.1818048724328646e-06, + "loss": 99.5997, + "step": 73890 + }, + { + "epoch": 0.6113248128386483, + "grad_norm": 1349.948486328125, + "learning_rate": 4.180413549277614e-06, + "loss": 101.2212, + "step": 73900 + }, + { + "epoch": 0.6114075360880176, + "grad_norm": 646.7472534179688, + "learning_rate": 4.1790222913330955e-06, + "loss": 89.3753, + "step": 73910 + }, + { + "epoch": 0.6114902593373868, + "grad_norm": 920.3361206054688, + "learning_rate": 4.1776310987100054e-06, + "loss": 97.7225, + "step": 73920 + }, + { + "epoch": 0.611572982586756, + "grad_norm": 1254.9158935546875, + "learning_rate": 4.1762399715190366e-06, + "loss": 104.7424, + "step": 73930 + }, + { + "epoch": 0.6116557058361253, + "grad_norm": 821.2960815429688, + "learning_rate": 4.1748489098708715e-06, + "loss": 106.9926, + "step": 73940 + }, + { + "epoch": 0.6117384290854945, + "grad_norm": 716.6043701171875, + "learning_rate": 4.173457913876191e-06, + "loss": 92.9678, + "step": 73950 + }, + { + "epoch": 0.6118211523348637, + "grad_norm": 1183.022216796875, + "learning_rate": 4.172066983645671e-06, + "loss": 117.3768, + "step": 73960 + }, + { + "epoch": 0.611903875584233, + "grad_norm": 836.3502807617188, + "learning_rate": 4.170676119289982e-06, + "loss": 81.8189, + "step": 73970 + }, + { + "epoch": 0.6119865988336022, + "grad_norm": 491.899658203125, + "learning_rate": 4.1692853209197865e-06, + "loss": 81.1659, + "step": 73980 + }, + { + "epoch": 0.6120693220829714, + "grad_norm": 1108.9029541015625, + "learning_rate": 4.167894588645746e-06, + "loss": 63.8417, + "step": 73990 + }, + { + "epoch": 0.6121520453323407, + "grad_norm": 1243.21533203125, + "learning_rate": 4.166503922578516e-06, + "loss": 71.4258, + "step": 74000 + }, + { + "epoch": 0.6122347685817099, + "grad_norm": 1081.2105712890625, + "learning_rate": 4.165113322828743e-06, + "loss": 110.2544, + "step": 74010 + }, + { + "epoch": 0.6123174918310791, + "grad_norm": 857.2208862304688, + "learning_rate": 4.163722789507071e-06, + "loss": 97.8825, + "step": 74020 + }, + { + "epoch": 0.6124002150804484, + "grad_norm": 1000.3453369140625, + "learning_rate": 4.162332322724139e-06, + "loss": 64.7686, + "step": 74030 + }, + { + "epoch": 0.6124829383298176, + "grad_norm": 536.7930297851562, + "learning_rate": 4.16094192259058e-06, + "loss": 82.5932, + "step": 74040 + }, + { + "epoch": 0.6125656615791868, + "grad_norm": 1039.748779296875, + "learning_rate": 4.15955158921702e-06, + "loss": 94.5069, + "step": 74050 + }, + { + "epoch": 0.6126483848285561, + "grad_norm": 903.4092407226562, + "learning_rate": 4.158161322714085e-06, + "loss": 93.7216, + "step": 74060 + }, + { + "epoch": 0.6127311080779253, + "grad_norm": 1123.583740234375, + "learning_rate": 4.1567711231923876e-06, + "loss": 97.1475, + "step": 74070 + }, + { + "epoch": 0.6128138313272945, + "grad_norm": 614.0092163085938, + "learning_rate": 4.155380990762542e-06, + "loss": 69.82, + "step": 74080 + }, + { + "epoch": 0.6128965545766638, + "grad_norm": 436.3868103027344, + "learning_rate": 4.153990925535157e-06, + "loss": 85.2651, + "step": 74090 + }, + { + "epoch": 0.612979277826033, + "grad_norm": 1019.7743530273438, + "learning_rate": 4.15260092762083e-06, + "loss": 68.9177, + "step": 74100 + }, + { + "epoch": 0.6130620010754022, + "grad_norm": 1233.597412109375, + "learning_rate": 4.151210997130159e-06, + "loss": 126.1689, + "step": 74110 + }, + { + "epoch": 0.6131447243247715, + "grad_norm": 1863.585205078125, + "learning_rate": 4.1498211341737335e-06, + "loss": 109.2528, + "step": 74120 + }, + { + "epoch": 0.6132274475741407, + "grad_norm": 603.75634765625, + "learning_rate": 4.148431338862138e-06, + "loss": 83.3835, + "step": 74130 + }, + { + "epoch": 0.6133101708235099, + "grad_norm": 1063.5419921875, + "learning_rate": 4.147041611305952e-06, + "loss": 81.7966, + "step": 74140 + }, + { + "epoch": 0.6133928940728792, + "grad_norm": 553.776123046875, + "learning_rate": 4.145651951615752e-06, + "loss": 103.0459, + "step": 74150 + }, + { + "epoch": 0.6134756173222484, + "grad_norm": 783.0980834960938, + "learning_rate": 4.144262359902104e-06, + "loss": 88.7107, + "step": 74160 + }, + { + "epoch": 0.6135583405716176, + "grad_norm": 1454.119873046875, + "learning_rate": 4.142872836275572e-06, + "loss": 95.9363, + "step": 74170 + }, + { + "epoch": 0.6136410638209869, + "grad_norm": 1249.62548828125, + "learning_rate": 4.141483380846716e-06, + "loss": 85.594, + "step": 74180 + }, + { + "epoch": 0.6137237870703561, + "grad_norm": 1073.4764404296875, + "learning_rate": 4.1400939937260894e-06, + "loss": 78.375, + "step": 74190 + }, + { + "epoch": 0.6138065103197253, + "grad_norm": 762.4814453125, + "learning_rate": 4.138704675024235e-06, + "loss": 87.9466, + "step": 74200 + }, + { + "epoch": 0.6138892335690946, + "grad_norm": 1005.1492919921875, + "learning_rate": 4.137315424851696e-06, + "loss": 99.7527, + "step": 74210 + }, + { + "epoch": 0.6139719568184638, + "grad_norm": 892.3167114257812, + "learning_rate": 4.1359262433190105e-06, + "loss": 88.9903, + "step": 74220 + }, + { + "epoch": 0.614054680067833, + "grad_norm": 736.7813110351562, + "learning_rate": 4.134537130536708e-06, + "loss": 72.6644, + "step": 74230 + }, + { + "epoch": 0.6141374033172023, + "grad_norm": 2169.072998046875, + "learning_rate": 4.133148086615314e-06, + "loss": 91.2334, + "step": 74240 + }, + { + "epoch": 0.6142201265665715, + "grad_norm": 918.8009643554688, + "learning_rate": 4.131759111665349e-06, + "loss": 88.9064, + "step": 74250 + }, + { + "epoch": 0.6143028498159407, + "grad_norm": 991.6587524414062, + "learning_rate": 4.130370205797326e-06, + "loss": 126.6198, + "step": 74260 + }, + { + "epoch": 0.61438557306531, + "grad_norm": 626.6166381835938, + "learning_rate": 4.128981369121754e-06, + "loss": 101.1643, + "step": 74270 + }, + { + "epoch": 0.6144682963146793, + "grad_norm": 1045.540283203125, + "learning_rate": 4.127592601749141e-06, + "loss": 100.4899, + "step": 74280 + }, + { + "epoch": 0.6145510195640485, + "grad_norm": 948.6563720703125, + "learning_rate": 4.1262039037899775e-06, + "loss": 92.1499, + "step": 74290 + }, + { + "epoch": 0.6146337428134178, + "grad_norm": 672.4278564453125, + "learning_rate": 4.12481527535476e-06, + "loss": 90.7191, + "step": 74300 + }, + { + "epoch": 0.614716466062787, + "grad_norm": 1149.4989013671875, + "learning_rate": 4.123426716553972e-06, + "loss": 97.7278, + "step": 74310 + }, + { + "epoch": 0.6147991893121562, + "grad_norm": 1198.019775390625, + "learning_rate": 4.122038227498101e-06, + "loss": 84.5663, + "step": 74320 + }, + { + "epoch": 0.6148819125615255, + "grad_norm": 1210.01611328125, + "learning_rate": 4.120649808297616e-06, + "loss": 116.1529, + "step": 74330 + }, + { + "epoch": 0.6149646358108947, + "grad_norm": 815.8023681640625, + "learning_rate": 4.119261459062992e-06, + "loss": 94.437, + "step": 74340 + }, + { + "epoch": 0.6150473590602639, + "grad_norm": 616.3191528320312, + "learning_rate": 4.1178731799046915e-06, + "loss": 110.6811, + "step": 74350 + }, + { + "epoch": 0.6151300823096332, + "grad_norm": 938.7825927734375, + "learning_rate": 4.116484970933174e-06, + "loss": 85.8687, + "step": 74360 + }, + { + "epoch": 0.6152128055590024, + "grad_norm": 754.9133911132812, + "learning_rate": 4.1150968322588915e-06, + "loss": 85.892, + "step": 74370 + }, + { + "epoch": 0.6152955288083716, + "grad_norm": 982.1900024414062, + "learning_rate": 4.113708763992294e-06, + "loss": 98.6362, + "step": 74380 + }, + { + "epoch": 0.6153782520577409, + "grad_norm": 1620.11767578125, + "learning_rate": 4.1123207662438216e-06, + "loss": 99.9468, + "step": 74390 + }, + { + "epoch": 0.6154609753071101, + "grad_norm": 843.5370483398438, + "learning_rate": 4.110932839123911e-06, + "loss": 102.8236, + "step": 74400 + }, + { + "epoch": 0.6155436985564793, + "grad_norm": 1027.692138671875, + "learning_rate": 4.109544982742995e-06, + "loss": 87.6993, + "step": 74410 + }, + { + "epoch": 0.6156264218058485, + "grad_norm": 922.636962890625, + "learning_rate": 4.108157197211499e-06, + "loss": 91.7235, + "step": 74420 + }, + { + "epoch": 0.6157091450552178, + "grad_norm": 592.3994750976562, + "learning_rate": 4.1067694826398405e-06, + "loss": 106.6859, + "step": 74430 + }, + { + "epoch": 0.615791868304587, + "grad_norm": 576.2105102539062, + "learning_rate": 4.105381839138436e-06, + "loss": 67.9388, + "step": 74440 + }, + { + "epoch": 0.6158745915539562, + "grad_norm": 1076.5267333984375, + "learning_rate": 4.103994266817694e-06, + "loss": 81.3077, + "step": 74450 + }, + { + "epoch": 0.6159573148033255, + "grad_norm": 1463.522216796875, + "learning_rate": 4.102606765788014e-06, + "loss": 121.3816, + "step": 74460 + }, + { + "epoch": 0.6160400380526947, + "grad_norm": 523.5887451171875, + "learning_rate": 4.101219336159795e-06, + "loss": 96.6892, + "step": 74470 + }, + { + "epoch": 0.6161227613020639, + "grad_norm": 801.384765625, + "learning_rate": 4.09983197804343e-06, + "loss": 90.4624, + "step": 74480 + }, + { + "epoch": 0.6162054845514332, + "grad_norm": 815.4998168945312, + "learning_rate": 4.098444691549302e-06, + "loss": 85.0159, + "step": 74490 + }, + { + "epoch": 0.6162882078008024, + "grad_norm": 1301.9669189453125, + "learning_rate": 4.097057476787792e-06, + "loss": 101.675, + "step": 74500 + }, + { + "epoch": 0.6163709310501716, + "grad_norm": 2187.890869140625, + "learning_rate": 4.0956703338692755e-06, + "loss": 93.8541, + "step": 74510 + }, + { + "epoch": 0.6164536542995409, + "grad_norm": 2067.32470703125, + "learning_rate": 4.09428326290412e-06, + "loss": 100.0631, + "step": 74520 + }, + { + "epoch": 0.6165363775489101, + "grad_norm": 1015.9877319335938, + "learning_rate": 4.092896264002689e-06, + "loss": 88.7052, + "step": 74530 + }, + { + "epoch": 0.6166191007982793, + "grad_norm": 1361.1781005859375, + "learning_rate": 4.09150933727534e-06, + "loss": 98.1841, + "step": 74540 + }, + { + "epoch": 0.6167018240476486, + "grad_norm": 1028.168701171875, + "learning_rate": 4.0901224828324225e-06, + "loss": 92.4252, + "step": 74550 + }, + { + "epoch": 0.6167845472970178, + "grad_norm": 841.4468383789062, + "learning_rate": 4.088735700784283e-06, + "loss": 96.4391, + "step": 74560 + }, + { + "epoch": 0.616867270546387, + "grad_norm": 941.0628662109375, + "learning_rate": 4.087348991241262e-06, + "loss": 79.4249, + "step": 74570 + }, + { + "epoch": 0.6169499937957563, + "grad_norm": 550.6486206054688, + "learning_rate": 4.0859623543136935e-06, + "loss": 71.2127, + "step": 74580 + }, + { + "epoch": 0.6170327170451255, + "grad_norm": 396.369140625, + "learning_rate": 4.084575790111905e-06, + "loss": 119.8758, + "step": 74590 + }, + { + "epoch": 0.6171154402944947, + "grad_norm": 714.75634765625, + "learning_rate": 4.08318929874622e-06, + "loss": 97.5772, + "step": 74600 + }, + { + "epoch": 0.617198163543864, + "grad_norm": 1025.8707275390625, + "learning_rate": 4.081802880326955e-06, + "loss": 109.0059, + "step": 74610 + }, + { + "epoch": 0.6172808867932332, + "grad_norm": 819.5011596679688, + "learning_rate": 4.080416534964422e-06, + "loss": 106.0014, + "step": 74620 + }, + { + "epoch": 0.6173636100426024, + "grad_norm": 628.18359375, + "learning_rate": 4.079030262768924e-06, + "loss": 99.0313, + "step": 74630 + }, + { + "epoch": 0.6174463332919717, + "grad_norm": 1026.5445556640625, + "learning_rate": 4.077644063850761e-06, + "loss": 106.1461, + "step": 74640 + }, + { + "epoch": 0.6175290565413409, + "grad_norm": 479.8872985839844, + "learning_rate": 4.076257938320226e-06, + "loss": 77.8964, + "step": 74650 + }, + { + "epoch": 0.6176117797907101, + "grad_norm": 782.7094116210938, + "learning_rate": 4.074871886287609e-06, + "loss": 123.7894, + "step": 74660 + }, + { + "epoch": 0.6176945030400794, + "grad_norm": 708.5579833984375, + "learning_rate": 4.073485907863189e-06, + "loss": 62.82, + "step": 74670 + }, + { + "epoch": 0.6177772262894486, + "grad_norm": 1126.7977294921875, + "learning_rate": 4.0721000031572445e-06, + "loss": 85.1428, + "step": 74680 + }, + { + "epoch": 0.6178599495388178, + "grad_norm": 664.0321655273438, + "learning_rate": 4.070714172280043e-06, + "loss": 87.29, + "step": 74690 + }, + { + "epoch": 0.6179426727881872, + "grad_norm": 716.9480590820312, + "learning_rate": 4.06932841534185e-06, + "loss": 85.6546, + "step": 74700 + }, + { + "epoch": 0.6180253960375564, + "grad_norm": 894.58935546875, + "learning_rate": 4.067942732452926e-06, + "loss": 98.9088, + "step": 74710 + }, + { + "epoch": 0.6181081192869256, + "grad_norm": 844.1010131835938, + "learning_rate": 4.06655712372352e-06, + "loss": 114.6562, + "step": 74720 + }, + { + "epoch": 0.6181908425362949, + "grad_norm": 735.4295654296875, + "learning_rate": 4.065171589263878e-06, + "loss": 87.0721, + "step": 74730 + }, + { + "epoch": 0.6182735657856641, + "grad_norm": 758.739990234375, + "learning_rate": 4.063786129184243e-06, + "loss": 108.7126, + "step": 74740 + }, + { + "epoch": 0.6183562890350333, + "grad_norm": 1705.91064453125, + "learning_rate": 4.06240074359485e-06, + "loss": 130.587, + "step": 74750 + }, + { + "epoch": 0.6184390122844026, + "grad_norm": 498.7382507324219, + "learning_rate": 4.061015432605927e-06, + "loss": 76.9041, + "step": 74760 + }, + { + "epoch": 0.6185217355337718, + "grad_norm": 610.8711547851562, + "learning_rate": 4.059630196327696e-06, + "loss": 93.8699, + "step": 74770 + }, + { + "epoch": 0.618604458783141, + "grad_norm": 850.5593872070312, + "learning_rate": 4.058245034870375e-06, + "loss": 84.9047, + "step": 74780 + }, + { + "epoch": 0.6186871820325103, + "grad_norm": 833.0304565429688, + "learning_rate": 4.056859948344175e-06, + "loss": 71.8329, + "step": 74790 + }, + { + "epoch": 0.6187699052818795, + "grad_norm": 758.42236328125, + "learning_rate": 4.0554749368593e-06, + "loss": 69.1855, + "step": 74800 + }, + { + "epoch": 0.6188526285312487, + "grad_norm": 853.135009765625, + "learning_rate": 4.054090000525949e-06, + "loss": 74.7512, + "step": 74810 + }, + { + "epoch": 0.618935351780618, + "grad_norm": 817.5387573242188, + "learning_rate": 4.052705139454316e-06, + "loss": 116.5073, + "step": 74820 + }, + { + "epoch": 0.6190180750299872, + "grad_norm": 1293.7938232421875, + "learning_rate": 4.051320353754586e-06, + "loss": 102.931, + "step": 74830 + }, + { + "epoch": 0.6191007982793564, + "grad_norm": 931.7042236328125, + "learning_rate": 4.049935643536943e-06, + "loss": 96.7613, + "step": 74840 + }, + { + "epoch": 0.6191835215287257, + "grad_norm": 495.9801940917969, + "learning_rate": 4.048551008911561e-06, + "loss": 89.8051, + "step": 74850 + }, + { + "epoch": 0.6192662447780949, + "grad_norm": 587.1924438476562, + "learning_rate": 4.0471664499886074e-06, + "loss": 116.4916, + "step": 74860 + }, + { + "epoch": 0.6193489680274641, + "grad_norm": 769.1555786132812, + "learning_rate": 4.045781966878247e-06, + "loss": 85.5647, + "step": 74870 + }, + { + "epoch": 0.6194316912768334, + "grad_norm": 1216.7843017578125, + "learning_rate": 4.044397559690638e-06, + "loss": 108.6549, + "step": 74880 + }, + { + "epoch": 0.6195144145262026, + "grad_norm": 764.6852416992188, + "learning_rate": 4.043013228535928e-06, + "loss": 113.6466, + "step": 74890 + }, + { + "epoch": 0.6195971377755718, + "grad_norm": 934.8550415039062, + "learning_rate": 4.041628973524264e-06, + "loss": 130.2357, + "step": 74900 + }, + { + "epoch": 0.6196798610249411, + "grad_norm": 377.97088623046875, + "learning_rate": 4.040244794765783e-06, + "loss": 85.8722, + "step": 74910 + }, + { + "epoch": 0.6197625842743103, + "grad_norm": 1394.6834716796875, + "learning_rate": 4.03886069237062e-06, + "loss": 91.4125, + "step": 74920 + }, + { + "epoch": 0.6198453075236795, + "grad_norm": 595.1375122070312, + "learning_rate": 4.037476666448899e-06, + "loss": 71.7473, + "step": 74930 + }, + { + "epoch": 0.6199280307730488, + "grad_norm": 595.4174194335938, + "learning_rate": 4.0360927171107436e-06, + "loss": 93.8769, + "step": 74940 + }, + { + "epoch": 0.620010754022418, + "grad_norm": 718.7639770507812, + "learning_rate": 4.034708844466267e-06, + "loss": 102.3341, + "step": 74950 + }, + { + "epoch": 0.6200934772717872, + "grad_norm": 968.3093872070312, + "learning_rate": 4.033325048625578e-06, + "loss": 90.3649, + "step": 74960 + }, + { + "epoch": 0.6201762005211565, + "grad_norm": 1063.10009765625, + "learning_rate": 4.031941329698778e-06, + "loss": 104.0913, + "step": 74970 + }, + { + "epoch": 0.6202589237705257, + "grad_norm": 1573.7166748046875, + "learning_rate": 4.030557687795965e-06, + "loss": 92.6962, + "step": 74980 + }, + { + "epoch": 0.6203416470198949, + "grad_norm": 1660.5982666015625, + "learning_rate": 4.029174123027226e-06, + "loss": 135.9649, + "step": 74990 + }, + { + "epoch": 0.6204243702692642, + "grad_norm": 1034.1180419921875, + "learning_rate": 4.027790635502646e-06, + "loss": 94.0106, + "step": 75000 + }, + { + "epoch": 0.6205070935186334, + "grad_norm": 742.1674194335938, + "learning_rate": 4.026407225332305e-06, + "loss": 109.5858, + "step": 75010 + }, + { + "epoch": 0.6205898167680026, + "grad_norm": 586.0728759765625, + "learning_rate": 4.025023892626272e-06, + "loss": 76.6323, + "step": 75020 + }, + { + "epoch": 0.6206725400173719, + "grad_norm": 1411.9517822265625, + "learning_rate": 4.023640637494612e-06, + "loss": 135.056, + "step": 75030 + }, + { + "epoch": 0.6207552632667411, + "grad_norm": 1079.8941650390625, + "learning_rate": 4.022257460047387e-06, + "loss": 97.0648, + "step": 75040 + }, + { + "epoch": 0.6208379865161103, + "grad_norm": 791.2523803710938, + "learning_rate": 4.0208743603946505e-06, + "loss": 102.2262, + "step": 75050 + }, + { + "epoch": 0.6209207097654796, + "grad_norm": 1095.387939453125, + "learning_rate": 4.0194913386464445e-06, + "loss": 95.2964, + "step": 75060 + }, + { + "epoch": 0.6210034330148488, + "grad_norm": 601.5514526367188, + "learning_rate": 4.018108394912814e-06, + "loss": 88.666, + "step": 75070 + }, + { + "epoch": 0.621086156264218, + "grad_norm": 747.2603149414062, + "learning_rate": 4.016725529303792e-06, + "loss": 84.6649, + "step": 75080 + }, + { + "epoch": 0.6211688795135873, + "grad_norm": 619.85791015625, + "learning_rate": 4.015342741929407e-06, + "loss": 73.6812, + "step": 75090 + }, + { + "epoch": 0.6212516027629565, + "grad_norm": 1243.3499755859375, + "learning_rate": 4.013960032899681e-06, + "loss": 100.2627, + "step": 75100 + }, + { + "epoch": 0.6213343260123257, + "grad_norm": 491.8591003417969, + "learning_rate": 4.012577402324631e-06, + "loss": 99.1271, + "step": 75110 + }, + { + "epoch": 0.621417049261695, + "grad_norm": 955.7748413085938, + "learning_rate": 4.011194850314263e-06, + "loss": 87.4493, + "step": 75120 + }, + { + "epoch": 0.6214997725110643, + "grad_norm": 895.6536254882812, + "learning_rate": 4.009812376978585e-06, + "loss": 111.0081, + "step": 75130 + }, + { + "epoch": 0.6215824957604335, + "grad_norm": 513.0172119140625, + "learning_rate": 4.0084299824275926e-06, + "loss": 62.0584, + "step": 75140 + }, + { + "epoch": 0.6216652190098026, + "grad_norm": 835.1646118164062, + "learning_rate": 4.007047666771274e-06, + "loss": 112.1609, + "step": 75150 + }, + { + "epoch": 0.621747942259172, + "grad_norm": 855.74169921875, + "learning_rate": 4.005665430119615e-06, + "loss": 89.9893, + "step": 75160 + }, + { + "epoch": 0.6218306655085412, + "grad_norm": 1189.672119140625, + "learning_rate": 4.0042832725825954e-06, + "loss": 78.6606, + "step": 75170 + }, + { + "epoch": 0.6219133887579104, + "grad_norm": 690.7192993164062, + "learning_rate": 4.002901194270186e-06, + "loss": 82.508, + "step": 75180 + }, + { + "epoch": 0.6219961120072797, + "grad_norm": 519.3690795898438, + "learning_rate": 4.001519195292352e-06, + "loss": 74.1029, + "step": 75190 + }, + { + "epoch": 0.6220788352566489, + "grad_norm": 664.5087890625, + "learning_rate": 4.000137275759053e-06, + "loss": 99.7511, + "step": 75200 + }, + { + "epoch": 0.6221615585060181, + "grad_norm": 966.7168579101562, + "learning_rate": 3.9987554357802435e-06, + "loss": 102.3384, + "step": 75210 + }, + { + "epoch": 0.6222442817553874, + "grad_norm": 1171.9957275390625, + "learning_rate": 3.997373675465869e-06, + "loss": 74.9776, + "step": 75220 + }, + { + "epoch": 0.6223270050047566, + "grad_norm": 534.0394897460938, + "learning_rate": 3.995991994925869e-06, + "loss": 91.2517, + "step": 75230 + }, + { + "epoch": 0.6224097282541258, + "grad_norm": 833.4016723632812, + "learning_rate": 3.994610394270178e-06, + "loss": 112.876, + "step": 75240 + }, + { + "epoch": 0.6224924515034951, + "grad_norm": 833.5524291992188, + "learning_rate": 3.993228873608724e-06, + "loss": 97.8957, + "step": 75250 + }, + { + "epoch": 0.6225751747528643, + "grad_norm": 1124.5748291015625, + "learning_rate": 3.991847433051427e-06, + "loss": 90.5661, + "step": 75260 + }, + { + "epoch": 0.6226578980022335, + "grad_norm": 701.1575927734375, + "learning_rate": 3.990466072708204e-06, + "loss": 93.4155, + "step": 75270 + }, + { + "epoch": 0.6227406212516028, + "grad_norm": 1118.724365234375, + "learning_rate": 3.989084792688962e-06, + "loss": 114.3592, + "step": 75280 + }, + { + "epoch": 0.622823344500972, + "grad_norm": 1080.4481201171875, + "learning_rate": 3.987703593103604e-06, + "loss": 91.7185, + "step": 75290 + }, + { + "epoch": 0.6229060677503412, + "grad_norm": 1988.3421630859375, + "learning_rate": 3.986322474062025e-06, + "loss": 110.2386, + "step": 75300 + }, + { + "epoch": 0.6229887909997105, + "grad_norm": 434.8983459472656, + "learning_rate": 3.9849414356741165e-06, + "loss": 66.5312, + "step": 75310 + }, + { + "epoch": 0.6230715142490797, + "grad_norm": 682.07421875, + "learning_rate": 3.9835604780497575e-06, + "loss": 85.5432, + "step": 75320 + }, + { + "epoch": 0.6231542374984489, + "grad_norm": 1016.9640502929688, + "learning_rate": 3.982179601298827e-06, + "loss": 109.5171, + "step": 75330 + }, + { + "epoch": 0.6232369607478182, + "grad_norm": 893.9984130859375, + "learning_rate": 3.9807988055311946e-06, + "loss": 78.253, + "step": 75340 + }, + { + "epoch": 0.6233196839971874, + "grad_norm": 589.87451171875, + "learning_rate": 3.979418090856723e-06, + "loss": 87.9922, + "step": 75350 + }, + { + "epoch": 0.6234024072465566, + "grad_norm": 899.80126953125, + "learning_rate": 3.978037457385268e-06, + "loss": 109.1129, + "step": 75360 + }, + { + "epoch": 0.6234851304959259, + "grad_norm": 833.7298583984375, + "learning_rate": 3.976656905226686e-06, + "loss": 104.8421, + "step": 75370 + }, + { + "epoch": 0.6235678537452951, + "grad_norm": 1044.365234375, + "learning_rate": 3.9752764344908155e-06, + "loss": 97.9259, + "step": 75380 + }, + { + "epoch": 0.6236505769946643, + "grad_norm": 1515.5921630859375, + "learning_rate": 3.9738960452874975e-06, + "loss": 102.9154, + "step": 75390 + }, + { + "epoch": 0.6237333002440336, + "grad_norm": 1511.9940185546875, + "learning_rate": 3.972515737726563e-06, + "loss": 114.3788, + "step": 75400 + }, + { + "epoch": 0.6238160234934028, + "grad_norm": 1036.8448486328125, + "learning_rate": 3.9711355119178345e-06, + "loss": 144.706, + "step": 75410 + }, + { + "epoch": 0.623898746742772, + "grad_norm": 655.7908325195312, + "learning_rate": 3.969755367971131e-06, + "loss": 90.5163, + "step": 75420 + }, + { + "epoch": 0.6239814699921413, + "grad_norm": 574.1646728515625, + "learning_rate": 3.9683753059962646e-06, + "loss": 77.3598, + "step": 75430 + }, + { + "epoch": 0.6240641932415105, + "grad_norm": 615.3191528320312, + "learning_rate": 3.966995326103041e-06, + "loss": 88.5786, + "step": 75440 + }, + { + "epoch": 0.6241469164908797, + "grad_norm": 1213.435546875, + "learning_rate": 3.965615428401257e-06, + "loss": 95.156, + "step": 75450 + }, + { + "epoch": 0.624229639740249, + "grad_norm": 960.2600708007812, + "learning_rate": 3.964235613000708e-06, + "loss": 85.2145, + "step": 75460 + }, + { + "epoch": 0.6243123629896182, + "grad_norm": 616.06689453125, + "learning_rate": 3.962855880011177e-06, + "loss": 78.2717, + "step": 75470 + }, + { + "epoch": 0.6243950862389874, + "grad_norm": 560.2363891601562, + "learning_rate": 3.961476229542446e-06, + "loss": 102.936, + "step": 75480 + }, + { + "epoch": 0.6244778094883567, + "grad_norm": 473.06793212890625, + "learning_rate": 3.9600966617042825e-06, + "loss": 72.4044, + "step": 75490 + }, + { + "epoch": 0.6245605327377259, + "grad_norm": 770.420166015625, + "learning_rate": 3.958717176606456e-06, + "loss": 99.9721, + "step": 75500 + }, + { + "epoch": 0.6246432559870951, + "grad_norm": 1100.4041748046875, + "learning_rate": 3.957337774358725e-06, + "loss": 123.5163, + "step": 75510 + }, + { + "epoch": 0.6247259792364644, + "grad_norm": 1014.705322265625, + "learning_rate": 3.955958455070842e-06, + "loss": 105.1985, + "step": 75520 + }, + { + "epoch": 0.6248087024858336, + "grad_norm": 1663.8048095703125, + "learning_rate": 3.954579218852553e-06, + "loss": 100.1772, + "step": 75530 + }, + { + "epoch": 0.6248914257352028, + "grad_norm": 701.8104248046875, + "learning_rate": 3.953200065813597e-06, + "loss": 100.0598, + "step": 75540 + }, + { + "epoch": 0.6249741489845722, + "grad_norm": 906.2036743164062, + "learning_rate": 3.951820996063708e-06, + "loss": 95.372, + "step": 75550 + }, + { + "epoch": 0.6250568722339414, + "grad_norm": 719.7368774414062, + "learning_rate": 3.950442009712612e-06, + "loss": 83.6697, + "step": 75560 + }, + { + "epoch": 0.6251395954833106, + "grad_norm": 703.7513427734375, + "learning_rate": 3.949063106870031e-06, + "loss": 85.2299, + "step": 75570 + }, + { + "epoch": 0.6252223187326799, + "grad_norm": 571.2645263671875, + "learning_rate": 3.9476842876456735e-06, + "loss": 100.489, + "step": 75580 + }, + { + "epoch": 0.6253050419820491, + "grad_norm": 829.6326293945312, + "learning_rate": 3.946305552149247e-06, + "loss": 92.2218, + "step": 75590 + }, + { + "epoch": 0.6253877652314183, + "grad_norm": 716.100341796875, + "learning_rate": 3.944926900490452e-06, + "loss": 96.999, + "step": 75600 + }, + { + "epoch": 0.6254704884807876, + "grad_norm": 731.5742797851562, + "learning_rate": 3.943548332778982e-06, + "loss": 108.1109, + "step": 75610 + }, + { + "epoch": 0.6255532117301568, + "grad_norm": 586.7816162109375, + "learning_rate": 3.942169849124523e-06, + "loss": 102.1984, + "step": 75620 + }, + { + "epoch": 0.625635934979526, + "grad_norm": 583.6131591796875, + "learning_rate": 3.940791449636753e-06, + "loss": 72.8051, + "step": 75630 + }, + { + "epoch": 0.6257186582288953, + "grad_norm": 655.7570190429688, + "learning_rate": 3.939413134425347e-06, + "loss": 92.8459, + "step": 75640 + }, + { + "epoch": 0.6258013814782645, + "grad_norm": 609.809326171875, + "learning_rate": 3.938034903599972e-06, + "loss": 67.6845, + "step": 75650 + }, + { + "epoch": 0.6258841047276337, + "grad_norm": 989.1069946289062, + "learning_rate": 3.9366567572702845e-06, + "loss": 65.7915, + "step": 75660 + }, + { + "epoch": 0.625966827977003, + "grad_norm": 746.6648559570312, + "learning_rate": 3.935278695545939e-06, + "loss": 90.3173, + "step": 75670 + }, + { + "epoch": 0.6260495512263722, + "grad_norm": 917.3569946289062, + "learning_rate": 3.933900718536579e-06, + "loss": 103.0491, + "step": 75680 + }, + { + "epoch": 0.6261322744757414, + "grad_norm": 753.1608276367188, + "learning_rate": 3.932522826351849e-06, + "loss": 111.5943, + "step": 75690 + }, + { + "epoch": 0.6262149977251107, + "grad_norm": 582.921142578125, + "learning_rate": 3.9311450191013774e-06, + "loss": 113.5137, + "step": 75700 + }, + { + "epoch": 0.6262977209744799, + "grad_norm": 944.9461669921875, + "learning_rate": 3.929767296894792e-06, + "loss": 77.7904, + "step": 75710 + }, + { + "epoch": 0.6263804442238491, + "grad_norm": 705.4637451171875, + "learning_rate": 3.9283896598417104e-06, + "loss": 79.6284, + "step": 75720 + }, + { + "epoch": 0.6264631674732184, + "grad_norm": 835.1659545898438, + "learning_rate": 3.927012108051746e-06, + "loss": 89.4407, + "step": 75730 + }, + { + "epoch": 0.6265458907225876, + "grad_norm": 1340.5850830078125, + "learning_rate": 3.925634641634505e-06, + "loss": 90.6847, + "step": 75740 + }, + { + "epoch": 0.6266286139719568, + "grad_norm": 1067.461181640625, + "learning_rate": 3.924257260699583e-06, + "loss": 90.2306, + "step": 75750 + }, + { + "epoch": 0.6267113372213261, + "grad_norm": 967.4222412109375, + "learning_rate": 3.922879965356574e-06, + "loss": 81.9187, + "step": 75760 + }, + { + "epoch": 0.6267940604706953, + "grad_norm": 1008.221923828125, + "learning_rate": 3.921502755715064e-06, + "loss": 83.478, + "step": 75770 + }, + { + "epoch": 0.6268767837200645, + "grad_norm": 506.6917724609375, + "learning_rate": 3.920125631884628e-06, + "loss": 87.9039, + "step": 75780 + }, + { + "epoch": 0.6269595069694338, + "grad_norm": 881.6430053710938, + "learning_rate": 3.918748593974841e-06, + "loss": 84.5617, + "step": 75790 + }, + { + "epoch": 0.627042230218803, + "grad_norm": 1556.2261962890625, + "learning_rate": 3.917371642095265e-06, + "loss": 108.6617, + "step": 75800 + }, + { + "epoch": 0.6271249534681722, + "grad_norm": 1210.6939697265625, + "learning_rate": 3.91599477635546e-06, + "loss": 89.0075, + "step": 75810 + }, + { + "epoch": 0.6272076767175415, + "grad_norm": 792.7057495117188, + "learning_rate": 3.914617996864976e-06, + "loss": 75.4186, + "step": 75820 + }, + { + "epoch": 0.6272903999669107, + "grad_norm": 965.24072265625, + "learning_rate": 3.9132413037333565e-06, + "loss": 85.8507, + "step": 75830 + }, + { + "epoch": 0.6273731232162799, + "grad_norm": 773.074462890625, + "learning_rate": 3.911864697070139e-06, + "loss": 97.3403, + "step": 75840 + }, + { + "epoch": 0.6274558464656492, + "grad_norm": 1299.928955078125, + "learning_rate": 3.910488176984853e-06, + "loss": 127.7312, + "step": 75850 + }, + { + "epoch": 0.6275385697150184, + "grad_norm": 662.94970703125, + "learning_rate": 3.909111743587023e-06, + "loss": 90.648, + "step": 75860 + }, + { + "epoch": 0.6276212929643876, + "grad_norm": 557.4157104492188, + "learning_rate": 3.907735396986166e-06, + "loss": 110.0861, + "step": 75870 + }, + { + "epoch": 0.6277040162137568, + "grad_norm": 1387.1092529296875, + "learning_rate": 3.9063591372917875e-06, + "loss": 83.7509, + "step": 75880 + }, + { + "epoch": 0.6277867394631261, + "grad_norm": 1644.345947265625, + "learning_rate": 3.904982964613395e-06, + "loss": 104.4969, + "step": 75890 + }, + { + "epoch": 0.6278694627124953, + "grad_norm": 663.9248657226562, + "learning_rate": 3.903606879060483e-06, + "loss": 120.2953, + "step": 75900 + }, + { + "epoch": 0.6279521859618645, + "grad_norm": 1548.13232421875, + "learning_rate": 3.902230880742541e-06, + "loss": 109.4352, + "step": 75910 + }, + { + "epoch": 0.6280349092112338, + "grad_norm": 615.7534790039062, + "learning_rate": 3.900854969769049e-06, + "loss": 82.1672, + "step": 75920 + }, + { + "epoch": 0.628117632460603, + "grad_norm": 1001.1287841796875, + "learning_rate": 3.899479146249482e-06, + "loss": 87.0741, + "step": 75930 + }, + { + "epoch": 0.6282003557099722, + "grad_norm": 1314.9921875, + "learning_rate": 3.898103410293309e-06, + "loss": 136.3624, + "step": 75940 + }, + { + "epoch": 0.6282830789593415, + "grad_norm": 1015.78759765625, + "learning_rate": 3.89672776200999e-06, + "loss": 87.5703, + "step": 75950 + }, + { + "epoch": 0.6283658022087107, + "grad_norm": 553.9141235351562, + "learning_rate": 3.895352201508981e-06, + "loss": 66.2459, + "step": 75960 + }, + { + "epoch": 0.6284485254580799, + "grad_norm": 585.1502075195312, + "learning_rate": 3.893976728899726e-06, + "loss": 119.4419, + "step": 75970 + }, + { + "epoch": 0.6285312487074493, + "grad_norm": 414.6874084472656, + "learning_rate": 3.892601344291667e-06, + "loss": 68.8029, + "step": 75980 + }, + { + "epoch": 0.6286139719568185, + "grad_norm": 912.771728515625, + "learning_rate": 3.891226047794237e-06, + "loss": 89.6607, + "step": 75990 + }, + { + "epoch": 0.6286966952061877, + "grad_norm": 777.063232421875, + "learning_rate": 3.8898508395168645e-06, + "loss": 146.2245, + "step": 76000 + }, + { + "epoch": 0.628779418455557, + "grad_norm": 884.0517578125, + "learning_rate": 3.888475719568961e-06, + "loss": 87.4548, + "step": 76010 + }, + { + "epoch": 0.6288621417049262, + "grad_norm": 768.80859375, + "learning_rate": 3.887100688059947e-06, + "loss": 83.4292, + "step": 76020 + }, + { + "epoch": 0.6289448649542954, + "grad_norm": 686.2239379882812, + "learning_rate": 3.885725745099222e-06, + "loss": 100.6639, + "step": 76030 + }, + { + "epoch": 0.6290275882036647, + "grad_norm": 1023.0579833984375, + "learning_rate": 3.8843508907961855e-06, + "loss": 81.8008, + "step": 76040 + }, + { + "epoch": 0.6291103114530339, + "grad_norm": 867.1831665039062, + "learning_rate": 3.882976125260229e-06, + "loss": 89.1627, + "step": 76050 + }, + { + "epoch": 0.6291930347024031, + "grad_norm": 1243.499755859375, + "learning_rate": 3.881601448600736e-06, + "loss": 112.3818, + "step": 76060 + }, + { + "epoch": 0.6292757579517724, + "grad_norm": 753.3609619140625, + "learning_rate": 3.880226860927082e-06, + "loss": 79.829, + "step": 76070 + }, + { + "epoch": 0.6293584812011416, + "grad_norm": 457.8770446777344, + "learning_rate": 3.8788523623486405e-06, + "loss": 115.6456, + "step": 76080 + }, + { + "epoch": 0.6294412044505108, + "grad_norm": 2279.8681640625, + "learning_rate": 3.877477952974768e-06, + "loss": 100.5304, + "step": 76090 + }, + { + "epoch": 0.6295239276998801, + "grad_norm": 1045.8399658203125, + "learning_rate": 3.876103632914825e-06, + "loss": 84.072, + "step": 76100 + }, + { + "epoch": 0.6296066509492493, + "grad_norm": 731.2930908203125, + "learning_rate": 3.8747294022781555e-06, + "loss": 91.0355, + "step": 76110 + }, + { + "epoch": 0.6296893741986185, + "grad_norm": 1290.4974365234375, + "learning_rate": 3.873355261174105e-06, + "loss": 74.0479, + "step": 76120 + }, + { + "epoch": 0.6297720974479878, + "grad_norm": 842.3202514648438, + "learning_rate": 3.871981209712006e-06, + "loss": 91.0174, + "step": 76130 + }, + { + "epoch": 0.629854820697357, + "grad_norm": 1351.25244140625, + "learning_rate": 3.870607248001184e-06, + "loss": 96.3427, + "step": 76140 + }, + { + "epoch": 0.6299375439467262, + "grad_norm": 1196.4425048828125, + "learning_rate": 3.869233376150961e-06, + "loss": 83.2907, + "step": 76150 + }, + { + "epoch": 0.6300202671960955, + "grad_norm": 630.231689453125, + "learning_rate": 3.867859594270649e-06, + "loss": 93.8257, + "step": 76160 + }, + { + "epoch": 0.6301029904454647, + "grad_norm": 636.0516357421875, + "learning_rate": 3.866485902469554e-06, + "loss": 107.9014, + "step": 76170 + }, + { + "epoch": 0.6301857136948339, + "grad_norm": 867.7330322265625, + "learning_rate": 3.865112300856972e-06, + "loss": 98.2939, + "step": 76180 + }, + { + "epoch": 0.6302684369442032, + "grad_norm": 836.81884765625, + "learning_rate": 3.863738789542196e-06, + "loss": 91.0884, + "step": 76190 + }, + { + "epoch": 0.6303511601935724, + "grad_norm": 700.2529296875, + "learning_rate": 3.86236536863451e-06, + "loss": 95.8659, + "step": 76200 + }, + { + "epoch": 0.6304338834429416, + "grad_norm": 716.2244262695312, + "learning_rate": 3.860992038243189e-06, + "loss": 61.6874, + "step": 76210 + }, + { + "epoch": 0.6305166066923109, + "grad_norm": 410.9248962402344, + "learning_rate": 3.8596187984775064e-06, + "loss": 99.224, + "step": 76220 + }, + { + "epoch": 0.6305993299416801, + "grad_norm": 907.3155517578125, + "learning_rate": 3.8582456494467214e-06, + "loss": 101.9425, + "step": 76230 + }, + { + "epoch": 0.6306820531910493, + "grad_norm": 579.9906616210938, + "learning_rate": 3.8568725912600904e-06, + "loss": 71.3855, + "step": 76240 + }, + { + "epoch": 0.6307647764404186, + "grad_norm": 1267.6279296875, + "learning_rate": 3.855499624026861e-06, + "loss": 123.397, + "step": 76250 + }, + { + "epoch": 0.6308474996897878, + "grad_norm": 720.7117309570312, + "learning_rate": 3.854126747856275e-06, + "loss": 105.1762, + "step": 76260 + }, + { + "epoch": 0.630930222939157, + "grad_norm": 1166.6748046875, + "learning_rate": 3.8527539628575635e-06, + "loss": 78.2158, + "step": 76270 + }, + { + "epoch": 0.6310129461885263, + "grad_norm": 721.6836547851562, + "learning_rate": 3.851381269139955e-06, + "loss": 75.0298, + "step": 76280 + }, + { + "epoch": 0.6310956694378955, + "grad_norm": 932.6498413085938, + "learning_rate": 3.8500086668126666e-06, + "loss": 127.4507, + "step": 76290 + }, + { + "epoch": 0.6311783926872647, + "grad_norm": 804.4441528320312, + "learning_rate": 3.848636155984912e-06, + "loss": 88.2771, + "step": 76300 + }, + { + "epoch": 0.631261115936634, + "grad_norm": 662.4979248046875, + "learning_rate": 3.847263736765892e-06, + "loss": 76.6129, + "step": 76310 + }, + { + "epoch": 0.6313438391860032, + "grad_norm": 1362.8896484375, + "learning_rate": 3.8458914092648074e-06, + "loss": 116.3484, + "step": 76320 + }, + { + "epoch": 0.6314265624353724, + "grad_norm": 1995.614013671875, + "learning_rate": 3.844519173590847e-06, + "loss": 85.0176, + "step": 76330 + }, + { + "epoch": 0.6315092856847417, + "grad_norm": 494.42132568359375, + "learning_rate": 3.843147029853194e-06, + "loss": 77.5021, + "step": 76340 + }, + { + "epoch": 0.6315920089341109, + "grad_norm": 1092.7735595703125, + "learning_rate": 3.841774978161022e-06, + "loss": 107.8559, + "step": 76350 + }, + { + "epoch": 0.6316747321834801, + "grad_norm": 746.4695434570312, + "learning_rate": 3.840403018623499e-06, + "loss": 82.9424, + "step": 76360 + }, + { + "epoch": 0.6317574554328494, + "grad_norm": 911.3170166015625, + "learning_rate": 3.839031151349786e-06, + "loss": 87.2492, + "step": 76370 + }, + { + "epoch": 0.6318401786822186, + "grad_norm": 1411.3955078125, + "learning_rate": 3.837659376449036e-06, + "loss": 103.6988, + "step": 76380 + }, + { + "epoch": 0.6319229019315878, + "grad_norm": 1356.0250244140625, + "learning_rate": 3.836287694030395e-06, + "loss": 120.5798, + "step": 76390 + }, + { + "epoch": 0.6320056251809572, + "grad_norm": 498.0589294433594, + "learning_rate": 3.834916104203e-06, + "loss": 95.7689, + "step": 76400 + }, + { + "epoch": 0.6320883484303264, + "grad_norm": 701.58740234375, + "learning_rate": 3.833544607075986e-06, + "loss": 78.422, + "step": 76410 + }, + { + "epoch": 0.6321710716796956, + "grad_norm": 596.4544677734375, + "learning_rate": 3.8321732027584734e-06, + "loss": 94.8889, + "step": 76420 + }, + { + "epoch": 0.6322537949290649, + "grad_norm": 910.3405151367188, + "learning_rate": 3.830801891359582e-06, + "loss": 81.3986, + "step": 76430 + }, + { + "epoch": 0.6323365181784341, + "grad_norm": 494.3802490234375, + "learning_rate": 3.829430672988414e-06, + "loss": 75.0982, + "step": 76440 + }, + { + "epoch": 0.6324192414278033, + "grad_norm": 773.7373046875, + "learning_rate": 3.828059547754078e-06, + "loss": 73.4452, + "step": 76450 + }, + { + "epoch": 0.6325019646771726, + "grad_norm": 597.4763793945312, + "learning_rate": 3.826688515765664e-06, + "loss": 94.815, + "step": 76460 + }, + { + "epoch": 0.6325846879265418, + "grad_norm": 1558.9375, + "learning_rate": 3.82531757713226e-06, + "loss": 89.0715, + "step": 76470 + }, + { + "epoch": 0.632667411175911, + "grad_norm": 912.0201416015625, + "learning_rate": 3.823946731962945e-06, + "loss": 89.6122, + "step": 76480 + }, + { + "epoch": 0.6327501344252803, + "grad_norm": 803.7633666992188, + "learning_rate": 3.8225759803667925e-06, + "loss": 90.7924, + "step": 76490 + }, + { + "epoch": 0.6328328576746495, + "grad_norm": 1232.05712890625, + "learning_rate": 3.821205322452863e-06, + "loss": 89.2706, + "step": 76500 + }, + { + "epoch": 0.6329155809240187, + "grad_norm": 831.2234497070312, + "learning_rate": 3.81983475833022e-06, + "loss": 75.2085, + "step": 76510 + }, + { + "epoch": 0.632998304173388, + "grad_norm": 560.6333618164062, + "learning_rate": 3.818464288107908e-06, + "loss": 80.2901, + "step": 76520 + }, + { + "epoch": 0.6330810274227572, + "grad_norm": 1093.8529052734375, + "learning_rate": 3.817093911894968e-06, + "loss": 81.6339, + "step": 76530 + }, + { + "epoch": 0.6331637506721264, + "grad_norm": 787.9254150390625, + "learning_rate": 3.8157236298004375e-06, + "loss": 98.8886, + "step": 76540 + }, + { + "epoch": 0.6332464739214957, + "grad_norm": 613.9071655273438, + "learning_rate": 3.814353441933343e-06, + "loss": 88.6119, + "step": 76550 + }, + { + "epoch": 0.6333291971708649, + "grad_norm": 1168.52685546875, + "learning_rate": 3.812983348402703e-06, + "loss": 86.0251, + "step": 76560 + }, + { + "epoch": 0.6334119204202341, + "grad_norm": 684.206298828125, + "learning_rate": 3.811613349317531e-06, + "loss": 68.7577, + "step": 76570 + }, + { + "epoch": 0.6334946436696034, + "grad_norm": 543.0419311523438, + "learning_rate": 3.810243444786831e-06, + "loss": 85.61, + "step": 76580 + }, + { + "epoch": 0.6335773669189726, + "grad_norm": 1784.14794921875, + "learning_rate": 3.8088736349195995e-06, + "loss": 119.1676, + "step": 76590 + }, + { + "epoch": 0.6336600901683418, + "grad_norm": 1234.8670654296875, + "learning_rate": 3.8075039198248274e-06, + "loss": 82.1384, + "step": 76600 + }, + { + "epoch": 0.633742813417711, + "grad_norm": 454.25372314453125, + "learning_rate": 3.8061342996114946e-06, + "loss": 81.6356, + "step": 76610 + }, + { + "epoch": 0.6338255366670803, + "grad_norm": 1700.140380859375, + "learning_rate": 3.8047647743885762e-06, + "loss": 88.7648, + "step": 76620 + }, + { + "epoch": 0.6339082599164495, + "grad_norm": 1148.6446533203125, + "learning_rate": 3.8033953442650382e-06, + "loss": 85.314, + "step": 76630 + }, + { + "epoch": 0.6339909831658187, + "grad_norm": 730.97998046875, + "learning_rate": 3.802026009349843e-06, + "loss": 98.301, + "step": 76640 + }, + { + "epoch": 0.634073706415188, + "grad_norm": 672.2242431640625, + "learning_rate": 3.800656769751939e-06, + "loss": 86.9433, + "step": 76650 + }, + { + "epoch": 0.6341564296645572, + "grad_norm": 769.8524780273438, + "learning_rate": 3.799287625580273e-06, + "loss": 93.8155, + "step": 76660 + }, + { + "epoch": 0.6342391529139264, + "grad_norm": 539.6921997070312, + "learning_rate": 3.7979185769437795e-06, + "loss": 71.8179, + "step": 76670 + }, + { + "epoch": 0.6343218761632957, + "grad_norm": 1174.0040283203125, + "learning_rate": 3.7965496239513875e-06, + "loss": 100.7191, + "step": 76680 + }, + { + "epoch": 0.6344045994126649, + "grad_norm": 654.21240234375, + "learning_rate": 3.79518076671202e-06, + "loss": 101.6481, + "step": 76690 + }, + { + "epoch": 0.6344873226620341, + "grad_norm": 1032.4493408203125, + "learning_rate": 3.793812005334589e-06, + "loss": 73.4023, + "step": 76700 + }, + { + "epoch": 0.6345700459114034, + "grad_norm": 1226.67333984375, + "learning_rate": 3.792443339928001e-06, + "loss": 94.1326, + "step": 76710 + }, + { + "epoch": 0.6346527691607726, + "grad_norm": 693.3799438476562, + "learning_rate": 3.7910747706011543e-06, + "loss": 103.0554, + "step": 76720 + }, + { + "epoch": 0.6347354924101418, + "grad_norm": 427.7120361328125, + "learning_rate": 3.7897062974629384e-06, + "loss": 101.5577, + "step": 76730 + }, + { + "epoch": 0.6348182156595111, + "grad_norm": 1844.3143310546875, + "learning_rate": 3.78833792062224e-06, + "loss": 119.7666, + "step": 76740 + }, + { + "epoch": 0.6349009389088803, + "grad_norm": 381.83380126953125, + "learning_rate": 3.786969640187932e-06, + "loss": 89.6403, + "step": 76750 + }, + { + "epoch": 0.6349836621582495, + "grad_norm": 1052.9390869140625, + "learning_rate": 3.785601456268882e-06, + "loss": 81.7052, + "step": 76760 + }, + { + "epoch": 0.6350663854076188, + "grad_norm": 1047.6627197265625, + "learning_rate": 3.7842333689739524e-06, + "loss": 87.8519, + "step": 76770 + }, + { + "epoch": 0.635149108656988, + "grad_norm": 623.3661499023438, + "learning_rate": 3.782865378411993e-06, + "loss": 104.4676, + "step": 76780 + }, + { + "epoch": 0.6352318319063572, + "grad_norm": 482.55059814453125, + "learning_rate": 3.7814974846918496e-06, + "loss": 87.8084, + "step": 76790 + }, + { + "epoch": 0.6353145551557265, + "grad_norm": 731.9932250976562, + "learning_rate": 3.7801296879223594e-06, + "loss": 64.0985, + "step": 76800 + }, + { + "epoch": 0.6353972784050957, + "grad_norm": 421.17864990234375, + "learning_rate": 3.7787619882123506e-06, + "loss": 95.0226, + "step": 76810 + }, + { + "epoch": 0.6354800016544649, + "grad_norm": 945.0263671875, + "learning_rate": 3.7773943856706463e-06, + "loss": 96.3978, + "step": 76820 + }, + { + "epoch": 0.6355627249038343, + "grad_norm": 705.6464233398438, + "learning_rate": 3.7760268804060583e-06, + "loss": 92.4526, + "step": 76830 + }, + { + "epoch": 0.6356454481532035, + "grad_norm": 624.5090942382812, + "learning_rate": 3.774659472527396e-06, + "loss": 82.1249, + "step": 76840 + }, + { + "epoch": 0.6357281714025727, + "grad_norm": 788.3277587890625, + "learning_rate": 3.7732921621434553e-06, + "loss": 66.9649, + "step": 76850 + }, + { + "epoch": 0.635810894651942, + "grad_norm": 451.6109619140625, + "learning_rate": 3.77192494936303e-06, + "loss": 113.1434, + "step": 76860 + }, + { + "epoch": 0.6358936179013112, + "grad_norm": 711.1947021484375, + "learning_rate": 3.7705578342948967e-06, + "loss": 81.1459, + "step": 76870 + }, + { + "epoch": 0.6359763411506804, + "grad_norm": 987.3251953125, + "learning_rate": 3.7691908170478352e-06, + "loss": 82.2781, + "step": 76880 + }, + { + "epoch": 0.6360590644000497, + "grad_norm": 791.8734130859375, + "learning_rate": 3.767823897730612e-06, + "loss": 87.6097, + "step": 76890 + }, + { + "epoch": 0.6361417876494189, + "grad_norm": 657.6787109375, + "learning_rate": 3.7664570764519865e-06, + "loss": 88.5876, + "step": 76900 + }, + { + "epoch": 0.6362245108987881, + "grad_norm": 958.947265625, + "learning_rate": 3.76509035332071e-06, + "loss": 94.1349, + "step": 76910 + }, + { + "epoch": 0.6363072341481574, + "grad_norm": 1417.035400390625, + "learning_rate": 3.7637237284455264e-06, + "loss": 97.1612, + "step": 76920 + }, + { + "epoch": 0.6363899573975266, + "grad_norm": 766.021240234375, + "learning_rate": 3.762357201935171e-06, + "loss": 82.1091, + "step": 76930 + }, + { + "epoch": 0.6364726806468958, + "grad_norm": 1105.8828125, + "learning_rate": 3.7609907738983762e-06, + "loss": 88.7781, + "step": 76940 + }, + { + "epoch": 0.6365554038962651, + "grad_norm": 916.3728637695312, + "learning_rate": 3.7596244444438577e-06, + "loss": 99.8711, + "step": 76950 + }, + { + "epoch": 0.6366381271456343, + "grad_norm": 1511.9302978515625, + "learning_rate": 3.758258213680328e-06, + "loss": 76.2348, + "step": 76960 + }, + { + "epoch": 0.6367208503950035, + "grad_norm": 966.1973266601562, + "learning_rate": 3.7568920817164945e-06, + "loss": 102.2466, + "step": 76970 + }, + { + "epoch": 0.6368035736443728, + "grad_norm": 1336.3597412109375, + "learning_rate": 3.755526048661053e-06, + "loss": 102.7599, + "step": 76980 + }, + { + "epoch": 0.636886296893742, + "grad_norm": 845.8065185546875, + "learning_rate": 3.7541601146226924e-06, + "loss": 92.4951, + "step": 76990 + }, + { + "epoch": 0.6369690201431112, + "grad_norm": 876.119873046875, + "learning_rate": 3.752794279710094e-06, + "loss": 90.3612, + "step": 77000 + }, + { + "epoch": 0.6370517433924805, + "grad_norm": 1019.0983276367188, + "learning_rate": 3.751428544031931e-06, + "loss": 69.2927, + "step": 77010 + }, + { + "epoch": 0.6371344666418497, + "grad_norm": 591.1396484375, + "learning_rate": 3.750062907696868e-06, + "loss": 59.3948, + "step": 77020 + }, + { + "epoch": 0.6372171898912189, + "grad_norm": 627.9861450195312, + "learning_rate": 3.7486973708135643e-06, + "loss": 89.2826, + "step": 77030 + }, + { + "epoch": 0.6372999131405882, + "grad_norm": 823.3641967773438, + "learning_rate": 3.7473319334906678e-06, + "loss": 90.4643, + "step": 77040 + }, + { + "epoch": 0.6373826363899574, + "grad_norm": 1226.17333984375, + "learning_rate": 3.7459665958368197e-06, + "loss": 88.7953, + "step": 77050 + }, + { + "epoch": 0.6374653596393266, + "grad_norm": 1117.8607177734375, + "learning_rate": 3.7446013579606534e-06, + "loss": 101.2203, + "step": 77060 + }, + { + "epoch": 0.6375480828886959, + "grad_norm": 1034.3206787109375, + "learning_rate": 3.743236219970796e-06, + "loss": 102.8942, + "step": 77070 + }, + { + "epoch": 0.6376308061380651, + "grad_norm": 929.0318603515625, + "learning_rate": 3.741871181975866e-06, + "loss": 102.5503, + "step": 77080 + }, + { + "epoch": 0.6377135293874343, + "grad_norm": 815.303466796875, + "learning_rate": 3.740506244084471e-06, + "loss": 88.2377, + "step": 77090 + }, + { + "epoch": 0.6377962526368036, + "grad_norm": 1119.909423828125, + "learning_rate": 3.7391414064052138e-06, + "loss": 85.4328, + "step": 77100 + }, + { + "epoch": 0.6378789758861728, + "grad_norm": 477.2499084472656, + "learning_rate": 3.737776669046689e-06, + "loss": 85.3801, + "step": 77110 + }, + { + "epoch": 0.637961699135542, + "grad_norm": 1207.775634765625, + "learning_rate": 3.7364120321174826e-06, + "loss": 105.7464, + "step": 77120 + }, + { + "epoch": 0.6380444223849113, + "grad_norm": 664.5465698242188, + "learning_rate": 3.7350474957261705e-06, + "loss": 119.5242, + "step": 77130 + }, + { + "epoch": 0.6381271456342805, + "grad_norm": 686.4176025390625, + "learning_rate": 3.7336830599813245e-06, + "loss": 96.0158, + "step": 77140 + }, + { + "epoch": 0.6382098688836497, + "grad_norm": 537.5717163085938, + "learning_rate": 3.732318724991505e-06, + "loss": 75.7251, + "step": 77150 + }, + { + "epoch": 0.638292592133019, + "grad_norm": 1141.5447998046875, + "learning_rate": 3.730954490865266e-06, + "loss": 88.8078, + "step": 77160 + }, + { + "epoch": 0.6383753153823882, + "grad_norm": 990.2410888671875, + "learning_rate": 3.7295903577111548e-06, + "loss": 86.1368, + "step": 77170 + }, + { + "epoch": 0.6384580386317574, + "grad_norm": 613.973876953125, + "learning_rate": 3.728226325637709e-06, + "loss": 57.9084, + "step": 77180 + }, + { + "epoch": 0.6385407618811267, + "grad_norm": 959.3004150390625, + "learning_rate": 3.726862394753457e-06, + "loss": 88.2764, + "step": 77190 + }, + { + "epoch": 0.6386234851304959, + "grad_norm": 1009.2028198242188, + "learning_rate": 3.725498565166923e-06, + "loss": 60.0482, + "step": 77200 + }, + { + "epoch": 0.6387062083798651, + "grad_norm": 731.6983032226562, + "learning_rate": 3.7241348369866183e-06, + "loss": 90.616, + "step": 77210 + }, + { + "epoch": 0.6387889316292344, + "grad_norm": 1137.9534912109375, + "learning_rate": 3.7227712103210485e-06, + "loss": 77.4084, + "step": 77220 + }, + { + "epoch": 0.6388716548786036, + "grad_norm": 740.2508544921875, + "learning_rate": 3.721407685278712e-06, + "loss": 99.5695, + "step": 77230 + }, + { + "epoch": 0.6389543781279728, + "grad_norm": 921.7864379882812, + "learning_rate": 3.7200442619680976e-06, + "loss": 62.2452, + "step": 77240 + }, + { + "epoch": 0.6390371013773422, + "grad_norm": 1076.6083984375, + "learning_rate": 3.7186809404976877e-06, + "loss": 95.747, + "step": 77250 + }, + { + "epoch": 0.6391198246267114, + "grad_norm": 971.0541381835938, + "learning_rate": 3.7173177209759538e-06, + "loss": 99.368, + "step": 77260 + }, + { + "epoch": 0.6392025478760806, + "grad_norm": 1410.000732421875, + "learning_rate": 3.715954603511363e-06, + "loss": 106.5322, + "step": 77270 + }, + { + "epoch": 0.6392852711254499, + "grad_norm": 587.7864990234375, + "learning_rate": 3.714591588212372e-06, + "loss": 74.5204, + "step": 77280 + }, + { + "epoch": 0.6393679943748191, + "grad_norm": 632.3125, + "learning_rate": 3.713228675187429e-06, + "loss": 105.5088, + "step": 77290 + }, + { + "epoch": 0.6394507176241883, + "grad_norm": 768.724609375, + "learning_rate": 3.7118658645449745e-06, + "loss": 87.9743, + "step": 77300 + }, + { + "epoch": 0.6395334408735576, + "grad_norm": 721.2608642578125, + "learning_rate": 3.710503156393441e-06, + "loss": 79.1286, + "step": 77310 + }, + { + "epoch": 0.6396161641229268, + "grad_norm": 1327.6044921875, + "learning_rate": 3.7091405508412538e-06, + "loss": 95.4713, + "step": 77320 + }, + { + "epoch": 0.639698887372296, + "grad_norm": 1236.894287109375, + "learning_rate": 3.7077780479968286e-06, + "loss": 91.4254, + "step": 77330 + }, + { + "epoch": 0.6397816106216652, + "grad_norm": 883.6204833984375, + "learning_rate": 3.7064156479685736e-06, + "loss": 119.5111, + "step": 77340 + }, + { + "epoch": 0.6398643338710345, + "grad_norm": 589.4072265625, + "learning_rate": 3.705053350864888e-06, + "loss": 71.212, + "step": 77350 + }, + { + "epoch": 0.6399470571204037, + "grad_norm": 1030.01904296875, + "learning_rate": 3.703691156794165e-06, + "loss": 68.7815, + "step": 77360 + }, + { + "epoch": 0.6400297803697729, + "grad_norm": 1243.1724853515625, + "learning_rate": 3.7023290658647893e-06, + "loss": 81.1806, + "step": 77370 + }, + { + "epoch": 0.6401125036191422, + "grad_norm": 1076.9923095703125, + "learning_rate": 3.7009670781851326e-06, + "loss": 92.6175, + "step": 77380 + }, + { + "epoch": 0.6401952268685114, + "grad_norm": 811.0065307617188, + "learning_rate": 3.6996051938635626e-06, + "loss": 94.767, + "step": 77390 + }, + { + "epoch": 0.6402779501178806, + "grad_norm": 707.3673095703125, + "learning_rate": 3.69824341300844e-06, + "loss": 82.464, + "step": 77400 + }, + { + "epoch": 0.6403606733672499, + "grad_norm": 801.9874267578125, + "learning_rate": 3.696881735728115e-06, + "loss": 89.9301, + "step": 77410 + }, + { + "epoch": 0.6404433966166191, + "grad_norm": 782.5747680664062, + "learning_rate": 3.6955201621309302e-06, + "loss": 91.4034, + "step": 77420 + }, + { + "epoch": 0.6405261198659883, + "grad_norm": 1794.2557373046875, + "learning_rate": 3.6941586923252194e-06, + "loss": 89.6232, + "step": 77430 + }, + { + "epoch": 0.6406088431153576, + "grad_norm": 1282.8133544921875, + "learning_rate": 3.6927973264193074e-06, + "loss": 84.1603, + "step": 77440 + }, + { + "epoch": 0.6406915663647268, + "grad_norm": 615.4580078125, + "learning_rate": 3.691436064521513e-06, + "loss": 106.5725, + "step": 77450 + }, + { + "epoch": 0.640774289614096, + "grad_norm": 885.3602905273438, + "learning_rate": 3.6900749067401474e-06, + "loss": 100.404, + "step": 77460 + }, + { + "epoch": 0.6408570128634653, + "grad_norm": 883.349853515625, + "learning_rate": 3.6887138531835085e-06, + "loss": 120.5579, + "step": 77470 + }, + { + "epoch": 0.6409397361128345, + "grad_norm": 510.637939453125, + "learning_rate": 3.6873529039598903e-06, + "loss": 72.0067, + "step": 77480 + }, + { + "epoch": 0.6410224593622037, + "grad_norm": 1126.9063720703125, + "learning_rate": 3.6859920591775763e-06, + "loss": 89.0631, + "step": 77490 + }, + { + "epoch": 0.641105182611573, + "grad_norm": 897.1529541015625, + "learning_rate": 3.6846313189448447e-06, + "loss": 95.6343, + "step": 77500 + }, + { + "epoch": 0.6411879058609422, + "grad_norm": 1228.466796875, + "learning_rate": 3.6832706833699616e-06, + "loss": 107.3611, + "step": 77510 + }, + { + "epoch": 0.6412706291103114, + "grad_norm": 386.9453125, + "learning_rate": 3.681910152561188e-06, + "loss": 56.1387, + "step": 77520 + }, + { + "epoch": 0.6413533523596807, + "grad_norm": 651.796142578125, + "learning_rate": 3.6805497266267742e-06, + "loss": 80.0673, + "step": 77530 + }, + { + "epoch": 0.6414360756090499, + "grad_norm": 970.6996459960938, + "learning_rate": 3.679189405674963e-06, + "loss": 86.8163, + "step": 77540 + }, + { + "epoch": 0.6415187988584191, + "grad_norm": 921.419189453125, + "learning_rate": 3.6778291898139907e-06, + "loss": 99.753, + "step": 77550 + }, + { + "epoch": 0.6416015221077884, + "grad_norm": 940.6666259765625, + "learning_rate": 3.6764690791520797e-06, + "loss": 72.5782, + "step": 77560 + }, + { + "epoch": 0.6416842453571576, + "grad_norm": 927.505126953125, + "learning_rate": 3.6751090737974506e-06, + "loss": 80.6144, + "step": 77570 + }, + { + "epoch": 0.6417669686065268, + "grad_norm": 1062.0577392578125, + "learning_rate": 3.673749173858312e-06, + "loss": 80.1158, + "step": 77580 + }, + { + "epoch": 0.6418496918558961, + "grad_norm": 1165.107177734375, + "learning_rate": 3.672389379442864e-06, + "loss": 109.7003, + "step": 77590 + }, + { + "epoch": 0.6419324151052653, + "grad_norm": 929.5150756835938, + "learning_rate": 3.6710296906593012e-06, + "loss": 76.4433, + "step": 77600 + }, + { + "epoch": 0.6420151383546345, + "grad_norm": 709.593017578125, + "learning_rate": 3.6696701076158064e-06, + "loss": 78.3428, + "step": 77610 + }, + { + "epoch": 0.6420978616040038, + "grad_norm": 995.3925170898438, + "learning_rate": 3.6683106304205564e-06, + "loss": 92.2917, + "step": 77620 + }, + { + "epoch": 0.642180584853373, + "grad_norm": 426.57952880859375, + "learning_rate": 3.666951259181718e-06, + "loss": 80.3045, + "step": 77630 + }, + { + "epoch": 0.6422633081027422, + "grad_norm": 1803.5283203125, + "learning_rate": 3.6655919940074497e-06, + "loss": 85.4238, + "step": 77640 + }, + { + "epoch": 0.6423460313521115, + "grad_norm": 863.35302734375, + "learning_rate": 3.664232835005902e-06, + "loss": 112.9134, + "step": 77650 + }, + { + "epoch": 0.6424287546014807, + "grad_norm": 444.5658264160156, + "learning_rate": 3.6628737822852177e-06, + "loss": 73.9999, + "step": 77660 + }, + { + "epoch": 0.6425114778508499, + "grad_norm": 895.299560546875, + "learning_rate": 3.66151483595353e-06, + "loss": 98.808, + "step": 77670 + }, + { + "epoch": 0.6425942011002193, + "grad_norm": 622.2603149414062, + "learning_rate": 3.6601559961189626e-06, + "loss": 72.3235, + "step": 77680 + }, + { + "epoch": 0.6426769243495885, + "grad_norm": 751.9857788085938, + "learning_rate": 3.6587972628896345e-06, + "loss": 65.3022, + "step": 77690 + }, + { + "epoch": 0.6427596475989577, + "grad_norm": 696.5802001953125, + "learning_rate": 3.6574386363736532e-06, + "loss": 111.5915, + "step": 77700 + }, + { + "epoch": 0.642842370848327, + "grad_norm": 1348.688232421875, + "learning_rate": 3.6560801166791183e-06, + "loss": 111.6634, + "step": 77710 + }, + { + "epoch": 0.6429250940976962, + "grad_norm": 933.01953125, + "learning_rate": 3.654721703914121e-06, + "loss": 101.2537, + "step": 77720 + }, + { + "epoch": 0.6430078173470654, + "grad_norm": 555.1231689453125, + "learning_rate": 3.6533633981867433e-06, + "loss": 71.2294, + "step": 77730 + }, + { + "epoch": 0.6430905405964347, + "grad_norm": 681.1109008789062, + "learning_rate": 3.652005199605059e-06, + "loss": 114.1313, + "step": 77740 + }, + { + "epoch": 0.6431732638458039, + "grad_norm": 1005.3911743164062, + "learning_rate": 3.6506471082771357e-06, + "loss": 98.2529, + "step": 77750 + }, + { + "epoch": 0.6432559870951731, + "grad_norm": 1053.3704833984375, + "learning_rate": 3.6492891243110283e-06, + "loss": 72.2338, + "step": 77760 + }, + { + "epoch": 0.6433387103445424, + "grad_norm": 664.69482421875, + "learning_rate": 3.6479312478147866e-06, + "loss": 93.1901, + "step": 77770 + }, + { + "epoch": 0.6434214335939116, + "grad_norm": 517.3734741210938, + "learning_rate": 3.64657347889645e-06, + "loss": 81.5787, + "step": 77780 + }, + { + "epoch": 0.6435041568432808, + "grad_norm": 778.450927734375, + "learning_rate": 3.6452158176640505e-06, + "loss": 111.1286, + "step": 77790 + }, + { + "epoch": 0.6435868800926501, + "grad_norm": 795.8098754882812, + "learning_rate": 3.6438582642256138e-06, + "loss": 80.7202, + "step": 77800 + }, + { + "epoch": 0.6436696033420193, + "grad_norm": 1202.6724853515625, + "learning_rate": 3.642500818689148e-06, + "loss": 89.1335, + "step": 77810 + }, + { + "epoch": 0.6437523265913885, + "grad_norm": 1074.335205078125, + "learning_rate": 3.641143481162661e-06, + "loss": 71.3191, + "step": 77820 + }, + { + "epoch": 0.6438350498407578, + "grad_norm": 1161.094482421875, + "learning_rate": 3.639786251754153e-06, + "loss": 85.943, + "step": 77830 + }, + { + "epoch": 0.643917773090127, + "grad_norm": 3801.1142578125, + "learning_rate": 3.638429130571609e-06, + "loss": 89.4513, + "step": 77840 + }, + { + "epoch": 0.6440004963394962, + "grad_norm": 818.4242553710938, + "learning_rate": 3.637072117723012e-06, + "loss": 108.8363, + "step": 77850 + }, + { + "epoch": 0.6440832195888655, + "grad_norm": 541.69921875, + "learning_rate": 3.6357152133163297e-06, + "loss": 83.608, + "step": 77860 + }, + { + "epoch": 0.6441659428382347, + "grad_norm": 540.3868408203125, + "learning_rate": 3.634358417459528e-06, + "loss": 109.2081, + "step": 77870 + }, + { + "epoch": 0.6442486660876039, + "grad_norm": 2112.091796875, + "learning_rate": 3.633001730260558e-06, + "loss": 145.9334, + "step": 77880 + }, + { + "epoch": 0.6443313893369732, + "grad_norm": 1037.028076171875, + "learning_rate": 3.63164515182737e-06, + "loss": 128.6462, + "step": 77890 + }, + { + "epoch": 0.6444141125863424, + "grad_norm": 710.5433959960938, + "learning_rate": 3.630288682267895e-06, + "loss": 95.2309, + "step": 77900 + }, + { + "epoch": 0.6444968358357116, + "grad_norm": 542.4974365234375, + "learning_rate": 3.628932321690063e-06, + "loss": 89.335, + "step": 77910 + }, + { + "epoch": 0.6445795590850809, + "grad_norm": 910.8280639648438, + "learning_rate": 3.6275760702017938e-06, + "loss": 123.32, + "step": 77920 + }, + { + "epoch": 0.6446622823344501, + "grad_norm": 474.7346496582031, + "learning_rate": 3.626219927910999e-06, + "loss": 144.9698, + "step": 77930 + }, + { + "epoch": 0.6447450055838193, + "grad_norm": 729.6742553710938, + "learning_rate": 3.6248638949255795e-06, + "loss": 97.5584, + "step": 77940 + }, + { + "epoch": 0.6448277288331886, + "grad_norm": 692.0260009765625, + "learning_rate": 3.6235079713534287e-06, + "loss": 96.659, + "step": 77950 + }, + { + "epoch": 0.6449104520825578, + "grad_norm": 1216.1015625, + "learning_rate": 3.6221521573024316e-06, + "loss": 119.2402, + "step": 77960 + }, + { + "epoch": 0.644993175331927, + "grad_norm": 1217.374267578125, + "learning_rate": 3.620796452880464e-06, + "loss": 110.5306, + "step": 77970 + }, + { + "epoch": 0.6450758985812963, + "grad_norm": 606.9527587890625, + "learning_rate": 3.6194408581953934e-06, + "loss": 99.9628, + "step": 77980 + }, + { + "epoch": 0.6451586218306655, + "grad_norm": 649.1746826171875, + "learning_rate": 3.618085373355077e-06, + "loss": 95.9513, + "step": 77990 + }, + { + "epoch": 0.6452413450800347, + "grad_norm": 3910.442626953125, + "learning_rate": 3.6167299984673655e-06, + "loss": 100.3708, + "step": 78000 + }, + { + "epoch": 0.645324068329404, + "grad_norm": 991.4779663085938, + "learning_rate": 3.615374733640099e-06, + "loss": 89.7993, + "step": 78010 + }, + { + "epoch": 0.6454067915787732, + "grad_norm": 814.968017578125, + "learning_rate": 3.6140195789811108e-06, + "loss": 95.6553, + "step": 78020 + }, + { + "epoch": 0.6454895148281424, + "grad_norm": 974.3428344726562, + "learning_rate": 3.6126645345982243e-06, + "loss": 88.6417, + "step": 78030 + }, + { + "epoch": 0.6455722380775117, + "grad_norm": 700.50390625, + "learning_rate": 3.611309600599253e-06, + "loss": 97.1519, + "step": 78040 + }, + { + "epoch": 0.6456549613268809, + "grad_norm": 1007.422607421875, + "learning_rate": 3.6099547770920046e-06, + "loss": 96.4095, + "step": 78050 + }, + { + "epoch": 0.6457376845762501, + "grad_norm": 833.5045776367188, + "learning_rate": 3.6086000641842757e-06, + "loss": 90.0102, + "step": 78060 + }, + { + "epoch": 0.6458204078256193, + "grad_norm": 593.3013916015625, + "learning_rate": 3.6072454619838525e-06, + "loss": 89.5628, + "step": 78070 + }, + { + "epoch": 0.6459031310749886, + "grad_norm": 937.5786743164062, + "learning_rate": 3.6058909705985166e-06, + "loss": 93.1092, + "step": 78080 + }, + { + "epoch": 0.6459858543243578, + "grad_norm": 1777.1658935546875, + "learning_rate": 3.6045365901360385e-06, + "loss": 75.8696, + "step": 78090 + }, + { + "epoch": 0.646068577573727, + "grad_norm": 819.875732421875, + "learning_rate": 3.603182320704179e-06, + "loss": 77.7278, + "step": 78100 + }, + { + "epoch": 0.6461513008230964, + "grad_norm": 661.6198120117188, + "learning_rate": 3.601828162410691e-06, + "loss": 108.2391, + "step": 78110 + }, + { + "epoch": 0.6462340240724656, + "grad_norm": 1499.3050537109375, + "learning_rate": 3.6004741153633194e-06, + "loss": 92.8364, + "step": 78120 + }, + { + "epoch": 0.6463167473218348, + "grad_norm": 854.0673828125, + "learning_rate": 3.5991201796698006e-06, + "loss": 108.2219, + "step": 78130 + }, + { + "epoch": 0.6463994705712041, + "grad_norm": 1086.9384765625, + "learning_rate": 3.5977663554378594e-06, + "loss": 84.7707, + "step": 78140 + }, + { + "epoch": 0.6464821938205733, + "grad_norm": 1125.5399169921875, + "learning_rate": 3.5964126427752155e-06, + "loss": 109.9038, + "step": 78150 + }, + { + "epoch": 0.6465649170699425, + "grad_norm": 1124.92919921875, + "learning_rate": 3.595059041789575e-06, + "loss": 101.9598, + "step": 78160 + }, + { + "epoch": 0.6466476403193118, + "grad_norm": 868.62255859375, + "learning_rate": 3.5937055525886377e-06, + "loss": 80.6028, + "step": 78170 + }, + { + "epoch": 0.646730363568681, + "grad_norm": 605.7478637695312, + "learning_rate": 3.592352175280096e-06, + "loss": 91.8966, + "step": 78180 + }, + { + "epoch": 0.6468130868180502, + "grad_norm": 630.140380859375, + "learning_rate": 3.5909989099716325e-06, + "loss": 91.8721, + "step": 78190 + }, + { + "epoch": 0.6468958100674195, + "grad_norm": 599.5414428710938, + "learning_rate": 3.589645756770918e-06, + "loss": 73.2456, + "step": 78200 + }, + { + "epoch": 0.6469785333167887, + "grad_norm": 549.9411010742188, + "learning_rate": 3.5882927157856175e-06, + "loss": 76.2016, + "step": 78210 + }, + { + "epoch": 0.6470612565661579, + "grad_norm": 965.70947265625, + "learning_rate": 3.586939787123388e-06, + "loss": 89.8131, + "step": 78220 + }, + { + "epoch": 0.6471439798155272, + "grad_norm": 705.2805786132812, + "learning_rate": 3.585586970891876e-06, + "loss": 85.9309, + "step": 78230 + }, + { + "epoch": 0.6472267030648964, + "grad_norm": 539.7587890625, + "learning_rate": 3.584234267198715e-06, + "loss": 83.7423, + "step": 78240 + }, + { + "epoch": 0.6473094263142656, + "grad_norm": 904.0587768554688, + "learning_rate": 3.582881676151536e-06, + "loss": 102.2972, + "step": 78250 + }, + { + "epoch": 0.6473921495636349, + "grad_norm": 1084.20263671875, + "learning_rate": 3.581529197857959e-06, + "loss": 72.5396, + "step": 78260 + }, + { + "epoch": 0.6474748728130041, + "grad_norm": 1199.1031494140625, + "learning_rate": 3.580176832425594e-06, + "loss": 96.1683, + "step": 78270 + }, + { + "epoch": 0.6475575960623733, + "grad_norm": 1210.6788330078125, + "learning_rate": 3.5788245799620425e-06, + "loss": 103.4515, + "step": 78280 + }, + { + "epoch": 0.6476403193117426, + "grad_norm": 670.8250122070312, + "learning_rate": 3.577472440574896e-06, + "loss": 80.9642, + "step": 78290 + }, + { + "epoch": 0.6477230425611118, + "grad_norm": 965.8837890625, + "learning_rate": 3.5761204143717387e-06, + "loss": 61.6829, + "step": 78300 + }, + { + "epoch": 0.647805765810481, + "grad_norm": 566.632568359375, + "learning_rate": 3.5747685014601456e-06, + "loss": 104.7201, + "step": 78310 + }, + { + "epoch": 0.6478884890598503, + "grad_norm": 464.7363586425781, + "learning_rate": 3.5734167019476845e-06, + "loss": 90.921, + "step": 78320 + }, + { + "epoch": 0.6479712123092195, + "grad_norm": 498.57763671875, + "learning_rate": 3.572065015941907e-06, + "loss": 85.318, + "step": 78330 + }, + { + "epoch": 0.6480539355585887, + "grad_norm": 826.5978393554688, + "learning_rate": 3.570713443550362e-06, + "loss": 94.5103, + "step": 78340 + }, + { + "epoch": 0.648136658807958, + "grad_norm": 807.204345703125, + "learning_rate": 3.5693619848805892e-06, + "loss": 90.0785, + "step": 78350 + }, + { + "epoch": 0.6482193820573272, + "grad_norm": 774.7725219726562, + "learning_rate": 3.568010640040118e-06, + "loss": 70.4705, + "step": 78360 + }, + { + "epoch": 0.6483021053066964, + "grad_norm": 504.8932189941406, + "learning_rate": 3.566659409136468e-06, + "loss": 90.6975, + "step": 78370 + }, + { + "epoch": 0.6483848285560657, + "grad_norm": 746.927978515625, + "learning_rate": 3.565308292277151e-06, + "loss": 116.9276, + "step": 78380 + }, + { + "epoch": 0.6484675518054349, + "grad_norm": 912.6074829101562, + "learning_rate": 3.563957289569669e-06, + "loss": 78.5412, + "step": 78390 + }, + { + "epoch": 0.6485502750548041, + "grad_norm": 737.116455078125, + "learning_rate": 3.5626064011215135e-06, + "loss": 76.1263, + "step": 78400 + }, + { + "epoch": 0.6486329983041734, + "grad_norm": 932.8018188476562, + "learning_rate": 3.5612556270401733e-06, + "loss": 84.2955, + "step": 78410 + }, + { + "epoch": 0.6487157215535426, + "grad_norm": 862.2860717773438, + "learning_rate": 3.5599049674331175e-06, + "loss": 102.538, + "step": 78420 + }, + { + "epoch": 0.6487984448029118, + "grad_norm": 1257.6517333984375, + "learning_rate": 3.5585544224078143e-06, + "loss": 101.6207, + "step": 78430 + }, + { + "epoch": 0.6488811680522811, + "grad_norm": 948.5725708007812, + "learning_rate": 3.5572039920717192e-06, + "loss": 79.6746, + "step": 78440 + }, + { + "epoch": 0.6489638913016503, + "grad_norm": 1046.1217041015625, + "learning_rate": 3.5558536765322825e-06, + "loss": 113.3602, + "step": 78450 + }, + { + "epoch": 0.6490466145510195, + "grad_norm": 915.197509765625, + "learning_rate": 3.554503475896941e-06, + "loss": 97.8924, + "step": 78460 + }, + { + "epoch": 0.6491293378003888, + "grad_norm": 1135.54248046875, + "learning_rate": 3.553153390273124e-06, + "loss": 69.3663, + "step": 78470 + }, + { + "epoch": 0.649212061049758, + "grad_norm": 998.7444458007812, + "learning_rate": 3.551803419768251e-06, + "loss": 73.8026, + "step": 78480 + }, + { + "epoch": 0.6492947842991272, + "grad_norm": 949.7462158203125, + "learning_rate": 3.5504535644897352e-06, + "loss": 105.2546, + "step": 78490 + }, + { + "epoch": 0.6493775075484965, + "grad_norm": 992.8348388671875, + "learning_rate": 3.549103824544975e-06, + "loss": 106.6997, + "step": 78500 + }, + { + "epoch": 0.6494602307978657, + "grad_norm": 748.7544555664062, + "learning_rate": 3.5477542000413657e-06, + "loss": 85.0411, + "step": 78510 + }, + { + "epoch": 0.6495429540472349, + "grad_norm": 1108.8211669921875, + "learning_rate": 3.546404691086289e-06, + "loss": 102.6315, + "step": 78520 + }, + { + "epoch": 0.6496256772966043, + "grad_norm": 552.9169311523438, + "learning_rate": 3.5450552977871207e-06, + "loss": 83.2827, + "step": 78530 + }, + { + "epoch": 0.6497084005459735, + "grad_norm": 615.8543090820312, + "learning_rate": 3.543706020251223e-06, + "loss": 93.9779, + "step": 78540 + }, + { + "epoch": 0.6497911237953427, + "grad_norm": 740.286376953125, + "learning_rate": 3.542356858585956e-06, + "loss": 85.2819, + "step": 78550 + }, + { + "epoch": 0.649873847044712, + "grad_norm": 898.7952270507812, + "learning_rate": 3.541007812898663e-06, + "loss": 105.5582, + "step": 78560 + }, + { + "epoch": 0.6499565702940812, + "grad_norm": 719.4003295898438, + "learning_rate": 3.539658883296683e-06, + "loss": 73.1953, + "step": 78570 + }, + { + "epoch": 0.6500392935434504, + "grad_norm": 1007.62109375, + "learning_rate": 3.5383100698873446e-06, + "loss": 103.7223, + "step": 78580 + }, + { + "epoch": 0.6501220167928197, + "grad_norm": 1619.2178955078125, + "learning_rate": 3.536961372777965e-06, + "loss": 125.7312, + "step": 78590 + }, + { + "epoch": 0.6502047400421889, + "grad_norm": 796.3026123046875, + "learning_rate": 3.535612792075854e-06, + "loss": 86.965, + "step": 78600 + }, + { + "epoch": 0.6502874632915581, + "grad_norm": 686.6871337890625, + "learning_rate": 3.5342643278883127e-06, + "loss": 67.3191, + "step": 78610 + }, + { + "epoch": 0.6503701865409274, + "grad_norm": 950.4487915039062, + "learning_rate": 3.532915980322632e-06, + "loss": 76.5455, + "step": 78620 + }, + { + "epoch": 0.6504529097902966, + "grad_norm": 750.7484741210938, + "learning_rate": 3.5315677494860923e-06, + "loss": 94.5387, + "step": 78630 + }, + { + "epoch": 0.6505356330396658, + "grad_norm": 599.2245483398438, + "learning_rate": 3.5302196354859693e-06, + "loss": 97.6871, + "step": 78640 + }, + { + "epoch": 0.6506183562890351, + "grad_norm": 915.01123046875, + "learning_rate": 3.528871638429524e-06, + "loss": 84.8775, + "step": 78650 + }, + { + "epoch": 0.6507010795384043, + "grad_norm": 1431.2159423828125, + "learning_rate": 3.527523758424013e-06, + "loss": 101.8159, + "step": 78660 + }, + { + "epoch": 0.6507838027877735, + "grad_norm": 907.7205810546875, + "learning_rate": 3.526175995576676e-06, + "loss": 92.8583, + "step": 78670 + }, + { + "epoch": 0.6508665260371428, + "grad_norm": 685.8528442382812, + "learning_rate": 3.524828349994752e-06, + "loss": 108.4734, + "step": 78680 + }, + { + "epoch": 0.650949249286512, + "grad_norm": 1018.92919921875, + "learning_rate": 3.523480821785466e-06, + "loss": 116.4635, + "step": 78690 + }, + { + "epoch": 0.6510319725358812, + "grad_norm": 616.6637573242188, + "learning_rate": 3.5221334110560345e-06, + "loss": 70.0087, + "step": 78700 + }, + { + "epoch": 0.6511146957852505, + "grad_norm": 1268.9815673828125, + "learning_rate": 3.5207861179136654e-06, + "loss": 87.3583, + "step": 78710 + }, + { + "epoch": 0.6511974190346197, + "grad_norm": 787.7352905273438, + "learning_rate": 3.519438942465556e-06, + "loss": 79.1112, + "step": 78720 + }, + { + "epoch": 0.6512801422839889, + "grad_norm": 756.2986450195312, + "learning_rate": 3.5180918848188937e-06, + "loss": 89.4581, + "step": 78730 + }, + { + "epoch": 0.6513628655333582, + "grad_norm": 439.9861145019531, + "learning_rate": 3.516744945080861e-06, + "loss": 82.7982, + "step": 78740 + }, + { + "epoch": 0.6514455887827274, + "grad_norm": 702.6160278320312, + "learning_rate": 3.5153981233586277e-06, + "loss": 87.4703, + "step": 78750 + }, + { + "epoch": 0.6515283120320966, + "grad_norm": 973.2195434570312, + "learning_rate": 3.5140514197593494e-06, + "loss": 105.3042, + "step": 78760 + }, + { + "epoch": 0.6516110352814659, + "grad_norm": 547.9530639648438, + "learning_rate": 3.512704834390179e-06, + "loss": 90.783, + "step": 78770 + }, + { + "epoch": 0.6516937585308351, + "grad_norm": 734.762939453125, + "learning_rate": 3.5113583673582613e-06, + "loss": 93.9857, + "step": 78780 + }, + { + "epoch": 0.6517764817802043, + "grad_norm": 730.4273681640625, + "learning_rate": 3.510012018770726e-06, + "loss": 96.3544, + "step": 78790 + }, + { + "epoch": 0.6518592050295735, + "grad_norm": 1371.4984130859375, + "learning_rate": 3.508665788734696e-06, + "loss": 97.4626, + "step": 78800 + }, + { + "epoch": 0.6519419282789428, + "grad_norm": 974.1882934570312, + "learning_rate": 3.507319677357285e-06, + "loss": 106.0504, + "step": 78810 + }, + { + "epoch": 0.652024651528312, + "grad_norm": 973.0641479492188, + "learning_rate": 3.5059736847455967e-06, + "loss": 85.558, + "step": 78820 + }, + { + "epoch": 0.6521073747776812, + "grad_norm": 1314.4976806640625, + "learning_rate": 3.5046278110067242e-06, + "loss": 122.3348, + "step": 78830 + }, + { + "epoch": 0.6521900980270505, + "grad_norm": 687.9398803710938, + "learning_rate": 3.5032820562477577e-06, + "loss": 150.5666, + "step": 78840 + }, + { + "epoch": 0.6522728212764197, + "grad_norm": 1185.294189453125, + "learning_rate": 3.5019364205757667e-06, + "loss": 89.5953, + "step": 78850 + }, + { + "epoch": 0.6523555445257889, + "grad_norm": 736.0501708984375, + "learning_rate": 3.5005909040978188e-06, + "loss": 88.9674, + "step": 78860 + }, + { + "epoch": 0.6524382677751582, + "grad_norm": 644.609619140625, + "learning_rate": 3.4992455069209717e-06, + "loss": 84.699, + "step": 78870 + }, + { + "epoch": 0.6525209910245274, + "grad_norm": 954.2164306640625, + "learning_rate": 3.4979002291522723e-06, + "loss": 128.6724, + "step": 78880 + }, + { + "epoch": 0.6526037142738966, + "grad_norm": 750.8978881835938, + "learning_rate": 3.4965550708987583e-06, + "loss": 71.4564, + "step": 78890 + }, + { + "epoch": 0.6526864375232659, + "grad_norm": 927.7744140625, + "learning_rate": 3.4952100322674574e-06, + "loss": 93.2375, + "step": 78900 + }, + { + "epoch": 0.6527691607726351, + "grad_norm": 882.0057983398438, + "learning_rate": 3.4938651133653877e-06, + "loss": 89.3872, + "step": 78910 + }, + { + "epoch": 0.6528518840220043, + "grad_norm": 909.5263671875, + "learning_rate": 3.49252031429956e-06, + "loss": 90.456, + "step": 78920 + }, + { + "epoch": 0.6529346072713736, + "grad_norm": 810.2213134765625, + "learning_rate": 3.4911756351769722e-06, + "loss": 93.9017, + "step": 78930 + }, + { + "epoch": 0.6530173305207428, + "grad_norm": 510.4373474121094, + "learning_rate": 3.4898310761046133e-06, + "loss": 75.6976, + "step": 78940 + }, + { + "epoch": 0.653100053770112, + "grad_norm": 876.5424194335938, + "learning_rate": 3.4884866371894654e-06, + "loss": 82.5301, + "step": 78950 + }, + { + "epoch": 0.6531827770194814, + "grad_norm": 496.5128173828125, + "learning_rate": 3.487142318538498e-06, + "loss": 124.7649, + "step": 78960 + }, + { + "epoch": 0.6532655002688506, + "grad_norm": 574.9053344726562, + "learning_rate": 3.4857981202586742e-06, + "loss": 117.4653, + "step": 78970 + }, + { + "epoch": 0.6533482235182198, + "grad_norm": 619.2501831054688, + "learning_rate": 3.4844540424569453e-06, + "loss": 75.0704, + "step": 78980 + }, + { + "epoch": 0.6534309467675891, + "grad_norm": 502.5335693359375, + "learning_rate": 3.483110085240252e-06, + "loss": 117.5442, + "step": 78990 + }, + { + "epoch": 0.6535136700169583, + "grad_norm": 802.0072631835938, + "learning_rate": 3.481766248715528e-06, + "loss": 100.3333, + "step": 79000 + }, + { + "epoch": 0.6535963932663275, + "grad_norm": 762.8905639648438, + "learning_rate": 3.4804225329896963e-06, + "loss": 100.5814, + "step": 79010 + }, + { + "epoch": 0.6536791165156968, + "grad_norm": 732.5484619140625, + "learning_rate": 3.4790789381696686e-06, + "loss": 110.0594, + "step": 79020 + }, + { + "epoch": 0.653761839765066, + "grad_norm": 865.77734375, + "learning_rate": 3.4777354643623506e-06, + "loss": 80.9146, + "step": 79030 + }, + { + "epoch": 0.6538445630144352, + "grad_norm": 957.7134399414062, + "learning_rate": 3.4763921116746352e-06, + "loss": 93.2645, + "step": 79040 + }, + { + "epoch": 0.6539272862638045, + "grad_norm": 900.92041015625, + "learning_rate": 3.475048880213407e-06, + "loss": 62.1217, + "step": 79050 + }, + { + "epoch": 0.6540100095131737, + "grad_norm": 544.1870727539062, + "learning_rate": 3.473705770085539e-06, + "loss": 93.6251, + "step": 79060 + }, + { + "epoch": 0.6540927327625429, + "grad_norm": 1076.5374755859375, + "learning_rate": 3.4723627813979005e-06, + "loss": 113.8579, + "step": 79070 + }, + { + "epoch": 0.6541754560119122, + "grad_norm": 862.5486450195312, + "learning_rate": 3.471019914257344e-06, + "loss": 94.458, + "step": 79080 + }, + { + "epoch": 0.6542581792612814, + "grad_norm": 637.738037109375, + "learning_rate": 3.4696771687707176e-06, + "loss": 86.9744, + "step": 79090 + }, + { + "epoch": 0.6543409025106506, + "grad_norm": 979.8980712890625, + "learning_rate": 3.468334545044853e-06, + "loss": 89.8826, + "step": 79100 + }, + { + "epoch": 0.6544236257600199, + "grad_norm": 1138.6566162109375, + "learning_rate": 3.46699204318658e-06, + "loss": 93.8205, + "step": 79110 + }, + { + "epoch": 0.6545063490093891, + "grad_norm": 680.276611328125, + "learning_rate": 3.465649663302715e-06, + "loss": 87.8101, + "step": 79120 + }, + { + "epoch": 0.6545890722587583, + "grad_norm": 803.8458251953125, + "learning_rate": 3.464307405500064e-06, + "loss": 99.0789, + "step": 79130 + }, + { + "epoch": 0.6546717955081276, + "grad_norm": 1364.3984375, + "learning_rate": 3.4629652698854254e-06, + "loss": 92.8128, + "step": 79140 + }, + { + "epoch": 0.6547545187574968, + "grad_norm": 751.8421630859375, + "learning_rate": 3.461623256565586e-06, + "loss": 78.3212, + "step": 79150 + }, + { + "epoch": 0.654837242006866, + "grad_norm": 862.8961181640625, + "learning_rate": 3.4602813656473223e-06, + "loss": 94.2935, + "step": 79160 + }, + { + "epoch": 0.6549199652562353, + "grad_norm": 1315.6492919921875, + "learning_rate": 3.4589395972374055e-06, + "loss": 94.4292, + "step": 79170 + }, + { + "epoch": 0.6550026885056045, + "grad_norm": 714.10546875, + "learning_rate": 3.457597951442595e-06, + "loss": 61.3309, + "step": 79180 + }, + { + "epoch": 0.6550854117549737, + "grad_norm": 2307.056640625, + "learning_rate": 3.456256428369633e-06, + "loss": 95.6213, + "step": 79190 + }, + { + "epoch": 0.655168135004343, + "grad_norm": 1832.7984619140625, + "learning_rate": 3.4549150281252635e-06, + "loss": 94.4903, + "step": 79200 + }, + { + "epoch": 0.6552508582537122, + "grad_norm": 0.0, + "learning_rate": 3.453573750816214e-06, + "loss": 69.4073, + "step": 79210 + }, + { + "epoch": 0.6553335815030814, + "grad_norm": 550.095947265625, + "learning_rate": 3.452232596549204e-06, + "loss": 83.0661, + "step": 79220 + }, + { + "epoch": 0.6554163047524507, + "grad_norm": 833.4959716796875, + "learning_rate": 3.4508915654309438e-06, + "loss": 93.9101, + "step": 79230 + }, + { + "epoch": 0.6554990280018199, + "grad_norm": 584.6357421875, + "learning_rate": 3.4495506575681313e-06, + "loss": 89.3119, + "step": 79240 + }, + { + "epoch": 0.6555817512511891, + "grad_norm": 557.8964233398438, + "learning_rate": 3.4482098730674577e-06, + "loss": 78.8267, + "step": 79250 + }, + { + "epoch": 0.6556644745005584, + "grad_norm": 1147.6644287109375, + "learning_rate": 3.4468692120356017e-06, + "loss": 115.0388, + "step": 79260 + }, + { + "epoch": 0.6557471977499276, + "grad_norm": 1800.31494140625, + "learning_rate": 3.4455286745792383e-06, + "loss": 90.1449, + "step": 79270 + }, + { + "epoch": 0.6558299209992968, + "grad_norm": 1146.2457275390625, + "learning_rate": 3.4441882608050216e-06, + "loss": 99.0781, + "step": 79280 + }, + { + "epoch": 0.6559126442486661, + "grad_norm": 784.1929931640625, + "learning_rate": 3.442847970819604e-06, + "loss": 86.3861, + "step": 79290 + }, + { + "epoch": 0.6559953674980353, + "grad_norm": 1415.5955810546875, + "learning_rate": 3.441507804729627e-06, + "loss": 112.1447, + "step": 79300 + }, + { + "epoch": 0.6560780907474045, + "grad_norm": 643.2744140625, + "learning_rate": 3.440167762641722e-06, + "loss": 97.26, + "step": 79310 + }, + { + "epoch": 0.6561608139967738, + "grad_norm": 990.8764038085938, + "learning_rate": 3.43882784466251e-06, + "loss": 70.3671, + "step": 79320 + }, + { + "epoch": 0.656243537246143, + "grad_norm": 1008.8643188476562, + "learning_rate": 3.4374880508986013e-06, + "loss": 106.8238, + "step": 79330 + }, + { + "epoch": 0.6563262604955122, + "grad_norm": 616.8389892578125, + "learning_rate": 3.436148381456598e-06, + "loss": 88.8686, + "step": 79340 + }, + { + "epoch": 0.6564089837448815, + "grad_norm": 1004.9009399414062, + "learning_rate": 3.434808836443091e-06, + "loss": 79.8238, + "step": 79350 + }, + { + "epoch": 0.6564917069942507, + "grad_norm": 1268.065673828125, + "learning_rate": 3.4334694159646608e-06, + "loss": 86.7182, + "step": 79360 + }, + { + "epoch": 0.6565744302436199, + "grad_norm": 1004.0042724609375, + "learning_rate": 3.43213012012788e-06, + "loss": 108.0468, + "step": 79370 + }, + { + "epoch": 0.6566571534929893, + "grad_norm": 563.8744506835938, + "learning_rate": 3.43079094903931e-06, + "loss": 74.4405, + "step": 79380 + }, + { + "epoch": 0.6567398767423585, + "grad_norm": 410.0007019042969, + "learning_rate": 3.4294519028055014e-06, + "loss": 89.0153, + "step": 79390 + }, + { + "epoch": 0.6568225999917277, + "grad_norm": 816.875, + "learning_rate": 3.428112981532998e-06, + "loss": 66.4603, + "step": 79400 + }, + { + "epoch": 0.656905323241097, + "grad_norm": 665.1447143554688, + "learning_rate": 3.4267741853283305e-06, + "loss": 123.0643, + "step": 79410 + }, + { + "epoch": 0.6569880464904662, + "grad_norm": 1130.827392578125, + "learning_rate": 3.425435514298021e-06, + "loss": 113.6049, + "step": 79420 + }, + { + "epoch": 0.6570707697398354, + "grad_norm": 933.1907348632812, + "learning_rate": 3.4240969685485813e-06, + "loss": 85.3566, + "step": 79430 + }, + { + "epoch": 0.6571534929892047, + "grad_norm": 930.4938354492188, + "learning_rate": 3.422758548186515e-06, + "loss": 110.3764, + "step": 79440 + }, + { + "epoch": 0.6572362162385739, + "grad_norm": 698.0180053710938, + "learning_rate": 3.4214202533183104e-06, + "loss": 89.8429, + "step": 79450 + }, + { + "epoch": 0.6573189394879431, + "grad_norm": 949.0037841796875, + "learning_rate": 3.420082084050453e-06, + "loss": 125.2701, + "step": 79460 + }, + { + "epoch": 0.6574016627373124, + "grad_norm": 1515.08349609375, + "learning_rate": 3.4187440404894123e-06, + "loss": 111.6007, + "step": 79470 + }, + { + "epoch": 0.6574843859866816, + "grad_norm": 531.5831298828125, + "learning_rate": 3.417406122741651e-06, + "loss": 115.491, + "step": 79480 + }, + { + "epoch": 0.6575671092360508, + "grad_norm": 951.4327392578125, + "learning_rate": 3.416068330913621e-06, + "loss": 77.1653, + "step": 79490 + }, + { + "epoch": 0.6576498324854201, + "grad_norm": 1479.0516357421875, + "learning_rate": 3.4147306651117663e-06, + "loss": 102.2539, + "step": 79500 + }, + { + "epoch": 0.6577325557347893, + "grad_norm": 1768.2347412109375, + "learning_rate": 3.4133931254425156e-06, + "loss": 139.0158, + "step": 79510 + }, + { + "epoch": 0.6578152789841585, + "grad_norm": 519.06103515625, + "learning_rate": 3.4120557120122944e-06, + "loss": 103.6259, + "step": 79520 + }, + { + "epoch": 0.6578980022335277, + "grad_norm": 923.902587890625, + "learning_rate": 3.4107184249275114e-06, + "loss": 69.9052, + "step": 79530 + }, + { + "epoch": 0.657980725482897, + "grad_norm": 812.4449462890625, + "learning_rate": 3.4093812642945694e-06, + "loss": 82.5353, + "step": 79540 + }, + { + "epoch": 0.6580634487322662, + "grad_norm": 669.1686401367188, + "learning_rate": 3.40804423021986e-06, + "loss": 75.8008, + "step": 79550 + }, + { + "epoch": 0.6581461719816354, + "grad_norm": 556.7330322265625, + "learning_rate": 3.4067073228097656e-06, + "loss": 79.9305, + "step": 79560 + }, + { + "epoch": 0.6582288952310047, + "grad_norm": 828.5440673828125, + "learning_rate": 3.4053705421706574e-06, + "loss": 119.8367, + "step": 79570 + }, + { + "epoch": 0.6583116184803739, + "grad_norm": 982.3250732421875, + "learning_rate": 3.4040338884088955e-06, + "loss": 103.1818, + "step": 79580 + }, + { + "epoch": 0.6583943417297431, + "grad_norm": 556.98193359375, + "learning_rate": 3.4026973616308334e-06, + "loss": 127.7505, + "step": 79590 + }, + { + "epoch": 0.6584770649791124, + "grad_norm": 930.4917602539062, + "learning_rate": 3.401360961942812e-06, + "loss": 85.2934, + "step": 79600 + }, + { + "epoch": 0.6585597882284816, + "grad_norm": 1134.147705078125, + "learning_rate": 3.4000246894511634e-06, + "loss": 70.3682, + "step": 79610 + }, + { + "epoch": 0.6586425114778508, + "grad_norm": 524.334228515625, + "learning_rate": 3.398688544262205e-06, + "loss": 74.7115, + "step": 79620 + }, + { + "epoch": 0.6587252347272201, + "grad_norm": 1209.0157470703125, + "learning_rate": 3.397352526482251e-06, + "loss": 86.3904, + "step": 79630 + }, + { + "epoch": 0.6588079579765893, + "grad_norm": 833.2462768554688, + "learning_rate": 3.396016636217601e-06, + "loss": 103.8662, + "step": 79640 + }, + { + "epoch": 0.6588906812259585, + "grad_norm": 1902.74951171875, + "learning_rate": 3.394680873574546e-06, + "loss": 89.4633, + "step": 79650 + }, + { + "epoch": 0.6589734044753278, + "grad_norm": 1193.58740234375, + "learning_rate": 3.3933452386593666e-06, + "loss": 83.5736, + "step": 79660 + }, + { + "epoch": 0.659056127724697, + "grad_norm": 602.8118896484375, + "learning_rate": 3.392009731578334e-06, + "loss": 115.8359, + "step": 79670 + }, + { + "epoch": 0.6591388509740662, + "grad_norm": 744.4619140625, + "learning_rate": 3.3906743524377053e-06, + "loss": 78.6802, + "step": 79680 + }, + { + "epoch": 0.6592215742234355, + "grad_norm": 1028.5284423828125, + "learning_rate": 3.3893391013437338e-06, + "loss": 82.8485, + "step": 79690 + }, + { + "epoch": 0.6593042974728047, + "grad_norm": 1039.501708984375, + "learning_rate": 3.38800397840266e-06, + "loss": 97.3438, + "step": 79700 + }, + { + "epoch": 0.6593870207221739, + "grad_norm": 1091.6658935546875, + "learning_rate": 3.3866689837207094e-06, + "loss": 103.66, + "step": 79710 + }, + { + "epoch": 0.6594697439715432, + "grad_norm": 1788.04345703125, + "learning_rate": 3.3853341174041025e-06, + "loss": 97.2377, + "step": 79720 + }, + { + "epoch": 0.6595524672209124, + "grad_norm": 842.3888549804688, + "learning_rate": 3.3839993795590507e-06, + "loss": 101.914, + "step": 79730 + }, + { + "epoch": 0.6596351904702816, + "grad_norm": 1032.7957763671875, + "learning_rate": 3.3826647702917526e-06, + "loss": 83.7671, + "step": 79740 + }, + { + "epoch": 0.6597179137196509, + "grad_norm": 910.4108276367188, + "learning_rate": 3.3813302897083955e-06, + "loss": 114.1942, + "step": 79750 + }, + { + "epoch": 0.6598006369690201, + "grad_norm": 710.4639282226562, + "learning_rate": 3.379995937915158e-06, + "loss": 84.6564, + "step": 79760 + }, + { + "epoch": 0.6598833602183893, + "grad_norm": 667.979248046875, + "learning_rate": 3.37866171501821e-06, + "loss": 77.0747, + "step": 79770 + }, + { + "epoch": 0.6599660834677586, + "grad_norm": 1670.4046630859375, + "learning_rate": 3.3773276211237087e-06, + "loss": 120.1049, + "step": 79780 + }, + { + "epoch": 0.6600488067171278, + "grad_norm": 613.4122314453125, + "learning_rate": 3.3759936563378004e-06, + "loss": 83.0683, + "step": 79790 + }, + { + "epoch": 0.660131529966497, + "grad_norm": 902.4213256835938, + "learning_rate": 3.374659820766625e-06, + "loss": 99.4036, + "step": 79800 + }, + { + "epoch": 0.6602142532158664, + "grad_norm": 841.2720336914062, + "learning_rate": 3.3733261145163064e-06, + "loss": 112.4562, + "step": 79810 + }, + { + "epoch": 0.6602969764652356, + "grad_norm": 609.0618286132812, + "learning_rate": 3.371992537692964e-06, + "loss": 95.7773, + "step": 79820 + }, + { + "epoch": 0.6603796997146048, + "grad_norm": 1067.176025390625, + "learning_rate": 3.370659090402704e-06, + "loss": 115.8151, + "step": 79830 + }, + { + "epoch": 0.6604624229639741, + "grad_norm": 1055.511474609375, + "learning_rate": 3.3693257727516227e-06, + "loss": 96.7919, + "step": 79840 + }, + { + "epoch": 0.6605451462133433, + "grad_norm": 510.99395751953125, + "learning_rate": 3.367992584845806e-06, + "loss": 76.2172, + "step": 79850 + }, + { + "epoch": 0.6606278694627125, + "grad_norm": 407.9087219238281, + "learning_rate": 3.3666595267913293e-06, + "loss": 86.5948, + "step": 79860 + }, + { + "epoch": 0.6607105927120818, + "grad_norm": 1010.9825439453125, + "learning_rate": 3.365326598694259e-06, + "loss": 80.669, + "step": 79870 + }, + { + "epoch": 0.660793315961451, + "grad_norm": 707.4743041992188, + "learning_rate": 3.3639938006606483e-06, + "loss": 89.7645, + "step": 79880 + }, + { + "epoch": 0.6608760392108202, + "grad_norm": 1092.1805419921875, + "learning_rate": 3.3626611327965418e-06, + "loss": 103.0097, + "step": 79890 + }, + { + "epoch": 0.6609587624601895, + "grad_norm": 530.4946899414062, + "learning_rate": 3.3613285952079754e-06, + "loss": 87.2853, + "step": 79900 + }, + { + "epoch": 0.6610414857095587, + "grad_norm": 750.7198486328125, + "learning_rate": 3.3599961880009713e-06, + "loss": 80.3749, + "step": 79910 + }, + { + "epoch": 0.6611242089589279, + "grad_norm": 728.5155029296875, + "learning_rate": 3.3586639112815446e-06, + "loss": 104.2869, + "step": 79920 + }, + { + "epoch": 0.6612069322082972, + "grad_norm": 1172.182373046875, + "learning_rate": 3.357331765155698e-06, + "loss": 76.1472, + "step": 79930 + }, + { + "epoch": 0.6612896554576664, + "grad_norm": 1009.7491455078125, + "learning_rate": 3.355999749729424e-06, + "loss": 83.3518, + "step": 79940 + }, + { + "epoch": 0.6613723787070356, + "grad_norm": 865.0538940429688, + "learning_rate": 3.354667865108706e-06, + "loss": 121.849, + "step": 79950 + }, + { + "epoch": 0.6614551019564049, + "grad_norm": 635.9669189453125, + "learning_rate": 3.353336111399513e-06, + "loss": 103.0901, + "step": 79960 + }, + { + "epoch": 0.6615378252057741, + "grad_norm": 3667.30615234375, + "learning_rate": 3.3520044887078096e-06, + "loss": 148.1079, + "step": 79970 + }, + { + "epoch": 0.6616205484551433, + "grad_norm": 1349.1805419921875, + "learning_rate": 3.350672997139546e-06, + "loss": 102.0127, + "step": 79980 + }, + { + "epoch": 0.6617032717045126, + "grad_norm": 795.7191162109375, + "learning_rate": 3.3493416368006614e-06, + "loss": 75.6753, + "step": 79990 + }, + { + "epoch": 0.6617859949538818, + "grad_norm": 692.658935546875, + "learning_rate": 3.348010407797088e-06, + "loss": 82.7135, + "step": 80000 + }, + { + "epoch": 0.661868718203251, + "grad_norm": 1096.899169921875, + "learning_rate": 3.346679310234744e-06, + "loss": 79.7514, + "step": 80010 + }, + { + "epoch": 0.6619514414526203, + "grad_norm": 1015.4619140625, + "learning_rate": 3.34534834421954e-06, + "loss": 59.5193, + "step": 80020 + }, + { + "epoch": 0.6620341647019895, + "grad_norm": 537.7772216796875, + "learning_rate": 3.3440175098573748e-06, + "loss": 87.1953, + "step": 80030 + }, + { + "epoch": 0.6621168879513587, + "grad_norm": 721.2218627929688, + "learning_rate": 3.3426868072541386e-06, + "loss": 81.0099, + "step": 80040 + }, + { + "epoch": 0.662199611200728, + "grad_norm": 862.6383666992188, + "learning_rate": 3.3413562365157037e-06, + "loss": 82.9833, + "step": 80050 + }, + { + "epoch": 0.6622823344500972, + "grad_norm": 879.6834106445312, + "learning_rate": 3.340025797747942e-06, + "loss": 72.3241, + "step": 80060 + }, + { + "epoch": 0.6623650576994664, + "grad_norm": 831.2039184570312, + "learning_rate": 3.3386954910567094e-06, + "loss": 66.0481, + "step": 80070 + }, + { + "epoch": 0.6624477809488357, + "grad_norm": 699.9284057617188, + "learning_rate": 3.337365316547852e-06, + "loss": 98.672, + "step": 80080 + }, + { + "epoch": 0.6625305041982049, + "grad_norm": 831.2557983398438, + "learning_rate": 3.336035274327206e-06, + "loss": 99.8643, + "step": 80090 + }, + { + "epoch": 0.6626132274475741, + "grad_norm": 1425.111328125, + "learning_rate": 3.3347053645005965e-06, + "loss": 82.7985, + "step": 80100 + }, + { + "epoch": 0.6626959506969434, + "grad_norm": 794.6343994140625, + "learning_rate": 3.333375587173838e-06, + "loss": 107.7234, + "step": 80110 + }, + { + "epoch": 0.6627786739463126, + "grad_norm": 481.92596435546875, + "learning_rate": 3.332045942452738e-06, + "loss": 62.8261, + "step": 80120 + }, + { + "epoch": 0.6628613971956818, + "grad_norm": 651.9033813476562, + "learning_rate": 3.330716430443085e-06, + "loss": 76.272, + "step": 80130 + }, + { + "epoch": 0.6629441204450511, + "grad_norm": 692.5248413085938, + "learning_rate": 3.329387051250664e-06, + "loss": 92.6327, + "step": 80140 + }, + { + "epoch": 0.6630268436944203, + "grad_norm": 537.0725708007812, + "learning_rate": 3.3280578049812493e-06, + "loss": 73.6244, + "step": 80150 + }, + { + "epoch": 0.6631095669437895, + "grad_norm": 891.6332397460938, + "learning_rate": 3.3267286917406027e-06, + "loss": 67.5265, + "step": 80160 + }, + { + "epoch": 0.6631922901931588, + "grad_norm": 662.6693115234375, + "learning_rate": 3.3253997116344737e-06, + "loss": 102.1911, + "step": 80170 + }, + { + "epoch": 0.663275013442528, + "grad_norm": 1005.4918823242188, + "learning_rate": 3.3240708647686047e-06, + "loss": 76.6256, + "step": 80180 + }, + { + "epoch": 0.6633577366918972, + "grad_norm": 629.626953125, + "learning_rate": 3.322742151248726e-06, + "loss": 87.3736, + "step": 80190 + }, + { + "epoch": 0.6634404599412665, + "grad_norm": 760.7479858398438, + "learning_rate": 3.3214135711805555e-06, + "loss": 96.766, + "step": 80200 + }, + { + "epoch": 0.6635231831906357, + "grad_norm": 556.74365234375, + "learning_rate": 3.3200851246698053e-06, + "loss": 77.8871, + "step": 80210 + }, + { + "epoch": 0.663605906440005, + "grad_norm": 645.0292358398438, + "learning_rate": 3.318756811822171e-06, + "loss": 74.2956, + "step": 80220 + }, + { + "epoch": 0.6636886296893741, + "grad_norm": 1107.0675048828125, + "learning_rate": 3.3174286327433408e-06, + "loss": 87.9465, + "step": 80230 + }, + { + "epoch": 0.6637713529387435, + "grad_norm": 770.3927001953125, + "learning_rate": 3.3161005875389916e-06, + "loss": 91.9657, + "step": 80240 + }, + { + "epoch": 0.6638540761881127, + "grad_norm": 1054.7138671875, + "learning_rate": 3.3147726763147913e-06, + "loss": 84.3892, + "step": 80250 + }, + { + "epoch": 0.6639367994374819, + "grad_norm": 1000.5108032226562, + "learning_rate": 3.3134448991763957e-06, + "loss": 101.0391, + "step": 80260 + }, + { + "epoch": 0.6640195226868512, + "grad_norm": 588.8154296875, + "learning_rate": 3.312117256229449e-06, + "loss": 91.1146, + "step": 80270 + }, + { + "epoch": 0.6641022459362204, + "grad_norm": 722.9697875976562, + "learning_rate": 3.310789747579586e-06, + "loss": 84.0725, + "step": 80280 + }, + { + "epoch": 0.6641849691855896, + "grad_norm": 645.8081665039062, + "learning_rate": 3.30946237333243e-06, + "loss": 92.8005, + "step": 80290 + }, + { + "epoch": 0.6642676924349589, + "grad_norm": 1031.3046875, + "learning_rate": 3.308135133593595e-06, + "loss": 108.4536, + "step": 80300 + }, + { + "epoch": 0.6643504156843281, + "grad_norm": 596.3705444335938, + "learning_rate": 3.3068080284686825e-06, + "loss": 104.7899, + "step": 80310 + }, + { + "epoch": 0.6644331389336973, + "grad_norm": 792.6683349609375, + "learning_rate": 3.3054810580632844e-06, + "loss": 100.9892, + "step": 80320 + }, + { + "epoch": 0.6645158621830666, + "grad_norm": 803.8338623046875, + "learning_rate": 3.304154222482982e-06, + "loss": 72.7554, + "step": 80330 + }, + { + "epoch": 0.6645985854324358, + "grad_norm": 1118.839111328125, + "learning_rate": 3.3028275218333438e-06, + "loss": 67.1966, + "step": 80340 + }, + { + "epoch": 0.664681308681805, + "grad_norm": 929.019287109375, + "learning_rate": 3.301500956219932e-06, + "loss": 107.5598, + "step": 80350 + }, + { + "epoch": 0.6647640319311743, + "grad_norm": 1357.971923828125, + "learning_rate": 3.3001745257482935e-06, + "loss": 130.7297, + "step": 80360 + }, + { + "epoch": 0.6648467551805435, + "grad_norm": 299.3155212402344, + "learning_rate": 3.2988482305239673e-06, + "loss": 80.1713, + "step": 80370 + }, + { + "epoch": 0.6649294784299127, + "grad_norm": 658.03125, + "learning_rate": 3.2975220706524813e-06, + "loss": 120.3528, + "step": 80380 + }, + { + "epoch": 0.665012201679282, + "grad_norm": 1064.147216796875, + "learning_rate": 3.2961960462393492e-06, + "loss": 102.2007, + "step": 80390 + }, + { + "epoch": 0.6650949249286512, + "grad_norm": 783.42431640625, + "learning_rate": 3.2948701573900786e-06, + "loss": 104.9912, + "step": 80400 + }, + { + "epoch": 0.6651776481780204, + "grad_norm": 729.3176879882812, + "learning_rate": 3.2935444042101646e-06, + "loss": 95.9438, + "step": 80410 + }, + { + "epoch": 0.6652603714273897, + "grad_norm": 819.0823974609375, + "learning_rate": 3.29221878680509e-06, + "loss": 118.4774, + "step": 80420 + }, + { + "epoch": 0.6653430946767589, + "grad_norm": 698.5296020507812, + "learning_rate": 3.2908933052803292e-06, + "loss": 79.8682, + "step": 80430 + }, + { + "epoch": 0.6654258179261281, + "grad_norm": 1220.9241943359375, + "learning_rate": 3.2895679597413433e-06, + "loss": 97.124, + "step": 80440 + }, + { + "epoch": 0.6655085411754974, + "grad_norm": 1185.635009765625, + "learning_rate": 3.2882427502935867e-06, + "loss": 80.9898, + "step": 80450 + }, + { + "epoch": 0.6655912644248666, + "grad_norm": 1041.6197509765625, + "learning_rate": 3.2869176770424976e-06, + "loss": 91.9751, + "step": 80460 + }, + { + "epoch": 0.6656739876742358, + "grad_norm": 909.6505737304688, + "learning_rate": 3.2855927400935085e-06, + "loss": 83.0227, + "step": 80470 + }, + { + "epoch": 0.6657567109236051, + "grad_norm": 978.8333740234375, + "learning_rate": 3.2842679395520363e-06, + "loss": 87.613, + "step": 80480 + }, + { + "epoch": 0.6658394341729743, + "grad_norm": 1578.8958740234375, + "learning_rate": 3.282943275523489e-06, + "loss": 86.7915, + "step": 80490 + }, + { + "epoch": 0.6659221574223435, + "grad_norm": 1016.648681640625, + "learning_rate": 3.2816187481132655e-06, + "loss": 89.614, + "step": 80500 + }, + { + "epoch": 0.6660048806717128, + "grad_norm": 795.9924926757812, + "learning_rate": 3.280294357426752e-06, + "loss": 92.8654, + "step": 80510 + }, + { + "epoch": 0.666087603921082, + "grad_norm": 486.5448913574219, + "learning_rate": 3.2789701035693242e-06, + "loss": 68.977, + "step": 80520 + }, + { + "epoch": 0.6661703271704512, + "grad_norm": 1071.9464111328125, + "learning_rate": 3.277645986646346e-06, + "loss": 97.0568, + "step": 80530 + }, + { + "epoch": 0.6662530504198205, + "grad_norm": 1014.2429809570312, + "learning_rate": 3.276322006763172e-06, + "loss": 111.9005, + "step": 80540 + }, + { + "epoch": 0.6663357736691897, + "grad_norm": 571.8171997070312, + "learning_rate": 3.274998164025148e-06, + "loss": 73.2075, + "step": 80550 + }, + { + "epoch": 0.6664184969185589, + "grad_norm": 1098.88720703125, + "learning_rate": 3.2736744585376016e-06, + "loss": 73.6912, + "step": 80560 + }, + { + "epoch": 0.6665012201679282, + "grad_norm": 1065.5684814453125, + "learning_rate": 3.2723508904058547e-06, + "loss": 86.9936, + "step": 80570 + }, + { + "epoch": 0.6665839434172974, + "grad_norm": 1688.9266357421875, + "learning_rate": 3.27102745973522e-06, + "loss": 95.265, + "step": 80580 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1830.2410888671875, + "learning_rate": 3.269704166630995e-06, + "loss": 107.0393, + "step": 80590 + }, + { + "epoch": 0.6667493899160359, + "grad_norm": 662.8126831054688, + "learning_rate": 3.268381011198468e-06, + "loss": 81.1234, + "step": 80600 + }, + { + "epoch": 0.6668321131654051, + "grad_norm": 780.1619873046875, + "learning_rate": 3.2670579935429176e-06, + "loss": 72.5676, + "step": 80610 + }, + { + "epoch": 0.6669148364147743, + "grad_norm": 900.4564819335938, + "learning_rate": 3.265735113769609e-06, + "loss": 72.4489, + "step": 80620 + }, + { + "epoch": 0.6669975596641436, + "grad_norm": 737.3795166015625, + "learning_rate": 3.264412371983797e-06, + "loss": 104.431, + "step": 80630 + }, + { + "epoch": 0.6670802829135128, + "grad_norm": 705.3508911132812, + "learning_rate": 3.2630897682907312e-06, + "loss": 82.3283, + "step": 80640 + }, + { + "epoch": 0.667163006162882, + "grad_norm": 1338.135498046875, + "learning_rate": 3.261767302795639e-06, + "loss": 102.0957, + "step": 80650 + }, + { + "epoch": 0.6672457294122514, + "grad_norm": 1417.84375, + "learning_rate": 3.2604449756037447e-06, + "loss": 140.2835, + "step": 80660 + }, + { + "epoch": 0.6673284526616206, + "grad_norm": 899.5838012695312, + "learning_rate": 3.2591227868202592e-06, + "loss": 106.4462, + "step": 80670 + }, + { + "epoch": 0.6674111759109898, + "grad_norm": 1201.1510009765625, + "learning_rate": 3.257800736550385e-06, + "loss": 88.8766, + "step": 80680 + }, + { + "epoch": 0.6674938991603591, + "grad_norm": 1101.5433349609375, + "learning_rate": 3.2564788248993105e-06, + "loss": 91.0437, + "step": 80690 + }, + { + "epoch": 0.6675766224097283, + "grad_norm": 609.85400390625, + "learning_rate": 3.2551570519722155e-06, + "loss": 104.3016, + "step": 80700 + }, + { + "epoch": 0.6676593456590975, + "grad_norm": 1198.7747802734375, + "learning_rate": 3.2538354178742648e-06, + "loss": 102.0451, + "step": 80710 + }, + { + "epoch": 0.6677420689084668, + "grad_norm": 494.413818359375, + "learning_rate": 3.2525139227106163e-06, + "loss": 70.2176, + "step": 80720 + }, + { + "epoch": 0.667824792157836, + "grad_norm": 614.208251953125, + "learning_rate": 3.2511925665864164e-06, + "loss": 67.1571, + "step": 80730 + }, + { + "epoch": 0.6679075154072052, + "grad_norm": 730.7908935546875, + "learning_rate": 3.2498713496067963e-06, + "loss": 73.7568, + "step": 80740 + }, + { + "epoch": 0.6679902386565745, + "grad_norm": 1220.75341796875, + "learning_rate": 3.2485502718768814e-06, + "loss": 103.8804, + "step": 80750 + }, + { + "epoch": 0.6680729619059437, + "grad_norm": 909.8517456054688, + "learning_rate": 3.2472293335017836e-06, + "loss": 90.6027, + "step": 80760 + }, + { + "epoch": 0.6681556851553129, + "grad_norm": 1850.9075927734375, + "learning_rate": 3.245908534586602e-06, + "loss": 87.6312, + "step": 80770 + }, + { + "epoch": 0.6682384084046822, + "grad_norm": 984.0964965820312, + "learning_rate": 3.2445878752364298e-06, + "loss": 94.8259, + "step": 80780 + }, + { + "epoch": 0.6683211316540514, + "grad_norm": 605.5479736328125, + "learning_rate": 3.2432673555563433e-06, + "loss": 113.5487, + "step": 80790 + }, + { + "epoch": 0.6684038549034206, + "grad_norm": 800.7566528320312, + "learning_rate": 3.2419469756514116e-06, + "loss": 99.107, + "step": 80800 + }, + { + "epoch": 0.6684865781527899, + "grad_norm": 1075.5169677734375, + "learning_rate": 3.2406267356266918e-06, + "loss": 82.8865, + "step": 80810 + }, + { + "epoch": 0.6685693014021591, + "grad_norm": 1107.46240234375, + "learning_rate": 3.2393066355872264e-06, + "loss": 75.885, + "step": 80820 + }, + { + "epoch": 0.6686520246515283, + "grad_norm": 1359.361083984375, + "learning_rate": 3.237986675638052e-06, + "loss": 115.8543, + "step": 80830 + }, + { + "epoch": 0.6687347479008976, + "grad_norm": 1057.6058349609375, + "learning_rate": 3.236666855884192e-06, + "loss": 75.9444, + "step": 80840 + }, + { + "epoch": 0.6688174711502668, + "grad_norm": 1190.32421875, + "learning_rate": 3.2353471764306567e-06, + "loss": 97.1294, + "step": 80850 + }, + { + "epoch": 0.668900194399636, + "grad_norm": 431.67431640625, + "learning_rate": 3.234027637382447e-06, + "loss": 78.7077, + "step": 80860 + }, + { + "epoch": 0.6689829176490053, + "grad_norm": 1030.784912109375, + "learning_rate": 3.2327082388445545e-06, + "loss": 123.1514, + "step": 80870 + }, + { + "epoch": 0.6690656408983745, + "grad_norm": 712.6047973632812, + "learning_rate": 3.2313889809219568e-06, + "loss": 93.2329, + "step": 80880 + }, + { + "epoch": 0.6691483641477437, + "grad_norm": 793.4395141601562, + "learning_rate": 3.2300698637196217e-06, + "loss": 88.246, + "step": 80890 + }, + { + "epoch": 0.669231087397113, + "grad_norm": 594.5629272460938, + "learning_rate": 3.2287508873425043e-06, + "loss": 66.3094, + "step": 80900 + }, + { + "epoch": 0.6693138106464822, + "grad_norm": 589.426513671875, + "learning_rate": 3.22743205189555e-06, + "loss": 72.8444, + "step": 80910 + }, + { + "epoch": 0.6693965338958514, + "grad_norm": 545.673095703125, + "learning_rate": 3.2261133574836918e-06, + "loss": 89.7926, + "step": 80920 + }, + { + "epoch": 0.6694792571452207, + "grad_norm": 809.666748046875, + "learning_rate": 3.2247948042118525e-06, + "loss": 81.8926, + "step": 80930 + }, + { + "epoch": 0.6695619803945899, + "grad_norm": 1300.5181884765625, + "learning_rate": 3.223476392184944e-06, + "loss": 93.8708, + "step": 80940 + }, + { + "epoch": 0.6696447036439591, + "grad_norm": 767.7609252929688, + "learning_rate": 3.2221581215078656e-06, + "loss": 86.6903, + "step": 80950 + }, + { + "epoch": 0.6697274268933283, + "grad_norm": 568.2315063476562, + "learning_rate": 3.2208399922855055e-06, + "loss": 88.5472, + "step": 80960 + }, + { + "epoch": 0.6698101501426976, + "grad_norm": 971.0084228515625, + "learning_rate": 3.2195220046227425e-06, + "loss": 104.4163, + "step": 80970 + }, + { + "epoch": 0.6698928733920668, + "grad_norm": 955.1143188476562, + "learning_rate": 3.218204158624445e-06, + "loss": 71.1244, + "step": 80980 + }, + { + "epoch": 0.669975596641436, + "grad_norm": 953.3546142578125, + "learning_rate": 3.216886454395463e-06, + "loss": 89.6237, + "step": 80990 + }, + { + "epoch": 0.6700583198908053, + "grad_norm": 684.8885498046875, + "learning_rate": 3.2155688920406415e-06, + "loss": 99.5519, + "step": 81000 + }, + { + "epoch": 0.6701410431401745, + "grad_norm": 1077.44677734375, + "learning_rate": 3.2142514716648143e-06, + "loss": 82.6027, + "step": 81010 + }, + { + "epoch": 0.6702237663895437, + "grad_norm": 455.7202453613281, + "learning_rate": 3.212934193372803e-06, + "loss": 94.583, + "step": 81020 + }, + { + "epoch": 0.670306489638913, + "grad_norm": 445.9930419921875, + "learning_rate": 3.2116170572694156e-06, + "loss": 94.4956, + "step": 81030 + }, + { + "epoch": 0.6703892128882822, + "grad_norm": 668.4183349609375, + "learning_rate": 3.2103000634594518e-06, + "loss": 96.0746, + "step": 81040 + }, + { + "epoch": 0.6704719361376514, + "grad_norm": 1390.123291015625, + "learning_rate": 3.2089832120476983e-06, + "loss": 89.5325, + "step": 81050 + }, + { + "epoch": 0.6705546593870207, + "grad_norm": 847.5530395507812, + "learning_rate": 3.2076665031389294e-06, + "loss": 92.6629, + "step": 81060 + }, + { + "epoch": 0.67063738263639, + "grad_norm": 1006.8488159179688, + "learning_rate": 3.2063499368379146e-06, + "loss": 84.7229, + "step": 81070 + }, + { + "epoch": 0.6707201058857591, + "grad_norm": 802.8297119140625, + "learning_rate": 3.2050335132494014e-06, + "loss": 91.7173, + "step": 81080 + }, + { + "epoch": 0.6708028291351285, + "grad_norm": 806.7569580078125, + "learning_rate": 3.203717232478133e-06, + "loss": 95.0556, + "step": 81090 + }, + { + "epoch": 0.6708855523844977, + "grad_norm": 1519.580810546875, + "learning_rate": 3.2024010946288415e-06, + "loss": 93.1881, + "step": 81100 + }, + { + "epoch": 0.6709682756338669, + "grad_norm": 539.17919921875, + "learning_rate": 3.201085099806245e-06, + "loss": 98.1286, + "step": 81110 + }, + { + "epoch": 0.6710509988832362, + "grad_norm": 1128.193359375, + "learning_rate": 3.199769248115051e-06, + "loss": 89.5094, + "step": 81120 + }, + { + "epoch": 0.6711337221326054, + "grad_norm": 1057.2535400390625, + "learning_rate": 3.1984535396599565e-06, + "loss": 103.7013, + "step": 81130 + }, + { + "epoch": 0.6712164453819746, + "grad_norm": 699.2178955078125, + "learning_rate": 3.1971379745456452e-06, + "loss": 88.7541, + "step": 81140 + }, + { + "epoch": 0.6712991686313439, + "grad_norm": 515.211669921875, + "learning_rate": 3.1958225528767918e-06, + "loss": 85.0595, + "step": 81150 + }, + { + "epoch": 0.6713818918807131, + "grad_norm": 1344.10595703125, + "learning_rate": 3.1945072747580585e-06, + "loss": 89.7821, + "step": 81160 + }, + { + "epoch": 0.6714646151300823, + "grad_norm": 1213.0770263671875, + "learning_rate": 3.1931921402940946e-06, + "loss": 73.3116, + "step": 81170 + }, + { + "epoch": 0.6715473383794516, + "grad_norm": 654.7890625, + "learning_rate": 3.1918771495895395e-06, + "loss": 78.7428, + "step": 81180 + }, + { + "epoch": 0.6716300616288208, + "grad_norm": 1164.929443359375, + "learning_rate": 3.1905623027490205e-06, + "loss": 84.9769, + "step": 81190 + }, + { + "epoch": 0.67171278487819, + "grad_norm": 1309.23779296875, + "learning_rate": 3.1892475998771567e-06, + "loss": 81.4625, + "step": 81200 + }, + { + "epoch": 0.6717955081275593, + "grad_norm": 654.0279541015625, + "learning_rate": 3.1879330410785503e-06, + "loss": 71.7905, + "step": 81210 + }, + { + "epoch": 0.6718782313769285, + "grad_norm": 503.28863525390625, + "learning_rate": 3.186618626457796e-06, + "loss": 82.309, + "step": 81220 + }, + { + "epoch": 0.6719609546262977, + "grad_norm": 951.95654296875, + "learning_rate": 3.1853043561194748e-06, + "loss": 88.6264, + "step": 81230 + }, + { + "epoch": 0.672043677875667, + "grad_norm": 778.96484375, + "learning_rate": 3.183990230168159e-06, + "loss": 87.3521, + "step": 81240 + }, + { + "epoch": 0.6721264011250362, + "grad_norm": 1273.863525390625, + "learning_rate": 3.1826762487084053e-06, + "loss": 96.637, + "step": 81250 + }, + { + "epoch": 0.6722091243744054, + "grad_norm": 951.4760131835938, + "learning_rate": 3.1813624118447615e-06, + "loss": 77.3764, + "step": 81260 + }, + { + "epoch": 0.6722918476237747, + "grad_norm": 613.3677978515625, + "learning_rate": 3.180048719681765e-06, + "loss": 81.409, + "step": 81270 + }, + { + "epoch": 0.6723745708731439, + "grad_norm": 1063.3297119140625, + "learning_rate": 3.178735172323939e-06, + "loss": 68.0863, + "step": 81280 + }, + { + "epoch": 0.6724572941225131, + "grad_norm": 799.1066284179688, + "learning_rate": 3.177421769875796e-06, + "loss": 90.396, + "step": 81290 + }, + { + "epoch": 0.6725400173718824, + "grad_norm": 1305.4615478515625, + "learning_rate": 3.176108512441839e-06, + "loss": 110.8276, + "step": 81300 + }, + { + "epoch": 0.6726227406212516, + "grad_norm": 630.9846801757812, + "learning_rate": 3.174795400126557e-06, + "loss": 84.7067, + "step": 81310 + }, + { + "epoch": 0.6727054638706208, + "grad_norm": 2631.676025390625, + "learning_rate": 3.173482433034429e-06, + "loss": 118.5525, + "step": 81320 + }, + { + "epoch": 0.6727881871199901, + "grad_norm": 1203.627197265625, + "learning_rate": 3.1721696112699217e-06, + "loss": 102.9443, + "step": 81330 + }, + { + "epoch": 0.6728709103693593, + "grad_norm": 1666.17236328125, + "learning_rate": 3.1708569349374896e-06, + "loss": 100.6521, + "step": 81340 + }, + { + "epoch": 0.6729536336187285, + "grad_norm": 969.8807983398438, + "learning_rate": 3.1695444041415757e-06, + "loss": 88.7701, + "step": 81350 + }, + { + "epoch": 0.6730363568680978, + "grad_norm": 1491.0135498046875, + "learning_rate": 3.1682320189866133e-06, + "loss": 87.3877, + "step": 81360 + }, + { + "epoch": 0.673119080117467, + "grad_norm": 506.7419128417969, + "learning_rate": 3.1669197795770225e-06, + "loss": 107.3529, + "step": 81370 + }, + { + "epoch": 0.6732018033668362, + "grad_norm": 839.7028198242188, + "learning_rate": 3.165607686017212e-06, + "loss": 62.6055, + "step": 81380 + }, + { + "epoch": 0.6732845266162055, + "grad_norm": 372.7325134277344, + "learning_rate": 3.164295738411578e-06, + "loss": 79.6807, + "step": 81390 + }, + { + "epoch": 0.6733672498655747, + "grad_norm": 940.877685546875, + "learning_rate": 3.1629839368645087e-06, + "loss": 109.5253, + "step": 81400 + }, + { + "epoch": 0.6734499731149439, + "grad_norm": 571.5814208984375, + "learning_rate": 3.161672281480379e-06, + "loss": 101.1825, + "step": 81410 + }, + { + "epoch": 0.6735326963643132, + "grad_norm": 1666.25634765625, + "learning_rate": 3.1603607723635455e-06, + "loss": 92.3682, + "step": 81420 + }, + { + "epoch": 0.6736154196136824, + "grad_norm": 394.33026123046875, + "learning_rate": 3.1590494096183643e-06, + "loss": 68.1128, + "step": 81430 + }, + { + "epoch": 0.6736981428630516, + "grad_norm": 943.1664428710938, + "learning_rate": 3.1577381933491718e-06, + "loss": 92.4026, + "step": 81440 + }, + { + "epoch": 0.6737808661124209, + "grad_norm": 479.8966064453125, + "learning_rate": 3.156427123660297e-06, + "loss": 101.9956, + "step": 81450 + }, + { + "epoch": 0.6738635893617901, + "grad_norm": 1042.3021240234375, + "learning_rate": 3.1551162006560554e-06, + "loss": 92.2319, + "step": 81460 + }, + { + "epoch": 0.6739463126111593, + "grad_norm": 640.0420532226562, + "learning_rate": 3.15380542444075e-06, + "loss": 115.9655, + "step": 81470 + }, + { + "epoch": 0.6740290358605286, + "grad_norm": 636.8284912109375, + "learning_rate": 3.1524947951186746e-06, + "loss": 80.7586, + "step": 81480 + }, + { + "epoch": 0.6741117591098978, + "grad_norm": 886.6581420898438, + "learning_rate": 3.1511843127941085e-06, + "loss": 116.3692, + "step": 81490 + }, + { + "epoch": 0.674194482359267, + "grad_norm": 881.8652954101562, + "learning_rate": 3.149873977571324e-06, + "loss": 81.8739, + "step": 81500 + }, + { + "epoch": 0.6742772056086364, + "grad_norm": 663.4722900390625, + "learning_rate": 3.148563789554575e-06, + "loss": 74.8076, + "step": 81510 + }, + { + "epoch": 0.6743599288580056, + "grad_norm": 1380.9359130859375, + "learning_rate": 3.147253748848107e-06, + "loss": 138.0078, + "step": 81520 + }, + { + "epoch": 0.6744426521073748, + "grad_norm": 402.91241455078125, + "learning_rate": 3.1459438555561565e-06, + "loss": 111.0916, + "step": 81530 + }, + { + "epoch": 0.6745253753567441, + "grad_norm": 256.97442626953125, + "learning_rate": 3.1446341097829446e-06, + "loss": 81.1193, + "step": 81540 + }, + { + "epoch": 0.6746080986061133, + "grad_norm": 847.0762329101562, + "learning_rate": 3.1433245116326812e-06, + "loss": 80.5571, + "step": 81550 + }, + { + "epoch": 0.6746908218554825, + "grad_norm": 387.09881591796875, + "learning_rate": 3.1420150612095653e-06, + "loss": 85.5534, + "step": 81560 + }, + { + "epoch": 0.6747735451048518, + "grad_norm": 1107.9180908203125, + "learning_rate": 3.140705758617784e-06, + "loss": 92.0838, + "step": 81570 + }, + { + "epoch": 0.674856268354221, + "grad_norm": 940.799560546875, + "learning_rate": 3.139396603961512e-06, + "loss": 95.1766, + "step": 81580 + }, + { + "epoch": 0.6749389916035902, + "grad_norm": 753.7284545898438, + "learning_rate": 3.1380875973449155e-06, + "loss": 79.7191, + "step": 81590 + }, + { + "epoch": 0.6750217148529595, + "grad_norm": 1136.5980224609375, + "learning_rate": 3.1367787388721427e-06, + "loss": 69.0357, + "step": 81600 + }, + { + "epoch": 0.6751044381023287, + "grad_norm": 724.4802856445312, + "learning_rate": 3.135470028647334e-06, + "loss": 87.2878, + "step": 81610 + }, + { + "epoch": 0.6751871613516979, + "grad_norm": 742.6774291992188, + "learning_rate": 3.134161466774617e-06, + "loss": 72.0161, + "step": 81620 + }, + { + "epoch": 0.6752698846010672, + "grad_norm": 855.7781372070312, + "learning_rate": 3.1328530533581102e-06, + "loss": 76.9828, + "step": 81630 + }, + { + "epoch": 0.6753526078504364, + "grad_norm": 848.9146118164062, + "learning_rate": 3.131544788501917e-06, + "loss": 57.4658, + "step": 81640 + }, + { + "epoch": 0.6754353310998056, + "grad_norm": 800.960205078125, + "learning_rate": 3.1302366723101294e-06, + "loss": 94.8087, + "step": 81650 + }, + { + "epoch": 0.6755180543491749, + "grad_norm": 1037.375, + "learning_rate": 3.12892870488683e-06, + "loss": 102.3649, + "step": 81660 + }, + { + "epoch": 0.6756007775985441, + "grad_norm": 530.1427001953125, + "learning_rate": 3.1276208863360862e-06, + "loss": 79.7667, + "step": 81670 + }, + { + "epoch": 0.6756835008479133, + "grad_norm": 1380.489501953125, + "learning_rate": 3.126313216761955e-06, + "loss": 76.0104, + "step": 81680 + }, + { + "epoch": 0.6757662240972825, + "grad_norm": 503.63250732421875, + "learning_rate": 3.125005696268482e-06, + "loss": 117.2083, + "step": 81690 + }, + { + "epoch": 0.6758489473466518, + "grad_norm": 867.2805786132812, + "learning_rate": 3.1236983249597007e-06, + "loss": 107.8847, + "step": 81700 + }, + { + "epoch": 0.675931670596021, + "grad_norm": 753.2324829101562, + "learning_rate": 3.1223911029396324e-06, + "loss": 78.1021, + "step": 81710 + }, + { + "epoch": 0.6760143938453902, + "grad_norm": 1229.90673828125, + "learning_rate": 3.121084030312286e-06, + "loss": 83.4004, + "step": 81720 + }, + { + "epoch": 0.6760971170947595, + "grad_norm": 641.0833740234375, + "learning_rate": 3.1197771071816617e-06, + "loss": 97.8247, + "step": 81730 + }, + { + "epoch": 0.6761798403441287, + "grad_norm": 761.6908569335938, + "learning_rate": 3.118470333651744e-06, + "loss": 86.3788, + "step": 81740 + }, + { + "epoch": 0.6762625635934979, + "grad_norm": 763.9179077148438, + "learning_rate": 3.1171637098265063e-06, + "loss": 107.4264, + "step": 81750 + }, + { + "epoch": 0.6763452868428672, + "grad_norm": 1150.12890625, + "learning_rate": 3.1158572358099127e-06, + "loss": 118.2742, + "step": 81760 + }, + { + "epoch": 0.6764280100922364, + "grad_norm": 1080.7073974609375, + "learning_rate": 3.11455091170591e-06, + "loss": 106.7814, + "step": 81770 + }, + { + "epoch": 0.6765107333416056, + "grad_norm": 642.6464233398438, + "learning_rate": 3.1132447376184383e-06, + "loss": 99.3273, + "step": 81780 + }, + { + "epoch": 0.6765934565909749, + "grad_norm": 808.6451416015625, + "learning_rate": 3.1119387136514246e-06, + "loss": 75.5224, + "step": 81790 + }, + { + "epoch": 0.6766761798403441, + "grad_norm": 1050.21533203125, + "learning_rate": 3.1106328399087814e-06, + "loss": 79.97, + "step": 81800 + }, + { + "epoch": 0.6767589030897133, + "grad_norm": 1466.090087890625, + "learning_rate": 3.1093271164944116e-06, + "loss": 89.5285, + "step": 81810 + }, + { + "epoch": 0.6768416263390826, + "grad_norm": 1522.2515869140625, + "learning_rate": 3.1080215435122072e-06, + "loss": 116.0093, + "step": 81820 + }, + { + "epoch": 0.6769243495884518, + "grad_norm": 1189.734375, + "learning_rate": 3.106716121066046e-06, + "loss": 100.7569, + "step": 81830 + }, + { + "epoch": 0.677007072837821, + "grad_norm": 1023.820556640625, + "learning_rate": 3.105410849259796e-06, + "loss": 89.4581, + "step": 81840 + }, + { + "epoch": 0.6770897960871903, + "grad_norm": 938.9853515625, + "learning_rate": 3.104105728197306e-06, + "loss": 85.5331, + "step": 81850 + }, + { + "epoch": 0.6771725193365595, + "grad_norm": 752.8026123046875, + "learning_rate": 3.1028007579824234e-06, + "loss": 93.6751, + "step": 81860 + }, + { + "epoch": 0.6772552425859287, + "grad_norm": 913.8092041015625, + "learning_rate": 3.1014959387189774e-06, + "loss": 68.4734, + "step": 81870 + }, + { + "epoch": 0.677337965835298, + "grad_norm": 593.3901977539062, + "learning_rate": 3.1001912705107874e-06, + "loss": 82.684, + "step": 81880 + }, + { + "epoch": 0.6774206890846672, + "grad_norm": 824.601318359375, + "learning_rate": 3.0988867534616586e-06, + "loss": 70.1524, + "step": 81890 + }, + { + "epoch": 0.6775034123340364, + "grad_norm": 495.2556457519531, + "learning_rate": 3.097582387675385e-06, + "loss": 101.8257, + "step": 81900 + }, + { + "epoch": 0.6775861355834057, + "grad_norm": 515.7427978515625, + "learning_rate": 3.09627817325575e-06, + "loss": 109.3507, + "step": 81910 + }, + { + "epoch": 0.677668858832775, + "grad_norm": 457.17315673828125, + "learning_rate": 3.0949741103065246e-06, + "loss": 102.0512, + "step": 81920 + }, + { + "epoch": 0.6777515820821441, + "grad_norm": 652.50537109375, + "learning_rate": 3.093670198931469e-06, + "loss": 70.5868, + "step": 81930 + }, + { + "epoch": 0.6778343053315135, + "grad_norm": 1835.66748046875, + "learning_rate": 3.0923664392343233e-06, + "loss": 87.3482, + "step": 81940 + }, + { + "epoch": 0.6779170285808827, + "grad_norm": 632.263671875, + "learning_rate": 3.091062831318825e-06, + "loss": 104.5366, + "step": 81950 + }, + { + "epoch": 0.6779997518302519, + "grad_norm": 766.2409057617188, + "learning_rate": 3.089759375288698e-06, + "loss": 81.9699, + "step": 81960 + }, + { + "epoch": 0.6780824750796212, + "grad_norm": 743.1511840820312, + "learning_rate": 3.0884560712476497e-06, + "loss": 110.5213, + "step": 81970 + }, + { + "epoch": 0.6781651983289904, + "grad_norm": 979.716064453125, + "learning_rate": 3.0871529192993794e-06, + "loss": 85.1489, + "step": 81980 + }, + { + "epoch": 0.6782479215783596, + "grad_norm": 713.2177124023438, + "learning_rate": 3.085849919547572e-06, + "loss": 92.1346, + "step": 81990 + }, + { + "epoch": 0.6783306448277289, + "grad_norm": 1064.33837890625, + "learning_rate": 3.0845470720959027e-06, + "loss": 92.5992, + "step": 82000 + }, + { + "epoch": 0.6784133680770981, + "grad_norm": 541.3108520507812, + "learning_rate": 3.08324437704803e-06, + "loss": 81.4435, + "step": 82010 + }, + { + "epoch": 0.6784960913264673, + "grad_norm": 867.9805908203125, + "learning_rate": 3.0819418345076095e-06, + "loss": 86.1453, + "step": 82020 + }, + { + "epoch": 0.6785788145758366, + "grad_norm": 1205.291259765625, + "learning_rate": 3.080639444578272e-06, + "loss": 77.0935, + "step": 82030 + }, + { + "epoch": 0.6786615378252058, + "grad_norm": 821.0764770507812, + "learning_rate": 3.0793372073636455e-06, + "loss": 76.2802, + "step": 82040 + }, + { + "epoch": 0.678744261074575, + "grad_norm": 1098.4530029296875, + "learning_rate": 3.0780351229673423e-06, + "loss": 99.049, + "step": 82050 + }, + { + "epoch": 0.6788269843239443, + "grad_norm": 1023.1441650390625, + "learning_rate": 3.0767331914929638e-06, + "loss": 86.1433, + "step": 82060 + }, + { + "epoch": 0.6789097075733135, + "grad_norm": 648.2615966796875, + "learning_rate": 3.075431413044099e-06, + "loss": 67.8278, + "step": 82070 + }, + { + "epoch": 0.6789924308226827, + "grad_norm": 800.4606323242188, + "learning_rate": 3.074129787724324e-06, + "loss": 87.548, + "step": 82080 + }, + { + "epoch": 0.679075154072052, + "grad_norm": 1863.6453857421875, + "learning_rate": 3.072828315637203e-06, + "loss": 109.8302, + "step": 82090 + }, + { + "epoch": 0.6791578773214212, + "grad_norm": 1212.3822021484375, + "learning_rate": 3.0715269968862898e-06, + "loss": 122.7618, + "step": 82100 + }, + { + "epoch": 0.6792406005707904, + "grad_norm": 599.944580078125, + "learning_rate": 3.0702258315751223e-06, + "loss": 69.2954, + "step": 82110 + }, + { + "epoch": 0.6793233238201597, + "grad_norm": 1033.164794921875, + "learning_rate": 3.0689248198072282e-06, + "loss": 95.2678, + "step": 82120 + }, + { + "epoch": 0.6794060470695289, + "grad_norm": 803.4859008789062, + "learning_rate": 3.0676239616861234e-06, + "loss": 93.7857, + "step": 82130 + }, + { + "epoch": 0.6794887703188981, + "grad_norm": 790.937744140625, + "learning_rate": 3.066323257315311e-06, + "loss": 79.9522, + "step": 82140 + }, + { + "epoch": 0.6795714935682674, + "grad_norm": 756.3161010742188, + "learning_rate": 3.065022706798284e-06, + "loss": 62.7251, + "step": 82150 + }, + { + "epoch": 0.6796542168176366, + "grad_norm": 976.355224609375, + "learning_rate": 3.06372231023852e-06, + "loss": 115.6686, + "step": 82160 + }, + { + "epoch": 0.6797369400670058, + "grad_norm": 841.8123168945312, + "learning_rate": 3.0624220677394854e-06, + "loss": 87.1789, + "step": 82170 + }, + { + "epoch": 0.6798196633163751, + "grad_norm": 945.3234252929688, + "learning_rate": 3.0611219794046344e-06, + "loss": 93.9989, + "step": 82180 + }, + { + "epoch": 0.6799023865657443, + "grad_norm": 1082.9246826171875, + "learning_rate": 3.05982204533741e-06, + "loss": 66.153, + "step": 82190 + }, + { + "epoch": 0.6799851098151135, + "grad_norm": 707.8014526367188, + "learning_rate": 3.0585222656412406e-06, + "loss": 114.1069, + "step": 82200 + }, + { + "epoch": 0.6800678330644828, + "grad_norm": 942.4901123046875, + "learning_rate": 3.0572226404195436e-06, + "loss": 63.9763, + "step": 82210 + }, + { + "epoch": 0.680150556313852, + "grad_norm": 668.8705444335938, + "learning_rate": 3.055923169775726e-06, + "loss": 71.8056, + "step": 82220 + }, + { + "epoch": 0.6802332795632212, + "grad_norm": 1010.2730712890625, + "learning_rate": 3.054623853813179e-06, + "loss": 116.1978, + "step": 82230 + }, + { + "epoch": 0.6803160028125905, + "grad_norm": 636.508544921875, + "learning_rate": 3.0533246926352834e-06, + "loss": 88.7034, + "step": 82240 + }, + { + "epoch": 0.6803987260619597, + "grad_norm": 1168.9595947265625, + "learning_rate": 3.0520256863454077e-06, + "loss": 94.9172, + "step": 82250 + }, + { + "epoch": 0.6804814493113289, + "grad_norm": 727.0571899414062, + "learning_rate": 3.05072683504691e-06, + "loss": 85.0885, + "step": 82260 + }, + { + "epoch": 0.6805641725606982, + "grad_norm": 1059.238037109375, + "learning_rate": 3.049428138843133e-06, + "loss": 87.404, + "step": 82270 + }, + { + "epoch": 0.6806468958100674, + "grad_norm": 720.4179077148438, + "learning_rate": 3.0481295978374037e-06, + "loss": 84.2605, + "step": 82280 + }, + { + "epoch": 0.6807296190594366, + "grad_norm": 1156.1129150390625, + "learning_rate": 3.0468312121330464e-06, + "loss": 99.4175, + "step": 82290 + }, + { + "epoch": 0.6808123423088059, + "grad_norm": 987.896240234375, + "learning_rate": 3.0455329818333652e-06, + "loss": 66.1081, + "step": 82300 + }, + { + "epoch": 0.6808950655581751, + "grad_norm": 878.2027587890625, + "learning_rate": 3.044234907041655e-06, + "loss": 98.8819, + "step": 82310 + }, + { + "epoch": 0.6809777888075443, + "grad_norm": 1048.861083984375, + "learning_rate": 3.0429369878611968e-06, + "loss": 67.5868, + "step": 82320 + }, + { + "epoch": 0.6810605120569136, + "grad_norm": 1312.079833984375, + "learning_rate": 3.041639224395262e-06, + "loss": 120.5818, + "step": 82330 + }, + { + "epoch": 0.6811432353062828, + "grad_norm": 1217.2681884765625, + "learning_rate": 3.0403416167471044e-06, + "loss": 84.3008, + "step": 82340 + }, + { + "epoch": 0.681225958555652, + "grad_norm": 1105.7882080078125, + "learning_rate": 3.0390441650199727e-06, + "loss": 80.4502, + "step": 82350 + }, + { + "epoch": 0.6813086818050214, + "grad_norm": 722.1676635742188, + "learning_rate": 3.0377468693170985e-06, + "loss": 109.3831, + "step": 82360 + }, + { + "epoch": 0.6813914050543906, + "grad_norm": 734.0096435546875, + "learning_rate": 3.0364497297416973e-06, + "loss": 80.3656, + "step": 82370 + }, + { + "epoch": 0.6814741283037598, + "grad_norm": 392.9991760253906, + "learning_rate": 3.035152746396981e-06, + "loss": 93.2643, + "step": 82380 + }, + { + "epoch": 0.6815568515531291, + "grad_norm": 724.57177734375, + "learning_rate": 3.0338559193861434e-06, + "loss": 106.8783, + "step": 82390 + }, + { + "epoch": 0.6816395748024983, + "grad_norm": 511.555419921875, + "learning_rate": 3.032559248812367e-06, + "loss": 118.9703, + "step": 82400 + }, + { + "epoch": 0.6817222980518675, + "grad_norm": 378.6773376464844, + "learning_rate": 3.0312627347788208e-06, + "loss": 82.5903, + "step": 82410 + }, + { + "epoch": 0.6818050213012367, + "grad_norm": 438.7552490234375, + "learning_rate": 3.0299663773886646e-06, + "loss": 73.4946, + "step": 82420 + }, + { + "epoch": 0.681887744550606, + "grad_norm": 1227.794921875, + "learning_rate": 3.0286701767450423e-06, + "loss": 114.8965, + "step": 82430 + }, + { + "epoch": 0.6819704677999752, + "grad_norm": 591.5338745117188, + "learning_rate": 3.0273741329510852e-06, + "loss": 116.4364, + "step": 82440 + }, + { + "epoch": 0.6820531910493444, + "grad_norm": 852.6351928710938, + "learning_rate": 3.0260782461099192e-06, + "loss": 71.9436, + "step": 82450 + }, + { + "epoch": 0.6821359142987137, + "grad_norm": 758.0744018554688, + "learning_rate": 3.024782516324645e-06, + "loss": 82.7659, + "step": 82460 + }, + { + "epoch": 0.6822186375480829, + "grad_norm": 1753.00048828125, + "learning_rate": 3.0234869436983606e-06, + "loss": 109.4363, + "step": 82470 + }, + { + "epoch": 0.6823013607974521, + "grad_norm": 1189.2587890625, + "learning_rate": 3.02219152833415e-06, + "loss": 96.6973, + "step": 82480 + }, + { + "epoch": 0.6823840840468214, + "grad_norm": 894.1843872070312, + "learning_rate": 3.0208962703350832e-06, + "loss": 86.2049, + "step": 82490 + }, + { + "epoch": 0.6824668072961906, + "grad_norm": 675.1846313476562, + "learning_rate": 3.019601169804216e-06, + "loss": 73.5452, + "step": 82500 + }, + { + "epoch": 0.6825495305455598, + "grad_norm": 733.6863403320312, + "learning_rate": 3.0183062268445964e-06, + "loss": 96.7175, + "step": 82510 + }, + { + "epoch": 0.6826322537949291, + "grad_norm": 953.9824829101562, + "learning_rate": 3.0170114415592543e-06, + "loss": 95.8179, + "step": 82520 + }, + { + "epoch": 0.6827149770442983, + "grad_norm": 776.6597290039062, + "learning_rate": 3.015716814051213e-06, + "loss": 75.2235, + "step": 82530 + }, + { + "epoch": 0.6827977002936675, + "grad_norm": 741.5625610351562, + "learning_rate": 3.0144223444234767e-06, + "loss": 90.4091, + "step": 82540 + }, + { + "epoch": 0.6828804235430368, + "grad_norm": 1144.602783203125, + "learning_rate": 3.0131280327790412e-06, + "loss": 74.8416, + "step": 82550 + }, + { + "epoch": 0.682963146792406, + "grad_norm": 1054.5020751953125, + "learning_rate": 3.0118338792208912e-06, + "loss": 93.5738, + "step": 82560 + }, + { + "epoch": 0.6830458700417752, + "grad_norm": 1366.202392578125, + "learning_rate": 3.010539883851993e-06, + "loss": 99.257, + "step": 82570 + }, + { + "epoch": 0.6831285932911445, + "grad_norm": 303.204345703125, + "learning_rate": 3.009246046775307e-06, + "loss": 79.5888, + "step": 82580 + }, + { + "epoch": 0.6832113165405137, + "grad_norm": 1691.6275634765625, + "learning_rate": 3.0079523680937766e-06, + "loss": 130.3326, + "step": 82590 + }, + { + "epoch": 0.6832940397898829, + "grad_norm": 1158.856689453125, + "learning_rate": 3.006658847910334e-06, + "loss": 113.3093, + "step": 82600 + }, + { + "epoch": 0.6833767630392522, + "grad_norm": 804.7373046875, + "learning_rate": 3.005365486327899e-06, + "loss": 85.9386, + "step": 82610 + }, + { + "epoch": 0.6834594862886214, + "grad_norm": 961.7406005859375, + "learning_rate": 3.004072283449379e-06, + "loss": 88.6196, + "step": 82620 + }, + { + "epoch": 0.6835422095379906, + "grad_norm": 764.2142333984375, + "learning_rate": 3.0027792393776666e-06, + "loss": 89.8795, + "step": 82630 + }, + { + "epoch": 0.6836249327873599, + "grad_norm": 751.17822265625, + "learning_rate": 3.001486354215644e-06, + "loss": 70.7893, + "step": 82640 + }, + { + "epoch": 0.6837076560367291, + "grad_norm": 1221.4046630859375, + "learning_rate": 3.0001936280661794e-06, + "loss": 80.4374, + "step": 82650 + }, + { + "epoch": 0.6837903792860983, + "grad_norm": 748.9243774414062, + "learning_rate": 2.998901061032131e-06, + "loss": 115.65, + "step": 82660 + }, + { + "epoch": 0.6838731025354676, + "grad_norm": 847.3457641601562, + "learning_rate": 2.9976086532163397e-06, + "loss": 88.8179, + "step": 82670 + }, + { + "epoch": 0.6839558257848368, + "grad_norm": 363.82830810546875, + "learning_rate": 2.9963164047216397e-06, + "loss": 113.4569, + "step": 82680 + }, + { + "epoch": 0.684038549034206, + "grad_norm": 855.93212890625, + "learning_rate": 2.9950243156508473e-06, + "loss": 99.6168, + "step": 82690 + }, + { + "epoch": 0.6841212722835753, + "grad_norm": 877.383544921875, + "learning_rate": 2.9937323861067695e-06, + "loss": 107.2685, + "step": 82700 + }, + { + "epoch": 0.6842039955329445, + "grad_norm": 737.8973388671875, + "learning_rate": 2.992440616192197e-06, + "loss": 73.6583, + "step": 82710 + }, + { + "epoch": 0.6842867187823137, + "grad_norm": 878.8938598632812, + "learning_rate": 2.9911490060099117e-06, + "loss": 110.4591, + "step": 82720 + }, + { + "epoch": 0.684369442031683, + "grad_norm": 1008.2698364257812, + "learning_rate": 2.9898575556626807e-06, + "loss": 84.5192, + "step": 82730 + }, + { + "epoch": 0.6844521652810522, + "grad_norm": 942.9556884765625, + "learning_rate": 2.9885662652532586e-06, + "loss": 101.3532, + "step": 82740 + }, + { + "epoch": 0.6845348885304214, + "grad_norm": 903.5498657226562, + "learning_rate": 2.9872751348843875e-06, + "loss": 87.6757, + "step": 82750 + }, + { + "epoch": 0.6846176117797907, + "grad_norm": 1304.6390380859375, + "learning_rate": 2.985984164658796e-06, + "loss": 78.2163, + "step": 82760 + }, + { + "epoch": 0.68470033502916, + "grad_norm": 691.3413696289062, + "learning_rate": 2.9846933546792012e-06, + "loss": 79.0234, + "step": 82770 + }, + { + "epoch": 0.6847830582785291, + "grad_norm": 488.5516662597656, + "learning_rate": 2.9834027050483085e-06, + "loss": 106.5184, + "step": 82780 + }, + { + "epoch": 0.6848657815278985, + "grad_norm": 1274.3031005859375, + "learning_rate": 2.9821122158688086e-06, + "loss": 75.8053, + "step": 82790 + }, + { + "epoch": 0.6849485047772677, + "grad_norm": 719.878173828125, + "learning_rate": 2.980821887243377e-06, + "loss": 88.5654, + "step": 82800 + }, + { + "epoch": 0.6850312280266369, + "grad_norm": 974.0200805664062, + "learning_rate": 2.979531719274681e-06, + "loss": 79.3588, + "step": 82810 + }, + { + "epoch": 0.6851139512760062, + "grad_norm": 875.3156127929688, + "learning_rate": 2.978241712065374e-06, + "loss": 97.1635, + "step": 82820 + }, + { + "epoch": 0.6851966745253754, + "grad_norm": 1160.015869140625, + "learning_rate": 2.9769518657180953e-06, + "loss": 93.5529, + "step": 82830 + }, + { + "epoch": 0.6852793977747446, + "grad_norm": 419.8360290527344, + "learning_rate": 2.9756621803354722e-06, + "loss": 74.9933, + "step": 82840 + }, + { + "epoch": 0.6853621210241139, + "grad_norm": 822.18505859375, + "learning_rate": 2.9743726560201185e-06, + "loss": 147.5206, + "step": 82850 + }, + { + "epoch": 0.6854448442734831, + "grad_norm": 1076.56787109375, + "learning_rate": 2.9730832928746355e-06, + "loss": 89.3311, + "step": 82860 + }, + { + "epoch": 0.6855275675228523, + "grad_norm": 563.077392578125, + "learning_rate": 2.9717940910016135e-06, + "loss": 94.1015, + "step": 82870 + }, + { + "epoch": 0.6856102907722216, + "grad_norm": 1070.950927734375, + "learning_rate": 2.9705050505036294e-06, + "loss": 75.7543, + "step": 82880 + }, + { + "epoch": 0.6856930140215908, + "grad_norm": 563.2598876953125, + "learning_rate": 2.9692161714832422e-06, + "loss": 81.8349, + "step": 82890 + }, + { + "epoch": 0.68577573727096, + "grad_norm": 779.0994262695312, + "learning_rate": 2.9679274540430037e-06, + "loss": 77.1619, + "step": 82900 + }, + { + "epoch": 0.6858584605203293, + "grad_norm": 1350.3271484375, + "learning_rate": 2.966638898285452e-06, + "loss": 76.3261, + "step": 82910 + }, + { + "epoch": 0.6859411837696985, + "grad_norm": 790.4927368164062, + "learning_rate": 2.9653505043131125e-06, + "loss": 89.7235, + "step": 82920 + }, + { + "epoch": 0.6860239070190677, + "grad_norm": 549.5680541992188, + "learning_rate": 2.9640622722284944e-06, + "loss": 84.1619, + "step": 82930 + }, + { + "epoch": 0.686106630268437, + "grad_norm": 602.5223388671875, + "learning_rate": 2.962774202134098e-06, + "loss": 115.692, + "step": 82940 + }, + { + "epoch": 0.6861893535178062, + "grad_norm": 574.7587890625, + "learning_rate": 2.961486294132409e-06, + "loss": 103.2164, + "step": 82950 + }, + { + "epoch": 0.6862720767671754, + "grad_norm": 1748.5323486328125, + "learning_rate": 2.960198548325901e-06, + "loss": 89.1274, + "step": 82960 + }, + { + "epoch": 0.6863548000165447, + "grad_norm": 626.42041015625, + "learning_rate": 2.958910964817032e-06, + "loss": 85.8427, + "step": 82970 + }, + { + "epoch": 0.6864375232659139, + "grad_norm": 692.4397583007812, + "learning_rate": 2.9576235437082502e-06, + "loss": 100.6521, + "step": 82980 + }, + { + "epoch": 0.6865202465152831, + "grad_norm": 587.5997314453125, + "learning_rate": 2.9563362851019893e-06, + "loss": 65.4427, + "step": 82990 + }, + { + "epoch": 0.6866029697646524, + "grad_norm": 582.0680541992188, + "learning_rate": 2.9550491891006704e-06, + "loss": 82.0156, + "step": 83000 + }, + { + "epoch": 0.6866856930140216, + "grad_norm": 1114.3927001953125, + "learning_rate": 2.9537622558067036e-06, + "loss": 87.1522, + "step": 83010 + }, + { + "epoch": 0.6867684162633908, + "grad_norm": 487.21307373046875, + "learning_rate": 2.9524754853224837e-06, + "loss": 94.6792, + "step": 83020 + }, + { + "epoch": 0.6868511395127601, + "grad_norm": 713.6537475585938, + "learning_rate": 2.9511888777503916e-06, + "loss": 67.1539, + "step": 83030 + }, + { + "epoch": 0.6869338627621293, + "grad_norm": 840.3438110351562, + "learning_rate": 2.949902433192798e-06, + "loss": 84.9945, + "step": 83040 + }, + { + "epoch": 0.6870165860114985, + "grad_norm": 1528.6619873046875, + "learning_rate": 2.94861615175206e-06, + "loss": 77.7083, + "step": 83050 + }, + { + "epoch": 0.6870993092608678, + "grad_norm": 1220.947998046875, + "learning_rate": 2.9473300335305193e-06, + "loss": 89.6249, + "step": 83060 + }, + { + "epoch": 0.687182032510237, + "grad_norm": 588.5947875976562, + "learning_rate": 2.946044078630508e-06, + "loss": 85.9502, + "step": 83070 + }, + { + "epoch": 0.6872647557596062, + "grad_norm": 1005.1710815429688, + "learning_rate": 2.9447582871543423e-06, + "loss": 85.558, + "step": 83080 + }, + { + "epoch": 0.6873474790089755, + "grad_norm": 658.07177734375, + "learning_rate": 2.9434726592043263e-06, + "loss": 92.5939, + "step": 83090 + }, + { + "epoch": 0.6874302022583447, + "grad_norm": 996.118896484375, + "learning_rate": 2.942187194882754e-06, + "loss": 109.5573, + "step": 83100 + }, + { + "epoch": 0.6875129255077139, + "grad_norm": 852.5332641601562, + "learning_rate": 2.940901894291902e-06, + "loss": 113.4957, + "step": 83110 + }, + { + "epoch": 0.6875956487570832, + "grad_norm": 6915.46484375, + "learning_rate": 2.939616757534037e-06, + "loss": 119.719, + "step": 83120 + }, + { + "epoch": 0.6876783720064524, + "grad_norm": 852.2628173828125, + "learning_rate": 2.938331784711411e-06, + "loss": 89.6527, + "step": 83130 + }, + { + "epoch": 0.6877610952558216, + "grad_norm": 790.9977416992188, + "learning_rate": 2.937046975926262e-06, + "loss": 83.5397, + "step": 83140 + }, + { + "epoch": 0.6878438185051908, + "grad_norm": 721.8024291992188, + "learning_rate": 2.9357623312808183e-06, + "loss": 67.4693, + "step": 83150 + }, + { + "epoch": 0.6879265417545601, + "grad_norm": 525.3212280273438, + "learning_rate": 2.934477850877292e-06, + "loss": 100.1755, + "step": 83160 + }, + { + "epoch": 0.6880092650039293, + "grad_norm": 536.9306640625, + "learning_rate": 2.9331935348178838e-06, + "loss": 91.7662, + "step": 83170 + }, + { + "epoch": 0.6880919882532985, + "grad_norm": 924.3001098632812, + "learning_rate": 2.931909383204781e-06, + "loss": 77.1973, + "step": 83180 + }, + { + "epoch": 0.6881747115026678, + "grad_norm": 688.9435424804688, + "learning_rate": 2.9306253961401553e-06, + "loss": 79.3377, + "step": 83190 + }, + { + "epoch": 0.688257434752037, + "grad_norm": 1393.37451171875, + "learning_rate": 2.929341573726171e-06, + "loss": 111.1658, + "step": 83200 + }, + { + "epoch": 0.6883401580014062, + "grad_norm": 524.9993896484375, + "learning_rate": 2.928057916064975e-06, + "loss": 68.5311, + "step": 83210 + }, + { + "epoch": 0.6884228812507756, + "grad_norm": 1237.0621337890625, + "learning_rate": 2.9267744232587035e-06, + "loss": 102.3083, + "step": 83220 + }, + { + "epoch": 0.6885056045001448, + "grad_norm": 620.7877807617188, + "learning_rate": 2.925491095409473e-06, + "loss": 107.3997, + "step": 83230 + }, + { + "epoch": 0.688588327749514, + "grad_norm": 1383.7750244140625, + "learning_rate": 2.924207932619397e-06, + "loss": 91.8689, + "step": 83240 + }, + { + "epoch": 0.6886710509988833, + "grad_norm": 775.3110961914062, + "learning_rate": 2.9229249349905686e-06, + "loss": 87.8382, + "step": 83250 + }, + { + "epoch": 0.6887537742482525, + "grad_norm": 681.6326904296875, + "learning_rate": 2.9216421026250707e-06, + "loss": 70.0503, + "step": 83260 + }, + { + "epoch": 0.6888364974976217, + "grad_norm": 786.73046875, + "learning_rate": 2.9203594356249726e-06, + "loss": 117.5731, + "step": 83270 + }, + { + "epoch": 0.688919220746991, + "grad_norm": 575.7755737304688, + "learning_rate": 2.919076934092329e-06, + "loss": 64.0, + "step": 83280 + }, + { + "epoch": 0.6890019439963602, + "grad_norm": 677.3495483398438, + "learning_rate": 2.9177945981291843e-06, + "loss": 69.7188, + "step": 83290 + }, + { + "epoch": 0.6890846672457294, + "grad_norm": 704.7107543945312, + "learning_rate": 2.916512427837568e-06, + "loss": 112.342, + "step": 83300 + }, + { + "epoch": 0.6891673904950987, + "grad_norm": 861.6221923828125, + "learning_rate": 2.9152304233194974e-06, + "loss": 63.6381, + "step": 83310 + }, + { + "epoch": 0.6892501137444679, + "grad_norm": 722.7597045898438, + "learning_rate": 2.9139485846769723e-06, + "loss": 98.6105, + "step": 83320 + }, + { + "epoch": 0.6893328369938371, + "grad_norm": 586.8096313476562, + "learning_rate": 2.9126669120119846e-06, + "loss": 112.3304, + "step": 83330 + }, + { + "epoch": 0.6894155602432064, + "grad_norm": 1111.9354248046875, + "learning_rate": 2.9113854054265112e-06, + "loss": 104.5657, + "step": 83340 + }, + { + "epoch": 0.6894982834925756, + "grad_norm": 1186.3795166015625, + "learning_rate": 2.9101040650225155e-06, + "loss": 112.6777, + "step": 83350 + }, + { + "epoch": 0.6895810067419448, + "grad_norm": 627.06298828125, + "learning_rate": 2.9088228909019455e-06, + "loss": 91.1352, + "step": 83360 + }, + { + "epoch": 0.6896637299913141, + "grad_norm": 530.3230590820312, + "learning_rate": 2.9075418831667436e-06, + "loss": 74.27, + "step": 83370 + }, + { + "epoch": 0.6897464532406833, + "grad_norm": 1148.4014892578125, + "learning_rate": 2.906261041918831e-06, + "loss": 91.6355, + "step": 83380 + }, + { + "epoch": 0.6898291764900525, + "grad_norm": 896.2343139648438, + "learning_rate": 2.90498036726012e-06, + "loss": 101.3476, + "step": 83390 + }, + { + "epoch": 0.6899118997394218, + "grad_norm": 748.8731689453125, + "learning_rate": 2.903699859292505e-06, + "loss": 81.6133, + "step": 83400 + }, + { + "epoch": 0.689994622988791, + "grad_norm": 1056.1656494140625, + "learning_rate": 2.9024195181178704e-06, + "loss": 90.9416, + "step": 83410 + }, + { + "epoch": 0.6900773462381602, + "grad_norm": 1155.9085693359375, + "learning_rate": 2.9011393438380884e-06, + "loss": 80.1188, + "step": 83420 + }, + { + "epoch": 0.6901600694875295, + "grad_norm": 807.7344970703125, + "learning_rate": 2.8998593365550178e-06, + "loss": 121.4385, + "step": 83430 + }, + { + "epoch": 0.6902427927368987, + "grad_norm": 552.521484375, + "learning_rate": 2.8985794963704992e-06, + "loss": 92.7039, + "step": 83440 + }, + { + "epoch": 0.6903255159862679, + "grad_norm": 1697.0496826171875, + "learning_rate": 2.8972998233863657e-06, + "loss": 101.2329, + "step": 83450 + }, + { + "epoch": 0.6904082392356372, + "grad_norm": 824.8051147460938, + "learning_rate": 2.8960203177044364e-06, + "loss": 94.2846, + "step": 83460 + }, + { + "epoch": 0.6904909624850064, + "grad_norm": 1690.2552490234375, + "learning_rate": 2.8947409794265146e-06, + "loss": 85.7355, + "step": 83470 + }, + { + "epoch": 0.6905736857343756, + "grad_norm": 961.0975341796875, + "learning_rate": 2.893461808654393e-06, + "loss": 99.6032, + "step": 83480 + }, + { + "epoch": 0.6906564089837449, + "grad_norm": 622.6866455078125, + "learning_rate": 2.892182805489846e-06, + "loss": 78.2167, + "step": 83490 + }, + { + "epoch": 0.6907391322331141, + "grad_norm": 1131.5731201171875, + "learning_rate": 2.8909039700346385e-06, + "loss": 95.0546, + "step": 83500 + }, + { + "epoch": 0.6908218554824833, + "grad_norm": 324.06011962890625, + "learning_rate": 2.889625302390524e-06, + "loss": 72.3154, + "step": 83510 + }, + { + "epoch": 0.6909045787318526, + "grad_norm": 1070.0745849609375, + "learning_rate": 2.8883468026592382e-06, + "loss": 91.6812, + "step": 83520 + }, + { + "epoch": 0.6909873019812218, + "grad_norm": 876.22607421875, + "learning_rate": 2.8870684709425063e-06, + "loss": 86.1727, + "step": 83530 + }, + { + "epoch": 0.691070025230591, + "grad_norm": 1324.11376953125, + "learning_rate": 2.885790307342039e-06, + "loss": 97.0591, + "step": 83540 + }, + { + "epoch": 0.6911527484799603, + "grad_norm": 1690.0306396484375, + "learning_rate": 2.884512311959532e-06, + "loss": 101.5719, + "step": 83550 + }, + { + "epoch": 0.6912354717293295, + "grad_norm": 436.6270446777344, + "learning_rate": 2.8832344848966758e-06, + "loss": 64.3056, + "step": 83560 + }, + { + "epoch": 0.6913181949786987, + "grad_norm": 1113.216552734375, + "learning_rate": 2.8819568262551344e-06, + "loss": 134.5069, + "step": 83570 + }, + { + "epoch": 0.691400918228068, + "grad_norm": 676.7778930664062, + "learning_rate": 2.8806793361365686e-06, + "loss": 128.0765, + "step": 83580 + }, + { + "epoch": 0.6914836414774372, + "grad_norm": 1021.91259765625, + "learning_rate": 2.8794020146426217e-06, + "loss": 95.5648, + "step": 83590 + }, + { + "epoch": 0.6915663647268064, + "grad_norm": 963.874755859375, + "learning_rate": 2.8781248618749235e-06, + "loss": 85.2975, + "step": 83600 + }, + { + "epoch": 0.6916490879761757, + "grad_norm": 819.314453125, + "learning_rate": 2.8768478779350927e-06, + "loss": 76.2316, + "step": 83610 + }, + { + "epoch": 0.691731811225545, + "grad_norm": 1420.544921875, + "learning_rate": 2.875571062924732e-06, + "loss": 102.6076, + "step": 83620 + }, + { + "epoch": 0.6918145344749141, + "grad_norm": 1988.164794921875, + "learning_rate": 2.874294416945432e-06, + "loss": 109.0461, + "step": 83630 + }, + { + "epoch": 0.6918972577242835, + "grad_norm": 808.8279418945312, + "learning_rate": 2.8730179400987697e-06, + "loss": 102.7774, + "step": 83640 + }, + { + "epoch": 0.6919799809736527, + "grad_norm": 1196.42626953125, + "learning_rate": 2.871741632486308e-06, + "loss": 86.4877, + "step": 83650 + }, + { + "epoch": 0.6920627042230219, + "grad_norm": 940.0408935546875, + "learning_rate": 2.8704654942095977e-06, + "loss": 103.2302, + "step": 83660 + }, + { + "epoch": 0.6921454274723912, + "grad_norm": 955.0111083984375, + "learning_rate": 2.869189525370174e-06, + "loss": 86.1693, + "step": 83670 + }, + { + "epoch": 0.6922281507217604, + "grad_norm": 411.0567932128906, + "learning_rate": 2.8679137260695614e-06, + "loss": 82.702, + "step": 83680 + }, + { + "epoch": 0.6923108739711296, + "grad_norm": 586.8140258789062, + "learning_rate": 2.866638096409269e-06, + "loss": 97.6693, + "step": 83690 + }, + { + "epoch": 0.6923935972204989, + "grad_norm": 781.5844116210938, + "learning_rate": 2.8653626364907918e-06, + "loss": 52.9038, + "step": 83700 + }, + { + "epoch": 0.6924763204698681, + "grad_norm": 1218.3243408203125, + "learning_rate": 2.8640873464156127e-06, + "loss": 77.371, + "step": 83710 + }, + { + "epoch": 0.6925590437192373, + "grad_norm": 860.3048706054688, + "learning_rate": 2.8628122262852015e-06, + "loss": 78.9779, + "step": 83720 + }, + { + "epoch": 0.6926417669686066, + "grad_norm": 655.4887084960938, + "learning_rate": 2.861537276201013e-06, + "loss": 95.3996, + "step": 83730 + }, + { + "epoch": 0.6927244902179758, + "grad_norm": 611.69384765625, + "learning_rate": 2.860262496264489e-06, + "loss": 93.9206, + "step": 83740 + }, + { + "epoch": 0.692807213467345, + "grad_norm": 1187.9385986328125, + "learning_rate": 2.858987886577058e-06, + "loss": 85.537, + "step": 83750 + }, + { + "epoch": 0.6928899367167143, + "grad_norm": 594.444580078125, + "learning_rate": 2.857713447240135e-06, + "loss": 96.7337, + "step": 83760 + }, + { + "epoch": 0.6929726599660835, + "grad_norm": 702.6067504882812, + "learning_rate": 2.8564391783551214e-06, + "loss": 96.4278, + "step": 83770 + }, + { + "epoch": 0.6930553832154527, + "grad_norm": 799.9725341796875, + "learning_rate": 2.855165080023405e-06, + "loss": 85.3117, + "step": 83780 + }, + { + "epoch": 0.693138106464822, + "grad_norm": 865.3021240234375, + "learning_rate": 2.85389115234636e-06, + "loss": 97.0047, + "step": 83790 + }, + { + "epoch": 0.6932208297141912, + "grad_norm": 595.7553100585938, + "learning_rate": 2.8526173954253458e-06, + "loss": 87.2789, + "step": 83800 + }, + { + "epoch": 0.6933035529635604, + "grad_norm": 1058.0035400390625, + "learning_rate": 2.8513438093617107e-06, + "loss": 119.3666, + "step": 83810 + }, + { + "epoch": 0.6933862762129297, + "grad_norm": 1175.2041015625, + "learning_rate": 2.8500703942567874e-06, + "loss": 82.3834, + "step": 83820 + }, + { + "epoch": 0.6934689994622989, + "grad_norm": 966.0508422851562, + "learning_rate": 2.848797150211896e-06, + "loss": 89.0491, + "step": 83830 + }, + { + "epoch": 0.6935517227116681, + "grad_norm": 732.8645629882812, + "learning_rate": 2.847524077328343e-06, + "loss": 90.359, + "step": 83840 + }, + { + "epoch": 0.6936344459610374, + "grad_norm": 575.0491943359375, + "learning_rate": 2.8462511757074205e-06, + "loss": 73.8018, + "step": 83850 + }, + { + "epoch": 0.6937171692104066, + "grad_norm": 1305.1068115234375, + "learning_rate": 2.844978445450408e-06, + "loss": 90.5077, + "step": 83860 + }, + { + "epoch": 0.6937998924597758, + "grad_norm": 1045.33935546875, + "learning_rate": 2.8437058866585698e-06, + "loss": 114.761, + "step": 83870 + }, + { + "epoch": 0.693882615709145, + "grad_norm": 688.1589965820312, + "learning_rate": 2.842433499433158e-06, + "loss": 91.853, + "step": 83880 + }, + { + "epoch": 0.6939653389585143, + "grad_norm": 436.8230285644531, + "learning_rate": 2.841161283875411e-06, + "loss": 106.294, + "step": 83890 + }, + { + "epoch": 0.6940480622078835, + "grad_norm": 906.2509765625, + "learning_rate": 2.8398892400865537e-06, + "loss": 74.6637, + "step": 83900 + }, + { + "epoch": 0.6941307854572527, + "grad_norm": 450.2344665527344, + "learning_rate": 2.838617368167797e-06, + "loss": 143.2664, + "step": 83910 + }, + { + "epoch": 0.694213508706622, + "grad_norm": 952.6104125976562, + "learning_rate": 2.837345668220333e-06, + "loss": 69.9202, + "step": 83920 + }, + { + "epoch": 0.6942962319559912, + "grad_norm": 755.5713500976562, + "learning_rate": 2.836074140345352e-06, + "loss": 82.3544, + "step": 83930 + }, + { + "epoch": 0.6943789552053604, + "grad_norm": 659.2360229492188, + "learning_rate": 2.834802784644019e-06, + "loss": 92.3773, + "step": 83940 + }, + { + "epoch": 0.6944616784547297, + "grad_norm": 567.8734130859375, + "learning_rate": 2.8335316012174925e-06, + "loss": 102.9247, + "step": 83950 + }, + { + "epoch": 0.6945444017040989, + "grad_norm": 434.0081787109375, + "learning_rate": 2.8322605901669133e-06, + "loss": 73.0931, + "step": 83960 + }, + { + "epoch": 0.6946271249534681, + "grad_norm": 453.88153076171875, + "learning_rate": 2.8309897515934104e-06, + "loss": 98.706, + "step": 83970 + }, + { + "epoch": 0.6947098482028374, + "grad_norm": 726.1422119140625, + "learning_rate": 2.8297190855980987e-06, + "loss": 87.1649, + "step": 83980 + }, + { + "epoch": 0.6947925714522066, + "grad_norm": 784.1115112304688, + "learning_rate": 2.8284485922820814e-06, + "loss": 74.6825, + "step": 83990 + }, + { + "epoch": 0.6948752947015758, + "grad_norm": 674.6708984375, + "learning_rate": 2.8271782717464413e-06, + "loss": 59.7278, + "step": 84000 + }, + { + "epoch": 0.6949580179509451, + "grad_norm": 1085.0732421875, + "learning_rate": 2.8259081240922522e-06, + "loss": 110.7847, + "step": 84010 + }, + { + "epoch": 0.6950407412003143, + "grad_norm": 902.2394409179688, + "learning_rate": 2.8246381494205775e-06, + "loss": 72.2629, + "step": 84020 + }, + { + "epoch": 0.6951234644496835, + "grad_norm": 1027.4735107421875, + "learning_rate": 2.8233683478324627e-06, + "loss": 99.9426, + "step": 84030 + }, + { + "epoch": 0.6952061876990528, + "grad_norm": 709.7374267578125, + "learning_rate": 2.822098719428938e-06, + "loss": 100.229, + "step": 84040 + }, + { + "epoch": 0.695288910948422, + "grad_norm": 1114.0150146484375, + "learning_rate": 2.8208292643110237e-06, + "loss": 94.4212, + "step": 84050 + }, + { + "epoch": 0.6953716341977912, + "grad_norm": 679.0440063476562, + "learning_rate": 2.8195599825797233e-06, + "loss": 76.9554, + "step": 84060 + }, + { + "epoch": 0.6954543574471606, + "grad_norm": 947.027099609375, + "learning_rate": 2.818290874336028e-06, + "loss": 100.9757, + "step": 84070 + }, + { + "epoch": 0.6955370806965298, + "grad_norm": 2001.5252685546875, + "learning_rate": 2.817021939680918e-06, + "loss": 95.2409, + "step": 84080 + }, + { + "epoch": 0.695619803945899, + "grad_norm": 871.5374145507812, + "learning_rate": 2.8157531787153515e-06, + "loss": 94.9407, + "step": 84090 + }, + { + "epoch": 0.6957025271952683, + "grad_norm": 1146.988037109375, + "learning_rate": 2.8144845915402796e-06, + "loss": 102.4395, + "step": 84100 + }, + { + "epoch": 0.6957852504446375, + "grad_norm": 855.0643920898438, + "learning_rate": 2.813216178256637e-06, + "loss": 105.7368, + "step": 84110 + }, + { + "epoch": 0.6958679736940067, + "grad_norm": 662.9407958984375, + "learning_rate": 2.8119479389653492e-06, + "loss": 76.1168, + "step": 84120 + }, + { + "epoch": 0.695950696943376, + "grad_norm": 1097.759765625, + "learning_rate": 2.8106798737673223e-06, + "loss": 86.9396, + "step": 84130 + }, + { + "epoch": 0.6960334201927452, + "grad_norm": 740.43896484375, + "learning_rate": 2.8094119827634496e-06, + "loss": 84.4411, + "step": 84140 + }, + { + "epoch": 0.6961161434421144, + "grad_norm": 1305.01171875, + "learning_rate": 2.8081442660546126e-06, + "loss": 100.1945, + "step": 84150 + }, + { + "epoch": 0.6961988666914837, + "grad_norm": 1327.839599609375, + "learning_rate": 2.806876723741677e-06, + "loss": 115.4466, + "step": 84160 + }, + { + "epoch": 0.6962815899408529, + "grad_norm": 795.6221923828125, + "learning_rate": 2.805609355925497e-06, + "loss": 75.2351, + "step": 84170 + }, + { + "epoch": 0.6963643131902221, + "grad_norm": 712.4464721679688, + "learning_rate": 2.8043421627069077e-06, + "loss": 70.8538, + "step": 84180 + }, + { + "epoch": 0.6964470364395914, + "grad_norm": 838.209716796875, + "learning_rate": 2.8030751441867364e-06, + "loss": 86.4729, + "step": 84190 + }, + { + "epoch": 0.6965297596889606, + "grad_norm": 584.37646484375, + "learning_rate": 2.8018083004657924e-06, + "loss": 112.4415, + "step": 84200 + }, + { + "epoch": 0.6966124829383298, + "grad_norm": 881.0332641601562, + "learning_rate": 2.800541631644873e-06, + "loss": 145.887, + "step": 84210 + }, + { + "epoch": 0.6966952061876991, + "grad_norm": 664.2578735351562, + "learning_rate": 2.7992751378247627e-06, + "loss": 124.8047, + "step": 84220 + }, + { + "epoch": 0.6967779294370683, + "grad_norm": 929.6114501953125, + "learning_rate": 2.79800881910623e-06, + "loss": 95.0422, + "step": 84230 + }, + { + "epoch": 0.6968606526864375, + "grad_norm": 777.854248046875, + "learning_rate": 2.7967426755900293e-06, + "loss": 84.8647, + "step": 84240 + }, + { + "epoch": 0.6969433759358068, + "grad_norm": 749.425048828125, + "learning_rate": 2.795476707376905e-06, + "loss": 89.1832, + "step": 84250 + }, + { + "epoch": 0.697026099185176, + "grad_norm": 844.8867797851562, + "learning_rate": 2.79421091456758e-06, + "loss": 75.1808, + "step": 84260 + }, + { + "epoch": 0.6971088224345452, + "grad_norm": 1129.9508056640625, + "learning_rate": 2.7929452972627685e-06, + "loss": 108.7002, + "step": 84270 + }, + { + "epoch": 0.6971915456839145, + "grad_norm": 868.1597900390625, + "learning_rate": 2.791679855563171e-06, + "loss": 70.2446, + "step": 84280 + }, + { + "epoch": 0.6972742689332837, + "grad_norm": 1031.17578125, + "learning_rate": 2.790414589569473e-06, + "loss": 93.3332, + "step": 84290 + }, + { + "epoch": 0.6973569921826529, + "grad_norm": 544.4410400390625, + "learning_rate": 2.789149499382345e-06, + "loss": 78.5159, + "step": 84300 + }, + { + "epoch": 0.6974397154320222, + "grad_norm": 861.5923461914062, + "learning_rate": 2.7878845851024426e-06, + "loss": 84.6842, + "step": 84310 + }, + { + "epoch": 0.6975224386813914, + "grad_norm": 761.0271606445312, + "learning_rate": 2.786619846830414e-06, + "loss": 91.5659, + "step": 84320 + }, + { + "epoch": 0.6976051619307606, + "grad_norm": 1823.0885009765625, + "learning_rate": 2.7853552846668865e-06, + "loss": 96.6269, + "step": 84330 + }, + { + "epoch": 0.6976878851801299, + "grad_norm": 1062.7515869140625, + "learning_rate": 2.784090898712476e-06, + "loss": 87.3296, + "step": 84340 + }, + { + "epoch": 0.6977706084294991, + "grad_norm": 582.9891967773438, + "learning_rate": 2.7828266890677825e-06, + "loss": 89.4428, + "step": 84350 + }, + { + "epoch": 0.6978533316788683, + "grad_norm": 1207.8365478515625, + "learning_rate": 2.781562655833393e-06, + "loss": 109.0874, + "step": 84360 + }, + { + "epoch": 0.6979360549282376, + "grad_norm": 583.7366943359375, + "learning_rate": 2.7802987991098816e-06, + "loss": 77.5733, + "step": 84370 + }, + { + "epoch": 0.6980187781776068, + "grad_norm": 479.65802001953125, + "learning_rate": 2.7790351189978083e-06, + "loss": 83.5992, + "step": 84380 + }, + { + "epoch": 0.698101501426976, + "grad_norm": 1373.9141845703125, + "learning_rate": 2.777771615597717e-06, + "loss": 124.8873, + "step": 84390 + }, + { + "epoch": 0.6981842246763453, + "grad_norm": 1068.3160400390625, + "learning_rate": 2.776508289010138e-06, + "loss": 102.6386, + "step": 84400 + }, + { + "epoch": 0.6982669479257145, + "grad_norm": 761.4998779296875, + "learning_rate": 2.7752451393355916e-06, + "loss": 103.4375, + "step": 84410 + }, + { + "epoch": 0.6983496711750837, + "grad_norm": 594.727294921875, + "learning_rate": 2.773982166674582e-06, + "loss": 93.2319, + "step": 84420 + }, + { + "epoch": 0.698432394424453, + "grad_norm": 516.1226806640625, + "learning_rate": 2.772719371127593e-06, + "loss": 117.761, + "step": 84430 + }, + { + "epoch": 0.6985151176738222, + "grad_norm": 1146.329833984375, + "learning_rate": 2.771456752795102e-06, + "loss": 98.0758, + "step": 84440 + }, + { + "epoch": 0.6985978409231914, + "grad_norm": 1272.3282470703125, + "learning_rate": 2.7701943117775686e-06, + "loss": 71.8762, + "step": 84450 + }, + { + "epoch": 0.6986805641725607, + "grad_norm": 938.8295288085938, + "learning_rate": 2.7689320481754414e-06, + "loss": 78.6577, + "step": 84460 + }, + { + "epoch": 0.69876328742193, + "grad_norm": 1171.89306640625, + "learning_rate": 2.7676699620891514e-06, + "loss": 65.5108, + "step": 84470 + }, + { + "epoch": 0.6988460106712991, + "grad_norm": 1358.8653564453125, + "learning_rate": 2.7664080536191178e-06, + "loss": 85.8439, + "step": 84480 + }, + { + "epoch": 0.6989287339206685, + "grad_norm": 1054.154541015625, + "learning_rate": 2.7651463228657444e-06, + "loss": 93.1995, + "step": 84490 + }, + { + "epoch": 0.6990114571700377, + "grad_norm": 1459.2327880859375, + "learning_rate": 2.7638847699294196e-06, + "loss": 96.8761, + "step": 84500 + }, + { + "epoch": 0.6990941804194069, + "grad_norm": 607.018310546875, + "learning_rate": 2.7626233949105252e-06, + "loss": 69.8457, + "step": 84510 + }, + { + "epoch": 0.6991769036687762, + "grad_norm": 713.6427612304688, + "learning_rate": 2.7613621979094173e-06, + "loss": 61.2357, + "step": 84520 + }, + { + "epoch": 0.6992596269181454, + "grad_norm": 1142.56201171875, + "learning_rate": 2.7601011790264454e-06, + "loss": 81.4103, + "step": 84530 + }, + { + "epoch": 0.6993423501675146, + "grad_norm": 354.2678527832031, + "learning_rate": 2.758840338361942e-06, + "loss": 79.8373, + "step": 84540 + }, + { + "epoch": 0.6994250734168839, + "grad_norm": 730.341552734375, + "learning_rate": 2.7575796760162288e-06, + "loss": 84.2706, + "step": 84550 + }, + { + "epoch": 0.6995077966662531, + "grad_norm": 687.3909301757812, + "learning_rate": 2.7563191920896084e-06, + "loss": 79.1268, + "step": 84560 + }, + { + "epoch": 0.6995905199156223, + "grad_norm": 771.8450927734375, + "learning_rate": 2.755058886682373e-06, + "loss": 86.2774, + "step": 84570 + }, + { + "epoch": 0.6996732431649916, + "grad_norm": 826.2738037109375, + "learning_rate": 2.753798759894799e-06, + "loss": 96.8274, + "step": 84580 + }, + { + "epoch": 0.6997559664143608, + "grad_norm": 755.7567749023438, + "learning_rate": 2.7525388118271495e-06, + "loss": 90.1103, + "step": 84590 + }, + { + "epoch": 0.69983868966373, + "grad_norm": 1258.190185546875, + "learning_rate": 2.751279042579672e-06, + "loss": 95.2349, + "step": 84600 + }, + { + "epoch": 0.6999214129130992, + "grad_norm": 915.1702270507812, + "learning_rate": 2.7500194522526007e-06, + "loss": 97.5897, + "step": 84610 + }, + { + "epoch": 0.7000041361624685, + "grad_norm": 395.96044921875, + "learning_rate": 2.748760040946156e-06, + "loss": 71.4906, + "step": 84620 + }, + { + "epoch": 0.7000868594118377, + "grad_norm": 1328.4857177734375, + "learning_rate": 2.7475008087605428e-06, + "loss": 88.9978, + "step": 84630 + }, + { + "epoch": 0.7001695826612069, + "grad_norm": 1446.4951171875, + "learning_rate": 2.746241755795952e-06, + "loss": 129.5656, + "step": 84640 + }, + { + "epoch": 0.7002523059105762, + "grad_norm": 316.505615234375, + "learning_rate": 2.7449828821525624e-06, + "loss": 87.3932, + "step": 84650 + }, + { + "epoch": 0.7003350291599454, + "grad_norm": 738.2881469726562, + "learning_rate": 2.7437241879305354e-06, + "loss": 81.33, + "step": 84660 + }, + { + "epoch": 0.7004177524093146, + "grad_norm": 449.9028625488281, + "learning_rate": 2.7424656732300193e-06, + "loss": 71.7711, + "step": 84670 + }, + { + "epoch": 0.7005004756586839, + "grad_norm": 856.5056762695312, + "learning_rate": 2.7412073381511495e-06, + "loss": 66.1047, + "step": 84680 + }, + { + "epoch": 0.7005831989080531, + "grad_norm": 1258.2718505859375, + "learning_rate": 2.739949182794045e-06, + "loss": 119.7207, + "step": 84690 + }, + { + "epoch": 0.7006659221574223, + "grad_norm": 1023.8765869140625, + "learning_rate": 2.7386912072588123e-06, + "loss": 84.5958, + "step": 84700 + }, + { + "epoch": 0.7007486454067916, + "grad_norm": 1277.8900146484375, + "learning_rate": 2.737433411645542e-06, + "loss": 85.9086, + "step": 84710 + }, + { + "epoch": 0.7008313686561608, + "grad_norm": 739.3309326171875, + "learning_rate": 2.7361757960543114e-06, + "loss": 81.2444, + "step": 84720 + }, + { + "epoch": 0.70091409190553, + "grad_norm": 902.0711669921875, + "learning_rate": 2.7349183605851824e-06, + "loss": 72.4432, + "step": 84730 + }, + { + "epoch": 0.7009968151548993, + "grad_norm": 1022.815185546875, + "learning_rate": 2.733661105338205e-06, + "loss": 131.5625, + "step": 84740 + }, + { + "epoch": 0.7010795384042685, + "grad_norm": 1463.1163330078125, + "learning_rate": 2.7324040304134125e-06, + "loss": 156.1384, + "step": 84750 + }, + { + "epoch": 0.7011622616536377, + "grad_norm": 762.2933959960938, + "learning_rate": 2.731147135910824e-06, + "loss": 100.2397, + "step": 84760 + }, + { + "epoch": 0.701244984903007, + "grad_norm": 1155.1771240234375, + "learning_rate": 2.729890421930445e-06, + "loss": 89.7689, + "step": 84770 + }, + { + "epoch": 0.7013277081523762, + "grad_norm": 649.2015991210938, + "learning_rate": 2.7286338885722674e-06, + "loss": 105.2359, + "step": 84780 + }, + { + "epoch": 0.7014104314017454, + "grad_norm": 776.761962890625, + "learning_rate": 2.7273775359362665e-06, + "loss": 78.5909, + "step": 84790 + }, + { + "epoch": 0.7014931546511147, + "grad_norm": 1017.0020141601562, + "learning_rate": 2.7261213641224056e-06, + "loss": 87.9357, + "step": 84800 + }, + { + "epoch": 0.7015758779004839, + "grad_norm": 754.7698974609375, + "learning_rate": 2.724865373230632e-06, + "loss": 105.561, + "step": 84810 + }, + { + "epoch": 0.7016586011498531, + "grad_norm": 519.2734375, + "learning_rate": 2.723609563360879e-06, + "loss": 87.3953, + "step": 84820 + }, + { + "epoch": 0.7017413243992224, + "grad_norm": 740.0153198242188, + "learning_rate": 2.7223539346130655e-06, + "loss": 105.0598, + "step": 84830 + }, + { + "epoch": 0.7018240476485916, + "grad_norm": 761.6763305664062, + "learning_rate": 2.7210984870870972e-06, + "loss": 110.6421, + "step": 84840 + }, + { + "epoch": 0.7019067708979608, + "grad_norm": 1044.949951171875, + "learning_rate": 2.7198432208828653e-06, + "loss": 116.031, + "step": 84850 + }, + { + "epoch": 0.7019894941473301, + "grad_norm": 934.532958984375, + "learning_rate": 2.7185881361002415e-06, + "loss": 75.7842, + "step": 84860 + }, + { + "epoch": 0.7020722173966993, + "grad_norm": 714.5556030273438, + "learning_rate": 2.717333232839088e-06, + "loss": 105.3236, + "step": 84870 + }, + { + "epoch": 0.7021549406460685, + "grad_norm": 589.3107299804688, + "learning_rate": 2.7160785111992546e-06, + "loss": 82.1901, + "step": 84880 + }, + { + "epoch": 0.7022376638954378, + "grad_norm": 1139.3077392578125, + "learning_rate": 2.7148239712805725e-06, + "loss": 90.0878, + "step": 84890 + }, + { + "epoch": 0.702320387144807, + "grad_norm": 951.9703979492188, + "learning_rate": 2.713569613182859e-06, + "loss": 78.3111, + "step": 84900 + }, + { + "epoch": 0.7024031103941762, + "grad_norm": 1005.8824462890625, + "learning_rate": 2.7123154370059185e-06, + "loss": 107.2209, + "step": 84910 + }, + { + "epoch": 0.7024858336435456, + "grad_norm": 1006.9483642578125, + "learning_rate": 2.7110614428495396e-06, + "loss": 116.84, + "step": 84920 + }, + { + "epoch": 0.7025685568929148, + "grad_norm": 607.9546508789062, + "learning_rate": 2.709807630813497e-06, + "loss": 106.0352, + "step": 84930 + }, + { + "epoch": 0.702651280142284, + "grad_norm": 458.4266662597656, + "learning_rate": 2.7085540009975526e-06, + "loss": 78.4597, + "step": 84940 + }, + { + "epoch": 0.7027340033916533, + "grad_norm": 1103.226318359375, + "learning_rate": 2.707300553501448e-06, + "loss": 70.445, + "step": 84950 + }, + { + "epoch": 0.7028167266410225, + "grad_norm": 1002.1087036132812, + "learning_rate": 2.7060472884249145e-06, + "loss": 87.4876, + "step": 84960 + }, + { + "epoch": 0.7028994498903917, + "grad_norm": 993.1524047851562, + "learning_rate": 2.7047942058676717e-06, + "loss": 82.637, + "step": 84970 + }, + { + "epoch": 0.702982173139761, + "grad_norm": 1153.7645263671875, + "learning_rate": 2.703541305929421e-06, + "loss": 105.1839, + "step": 84980 + }, + { + "epoch": 0.7030648963891302, + "grad_norm": 765.0949096679688, + "learning_rate": 2.7022885887098492e-06, + "loss": 69.9202, + "step": 84990 + }, + { + "epoch": 0.7031476196384994, + "grad_norm": 930.6118774414062, + "learning_rate": 2.701036054308629e-06, + "loss": 78.9953, + "step": 85000 + }, + { + "epoch": 0.7032303428878687, + "grad_norm": 728.7538452148438, + "learning_rate": 2.699783702825419e-06, + "loss": 86.7408, + "step": 85010 + }, + { + "epoch": 0.7033130661372379, + "grad_norm": 731.0704345703125, + "learning_rate": 2.698531534359864e-06, + "loss": 94.9619, + "step": 85020 + }, + { + "epoch": 0.7033957893866071, + "grad_norm": 795.151611328125, + "learning_rate": 2.6972795490115944e-06, + "loss": 78.0553, + "step": 85030 + }, + { + "epoch": 0.7034785126359764, + "grad_norm": 806.3612060546875, + "learning_rate": 2.6960277468802203e-06, + "loss": 75.0848, + "step": 85040 + }, + { + "epoch": 0.7035612358853456, + "grad_norm": 1016.3757934570312, + "learning_rate": 2.694776128065345e-06, + "loss": 70.555, + "step": 85050 + }, + { + "epoch": 0.7036439591347148, + "grad_norm": 759.8416137695312, + "learning_rate": 2.6935246926665513e-06, + "loss": 75.158, + "step": 85060 + }, + { + "epoch": 0.7037266823840841, + "grad_norm": 671.8663940429688, + "learning_rate": 2.692273440783415e-06, + "loss": 107.0158, + "step": 85070 + }, + { + "epoch": 0.7038094056334533, + "grad_norm": 978.9486694335938, + "learning_rate": 2.6910223725154903e-06, + "loss": 105.8965, + "step": 85080 + }, + { + "epoch": 0.7038921288828225, + "grad_norm": 1325.02294921875, + "learning_rate": 2.6897714879623184e-06, + "loss": 97.7983, + "step": 85090 + }, + { + "epoch": 0.7039748521321918, + "grad_norm": 592.0951538085938, + "learning_rate": 2.688520787223426e-06, + "loss": 86.1239, + "step": 85100 + }, + { + "epoch": 0.704057575381561, + "grad_norm": 648.145263671875, + "learning_rate": 2.6872702703983287e-06, + "loss": 81.3647, + "step": 85110 + }, + { + "epoch": 0.7041402986309302, + "grad_norm": 702.9576416015625, + "learning_rate": 2.6860199375865203e-06, + "loss": 125.3935, + "step": 85120 + }, + { + "epoch": 0.7042230218802995, + "grad_norm": 605.6353759765625, + "learning_rate": 2.6847697888874853e-06, + "loss": 98.5384, + "step": 85130 + }, + { + "epoch": 0.7043057451296687, + "grad_norm": 843.0704956054688, + "learning_rate": 2.683519824400693e-06, + "loss": 100.6389, + "step": 85140 + }, + { + "epoch": 0.7043884683790379, + "grad_norm": 1235.5345458984375, + "learning_rate": 2.6822700442255965e-06, + "loss": 93.4948, + "step": 85150 + }, + { + "epoch": 0.7044711916284072, + "grad_norm": 1300.0670166015625, + "learning_rate": 2.681020448461634e-06, + "loss": 77.3637, + "step": 85160 + }, + { + "epoch": 0.7045539148777764, + "grad_norm": 2526.052734375, + "learning_rate": 2.679771037208234e-06, + "loss": 179.5842, + "step": 85170 + }, + { + "epoch": 0.7046366381271456, + "grad_norm": 994.899169921875, + "learning_rate": 2.678521810564804e-06, + "loss": 68.4581, + "step": 85180 + }, + { + "epoch": 0.7047193613765149, + "grad_norm": 894.9552612304688, + "learning_rate": 2.6772727686307398e-06, + "loss": 98.303, + "step": 85190 + }, + { + "epoch": 0.7048020846258841, + "grad_norm": 1177.982421875, + "learning_rate": 2.676023911505423e-06, + "loss": 82.6399, + "step": 85200 + }, + { + "epoch": 0.7048848078752533, + "grad_norm": 1218.446533203125, + "learning_rate": 2.674775239288216e-06, + "loss": 101.0908, + "step": 85210 + }, + { + "epoch": 0.7049675311246226, + "grad_norm": 1381.9801025390625, + "learning_rate": 2.673526752078472e-06, + "loss": 102.3884, + "step": 85220 + }, + { + "epoch": 0.7050502543739918, + "grad_norm": 900.25634765625, + "learning_rate": 2.6722784499755273e-06, + "loss": 70.0581, + "step": 85230 + }, + { + "epoch": 0.705132977623361, + "grad_norm": 407.5909729003906, + "learning_rate": 2.6710303330787035e-06, + "loss": 83.3081, + "step": 85240 + }, + { + "epoch": 0.7052157008727303, + "grad_norm": 914.4041137695312, + "learning_rate": 2.6697824014873076e-06, + "loss": 82.6285, + "step": 85250 + }, + { + "epoch": 0.7052984241220995, + "grad_norm": 731.021240234375, + "learning_rate": 2.6685346553006293e-06, + "loss": 78.5455, + "step": 85260 + }, + { + "epoch": 0.7053811473714687, + "grad_norm": 1125.2713623046875, + "learning_rate": 2.6672870946179506e-06, + "loss": 105.0443, + "step": 85270 + }, + { + "epoch": 0.705463870620838, + "grad_norm": 688.19140625, + "learning_rate": 2.6660397195385344e-06, + "loss": 114.3066, + "step": 85280 + }, + { + "epoch": 0.7055465938702072, + "grad_norm": 1272.36083984375, + "learning_rate": 2.664792530161624e-06, + "loss": 86.4369, + "step": 85290 + }, + { + "epoch": 0.7056293171195764, + "grad_norm": 1196.80224609375, + "learning_rate": 2.6635455265864553e-06, + "loss": 107.9678, + "step": 85300 + }, + { + "epoch": 0.7057120403689456, + "grad_norm": 773.6207275390625, + "learning_rate": 2.662298708912246e-06, + "loss": 97.1165, + "step": 85310 + }, + { + "epoch": 0.705794763618315, + "grad_norm": 636.7437744140625, + "learning_rate": 2.6610520772382e-06, + "loss": 81.5361, + "step": 85320 + }, + { + "epoch": 0.7058774868676841, + "grad_norm": 875.7274780273438, + "learning_rate": 2.659805631663505e-06, + "loss": 78.7864, + "step": 85330 + }, + { + "epoch": 0.7059602101170533, + "grad_norm": 1031.9066162109375, + "learning_rate": 2.658559372287337e-06, + "loss": 92.2199, + "step": 85340 + }, + { + "epoch": 0.7060429333664227, + "grad_norm": 583.7274780273438, + "learning_rate": 2.6573132992088534e-06, + "loss": 63.5923, + "step": 85350 + }, + { + "epoch": 0.7061256566157919, + "grad_norm": 890.5264282226562, + "learning_rate": 2.656067412527197e-06, + "loss": 84.5146, + "step": 85360 + }, + { + "epoch": 0.706208379865161, + "grad_norm": 744.9577026367188, + "learning_rate": 2.6548217123415033e-06, + "loss": 90.214, + "step": 85370 + }, + { + "epoch": 0.7062911031145304, + "grad_norm": 792.3532104492188, + "learning_rate": 2.6535761987508813e-06, + "loss": 79.4676, + "step": 85380 + }, + { + "epoch": 0.7063738263638996, + "grad_norm": 739.8575439453125, + "learning_rate": 2.652330871854433e-06, + "loss": 90.5693, + "step": 85390 + }, + { + "epoch": 0.7064565496132688, + "grad_norm": 450.8844909667969, + "learning_rate": 2.651085731751242e-06, + "loss": 90.0703, + "step": 85400 + }, + { + "epoch": 0.7065392728626381, + "grad_norm": 1677.9515380859375, + "learning_rate": 2.6498407785403794e-06, + "loss": 93.2693, + "step": 85410 + }, + { + "epoch": 0.7066219961120073, + "grad_norm": 899.0578002929688, + "learning_rate": 2.648596012320901e-06, + "loss": 79.1233, + "step": 85420 + }, + { + "epoch": 0.7067047193613765, + "grad_norm": 735.9163208007812, + "learning_rate": 2.647351433191846e-06, + "loss": 90.7922, + "step": 85430 + }, + { + "epoch": 0.7067874426107458, + "grad_norm": 951.7572631835938, + "learning_rate": 2.64610704125224e-06, + "loss": 88.4487, + "step": 85440 + }, + { + "epoch": 0.706870165860115, + "grad_norm": 1408.339111328125, + "learning_rate": 2.644862836601092e-06, + "loss": 95.58, + "step": 85450 + }, + { + "epoch": 0.7069528891094842, + "grad_norm": 850.1331787109375, + "learning_rate": 2.6436188193374035e-06, + "loss": 96.218, + "step": 85460 + }, + { + "epoch": 0.7070356123588535, + "grad_norm": 864.1105346679688, + "learning_rate": 2.6423749895601494e-06, + "loss": 113.5922, + "step": 85470 + }, + { + "epoch": 0.7071183356082227, + "grad_norm": 969.8984985351562, + "learning_rate": 2.6411313473682966e-06, + "loss": 98.6914, + "step": 85480 + }, + { + "epoch": 0.7072010588575919, + "grad_norm": 965.0258178710938, + "learning_rate": 2.6398878928607973e-06, + "loss": 111.6938, + "step": 85490 + }, + { + "epoch": 0.7072837821069612, + "grad_norm": 2024.0552978515625, + "learning_rate": 2.6386446261365874e-06, + "loss": 122.5149, + "step": 85500 + }, + { + "epoch": 0.7073665053563304, + "grad_norm": 342.0011291503906, + "learning_rate": 2.6374015472945868e-06, + "loss": 109.7086, + "step": 85510 + }, + { + "epoch": 0.7074492286056996, + "grad_norm": 848.0896606445312, + "learning_rate": 2.6361586564337023e-06, + "loss": 91.5193, + "step": 85520 + }, + { + "epoch": 0.7075319518550689, + "grad_norm": 559.4988403320312, + "learning_rate": 2.6349159536528245e-06, + "loss": 93.1284, + "step": 85530 + }, + { + "epoch": 0.7076146751044381, + "grad_norm": 870.3743896484375, + "learning_rate": 2.633673439050831e-06, + "loss": 80.7735, + "step": 85540 + }, + { + "epoch": 0.7076973983538073, + "grad_norm": 547.6781616210938, + "learning_rate": 2.6324311127265812e-06, + "loss": 106.9985, + "step": 85550 + }, + { + "epoch": 0.7077801216031766, + "grad_norm": 691.0943603515625, + "learning_rate": 2.6311889747789225e-06, + "loss": 102.5531, + "step": 85560 + }, + { + "epoch": 0.7078628448525458, + "grad_norm": 845.0540771484375, + "learning_rate": 2.6299470253066863e-06, + "loss": 86.6591, + "step": 85570 + }, + { + "epoch": 0.707945568101915, + "grad_norm": 847.3442993164062, + "learning_rate": 2.628705264408687e-06, + "loss": 106.6626, + "step": 85580 + }, + { + "epoch": 0.7080282913512843, + "grad_norm": 1003.5369262695312, + "learning_rate": 2.6274636921837272e-06, + "loss": 106.0019, + "step": 85590 + }, + { + "epoch": 0.7081110146006535, + "grad_norm": 774.9839477539062, + "learning_rate": 2.626222308730594e-06, + "loss": 88.3581, + "step": 85600 + }, + { + "epoch": 0.7081937378500227, + "grad_norm": 854.4312133789062, + "learning_rate": 2.6249811141480564e-06, + "loss": 89.8101, + "step": 85610 + }, + { + "epoch": 0.708276461099392, + "grad_norm": 445.865234375, + "learning_rate": 2.6237401085348723e-06, + "loss": 62.2433, + "step": 85620 + }, + { + "epoch": 0.7083591843487612, + "grad_norm": 720.7432250976562, + "learning_rate": 2.6224992919897817e-06, + "loss": 99.8638, + "step": 85630 + }, + { + "epoch": 0.7084419075981304, + "grad_norm": 997.8077392578125, + "learning_rate": 2.6212586646115114e-06, + "loss": 77.9413, + "step": 85640 + }, + { + "epoch": 0.7085246308474997, + "grad_norm": 1052.6624755859375, + "learning_rate": 2.620018226498772e-06, + "loss": 104.5083, + "step": 85650 + }, + { + "epoch": 0.7086073540968689, + "grad_norm": 999.076171875, + "learning_rate": 2.61877797775026e-06, + "loss": 89.7667, + "step": 85660 + }, + { + "epoch": 0.7086900773462381, + "grad_norm": 1541.536376953125, + "learning_rate": 2.6175379184646565e-06, + "loss": 92.7317, + "step": 85670 + }, + { + "epoch": 0.7087728005956074, + "grad_norm": 876.532470703125, + "learning_rate": 2.616298048740626e-06, + "loss": 85.8813, + "step": 85680 + }, + { + "epoch": 0.7088555238449766, + "grad_norm": 896.5637817382812, + "learning_rate": 2.6150583686768203e-06, + "loss": 120.6416, + "step": 85690 + }, + { + "epoch": 0.7089382470943458, + "grad_norm": 913.8203125, + "learning_rate": 2.6138188783718745e-06, + "loss": 100.0601, + "step": 85700 + }, + { + "epoch": 0.7090209703437151, + "grad_norm": 662.0992431640625, + "learning_rate": 2.6125795779244125e-06, + "loss": 90.1497, + "step": 85710 + }, + { + "epoch": 0.7091036935930843, + "grad_norm": 738.2545776367188, + "learning_rate": 2.611340467433031e-06, + "loss": 94.7059, + "step": 85720 + }, + { + "epoch": 0.7091864168424535, + "grad_norm": 925.9259643554688, + "learning_rate": 2.61010154699633e-06, + "loss": 64.9561, + "step": 85730 + }, + { + "epoch": 0.7092691400918228, + "grad_norm": 657.1807861328125, + "learning_rate": 2.6088628167128794e-06, + "loss": 81.276, + "step": 85740 + }, + { + "epoch": 0.709351863341192, + "grad_norm": 703.9519653320312, + "learning_rate": 2.607624276681241e-06, + "loss": 73.3136, + "step": 85750 + }, + { + "epoch": 0.7094345865905612, + "grad_norm": 596.41064453125, + "learning_rate": 2.6063859269999594e-06, + "loss": 77.5182, + "step": 85760 + }, + { + "epoch": 0.7095173098399306, + "grad_norm": 327.9614562988281, + "learning_rate": 2.605147767767564e-06, + "loss": 84.4486, + "step": 85770 + }, + { + "epoch": 0.7096000330892998, + "grad_norm": 914.2791137695312, + "learning_rate": 2.6039097990825703e-06, + "loss": 99.9321, + "step": 85780 + }, + { + "epoch": 0.709682756338669, + "grad_norm": 758.622802734375, + "learning_rate": 2.602672021043477e-06, + "loss": 83.2458, + "step": 85790 + }, + { + "epoch": 0.7097654795880383, + "grad_norm": 801.3850708007812, + "learning_rate": 2.601434433748771e-06, + "loss": 99.5949, + "step": 85800 + }, + { + "epoch": 0.7098482028374075, + "grad_norm": 531.544189453125, + "learning_rate": 2.600197037296917e-06, + "loss": 85.0115, + "step": 85810 + }, + { + "epoch": 0.7099309260867767, + "grad_norm": 977.3123779296875, + "learning_rate": 2.5989598317863694e-06, + "loss": 93.289, + "step": 85820 + }, + { + "epoch": 0.710013649336146, + "grad_norm": 613.4664916992188, + "learning_rate": 2.59772281731557e-06, + "loss": 80.9545, + "step": 85830 + }, + { + "epoch": 0.7100963725855152, + "grad_norm": 646.8933715820312, + "learning_rate": 2.5964859939829423e-06, + "loss": 107.6766, + "step": 85840 + }, + { + "epoch": 0.7101790958348844, + "grad_norm": 650.1854858398438, + "learning_rate": 2.595249361886892e-06, + "loss": 75.5614, + "step": 85850 + }, + { + "epoch": 0.7102618190842537, + "grad_norm": 816.5127563476562, + "learning_rate": 2.5940129211258147e-06, + "loss": 94.7183, + "step": 85860 + }, + { + "epoch": 0.7103445423336229, + "grad_norm": 719.228271484375, + "learning_rate": 2.5927766717980873e-06, + "loss": 90.4481, + "step": 85870 + }, + { + "epoch": 0.7104272655829921, + "grad_norm": 1630.4447021484375, + "learning_rate": 2.5915406140020738e-06, + "loss": 99.5369, + "step": 85880 + }, + { + "epoch": 0.7105099888323614, + "grad_norm": 830.96044921875, + "learning_rate": 2.590304747836119e-06, + "loss": 106.8879, + "step": 85890 + }, + { + "epoch": 0.7105927120817306, + "grad_norm": 1144.8453369140625, + "learning_rate": 2.5890690733985555e-06, + "loss": 93.4202, + "step": 85900 + }, + { + "epoch": 0.7106754353310998, + "grad_norm": 1315.0010986328125, + "learning_rate": 2.5878335907876997e-06, + "loss": 75.4638, + "step": 85910 + }, + { + "epoch": 0.7107581585804691, + "grad_norm": 748.9456176757812, + "learning_rate": 2.5865983001018567e-06, + "loss": 79.2242, + "step": 85920 + }, + { + "epoch": 0.7108408818298383, + "grad_norm": 456.5758972167969, + "learning_rate": 2.5853632014393108e-06, + "loss": 61.4048, + "step": 85930 + }, + { + "epoch": 0.7109236050792075, + "grad_norm": 565.6151733398438, + "learning_rate": 2.584128294898334e-06, + "loss": 79.6808, + "step": 85940 + }, + { + "epoch": 0.7110063283285768, + "grad_norm": 922.67529296875, + "learning_rate": 2.5828935805771804e-06, + "loss": 141.0256, + "step": 85950 + }, + { + "epoch": 0.711089051577946, + "grad_norm": 889.4005737304688, + "learning_rate": 2.581659058574092e-06, + "loss": 67.4175, + "step": 85960 + }, + { + "epoch": 0.7111717748273152, + "grad_norm": 669.8568725585938, + "learning_rate": 2.580424728987296e-06, + "loss": 88.6796, + "step": 85970 + }, + { + "epoch": 0.7112544980766845, + "grad_norm": 1054.4835205078125, + "learning_rate": 2.5791905919149973e-06, + "loss": 108.1136, + "step": 85980 + }, + { + "epoch": 0.7113372213260537, + "grad_norm": 1388.4984130859375, + "learning_rate": 2.5779566474553934e-06, + "loss": 109.7086, + "step": 85990 + }, + { + "epoch": 0.7114199445754229, + "grad_norm": 624.65673828125, + "learning_rate": 2.5767228957066635e-06, + "loss": 94.2603, + "step": 86000 + }, + { + "epoch": 0.7115026678247922, + "grad_norm": 607.4688110351562, + "learning_rate": 2.5754893367669697e-06, + "loss": 75.7593, + "step": 86010 + }, + { + "epoch": 0.7115853910741614, + "grad_norm": 1318.2784423828125, + "learning_rate": 2.5742559707344638e-06, + "loss": 98.0989, + "step": 86020 + }, + { + "epoch": 0.7116681143235306, + "grad_norm": 647.1195068359375, + "learning_rate": 2.573022797707278e-06, + "loss": 79.2859, + "step": 86030 + }, + { + "epoch": 0.7117508375728998, + "grad_norm": 461.97894287109375, + "learning_rate": 2.57178981778353e-06, + "loss": 101.9, + "step": 86040 + }, + { + "epoch": 0.7118335608222691, + "grad_norm": 616.1342163085938, + "learning_rate": 2.5705570310613215e-06, + "loss": 101.6059, + "step": 86050 + }, + { + "epoch": 0.7119162840716383, + "grad_norm": 997.8457641601562, + "learning_rate": 2.5693244376387435e-06, + "loss": 83.1273, + "step": 86060 + }, + { + "epoch": 0.7119990073210075, + "grad_norm": 954.9032592773438, + "learning_rate": 2.568092037613862e-06, + "loss": 109.8029, + "step": 86070 + }, + { + "epoch": 0.7120817305703768, + "grad_norm": 704.2922973632812, + "learning_rate": 2.566859831084736e-06, + "loss": 93.9186, + "step": 86080 + }, + { + "epoch": 0.712164453819746, + "grad_norm": 826.8761596679688, + "learning_rate": 2.5656278181494072e-06, + "loss": 85.7292, + "step": 86090 + }, + { + "epoch": 0.7122471770691152, + "grad_norm": 1042.4456787109375, + "learning_rate": 2.5643959989058997e-06, + "loss": 103.6534, + "step": 86100 + }, + { + "epoch": 0.7123299003184845, + "grad_norm": 1412.099365234375, + "learning_rate": 2.563164373452224e-06, + "loss": 96.1712, + "step": 86110 + }, + { + "epoch": 0.7124126235678537, + "grad_norm": 846.20947265625, + "learning_rate": 2.561932941886377e-06, + "loss": 93.9482, + "step": 86120 + }, + { + "epoch": 0.7124953468172229, + "grad_norm": 650.3298950195312, + "learning_rate": 2.560701704306336e-06, + "loss": 94.7339, + "step": 86130 + }, + { + "epoch": 0.7125780700665922, + "grad_norm": 903.107666015625, + "learning_rate": 2.5594706608100677e-06, + "loss": 68.6888, + "step": 86140 + }, + { + "epoch": 0.7126607933159614, + "grad_norm": 2324.131591796875, + "learning_rate": 2.5582398114955164e-06, + "loss": 88.7436, + "step": 86150 + }, + { + "epoch": 0.7127435165653306, + "grad_norm": 676.3530883789062, + "learning_rate": 2.5570091564606182e-06, + "loss": 95.5154, + "step": 86160 + }, + { + "epoch": 0.7128262398147, + "grad_norm": 713.0216064453125, + "learning_rate": 2.555778695803288e-06, + "loss": 48.4695, + "step": 86170 + }, + { + "epoch": 0.7129089630640691, + "grad_norm": 877.8251342773438, + "learning_rate": 2.554548429621431e-06, + "loss": 99.8856, + "step": 86180 + }, + { + "epoch": 0.7129916863134383, + "grad_norm": 679.6746215820312, + "learning_rate": 2.5533183580129317e-06, + "loss": 109.55, + "step": 86190 + }, + { + "epoch": 0.7130744095628077, + "grad_norm": 1079.2734375, + "learning_rate": 2.5520884810756614e-06, + "loss": 92.1818, + "step": 86200 + }, + { + "epoch": 0.7131571328121769, + "grad_norm": 1415.2052001953125, + "learning_rate": 2.550858798907475e-06, + "loss": 98.666, + "step": 86210 + }, + { + "epoch": 0.713239856061546, + "grad_norm": 498.3135986328125, + "learning_rate": 2.5496293116062154e-06, + "loss": 100.0309, + "step": 86220 + }, + { + "epoch": 0.7133225793109154, + "grad_norm": 556.9967651367188, + "learning_rate": 2.5484000192697078e-06, + "loss": 128.9959, + "step": 86230 + }, + { + "epoch": 0.7134053025602846, + "grad_norm": 1304.005126953125, + "learning_rate": 2.5471709219957573e-06, + "loss": 92.9146, + "step": 86240 + }, + { + "epoch": 0.7134880258096538, + "grad_norm": 667.9055786132812, + "learning_rate": 2.5459420198821604e-06, + "loss": 96.6319, + "step": 86250 + }, + { + "epoch": 0.7135707490590231, + "grad_norm": 969.6152954101562, + "learning_rate": 2.5447133130266937e-06, + "loss": 112.7888, + "step": 86260 + }, + { + "epoch": 0.7136534723083923, + "grad_norm": 1060.4730224609375, + "learning_rate": 2.5434848015271206e-06, + "loss": 96.5464, + "step": 86270 + }, + { + "epoch": 0.7137361955577615, + "grad_norm": 1293.969970703125, + "learning_rate": 2.542256485481188e-06, + "loss": 81.4303, + "step": 86280 + }, + { + "epoch": 0.7138189188071308, + "grad_norm": 737.7543334960938, + "learning_rate": 2.5410283649866272e-06, + "loss": 86.3164, + "step": 86290 + }, + { + "epoch": 0.7139016420565, + "grad_norm": 494.8547058105469, + "learning_rate": 2.539800440141154e-06, + "loss": 97.7299, + "step": 86300 + }, + { + "epoch": 0.7139843653058692, + "grad_norm": 751.9924926757812, + "learning_rate": 2.5385727110424697e-06, + "loss": 82.0479, + "step": 86310 + }, + { + "epoch": 0.7140670885552385, + "grad_norm": 854.956298828125, + "learning_rate": 2.5373451777882575e-06, + "loss": 98.6645, + "step": 86320 + }, + { + "epoch": 0.7141498118046077, + "grad_norm": 744.7928466796875, + "learning_rate": 2.5361178404761876e-06, + "loss": 81.0549, + "step": 86330 + }, + { + "epoch": 0.7142325350539769, + "grad_norm": 546.837646484375, + "learning_rate": 2.534890699203914e-06, + "loss": 85.5998, + "step": 86340 + }, + { + "epoch": 0.7143152583033462, + "grad_norm": 846.6171875, + "learning_rate": 2.533663754069074e-06, + "loss": 82.133, + "step": 86350 + }, + { + "epoch": 0.7143979815527154, + "grad_norm": 787.9846801757812, + "learning_rate": 2.5324370051692905e-06, + "loss": 96.9177, + "step": 86360 + }, + { + "epoch": 0.7144807048020846, + "grad_norm": 494.6238708496094, + "learning_rate": 2.5312104526021687e-06, + "loss": 90.6376, + "step": 86370 + }, + { + "epoch": 0.7145634280514539, + "grad_norm": 868.8283081054688, + "learning_rate": 2.529984096465302e-06, + "loss": 93.8985, + "step": 86380 + }, + { + "epoch": 0.7146461513008231, + "grad_norm": 971.2010498046875, + "learning_rate": 2.528757936856264e-06, + "loss": 89.9199, + "step": 86390 + }, + { + "epoch": 0.7147288745501923, + "grad_norm": 757.8761596679688, + "learning_rate": 2.527531973872617e-06, + "loss": 74.3093, + "step": 86400 + }, + { + "epoch": 0.7148115977995616, + "grad_norm": 610.7327270507812, + "learning_rate": 2.5263062076119026e-06, + "loss": 65.9494, + "step": 86410 + }, + { + "epoch": 0.7148943210489308, + "grad_norm": 3729.9736328125, + "learning_rate": 2.525080638171651e-06, + "loss": 91.3989, + "step": 86420 + }, + { + "epoch": 0.7149770442983, + "grad_norm": 1108.1942138671875, + "learning_rate": 2.5238552656493743e-06, + "loss": 89.1207, + "step": 86430 + }, + { + "epoch": 0.7150597675476693, + "grad_norm": 811.8157958984375, + "learning_rate": 2.52263009014257e-06, + "loss": 111.9879, + "step": 86440 + }, + { + "epoch": 0.7151424907970385, + "grad_norm": 570.6813354492188, + "learning_rate": 2.5214051117487205e-06, + "loss": 85.2475, + "step": 86450 + }, + { + "epoch": 0.7152252140464077, + "grad_norm": 888.626953125, + "learning_rate": 2.52018033056529e-06, + "loss": 82.2838, + "step": 86460 + }, + { + "epoch": 0.715307937295777, + "grad_norm": 272.4425964355469, + "learning_rate": 2.5189557466897306e-06, + "loss": 62.3443, + "step": 86470 + }, + { + "epoch": 0.7153906605451462, + "grad_norm": 869.8495483398438, + "learning_rate": 2.517731360219476e-06, + "loss": 92.5027, + "step": 86480 + }, + { + "epoch": 0.7154733837945154, + "grad_norm": 892.153564453125, + "learning_rate": 2.5165071712519447e-06, + "loss": 101.1932, + "step": 86490 + }, + { + "epoch": 0.7155561070438847, + "grad_norm": 1358.9947509765625, + "learning_rate": 2.51528317988454e-06, + "loss": 110.4798, + "step": 86500 + }, + { + "epoch": 0.7156388302932539, + "grad_norm": 894.7091674804688, + "learning_rate": 2.5140593862146496e-06, + "loss": 90.021, + "step": 86510 + }, + { + "epoch": 0.7157215535426231, + "grad_norm": 682.6785888671875, + "learning_rate": 2.512835790339645e-06, + "loss": 95.0613, + "step": 86520 + }, + { + "epoch": 0.7158042767919924, + "grad_norm": 1361.6385498046875, + "learning_rate": 2.5116123923568815e-06, + "loss": 111.4644, + "step": 86530 + }, + { + "epoch": 0.7158870000413616, + "grad_norm": 625.406982421875, + "learning_rate": 2.5103891923637e-06, + "loss": 98.9246, + "step": 86540 + }, + { + "epoch": 0.7159697232907308, + "grad_norm": 509.01702880859375, + "learning_rate": 2.509166190457425e-06, + "loss": 85.337, + "step": 86550 + }, + { + "epoch": 0.7160524465401001, + "grad_norm": 746.6972045898438, + "learning_rate": 2.5079433867353646e-06, + "loss": 92.2448, + "step": 86560 + }, + { + "epoch": 0.7161351697894693, + "grad_norm": 1138.3980712890625, + "learning_rate": 2.5067207812948123e-06, + "loss": 92.1766, + "step": 86570 + }, + { + "epoch": 0.7162178930388385, + "grad_norm": 1579.0123291015625, + "learning_rate": 2.505498374233044e-06, + "loss": 92.1637, + "step": 86580 + }, + { + "epoch": 0.7163006162882078, + "grad_norm": 728.6637573242188, + "learning_rate": 2.5042761656473226e-06, + "loss": 103.8948, + "step": 86590 + }, + { + "epoch": 0.716383339537577, + "grad_norm": 946.8383178710938, + "learning_rate": 2.503054155634893e-06, + "loss": 106.1606, + "step": 86600 + }, + { + "epoch": 0.7164660627869462, + "grad_norm": 1170.6138916015625, + "learning_rate": 2.5018323442929844e-06, + "loss": 66.9248, + "step": 86610 + }, + { + "epoch": 0.7165487860363156, + "grad_norm": 403.2840881347656, + "learning_rate": 2.500610731718811e-06, + "loss": 96.3434, + "step": 86620 + }, + { + "epoch": 0.7166315092856848, + "grad_norm": 940.978271484375, + "learning_rate": 2.499389318009571e-06, + "loss": 102.5182, + "step": 86630 + }, + { + "epoch": 0.716714232535054, + "grad_norm": 513.7085571289062, + "learning_rate": 2.4981681032624473e-06, + "loss": 78.1403, + "step": 86640 + }, + { + "epoch": 0.7167969557844233, + "grad_norm": 1063.4349365234375, + "learning_rate": 2.4969470875746055e-06, + "loss": 110.9906, + "step": 86650 + }, + { + "epoch": 0.7168796790337925, + "grad_norm": 577.010986328125, + "learning_rate": 2.495726271043198e-06, + "loss": 77.6551, + "step": 86660 + }, + { + "epoch": 0.7169624022831617, + "grad_norm": 457.9773864746094, + "learning_rate": 2.4945056537653545e-06, + "loss": 65.822, + "step": 86670 + }, + { + "epoch": 0.717045125532531, + "grad_norm": 985.6693115234375, + "learning_rate": 2.493285235838199e-06, + "loss": 76.2427, + "step": 86680 + }, + { + "epoch": 0.7171278487819002, + "grad_norm": 1075.8038330078125, + "learning_rate": 2.492065017358834e-06, + "loss": 108.8681, + "step": 86690 + }, + { + "epoch": 0.7172105720312694, + "grad_norm": 618.4483642578125, + "learning_rate": 2.4908449984243448e-06, + "loss": 81.0635, + "step": 86700 + }, + { + "epoch": 0.7172932952806387, + "grad_norm": 1000.34521484375, + "learning_rate": 2.4896251791318036e-06, + "loss": 88.2856, + "step": 86710 + }, + { + "epoch": 0.7173760185300079, + "grad_norm": 840.3250732421875, + "learning_rate": 2.4884055595782666e-06, + "loss": 76.9793, + "step": 86720 + }, + { + "epoch": 0.7174587417793771, + "grad_norm": 1622.5933837890625, + "learning_rate": 2.487186139860772e-06, + "loss": 99.8122, + "step": 86730 + }, + { + "epoch": 0.7175414650287464, + "grad_norm": 805.3701171875, + "learning_rate": 2.485966920076346e-06, + "loss": 88.3674, + "step": 86740 + }, + { + "epoch": 0.7176241882781156, + "grad_norm": 995.889892578125, + "learning_rate": 2.4847479003219926e-06, + "loss": 91.9693, + "step": 86750 + }, + { + "epoch": 0.7177069115274848, + "grad_norm": 657.5465087890625, + "learning_rate": 2.4835290806947047e-06, + "loss": 78.4104, + "step": 86760 + }, + { + "epoch": 0.717789634776854, + "grad_norm": 632.3702392578125, + "learning_rate": 2.4823104612914578e-06, + "loss": 109.4438, + "step": 86770 + }, + { + "epoch": 0.7178723580262233, + "grad_norm": 484.9309997558594, + "learning_rate": 2.4810920422092137e-06, + "loss": 92.3368, + "step": 86780 + }, + { + "epoch": 0.7179550812755925, + "grad_norm": 791.4384155273438, + "learning_rate": 2.4798738235449164e-06, + "loss": 90.1838, + "step": 86790 + }, + { + "epoch": 0.7180378045249617, + "grad_norm": 768.782470703125, + "learning_rate": 2.478655805395493e-06, + "loss": 93.8179, + "step": 86800 + }, + { + "epoch": 0.718120527774331, + "grad_norm": 603.9031982421875, + "learning_rate": 2.477437987857856e-06, + "loss": 66.2067, + "step": 86810 + }, + { + "epoch": 0.7182032510237002, + "grad_norm": 695.658447265625, + "learning_rate": 2.4762203710289008e-06, + "loss": 90.285, + "step": 86820 + }, + { + "epoch": 0.7182859742730694, + "grad_norm": 688.7564697265625, + "learning_rate": 2.4750029550055098e-06, + "loss": 104.6844, + "step": 86830 + }, + { + "epoch": 0.7183686975224387, + "grad_norm": 594.9547729492188, + "learning_rate": 2.473785739884544e-06, + "loss": 79.156, + "step": 86840 + }, + { + "epoch": 0.7184514207718079, + "grad_norm": 776.5545043945312, + "learning_rate": 2.4725687257628533e-06, + "loss": 110.7488, + "step": 86850 + }, + { + "epoch": 0.7185341440211771, + "grad_norm": 1479.708740234375, + "learning_rate": 2.47135191273727e-06, + "loss": 157.2341, + "step": 86860 + }, + { + "epoch": 0.7186168672705464, + "grad_norm": 855.310302734375, + "learning_rate": 2.4701353009046075e-06, + "loss": 98.2323, + "step": 86870 + }, + { + "epoch": 0.7186995905199156, + "grad_norm": 1696.113525390625, + "learning_rate": 2.4689188903616707e-06, + "loss": 87.6517, + "step": 86880 + }, + { + "epoch": 0.7187823137692848, + "grad_norm": 622.81787109375, + "learning_rate": 2.467702681205241e-06, + "loss": 83.5084, + "step": 86890 + }, + { + "epoch": 0.7188650370186541, + "grad_norm": 815.8912353515625, + "learning_rate": 2.4664866735320886e-06, + "loss": 93.1341, + "step": 86900 + }, + { + "epoch": 0.7189477602680233, + "grad_norm": 599.5869140625, + "learning_rate": 2.4652708674389636e-06, + "loss": 106.3732, + "step": 86910 + }, + { + "epoch": 0.7190304835173925, + "grad_norm": 917.40380859375, + "learning_rate": 2.464055263022605e-06, + "loss": 90.2012, + "step": 86920 + }, + { + "epoch": 0.7191132067667618, + "grad_norm": 765.8475952148438, + "learning_rate": 2.462839860379729e-06, + "loss": 76.7954, + "step": 86930 + }, + { + "epoch": 0.719195930016131, + "grad_norm": 1118.771240234375, + "learning_rate": 2.46162465960704e-06, + "loss": 88.3979, + "step": 86940 + }, + { + "epoch": 0.7192786532655002, + "grad_norm": 841.2666625976562, + "learning_rate": 2.460409660801229e-06, + "loss": 77.0938, + "step": 86950 + }, + { + "epoch": 0.7193613765148695, + "grad_norm": 579.8666381835938, + "learning_rate": 2.459194864058963e-06, + "loss": 81.2424, + "step": 86960 + }, + { + "epoch": 0.7194440997642387, + "grad_norm": 2329.31884765625, + "learning_rate": 2.457980269476903e-06, + "loss": 117.6933, + "step": 86970 + }, + { + "epoch": 0.7195268230136079, + "grad_norm": 662.1182250976562, + "learning_rate": 2.4567658771516876e-06, + "loss": 80.9285, + "step": 86980 + }, + { + "epoch": 0.7196095462629772, + "grad_norm": 798.1659545898438, + "learning_rate": 2.455551687179939e-06, + "loss": 75.5553, + "step": 86990 + }, + { + "epoch": 0.7196922695123464, + "grad_norm": 685.1201171875, + "learning_rate": 2.454337699658267e-06, + "loss": 100.3396, + "step": 87000 + }, + { + "epoch": 0.7197749927617156, + "grad_norm": 951.0033569335938, + "learning_rate": 2.453123914683259e-06, + "loss": 96.5214, + "step": 87010 + }, + { + "epoch": 0.719857716011085, + "grad_norm": 571.2054443359375, + "learning_rate": 2.4519103323514932e-06, + "loss": 74.3453, + "step": 87020 + }, + { + "epoch": 0.7199404392604541, + "grad_norm": 1552.3475341796875, + "learning_rate": 2.4506969527595277e-06, + "loss": 86.0868, + "step": 87030 + }, + { + "epoch": 0.7200231625098233, + "grad_norm": 716.04931640625, + "learning_rate": 2.4494837760039057e-06, + "loss": 91.2232, + "step": 87040 + }, + { + "epoch": 0.7201058857591927, + "grad_norm": 1257.593994140625, + "learning_rate": 2.4482708021811546e-06, + "loss": 86.8641, + "step": 87050 + }, + { + "epoch": 0.7201886090085619, + "grad_norm": 950.4786376953125, + "learning_rate": 2.4470580313877833e-06, + "loss": 97.9864, + "step": 87060 + }, + { + "epoch": 0.720271332257931, + "grad_norm": 1004.8903198242188, + "learning_rate": 2.44584546372029e-06, + "loss": 88.7277, + "step": 87070 + }, + { + "epoch": 0.7203540555073004, + "grad_norm": 937.1966552734375, + "learning_rate": 2.4446330992751504e-06, + "loss": 93.3525, + "step": 87080 + }, + { + "epoch": 0.7204367787566696, + "grad_norm": 994.6897583007812, + "learning_rate": 2.44342093814883e-06, + "loss": 79.5034, + "step": 87090 + }, + { + "epoch": 0.7205195020060388, + "grad_norm": 663.3712768554688, + "learning_rate": 2.442208980437771e-06, + "loss": 119.1683, + "step": 87100 + }, + { + "epoch": 0.7206022252554081, + "grad_norm": 734.4451904296875, + "learning_rate": 2.4409972262384037e-06, + "loss": 82.272, + "step": 87110 + }, + { + "epoch": 0.7206849485047773, + "grad_norm": 511.676513671875, + "learning_rate": 2.4397856756471435e-06, + "loss": 75.2757, + "step": 87120 + }, + { + "epoch": 0.7207676717541465, + "grad_norm": 1072.48046875, + "learning_rate": 2.438574328760387e-06, + "loss": 118.7081, + "step": 87130 + }, + { + "epoch": 0.7208503950035158, + "grad_norm": 1304.2197265625, + "learning_rate": 2.437363185674516e-06, + "loss": 65.1885, + "step": 87140 + }, + { + "epoch": 0.720933118252885, + "grad_norm": 734.7349243164062, + "learning_rate": 2.4361522464858956e-06, + "loss": 75.8047, + "step": 87150 + }, + { + "epoch": 0.7210158415022542, + "grad_norm": 775.7440795898438, + "learning_rate": 2.434941511290872e-06, + "loss": 84.4206, + "step": 87160 + }, + { + "epoch": 0.7210985647516235, + "grad_norm": 846.2425537109375, + "learning_rate": 2.4337309801857846e-06, + "loss": 75.5298, + "step": 87170 + }, + { + "epoch": 0.7211812880009927, + "grad_norm": 950.1725463867188, + "learning_rate": 2.432520653266943e-06, + "loss": 137.4889, + "step": 87180 + }, + { + "epoch": 0.7212640112503619, + "grad_norm": 1078.6646728515625, + "learning_rate": 2.4313105306306505e-06, + "loss": 104.9158, + "step": 87190 + }, + { + "epoch": 0.7213467344997312, + "grad_norm": 807.844970703125, + "learning_rate": 2.4301006123731908e-06, + "loss": 86.7134, + "step": 87200 + }, + { + "epoch": 0.7214294577491004, + "grad_norm": 1981.1280517578125, + "learning_rate": 2.4288908985908304e-06, + "loss": 90.906, + "step": 87210 + }, + { + "epoch": 0.7215121809984696, + "grad_norm": 573.0137939453125, + "learning_rate": 2.4276813893798212e-06, + "loss": 86.2283, + "step": 87220 + }, + { + "epoch": 0.7215949042478389, + "grad_norm": 1042.208984375, + "learning_rate": 2.4264720848363992e-06, + "loss": 79.0382, + "step": 87230 + }, + { + "epoch": 0.7216776274972081, + "grad_norm": 667.932861328125, + "learning_rate": 2.4252629850567823e-06, + "loss": 80.4661, + "step": 87240 + }, + { + "epoch": 0.7217603507465773, + "grad_norm": 869.255615234375, + "learning_rate": 2.4240540901371727e-06, + "loss": 93.8904, + "step": 87250 + }, + { + "epoch": 0.7218430739959466, + "grad_norm": 1087.067626953125, + "learning_rate": 2.4228454001737576e-06, + "loss": 70.2305, + "step": 87260 + }, + { + "epoch": 0.7219257972453158, + "grad_norm": 853.7138061523438, + "learning_rate": 2.421636915262707e-06, + "loss": 82.3333, + "step": 87270 + }, + { + "epoch": 0.722008520494685, + "grad_norm": 575.6585083007812, + "learning_rate": 2.420428635500173e-06, + "loss": 71.9151, + "step": 87280 + }, + { + "epoch": 0.7220912437440543, + "grad_norm": 846.3626708984375, + "learning_rate": 2.419220560982294e-06, + "loss": 111.3039, + "step": 87290 + }, + { + "epoch": 0.7221739669934235, + "grad_norm": 1180.4281005859375, + "learning_rate": 2.418012691805191e-06, + "loss": 96.9584, + "step": 87300 + }, + { + "epoch": 0.7222566902427927, + "grad_norm": 992.0172119140625, + "learning_rate": 2.4168050280649686e-06, + "loss": 136.7477, + "step": 87310 + }, + { + "epoch": 0.722339413492162, + "grad_norm": 1166.8099365234375, + "learning_rate": 2.4155975698577146e-06, + "loss": 88.4807, + "step": 87320 + }, + { + "epoch": 0.7224221367415312, + "grad_norm": 758.3084106445312, + "learning_rate": 2.4143903172795014e-06, + "loss": 73.7974, + "step": 87330 + }, + { + "epoch": 0.7225048599909004, + "grad_norm": 805.8015747070312, + "learning_rate": 2.4131832704263842e-06, + "loss": 91.9121, + "step": 87340 + }, + { + "epoch": 0.7225875832402697, + "grad_norm": 889.5006103515625, + "learning_rate": 2.411976429394402e-06, + "loss": 82.7861, + "step": 87350 + }, + { + "epoch": 0.7226703064896389, + "grad_norm": 859.7445068359375, + "learning_rate": 2.4107697942795782e-06, + "loss": 86.8242, + "step": 87360 + }, + { + "epoch": 0.7227530297390081, + "grad_norm": 1024.7366943359375, + "learning_rate": 2.4095633651779186e-06, + "loss": 75.2, + "step": 87370 + }, + { + "epoch": 0.7228357529883774, + "grad_norm": 732.5393676757812, + "learning_rate": 2.4083571421854137e-06, + "loss": 83.1405, + "step": 87380 + }, + { + "epoch": 0.7229184762377466, + "grad_norm": 754.288818359375, + "learning_rate": 2.407151125398037e-06, + "loss": 87.5245, + "step": 87390 + }, + { + "epoch": 0.7230011994871158, + "grad_norm": 859.1074829101562, + "learning_rate": 2.405945314911746e-06, + "loss": 90.8254, + "step": 87400 + }, + { + "epoch": 0.7230839227364851, + "grad_norm": 841.62744140625, + "learning_rate": 2.4047397108224807e-06, + "loss": 84.0488, + "step": 87410 + }, + { + "epoch": 0.7231666459858543, + "grad_norm": 1061.734375, + "learning_rate": 2.403534313226166e-06, + "loss": 95.578, + "step": 87420 + }, + { + "epoch": 0.7232493692352235, + "grad_norm": 678.4471435546875, + "learning_rate": 2.40232912221871e-06, + "loss": 119.1306, + "step": 87430 + }, + { + "epoch": 0.7233320924845928, + "grad_norm": 1203.9510498046875, + "learning_rate": 2.4011241378960037e-06, + "loss": 87.4557, + "step": 87440 + }, + { + "epoch": 0.723414815733962, + "grad_norm": 475.9383239746094, + "learning_rate": 2.3999193603539234e-06, + "loss": 83.3428, + "step": 87450 + }, + { + "epoch": 0.7234975389833312, + "grad_norm": 1285.626708984375, + "learning_rate": 2.3987147896883263e-06, + "loss": 116.1732, + "step": 87460 + }, + { + "epoch": 0.7235802622327006, + "grad_norm": 1077.05078125, + "learning_rate": 2.397510425995055e-06, + "loss": 82.0933, + "step": 87470 + }, + { + "epoch": 0.7236629854820698, + "grad_norm": 661.8814697265625, + "learning_rate": 2.3963062693699353e-06, + "loss": 78.9931, + "step": 87480 + }, + { + "epoch": 0.723745708731439, + "grad_norm": 866.9628295898438, + "learning_rate": 2.3951023199087763e-06, + "loss": 92.3042, + "step": 87490 + }, + { + "epoch": 0.7238284319808082, + "grad_norm": 463.5540771484375, + "learning_rate": 2.393898577707371e-06, + "loss": 90.7406, + "step": 87500 + }, + { + "epoch": 0.7239111552301775, + "grad_norm": 667.8403930664062, + "learning_rate": 2.392695042861495e-06, + "loss": 99.6054, + "step": 87510 + }, + { + "epoch": 0.7239938784795467, + "grad_norm": 685.7771606445312, + "learning_rate": 2.391491715466909e-06, + "loss": 75.8269, + "step": 87520 + }, + { + "epoch": 0.7240766017289159, + "grad_norm": 666.2769775390625, + "learning_rate": 2.390288595619356e-06, + "loss": 74.8925, + "step": 87530 + }, + { + "epoch": 0.7241593249782852, + "grad_norm": 1205.9483642578125, + "learning_rate": 2.3890856834145625e-06, + "loss": 104.4413, + "step": 87540 + }, + { + "epoch": 0.7242420482276544, + "grad_norm": 823.16748046875, + "learning_rate": 2.3878829789482385e-06, + "loss": 112.2172, + "step": 87550 + }, + { + "epoch": 0.7243247714770236, + "grad_norm": 675.4729614257812, + "learning_rate": 2.3866804823160776e-06, + "loss": 71.6798, + "step": 87560 + }, + { + "epoch": 0.7244074947263929, + "grad_norm": 931.8217163085938, + "learning_rate": 2.385478193613758e-06, + "loss": 102.3784, + "step": 87570 + }, + { + "epoch": 0.7244902179757621, + "grad_norm": 906.8097534179688, + "learning_rate": 2.3842761129369387e-06, + "loss": 97.9349, + "step": 87580 + }, + { + "epoch": 0.7245729412251313, + "grad_norm": 899.681884765625, + "learning_rate": 2.3830742403812646e-06, + "loss": 72.5381, + "step": 87590 + }, + { + "epoch": 0.7246556644745006, + "grad_norm": 828.892822265625, + "learning_rate": 2.381872576042365e-06, + "loss": 87.6623, + "step": 87600 + }, + { + "epoch": 0.7247383877238698, + "grad_norm": 1114.9024658203125, + "learning_rate": 2.3806711200158473e-06, + "loss": 94.0342, + "step": 87610 + }, + { + "epoch": 0.724821110973239, + "grad_norm": 899.71630859375, + "learning_rate": 2.3794698723973057e-06, + "loss": 63.5924, + "step": 87620 + }, + { + "epoch": 0.7249038342226083, + "grad_norm": 1092.0023193359375, + "learning_rate": 2.3782688332823212e-06, + "loss": 83.4462, + "step": 87630 + }, + { + "epoch": 0.7249865574719775, + "grad_norm": 982.8212280273438, + "learning_rate": 2.3770680027664537e-06, + "loss": 82.2134, + "step": 87640 + }, + { + "epoch": 0.7250692807213467, + "grad_norm": 1915.584228515625, + "learning_rate": 2.3758673809452484e-06, + "loss": 76.1468, + "step": 87650 + }, + { + "epoch": 0.725152003970716, + "grad_norm": 647.2635498046875, + "learning_rate": 2.3746669679142315e-06, + "loss": 87.5242, + "step": 87660 + }, + { + "epoch": 0.7252347272200852, + "grad_norm": 834.3574829101562, + "learning_rate": 2.373466763768915e-06, + "loss": 96.0043, + "step": 87670 + }, + { + "epoch": 0.7253174504694544, + "grad_norm": 553.946044921875, + "learning_rate": 2.3722667686047945e-06, + "loss": 73.195, + "step": 87680 + }, + { + "epoch": 0.7254001737188237, + "grad_norm": 719.322998046875, + "learning_rate": 2.37106698251735e-06, + "loss": 87.8059, + "step": 87690 + }, + { + "epoch": 0.7254828969681929, + "grad_norm": 1009.9353637695312, + "learning_rate": 2.3698674056020378e-06, + "loss": 86.6256, + "step": 87700 + }, + { + "epoch": 0.7255656202175621, + "grad_norm": 732.2600708007812, + "learning_rate": 2.3686680379543057e-06, + "loss": 105.3399, + "step": 87710 + }, + { + "epoch": 0.7256483434669314, + "grad_norm": 1122.77783203125, + "learning_rate": 2.36746887966958e-06, + "loss": 75.9752, + "step": 87720 + }, + { + "epoch": 0.7257310667163006, + "grad_norm": 834.73388671875, + "learning_rate": 2.366269930843275e-06, + "loss": 101.7426, + "step": 87730 + }, + { + "epoch": 0.7258137899656698, + "grad_norm": 1176.048583984375, + "learning_rate": 2.3650711915707852e-06, + "loss": 89.6517, + "step": 87740 + }, + { + "epoch": 0.7258965132150391, + "grad_norm": 902.6798706054688, + "learning_rate": 2.363872661947488e-06, + "loss": 94.8531, + "step": 87750 + }, + { + "epoch": 0.7259792364644083, + "grad_norm": 875.5723876953125, + "learning_rate": 2.362674342068744e-06, + "loss": 98.4001, + "step": 87760 + }, + { + "epoch": 0.7260619597137775, + "grad_norm": 1041.4979248046875, + "learning_rate": 2.3614762320299e-06, + "loss": 86.5132, + "step": 87770 + }, + { + "epoch": 0.7261446829631468, + "grad_norm": 634.3117065429688, + "learning_rate": 2.3602783319262847e-06, + "loss": 88.5851, + "step": 87780 + }, + { + "epoch": 0.726227406212516, + "grad_norm": 468.2691955566406, + "learning_rate": 2.3590806418532052e-06, + "loss": 107.173, + "step": 87790 + }, + { + "epoch": 0.7263101294618852, + "grad_norm": 743.6190185546875, + "learning_rate": 2.3578831619059595e-06, + "loss": 75.1913, + "step": 87800 + }, + { + "epoch": 0.7263928527112545, + "grad_norm": 485.16510009765625, + "learning_rate": 2.3566858921798246e-06, + "loss": 69.3648, + "step": 87810 + }, + { + "epoch": 0.7264755759606237, + "grad_norm": 559.470703125, + "learning_rate": 2.3554888327700604e-06, + "loss": 79.9045, + "step": 87820 + }, + { + "epoch": 0.7265582992099929, + "grad_norm": 720.0167236328125, + "learning_rate": 2.3542919837719154e-06, + "loss": 82.7708, + "step": 87830 + }, + { + "epoch": 0.7266410224593622, + "grad_norm": 1213.031005859375, + "learning_rate": 2.3530953452806143e-06, + "loss": 86.5757, + "step": 87840 + }, + { + "epoch": 0.7267237457087314, + "grad_norm": 729.4437866210938, + "learning_rate": 2.351898917391369e-06, + "loss": 78.6642, + "step": 87850 + }, + { + "epoch": 0.7268064689581006, + "grad_norm": 938.8148193359375, + "learning_rate": 2.350702700199376e-06, + "loss": 78.4311, + "step": 87860 + }, + { + "epoch": 0.72688919220747, + "grad_norm": 770.9730834960938, + "learning_rate": 2.3495066937998085e-06, + "loss": 83.714, + "step": 87870 + }, + { + "epoch": 0.7269719154568391, + "grad_norm": 1002.52490234375, + "learning_rate": 2.3483108982878294e-06, + "loss": 80.0576, + "step": 87880 + }, + { + "epoch": 0.7270546387062083, + "grad_norm": 594.104248046875, + "learning_rate": 2.3471153137585823e-06, + "loss": 66.6908, + "step": 87890 + }, + { + "epoch": 0.7271373619555777, + "grad_norm": 819.463134765625, + "learning_rate": 2.345919940307195e-06, + "loss": 78.4313, + "step": 87900 + }, + { + "epoch": 0.7272200852049469, + "grad_norm": 1145.914306640625, + "learning_rate": 2.3447247780287746e-06, + "loss": 127.2008, + "step": 87910 + }, + { + "epoch": 0.727302808454316, + "grad_norm": 698.8175048828125, + "learning_rate": 2.3435298270184204e-06, + "loss": 99.664, + "step": 87920 + }, + { + "epoch": 0.7273855317036854, + "grad_norm": 763.3477172851562, + "learning_rate": 2.3423350873712057e-06, + "loss": 84.8625, + "step": 87930 + }, + { + "epoch": 0.7274682549530546, + "grad_norm": 1032.08349609375, + "learning_rate": 2.341140559182192e-06, + "loss": 83.9295, + "step": 87940 + }, + { + "epoch": 0.7275509782024238, + "grad_norm": 821.6600341796875, + "learning_rate": 2.339946242546422e-06, + "loss": 84.5949, + "step": 87950 + }, + { + "epoch": 0.7276337014517931, + "grad_norm": 1196.482421875, + "learning_rate": 2.3387521375589205e-06, + "loss": 103.7103, + "step": 87960 + }, + { + "epoch": 0.7277164247011623, + "grad_norm": 939.4242553710938, + "learning_rate": 2.3375582443146977e-06, + "loss": 97.3099, + "step": 87970 + }, + { + "epoch": 0.7277991479505315, + "grad_norm": 656.9828491210938, + "learning_rate": 2.3363645629087467e-06, + "loss": 77.6787, + "step": 87980 + }, + { + "epoch": 0.7278818711999008, + "grad_norm": 467.5138854980469, + "learning_rate": 2.3351710934360426e-06, + "loss": 96.5642, + "step": 87990 + }, + { + "epoch": 0.72796459444927, + "grad_norm": 705.623046875, + "learning_rate": 2.333977835991545e-06, + "loss": 90.1867, + "step": 88000 + }, + { + "epoch": 0.7280473176986392, + "grad_norm": 1090.185302734375, + "learning_rate": 2.3327847906701932e-06, + "loss": 85.2803, + "step": 88010 + }, + { + "epoch": 0.7281300409480085, + "grad_norm": 793.4896240234375, + "learning_rate": 2.3315919575669172e-06, + "loss": 94.3953, + "step": 88020 + }, + { + "epoch": 0.7282127641973777, + "grad_norm": 584.8360595703125, + "learning_rate": 2.330399336776625e-06, + "loss": 111.1713, + "step": 88030 + }, + { + "epoch": 0.7282954874467469, + "grad_norm": 953.3636474609375, + "learning_rate": 2.329206928394203e-06, + "loss": 98.6218, + "step": 88040 + }, + { + "epoch": 0.7283782106961162, + "grad_norm": 915.4071655273438, + "learning_rate": 2.3280147325145285e-06, + "loss": 115.2945, + "step": 88050 + }, + { + "epoch": 0.7284609339454854, + "grad_norm": 500.0581970214844, + "learning_rate": 2.3268227492324594e-06, + "loss": 102.7827, + "step": 88060 + }, + { + "epoch": 0.7285436571948546, + "grad_norm": 864.3734130859375, + "learning_rate": 2.325630978642836e-06, + "loss": 87.891, + "step": 88070 + }, + { + "epoch": 0.7286263804442239, + "grad_norm": 819.2616577148438, + "learning_rate": 2.3244394208404816e-06, + "loss": 80.8149, + "step": 88080 + }, + { + "epoch": 0.7287091036935931, + "grad_norm": 1025.54443359375, + "learning_rate": 2.3232480759202035e-06, + "loss": 112.5396, + "step": 88090 + }, + { + "epoch": 0.7287918269429623, + "grad_norm": 786.0123291015625, + "learning_rate": 2.3220569439767907e-06, + "loss": 95.3773, + "step": 88100 + }, + { + "epoch": 0.7288745501923316, + "grad_norm": 1449.097900390625, + "learning_rate": 2.320866025105016e-06, + "loss": 100.4124, + "step": 88110 + }, + { + "epoch": 0.7289572734417008, + "grad_norm": 668.387939453125, + "learning_rate": 2.319675319399639e-06, + "loss": 90.4953, + "step": 88120 + }, + { + "epoch": 0.72903999669107, + "grad_norm": 753.0546264648438, + "learning_rate": 2.3184848269553944e-06, + "loss": 94.4148, + "step": 88130 + }, + { + "epoch": 0.7291227199404393, + "grad_norm": 1106.7774658203125, + "learning_rate": 2.3172945478670056e-06, + "loss": 91.0953, + "step": 88140 + }, + { + "epoch": 0.7292054431898085, + "grad_norm": 582.337646484375, + "learning_rate": 2.316104482229178e-06, + "loss": 84.3528, + "step": 88150 + }, + { + "epoch": 0.7292881664391777, + "grad_norm": 1405.619873046875, + "learning_rate": 2.314914630136599e-06, + "loss": 76.6839, + "step": 88160 + }, + { + "epoch": 0.729370889688547, + "grad_norm": 870.0715942382812, + "learning_rate": 2.3137249916839394e-06, + "loss": 72.3327, + "step": 88170 + }, + { + "epoch": 0.7294536129379162, + "grad_norm": 719.5054931640625, + "learning_rate": 2.3125355669658547e-06, + "loss": 90.0703, + "step": 88180 + }, + { + "epoch": 0.7295363361872854, + "grad_norm": 780.942138671875, + "learning_rate": 2.3113463560769807e-06, + "loss": 109.0758, + "step": 88190 + }, + { + "epoch": 0.7296190594366547, + "grad_norm": 927.2631225585938, + "learning_rate": 2.310157359111938e-06, + "loss": 97.629, + "step": 88200 + }, + { + "epoch": 0.7297017826860239, + "grad_norm": 558.8131103515625, + "learning_rate": 2.3089685761653296e-06, + "loss": 102.3254, + "step": 88210 + }, + { + "epoch": 0.7297845059353931, + "grad_norm": 982.3890380859375, + "learning_rate": 2.3077800073317415e-06, + "loss": 102.417, + "step": 88220 + }, + { + "epoch": 0.7298672291847623, + "grad_norm": 754.8167724609375, + "learning_rate": 2.3065916527057426e-06, + "loss": 88.5576, + "step": 88230 + }, + { + "epoch": 0.7299499524341316, + "grad_norm": 659.7408447265625, + "learning_rate": 2.305403512381884e-06, + "loss": 85.5432, + "step": 88240 + }, + { + "epoch": 0.7300326756835008, + "grad_norm": 838.64697265625, + "learning_rate": 2.3042155864547024e-06, + "loss": 78.8474, + "step": 88250 + }, + { + "epoch": 0.73011539893287, + "grad_norm": 750.2058715820312, + "learning_rate": 2.303027875018714e-06, + "loss": 88.2092, + "step": 88260 + }, + { + "epoch": 0.7301981221822393, + "grad_norm": 926.1990966796875, + "learning_rate": 2.3018403781684205e-06, + "loss": 79.4068, + "step": 88270 + }, + { + "epoch": 0.7302808454316085, + "grad_norm": 645.7316284179688, + "learning_rate": 2.3006530959983055e-06, + "loss": 67.1499, + "step": 88280 + }, + { + "epoch": 0.7303635686809777, + "grad_norm": 733.4593505859375, + "learning_rate": 2.299466028602835e-06, + "loss": 58.1151, + "step": 88290 + }, + { + "epoch": 0.730446291930347, + "grad_norm": 1105.638671875, + "learning_rate": 2.298279176076459e-06, + "loss": 87.0662, + "step": 88300 + }, + { + "epoch": 0.7305290151797162, + "grad_norm": 1261.8275146484375, + "learning_rate": 2.2970925385136093e-06, + "loss": 91.2717, + "step": 88310 + }, + { + "epoch": 0.7306117384290854, + "grad_norm": 857.1454467773438, + "learning_rate": 2.295906116008702e-06, + "loss": 77.8175, + "step": 88320 + }, + { + "epoch": 0.7306944616784548, + "grad_norm": 852.4544067382812, + "learning_rate": 2.2947199086561346e-06, + "loss": 92.5309, + "step": 88330 + }, + { + "epoch": 0.730777184927824, + "grad_norm": 1291.9940185546875, + "learning_rate": 2.293533916550289e-06, + "loss": 93.7867, + "step": 88340 + }, + { + "epoch": 0.7308599081771932, + "grad_norm": 805.106201171875, + "learning_rate": 2.292348139785528e-06, + "loss": 91.1033, + "step": 88350 + }, + { + "epoch": 0.7309426314265625, + "grad_norm": 1165.837890625, + "learning_rate": 2.2911625784562e-06, + "loss": 102.6606, + "step": 88360 + }, + { + "epoch": 0.7310253546759317, + "grad_norm": 1081.798095703125, + "learning_rate": 2.2899772326566327e-06, + "loss": 115.492, + "step": 88370 + }, + { + "epoch": 0.7311080779253009, + "grad_norm": 2020.0845947265625, + "learning_rate": 2.2887921024811405e-06, + "loss": 91.6865, + "step": 88380 + }, + { + "epoch": 0.7311908011746702, + "grad_norm": 739.7776489257812, + "learning_rate": 2.2876071880240174e-06, + "loss": 87.2011, + "step": 88390 + }, + { + "epoch": 0.7312735244240394, + "grad_norm": 1269.20751953125, + "learning_rate": 2.2864224893795423e-06, + "loss": 90.7345, + "step": 88400 + }, + { + "epoch": 0.7313562476734086, + "grad_norm": 859.2783203125, + "learning_rate": 2.285238006641976e-06, + "loss": 100.769, + "step": 88410 + }, + { + "epoch": 0.7314389709227779, + "grad_norm": 957.8426513671875, + "learning_rate": 2.284053739905563e-06, + "loss": 106.6209, + "step": 88420 + }, + { + "epoch": 0.7315216941721471, + "grad_norm": 1027.73095703125, + "learning_rate": 2.282869689264529e-06, + "loss": 90.6714, + "step": 88430 + }, + { + "epoch": 0.7316044174215163, + "grad_norm": 711.40576171875, + "learning_rate": 2.2816858548130837e-06, + "loss": 69.7044, + "step": 88440 + }, + { + "epoch": 0.7316871406708856, + "grad_norm": 721.7193603515625, + "learning_rate": 2.28050223664542e-06, + "loss": 75.2567, + "step": 88450 + }, + { + "epoch": 0.7317698639202548, + "grad_norm": 1569.3946533203125, + "learning_rate": 2.2793188348557136e-06, + "loss": 88.7201, + "step": 88460 + }, + { + "epoch": 0.731852587169624, + "grad_norm": 997.7343139648438, + "learning_rate": 2.2781356495381186e-06, + "loss": 142.2485, + "step": 88470 + }, + { + "epoch": 0.7319353104189933, + "grad_norm": 973.0025634765625, + "learning_rate": 2.276952680786779e-06, + "loss": 96.4867, + "step": 88480 + }, + { + "epoch": 0.7320180336683625, + "grad_norm": 626.2188110351562, + "learning_rate": 2.2757699286958186e-06, + "loss": 77.2272, + "step": 88490 + }, + { + "epoch": 0.7321007569177317, + "grad_norm": 411.275146484375, + "learning_rate": 2.274587393359342e-06, + "loss": 80.0432, + "step": 88500 + }, + { + "epoch": 0.732183480167101, + "grad_norm": 534.1046142578125, + "learning_rate": 2.273405074871438e-06, + "loss": 84.1023, + "step": 88510 + }, + { + "epoch": 0.7322662034164702, + "grad_norm": 793.4088745117188, + "learning_rate": 2.2722229733261795e-06, + "loss": 70.7006, + "step": 88520 + }, + { + "epoch": 0.7323489266658394, + "grad_norm": 710.7265014648438, + "learning_rate": 2.2710410888176205e-06, + "loss": 67.0532, + "step": 88530 + }, + { + "epoch": 0.7324316499152087, + "grad_norm": 667.3509521484375, + "learning_rate": 2.2698594214397966e-06, + "loss": 90.6114, + "step": 88540 + }, + { + "epoch": 0.7325143731645779, + "grad_norm": 981.4866943359375, + "learning_rate": 2.268677971286732e-06, + "loss": 102.2464, + "step": 88550 + }, + { + "epoch": 0.7325970964139471, + "grad_norm": 807.9349975585938, + "learning_rate": 2.2674967384524237e-06, + "loss": 76.0214, + "step": 88560 + }, + { + "epoch": 0.7326798196633164, + "grad_norm": 629.3372192382812, + "learning_rate": 2.2663157230308576e-06, + "loss": 127.0412, + "step": 88570 + }, + { + "epoch": 0.7327625429126856, + "grad_norm": 1088.6207275390625, + "learning_rate": 2.2651349251160055e-06, + "loss": 87.4096, + "step": 88580 + }, + { + "epoch": 0.7328452661620548, + "grad_norm": 692.8621215820312, + "learning_rate": 2.263954344801816e-06, + "loss": 73.1671, + "step": 88590 + }, + { + "epoch": 0.7329279894114241, + "grad_norm": 654.3468017578125, + "learning_rate": 2.2627739821822226e-06, + "loss": 78.6952, + "step": 88600 + }, + { + "epoch": 0.7330107126607933, + "grad_norm": 467.1300964355469, + "learning_rate": 2.261593837351141e-06, + "loss": 64.8719, + "step": 88610 + }, + { + "epoch": 0.7330934359101625, + "grad_norm": 724.4471435546875, + "learning_rate": 2.26041391040247e-06, + "loss": 114.528, + "step": 88620 + }, + { + "epoch": 0.7331761591595318, + "grad_norm": 960.2774658203125, + "learning_rate": 2.259234201430092e-06, + "loss": 106.1695, + "step": 88630 + }, + { + "epoch": 0.733258882408901, + "grad_norm": 810.5166625976562, + "learning_rate": 2.2580547105278716e-06, + "loss": 80.9764, + "step": 88640 + }, + { + "epoch": 0.7333416056582702, + "grad_norm": 919.3231811523438, + "learning_rate": 2.2568754377896516e-06, + "loss": 100.5872, + "step": 88650 + }, + { + "epoch": 0.7334243289076395, + "grad_norm": 684.9686889648438, + "learning_rate": 2.255696383309265e-06, + "loss": 85.3732, + "step": 88660 + }, + { + "epoch": 0.7335070521570087, + "grad_norm": 1018.3485107421875, + "learning_rate": 2.2545175471805197e-06, + "loss": 82.5631, + "step": 88670 + }, + { + "epoch": 0.7335897754063779, + "grad_norm": 658.5642700195312, + "learning_rate": 2.2533389294972153e-06, + "loss": 127.2706, + "step": 88680 + }, + { + "epoch": 0.7336724986557472, + "grad_norm": 582.38134765625, + "learning_rate": 2.2521605303531267e-06, + "loss": 81.0251, + "step": 88690 + }, + { + "epoch": 0.7337552219051164, + "grad_norm": 940.820556640625, + "learning_rate": 2.2509823498420142e-06, + "loss": 96.1019, + "step": 88700 + }, + { + "epoch": 0.7338379451544856, + "grad_norm": 1125.545654296875, + "learning_rate": 2.2498043880576193e-06, + "loss": 93.1604, + "step": 88710 + }, + { + "epoch": 0.733920668403855, + "grad_norm": 742.275634765625, + "learning_rate": 2.2486266450936695e-06, + "loss": 69.7256, + "step": 88720 + }, + { + "epoch": 0.7340033916532241, + "grad_norm": 985.7758178710938, + "learning_rate": 2.2474491210438687e-06, + "loss": 80.2849, + "step": 88730 + }, + { + "epoch": 0.7340861149025933, + "grad_norm": 607.6522216796875, + "learning_rate": 2.2462718160019086e-06, + "loss": 90.2372, + "step": 88740 + }, + { + "epoch": 0.7341688381519627, + "grad_norm": 690.2867431640625, + "learning_rate": 2.245094730061463e-06, + "loss": 67.3423, + "step": 88750 + }, + { + "epoch": 0.7342515614013319, + "grad_norm": 643.0440063476562, + "learning_rate": 2.2439178633161855e-06, + "loss": 68.1398, + "step": 88760 + }, + { + "epoch": 0.734334284650701, + "grad_norm": 737.9159545898438, + "learning_rate": 2.2427412158597133e-06, + "loss": 111.8004, + "step": 88770 + }, + { + "epoch": 0.7344170079000704, + "grad_norm": 791.5084228515625, + "learning_rate": 2.2415647877856706e-06, + "loss": 97.5052, + "step": 88780 + }, + { + "epoch": 0.7344997311494396, + "grad_norm": 746.7666015625, + "learning_rate": 2.240388579187658e-06, + "loss": 96.2792, + "step": 88790 + }, + { + "epoch": 0.7345824543988088, + "grad_norm": 920.7720947265625, + "learning_rate": 2.2392125901592615e-06, + "loss": 74.4708, + "step": 88800 + }, + { + "epoch": 0.7346651776481781, + "grad_norm": 1364.5985107421875, + "learning_rate": 2.23803682079405e-06, + "loss": 94.9622, + "step": 88810 + }, + { + "epoch": 0.7347479008975473, + "grad_norm": 917.6015625, + "learning_rate": 2.236861271185572e-06, + "loss": 80.0564, + "step": 88820 + }, + { + "epoch": 0.7348306241469165, + "grad_norm": 1057.025634765625, + "learning_rate": 2.2356859414273613e-06, + "loss": 73.4881, + "step": 88830 + }, + { + "epoch": 0.7349133473962858, + "grad_norm": 886.4762573242188, + "learning_rate": 2.2345108316129333e-06, + "loss": 103.8498, + "step": 88840 + }, + { + "epoch": 0.734996070645655, + "grad_norm": 528.6309814453125, + "learning_rate": 2.233335941835787e-06, + "loss": 77.8374, + "step": 88850 + }, + { + "epoch": 0.7350787938950242, + "grad_norm": 901.2240600585938, + "learning_rate": 2.232161272189401e-06, + "loss": 81.7305, + "step": 88860 + }, + { + "epoch": 0.7351615171443935, + "grad_norm": 1238.568359375, + "learning_rate": 2.230986822767241e-06, + "loss": 78.068, + "step": 88870 + }, + { + "epoch": 0.7352442403937627, + "grad_norm": 1001.0896606445312, + "learning_rate": 2.2298125936627517e-06, + "loss": 70.9309, + "step": 88880 + }, + { + "epoch": 0.7353269636431319, + "grad_norm": 1090.4779052734375, + "learning_rate": 2.228638584969363e-06, + "loss": 85.4851, + "step": 88890 + }, + { + "epoch": 0.7354096868925012, + "grad_norm": 472.36602783203125, + "learning_rate": 2.227464796780481e-06, + "loss": 83.7774, + "step": 88900 + }, + { + "epoch": 0.7354924101418704, + "grad_norm": 890.2085571289062, + "learning_rate": 2.226291229189501e-06, + "loss": 103.158, + "step": 88910 + }, + { + "epoch": 0.7355751333912396, + "grad_norm": 735.7196044921875, + "learning_rate": 2.225117882289799e-06, + "loss": 105.7497, + "step": 88920 + }, + { + "epoch": 0.7356578566406089, + "grad_norm": 647.6386108398438, + "learning_rate": 2.223944756174731e-06, + "loss": 79.5156, + "step": 88930 + }, + { + "epoch": 0.7357405798899781, + "grad_norm": 1861.40869140625, + "learning_rate": 2.2227718509376395e-06, + "loss": 133.0161, + "step": 88940 + }, + { + "epoch": 0.7358233031393473, + "grad_norm": 916.1025390625, + "learning_rate": 2.221599166671845e-06, + "loss": 104.4202, + "step": 88950 + }, + { + "epoch": 0.7359060263887165, + "grad_norm": 1034.66015625, + "learning_rate": 2.220426703470653e-06, + "loss": 90.1667, + "step": 88960 + }, + { + "epoch": 0.7359887496380858, + "grad_norm": 748.0284423828125, + "learning_rate": 2.2192544614273526e-06, + "loss": 58.7081, + "step": 88970 + }, + { + "epoch": 0.736071472887455, + "grad_norm": 1415.480224609375, + "learning_rate": 2.218082440635215e-06, + "loss": 77.1677, + "step": 88980 + }, + { + "epoch": 0.7361541961368242, + "grad_norm": 431.0531921386719, + "learning_rate": 2.216910641187488e-06, + "loss": 132.108, + "step": 88990 + }, + { + "epoch": 0.7362369193861935, + "grad_norm": 609.1949462890625, + "learning_rate": 2.215739063177409e-06, + "loss": 85.791, + "step": 89000 + }, + { + "epoch": 0.7363196426355627, + "grad_norm": 3827.787841796875, + "learning_rate": 2.2145677066981948e-06, + "loss": 111.6699, + "step": 89010 + }, + { + "epoch": 0.7364023658849319, + "grad_norm": 393.4620361328125, + "learning_rate": 2.213396571843045e-06, + "loss": 80.0387, + "step": 89020 + }, + { + "epoch": 0.7364850891343012, + "grad_norm": 636.8660278320312, + "learning_rate": 2.2122256587051404e-06, + "loss": 74.4118, + "step": 89030 + }, + { + "epoch": 0.7365678123836704, + "grad_norm": 783.8397216796875, + "learning_rate": 2.211054967377647e-06, + "loss": 88.0103, + "step": 89040 + }, + { + "epoch": 0.7366505356330396, + "grad_norm": 449.6473388671875, + "learning_rate": 2.2098844979537093e-06, + "loss": 105.3095, + "step": 89050 + }, + { + "epoch": 0.7367332588824089, + "grad_norm": 1187.8668212890625, + "learning_rate": 2.208714250526456e-06, + "loss": 82.4545, + "step": 89060 + }, + { + "epoch": 0.7368159821317781, + "grad_norm": 714.7520751953125, + "learning_rate": 2.207544225189003e-06, + "loss": 105.3385, + "step": 89070 + }, + { + "epoch": 0.7368987053811473, + "grad_norm": 1098.642578125, + "learning_rate": 2.2063744220344386e-06, + "loss": 87.2493, + "step": 89080 + }, + { + "epoch": 0.7369814286305166, + "grad_norm": 833.3667602539062, + "learning_rate": 2.2052048411558403e-06, + "loss": 89.0781, + "step": 89090 + }, + { + "epoch": 0.7370641518798858, + "grad_norm": 1110.5281982421875, + "learning_rate": 2.204035482646267e-06, + "loss": 79.2814, + "step": 89100 + }, + { + "epoch": 0.737146875129255, + "grad_norm": 981.9432983398438, + "learning_rate": 2.2028663465987576e-06, + "loss": 73.8698, + "step": 89110 + }, + { + "epoch": 0.7372295983786243, + "grad_norm": 894.4478149414062, + "learning_rate": 2.201697433106336e-06, + "loss": 101.7881, + "step": 89120 + }, + { + "epoch": 0.7373123216279935, + "grad_norm": 516.5750732421875, + "learning_rate": 2.2005287422620083e-06, + "loss": 82.2681, + "step": 89130 + }, + { + "epoch": 0.7373950448773627, + "grad_norm": 549.2446899414062, + "learning_rate": 2.19936027415876e-06, + "loss": 72.0674, + "step": 89140 + }, + { + "epoch": 0.737477768126732, + "grad_norm": 639.7827758789062, + "learning_rate": 2.1981920288895615e-06, + "loss": 86.3662, + "step": 89150 + }, + { + "epoch": 0.7375604913761012, + "grad_norm": 978.5205078125, + "learning_rate": 2.197024006547364e-06, + "loss": 94.2316, + "step": 89160 + }, + { + "epoch": 0.7376432146254704, + "grad_norm": 625.5220947265625, + "learning_rate": 2.1958562072251033e-06, + "loss": 97.0904, + "step": 89170 + }, + { + "epoch": 0.7377259378748398, + "grad_norm": 1031.7874755859375, + "learning_rate": 2.1946886310156945e-06, + "loss": 97.9485, + "step": 89180 + }, + { + "epoch": 0.737808661124209, + "grad_norm": 531.7047729492188, + "learning_rate": 2.193521278012037e-06, + "loss": 99.0671, + "step": 89190 + }, + { + "epoch": 0.7378913843735782, + "grad_norm": 391.3578796386719, + "learning_rate": 2.1923541483070114e-06, + "loss": 91.6598, + "step": 89200 + }, + { + "epoch": 0.7379741076229475, + "grad_norm": 606.8892211914062, + "learning_rate": 2.1911872419934804e-06, + "loss": 65.9362, + "step": 89210 + }, + { + "epoch": 0.7380568308723167, + "grad_norm": 890.9434814453125, + "learning_rate": 2.1900205591642904e-06, + "loss": 82.1377, + "step": 89220 + }, + { + "epoch": 0.7381395541216859, + "grad_norm": 1102.2276611328125, + "learning_rate": 2.188854099912268e-06, + "loss": 81.4802, + "step": 89230 + }, + { + "epoch": 0.7382222773710552, + "grad_norm": 614.1528930664062, + "learning_rate": 2.187687864330224e-06, + "loss": 88.8945, + "step": 89240 + }, + { + "epoch": 0.7383050006204244, + "grad_norm": 611.352294921875, + "learning_rate": 2.1865218525109496e-06, + "loss": 78.2255, + "step": 89250 + }, + { + "epoch": 0.7383877238697936, + "grad_norm": 934.4541015625, + "learning_rate": 2.185356064547219e-06, + "loss": 87.7939, + "step": 89260 + }, + { + "epoch": 0.7384704471191629, + "grad_norm": 1047.531494140625, + "learning_rate": 2.1841905005317893e-06, + "loss": 76.6948, + "step": 89270 + }, + { + "epoch": 0.7385531703685321, + "grad_norm": 455.1351623535156, + "learning_rate": 2.1830251605573984e-06, + "loss": 57.0161, + "step": 89280 + }, + { + "epoch": 0.7386358936179013, + "grad_norm": 902.6798095703125, + "learning_rate": 2.181860044716767e-06, + "loss": 77.3232, + "step": 89290 + }, + { + "epoch": 0.7387186168672706, + "grad_norm": 1158.1624755859375, + "learning_rate": 2.180695153102599e-06, + "loss": 94.8786, + "step": 89300 + }, + { + "epoch": 0.7388013401166398, + "grad_norm": 644.2076416015625, + "learning_rate": 2.179530485807578e-06, + "loss": 104.4438, + "step": 89310 + }, + { + "epoch": 0.738884063366009, + "grad_norm": 919.8838500976562, + "learning_rate": 2.1783660429243747e-06, + "loss": 127.025, + "step": 89320 + }, + { + "epoch": 0.7389667866153783, + "grad_norm": 1036.4383544921875, + "learning_rate": 2.177201824545632e-06, + "loss": 100.3359, + "step": 89330 + }, + { + "epoch": 0.7390495098647475, + "grad_norm": 775.053955078125, + "learning_rate": 2.1760378307639867e-06, + "loss": 101.0328, + "step": 89340 + }, + { + "epoch": 0.7391322331141167, + "grad_norm": 891.0201416015625, + "learning_rate": 2.174874061672051e-06, + "loss": 80.9924, + "step": 89350 + }, + { + "epoch": 0.739214956363486, + "grad_norm": 919.8565063476562, + "learning_rate": 2.173710517362421e-06, + "loss": 103.8065, + "step": 89360 + }, + { + "epoch": 0.7392976796128552, + "grad_norm": 565.3506469726562, + "learning_rate": 2.1725471979276734e-06, + "loss": 72.2678, + "step": 89370 + }, + { + "epoch": 0.7393804028622244, + "grad_norm": 666.4319458007812, + "learning_rate": 2.17138410346037e-06, + "loss": 75.9782, + "step": 89380 + }, + { + "epoch": 0.7394631261115937, + "grad_norm": 745.9927368164062, + "learning_rate": 2.1702212340530515e-06, + "loss": 111.4429, + "step": 89390 + }, + { + "epoch": 0.7395458493609629, + "grad_norm": 968.5650024414062, + "learning_rate": 2.1690585897982423e-06, + "loss": 100.4713, + "step": 89400 + }, + { + "epoch": 0.7396285726103321, + "grad_norm": 831.1185302734375, + "learning_rate": 2.167896170788451e-06, + "loss": 62.0955, + "step": 89410 + }, + { + "epoch": 0.7397112958597014, + "grad_norm": 1192.0394287109375, + "learning_rate": 2.16673397711616e-06, + "loss": 110.7184, + "step": 89420 + }, + { + "epoch": 0.7397940191090706, + "grad_norm": 985.3240966796875, + "learning_rate": 2.165572008873845e-06, + "loss": 95.5798, + "step": 89430 + }, + { + "epoch": 0.7398767423584398, + "grad_norm": 285.2931213378906, + "learning_rate": 2.1644102661539573e-06, + "loss": 76.2906, + "step": 89440 + }, + { + "epoch": 0.7399594656078091, + "grad_norm": 1738.4462890625, + "learning_rate": 2.1632487490489314e-06, + "loss": 90.5874, + "step": 89450 + }, + { + "epoch": 0.7400421888571783, + "grad_norm": 746.9173583984375, + "learning_rate": 2.162087457651183e-06, + "loss": 92.9749, + "step": 89460 + }, + { + "epoch": 0.7401249121065475, + "grad_norm": 530.8255004882812, + "learning_rate": 2.1609263920531115e-06, + "loss": 77.7234, + "step": 89470 + }, + { + "epoch": 0.7402076353559168, + "grad_norm": 0.0, + "learning_rate": 2.159765552347098e-06, + "loss": 63.3263, + "step": 89480 + }, + { + "epoch": 0.740290358605286, + "grad_norm": 911.8055419921875, + "learning_rate": 2.1586049386255036e-06, + "loss": 127.8942, + "step": 89490 + }, + { + "epoch": 0.7403730818546552, + "grad_norm": 723.2269897460938, + "learning_rate": 2.1574445509806764e-06, + "loss": 89.7021, + "step": 89500 + }, + { + "epoch": 0.7404558051040245, + "grad_norm": 584.854736328125, + "learning_rate": 2.156284389504939e-06, + "loss": 96.6398, + "step": 89510 + }, + { + "epoch": 0.7405385283533937, + "grad_norm": 399.6296691894531, + "learning_rate": 2.1551244542905995e-06, + "loss": 63.739, + "step": 89520 + }, + { + "epoch": 0.7406212516027629, + "grad_norm": 729.1290893554688, + "learning_rate": 2.1539647454299535e-06, + "loss": 64.4943, + "step": 89530 + }, + { + "epoch": 0.7407039748521322, + "grad_norm": 1142.3839111328125, + "learning_rate": 2.152805263015271e-06, + "loss": 98.2541, + "step": 89540 + }, + { + "epoch": 0.7407866981015014, + "grad_norm": 761.3294067382812, + "learning_rate": 2.1516460071388062e-06, + "loss": 78.2914, + "step": 89550 + }, + { + "epoch": 0.7408694213508706, + "grad_norm": 777.5975341796875, + "learning_rate": 2.1504869778927965e-06, + "loss": 86.9927, + "step": 89560 + }, + { + "epoch": 0.74095214460024, + "grad_norm": 767.6286010742188, + "learning_rate": 2.149328175369461e-06, + "loss": 74.6266, + "step": 89570 + }, + { + "epoch": 0.7410348678496091, + "grad_norm": 1057.6436767578125, + "learning_rate": 2.148169599661001e-06, + "loss": 79.5687, + "step": 89580 + }, + { + "epoch": 0.7411175910989783, + "grad_norm": 512.66015625, + "learning_rate": 2.147011250859597e-06, + "loss": 92.7999, + "step": 89590 + }, + { + "epoch": 0.7412003143483477, + "grad_norm": 878.7671508789062, + "learning_rate": 2.1458531290574138e-06, + "loss": 114.662, + "step": 89600 + }, + { + "epoch": 0.7412830375977169, + "grad_norm": 781.1104125976562, + "learning_rate": 2.144695234346598e-06, + "loss": 89.1826, + "step": 89610 + }, + { + "epoch": 0.741365760847086, + "grad_norm": 868.6851196289062, + "learning_rate": 2.143537566819277e-06, + "loss": 94.632, + "step": 89620 + }, + { + "epoch": 0.7414484840964554, + "grad_norm": 1113.6846923828125, + "learning_rate": 2.1423801265675643e-06, + "loss": 114.5532, + "step": 89630 + }, + { + "epoch": 0.7415312073458246, + "grad_norm": 987.2115478515625, + "learning_rate": 2.14122291368355e-06, + "loss": 101.5493, + "step": 89640 + }, + { + "epoch": 0.7416139305951938, + "grad_norm": 961.3772583007812, + "learning_rate": 2.1400659282593083e-06, + "loss": 84.6896, + "step": 89650 + }, + { + "epoch": 0.7416966538445631, + "grad_norm": 910.09033203125, + "learning_rate": 2.1389091703868954e-06, + "loss": 99.8653, + "step": 89660 + }, + { + "epoch": 0.7417793770939323, + "grad_norm": 1119.726318359375, + "learning_rate": 2.137752640158351e-06, + "loss": 99.0549, + "step": 89670 + }, + { + "epoch": 0.7418621003433015, + "grad_norm": 716.283447265625, + "learning_rate": 2.136596337665691e-06, + "loss": 94.7824, + "step": 89680 + }, + { + "epoch": 0.7419448235926707, + "grad_norm": 1223.6080322265625, + "learning_rate": 2.13544026300092e-06, + "loss": 125.8443, + "step": 89690 + }, + { + "epoch": 0.74202754684204, + "grad_norm": 1124.6939697265625, + "learning_rate": 2.13428441625602e-06, + "loss": 81.6936, + "step": 89700 + }, + { + "epoch": 0.7421102700914092, + "grad_norm": 617.048095703125, + "learning_rate": 2.1331287975229574e-06, + "loss": 88.8294, + "step": 89710 + }, + { + "epoch": 0.7421929933407784, + "grad_norm": 373.8196105957031, + "learning_rate": 2.131973406893677e-06, + "loss": 72.712, + "step": 89720 + }, + { + "epoch": 0.7422757165901477, + "grad_norm": 483.8746643066406, + "learning_rate": 2.1308182444601126e-06, + "loss": 92.3466, + "step": 89730 + }, + { + "epoch": 0.7423584398395169, + "grad_norm": 708.9064331054688, + "learning_rate": 2.1296633103141724e-06, + "loss": 105.1006, + "step": 89740 + }, + { + "epoch": 0.7424411630888861, + "grad_norm": 1248.171142578125, + "learning_rate": 2.1285086045477515e-06, + "loss": 82.8471, + "step": 89750 + }, + { + "epoch": 0.7425238863382554, + "grad_norm": 773.7487182617188, + "learning_rate": 2.12735412725272e-06, + "loss": 86.4886, + "step": 89760 + }, + { + "epoch": 0.7426066095876246, + "grad_norm": 810.4151000976562, + "learning_rate": 2.1261998785209382e-06, + "loss": 83.5308, + "step": 89770 + }, + { + "epoch": 0.7426893328369938, + "grad_norm": 606.5125732421875, + "learning_rate": 2.125045858444242e-06, + "loss": 60.5102, + "step": 89780 + }, + { + "epoch": 0.7427720560863631, + "grad_norm": 420.3428955078125, + "learning_rate": 2.1238920671144534e-06, + "loss": 87.7662, + "step": 89790 + }, + { + "epoch": 0.7428547793357323, + "grad_norm": 703.5516357421875, + "learning_rate": 2.122738504623373e-06, + "loss": 106.8324, + "step": 89800 + }, + { + "epoch": 0.7429375025851015, + "grad_norm": 1249.0703125, + "learning_rate": 2.121585171062785e-06, + "loss": 86.0322, + "step": 89810 + }, + { + "epoch": 0.7430202258344708, + "grad_norm": 1007.9696655273438, + "learning_rate": 2.1204320665244533e-06, + "loss": 83.4045, + "step": 89820 + }, + { + "epoch": 0.74310294908384, + "grad_norm": 926.9172973632812, + "learning_rate": 2.119279191100128e-06, + "loss": 96.8335, + "step": 89830 + }, + { + "epoch": 0.7431856723332092, + "grad_norm": 743.4456176757812, + "learning_rate": 2.1181265448815388e-06, + "loss": 71.0544, + "step": 89840 + }, + { + "epoch": 0.7432683955825785, + "grad_norm": 1535.4232177734375, + "learning_rate": 2.1169741279603927e-06, + "loss": 123.924, + "step": 89850 + }, + { + "epoch": 0.7433511188319477, + "grad_norm": 1149.1324462890625, + "learning_rate": 2.1158219404283836e-06, + "loss": 84.1763, + "step": 89860 + }, + { + "epoch": 0.7434338420813169, + "grad_norm": 632.4330444335938, + "learning_rate": 2.1146699823771867e-06, + "loss": 66.9591, + "step": 89870 + }, + { + "epoch": 0.7435165653306862, + "grad_norm": 1532.528076171875, + "learning_rate": 2.1135182538984565e-06, + "loss": 107.8088, + "step": 89880 + }, + { + "epoch": 0.7435992885800554, + "grad_norm": 1111.4703369140625, + "learning_rate": 2.1123667550838322e-06, + "loss": 78.3602, + "step": 89890 + }, + { + "epoch": 0.7436820118294246, + "grad_norm": 775.93994140625, + "learning_rate": 2.1112154860249327e-06, + "loss": 82.3048, + "step": 89900 + }, + { + "epoch": 0.7437647350787939, + "grad_norm": 840.6805419921875, + "learning_rate": 2.1100644468133574e-06, + "loss": 63.061, + "step": 89910 + }, + { + "epoch": 0.7438474583281631, + "grad_norm": 707.5786743164062, + "learning_rate": 2.1089136375406934e-06, + "loss": 85.1319, + "step": 89920 + }, + { + "epoch": 0.7439301815775323, + "grad_norm": 895.3738403320312, + "learning_rate": 2.107763058298504e-06, + "loss": 109.1324, + "step": 89930 + }, + { + "epoch": 0.7440129048269016, + "grad_norm": 349.6246643066406, + "learning_rate": 2.106612709178333e-06, + "loss": 71.1513, + "step": 89940 + }, + { + "epoch": 0.7440956280762708, + "grad_norm": 1578.1759033203125, + "learning_rate": 2.10546259027171e-06, + "loss": 89.1613, + "step": 89950 + }, + { + "epoch": 0.74417835132564, + "grad_norm": 891.1797485351562, + "learning_rate": 2.1043127016701442e-06, + "loss": 84.265, + "step": 89960 + }, + { + "epoch": 0.7442610745750093, + "grad_norm": 481.6813659667969, + "learning_rate": 2.1031630434651277e-06, + "loss": 81.8503, + "step": 89970 + }, + { + "epoch": 0.7443437978243785, + "grad_norm": 1180.74169921875, + "learning_rate": 2.102013615748133e-06, + "loss": 103.7244, + "step": 89980 + }, + { + "epoch": 0.7444265210737477, + "grad_norm": 867.1542358398438, + "learning_rate": 2.1008644186106146e-06, + "loss": 77.129, + "step": 89990 + }, + { + "epoch": 0.744509244323117, + "grad_norm": 0.0, + "learning_rate": 2.09971545214401e-06, + "loss": 64.4092, + "step": 90000 + }, + { + "epoch": 0.7445919675724862, + "grad_norm": 453.9908142089844, + "learning_rate": 2.0985667164397355e-06, + "loss": 56.7564, + "step": 90010 + }, + { + "epoch": 0.7446746908218554, + "grad_norm": 363.618896484375, + "learning_rate": 2.0974182115891924e-06, + "loss": 91.9298, + "step": 90020 + }, + { + "epoch": 0.7447574140712248, + "grad_norm": 873.480224609375, + "learning_rate": 2.0962699376837604e-06, + "loss": 74.911, + "step": 90030 + }, + { + "epoch": 0.744840137320594, + "grad_norm": 507.69085693359375, + "learning_rate": 2.0951218948148034e-06, + "loss": 99.115, + "step": 90040 + }, + { + "epoch": 0.7449228605699632, + "grad_norm": 1094.7142333984375, + "learning_rate": 2.093974083073666e-06, + "loss": 78.3589, + "step": 90050 + }, + { + "epoch": 0.7450055838193325, + "grad_norm": 914.5409545898438, + "learning_rate": 2.0928265025516737e-06, + "loss": 77.5211, + "step": 90060 + }, + { + "epoch": 0.7450883070687017, + "grad_norm": 433.937744140625, + "learning_rate": 2.0916791533401344e-06, + "loss": 117.6988, + "step": 90070 + }, + { + "epoch": 0.7451710303180709, + "grad_norm": 891.2142944335938, + "learning_rate": 2.090532035530337e-06, + "loss": 70.6285, + "step": 90080 + }, + { + "epoch": 0.7452537535674402, + "grad_norm": 684.631103515625, + "learning_rate": 2.0893851492135536e-06, + "loss": 66.5324, + "step": 90090 + }, + { + "epoch": 0.7453364768168094, + "grad_norm": 881.5846557617188, + "learning_rate": 2.0882384944810358e-06, + "loss": 112.9255, + "step": 90100 + }, + { + "epoch": 0.7454192000661786, + "grad_norm": 773.873291015625, + "learning_rate": 2.087092071424017e-06, + "loss": 105.3122, + "step": 90110 + }, + { + "epoch": 0.7455019233155479, + "grad_norm": 1046.751708984375, + "learning_rate": 2.085945880133715e-06, + "loss": 73.7329, + "step": 90120 + }, + { + "epoch": 0.7455846465649171, + "grad_norm": 880.5443725585938, + "learning_rate": 2.0847999207013247e-06, + "loss": 77.2783, + "step": 90130 + }, + { + "epoch": 0.7456673698142863, + "grad_norm": 625.60107421875, + "learning_rate": 2.083654193218026e-06, + "loss": 91.1625, + "step": 90140 + }, + { + "epoch": 0.7457500930636556, + "grad_norm": 1652.64697265625, + "learning_rate": 2.0825086977749793e-06, + "loss": 76.7295, + "step": 90150 + }, + { + "epoch": 0.7458328163130248, + "grad_norm": 902.3091430664062, + "learning_rate": 2.0813634344633256e-06, + "loss": 83.2874, + "step": 90160 + }, + { + "epoch": 0.745915539562394, + "grad_norm": 697.8639526367188, + "learning_rate": 2.0802184033741886e-06, + "loss": 94.8818, + "step": 90170 + }, + { + "epoch": 0.7459982628117633, + "grad_norm": 1140.19287109375, + "learning_rate": 2.0790736045986737e-06, + "loss": 84.9298, + "step": 90180 + }, + { + "epoch": 0.7460809860611325, + "grad_norm": 509.9717712402344, + "learning_rate": 2.077929038227867e-06, + "loss": 78.4466, + "step": 90190 + }, + { + "epoch": 0.7461637093105017, + "grad_norm": 620.2896118164062, + "learning_rate": 2.076784704352835e-06, + "loss": 67.6305, + "step": 90200 + }, + { + "epoch": 0.746246432559871, + "grad_norm": 776.4671020507812, + "learning_rate": 2.075640603064629e-06, + "loss": 123.8405, + "step": 90210 + }, + { + "epoch": 0.7463291558092402, + "grad_norm": 863.0435180664062, + "learning_rate": 2.07449673445428e-06, + "loss": 90.1876, + "step": 90220 + }, + { + "epoch": 0.7464118790586094, + "grad_norm": 1178.6463623046875, + "learning_rate": 2.0733530986127985e-06, + "loss": 101.5113, + "step": 90230 + }, + { + "epoch": 0.7464946023079787, + "grad_norm": 1176.33203125, + "learning_rate": 2.07220969563118e-06, + "loss": 90.9558, + "step": 90240 + }, + { + "epoch": 0.7465773255573479, + "grad_norm": 1168.3426513671875, + "learning_rate": 2.0710665256003994e-06, + "loss": 117.3465, + "step": 90250 + }, + { + "epoch": 0.7466600488067171, + "grad_norm": 598.4938354492188, + "learning_rate": 2.069923588611413e-06, + "loss": 88.4196, + "step": 90260 + }, + { + "epoch": 0.7467427720560864, + "grad_norm": 908.4249877929688, + "learning_rate": 2.068780884755161e-06, + "loss": 71.9552, + "step": 90270 + }, + { + "epoch": 0.7468254953054556, + "grad_norm": 610.5745849609375, + "learning_rate": 2.0676384141225586e-06, + "loss": 112.2541, + "step": 90280 + }, + { + "epoch": 0.7469082185548248, + "grad_norm": 722.8043212890625, + "learning_rate": 2.066496176804511e-06, + "loss": 77.0322, + "step": 90290 + }, + { + "epoch": 0.7469909418041941, + "grad_norm": 637.941650390625, + "learning_rate": 2.0653541728919002e-06, + "loss": 97.5822, + "step": 90300 + }, + { + "epoch": 0.7470736650535633, + "grad_norm": 1051.8499755859375, + "learning_rate": 2.0642124024755895e-06, + "loss": 78.8491, + "step": 90310 + }, + { + "epoch": 0.7471563883029325, + "grad_norm": 1052.2393798828125, + "learning_rate": 2.0630708656464245e-06, + "loss": 98.3979, + "step": 90320 + }, + { + "epoch": 0.7472391115523018, + "grad_norm": 490.3679504394531, + "learning_rate": 2.0619295624952318e-06, + "loss": 66.2834, + "step": 90330 + }, + { + "epoch": 0.747321834801671, + "grad_norm": 892.2313232421875, + "learning_rate": 2.0607884931128205e-06, + "loss": 76.3967, + "step": 90340 + }, + { + "epoch": 0.7474045580510402, + "grad_norm": 1203.5242919921875, + "learning_rate": 2.059647657589979e-06, + "loss": 84.8439, + "step": 90350 + }, + { + "epoch": 0.7474872813004095, + "grad_norm": 890.0416870117188, + "learning_rate": 2.0585070560174807e-06, + "loss": 74.8649, + "step": 90360 + }, + { + "epoch": 0.7475700045497787, + "grad_norm": 819.0018310546875, + "learning_rate": 2.057366688486073e-06, + "loss": 88.5144, + "step": 90370 + }, + { + "epoch": 0.7476527277991479, + "grad_norm": 1071.549072265625, + "learning_rate": 2.056226555086495e-06, + "loss": 79.6231, + "step": 90380 + }, + { + "epoch": 0.7477354510485172, + "grad_norm": 949.044677734375, + "learning_rate": 2.0550866559094597e-06, + "loss": 107.752, + "step": 90390 + }, + { + "epoch": 0.7478181742978864, + "grad_norm": 852.3134155273438, + "learning_rate": 2.053946991045664e-06, + "loss": 70.428, + "step": 90400 + }, + { + "epoch": 0.7479008975472556, + "grad_norm": 280.0954284667969, + "learning_rate": 2.0528075605857855e-06, + "loss": 89.7523, + "step": 90410 + }, + { + "epoch": 0.7479836207966248, + "grad_norm": 1142.2628173828125, + "learning_rate": 2.0516683646204836e-06, + "loss": 93.6377, + "step": 90420 + }, + { + "epoch": 0.7480663440459941, + "grad_norm": 561.314453125, + "learning_rate": 2.0505294032403987e-06, + "loss": 98.4227, + "step": 90430 + }, + { + "epoch": 0.7481490672953633, + "grad_norm": 538.2353515625, + "learning_rate": 2.0493906765361556e-06, + "loss": 81.2393, + "step": 90440 + }, + { + "epoch": 0.7482317905447325, + "grad_norm": 1096.204345703125, + "learning_rate": 2.0482521845983522e-06, + "loss": 92.642, + "step": 90450 + }, + { + "epoch": 0.7483145137941019, + "grad_norm": 855.7373046875, + "learning_rate": 2.047113927517576e-06, + "loss": 88.2958, + "step": 90460 + }, + { + "epoch": 0.748397237043471, + "grad_norm": 829.8861083984375, + "learning_rate": 2.0459759053843913e-06, + "loss": 84.8314, + "step": 90470 + }, + { + "epoch": 0.7484799602928403, + "grad_norm": 499.6122741699219, + "learning_rate": 2.0448381182893485e-06, + "loss": 87.0093, + "step": 90480 + }, + { + "epoch": 0.7485626835422096, + "grad_norm": 1019.7319946289062, + "learning_rate": 2.043700566322974e-06, + "loss": 91.0243, + "step": 90490 + }, + { + "epoch": 0.7486454067915788, + "grad_norm": 1175.784912109375, + "learning_rate": 2.0425632495757776e-06, + "loss": 85.5139, + "step": 90500 + }, + { + "epoch": 0.748728130040948, + "grad_norm": 1392.78759765625, + "learning_rate": 2.0414261681382507e-06, + "loss": 93.0289, + "step": 90510 + }, + { + "epoch": 0.7488108532903173, + "grad_norm": 986.0319213867188, + "learning_rate": 2.0402893221008657e-06, + "loss": 91.0319, + "step": 90520 + }, + { + "epoch": 0.7488935765396865, + "grad_norm": 1051.900146484375, + "learning_rate": 2.0391527115540777e-06, + "loss": 103.051, + "step": 90530 + }, + { + "epoch": 0.7489762997890557, + "grad_norm": 724.2633056640625, + "learning_rate": 2.0380163365883188e-06, + "loss": 93.0482, + "step": 90540 + }, + { + "epoch": 0.749059023038425, + "grad_norm": 1194.2574462890625, + "learning_rate": 2.0368801972940055e-06, + "loss": 113.0047, + "step": 90550 + }, + { + "epoch": 0.7491417462877942, + "grad_norm": 970.729736328125, + "learning_rate": 2.0357442937615367e-06, + "loss": 93.03, + "step": 90560 + }, + { + "epoch": 0.7492244695371634, + "grad_norm": 687.0914916992188, + "learning_rate": 2.034608626081288e-06, + "loss": 87.3318, + "step": 90570 + }, + { + "epoch": 0.7493071927865327, + "grad_norm": 732.1859130859375, + "learning_rate": 2.0334731943436235e-06, + "loss": 84.7605, + "step": 90580 + }, + { + "epoch": 0.7493899160359019, + "grad_norm": 1996.309814453125, + "learning_rate": 2.032337998638883e-06, + "loss": 117.3526, + "step": 90590 + }, + { + "epoch": 0.7494726392852711, + "grad_norm": 1726.9215087890625, + "learning_rate": 2.031203039057388e-06, + "loss": 91.3886, + "step": 90600 + }, + { + "epoch": 0.7495553625346404, + "grad_norm": 1001.2156372070312, + "learning_rate": 2.0300683156894435e-06, + "loss": 94.4224, + "step": 90610 + }, + { + "epoch": 0.7496380857840096, + "grad_norm": 1375.769775390625, + "learning_rate": 2.028933828625332e-06, + "loss": 69.2318, + "step": 90620 + }, + { + "epoch": 0.7497208090333788, + "grad_norm": 1265.17919921875, + "learning_rate": 2.0277995779553193e-06, + "loss": 77.7545, + "step": 90630 + }, + { + "epoch": 0.7498035322827481, + "grad_norm": 631.8235473632812, + "learning_rate": 2.026665563769655e-06, + "loss": 100.6061, + "step": 90640 + }, + { + "epoch": 0.7498862555321173, + "grad_norm": 845.4620971679688, + "learning_rate": 2.025531786158565e-06, + "loss": 123.6531, + "step": 90650 + }, + { + "epoch": 0.7499689787814865, + "grad_norm": 950.5982055664062, + "learning_rate": 2.02439824521226e-06, + "loss": 89.5225, + "step": 90660 + }, + { + "epoch": 0.7500517020308558, + "grad_norm": 1280.9688720703125, + "learning_rate": 2.023264941020929e-06, + "loss": 139.323, + "step": 90670 + }, + { + "epoch": 0.750134425280225, + "grad_norm": 989.0846557617188, + "learning_rate": 2.022131873674747e-06, + "loss": 88.5975, + "step": 90680 + }, + { + "epoch": 0.7502171485295942, + "grad_norm": 905.4840087890625, + "learning_rate": 2.020999043263865e-06, + "loss": 87.5264, + "step": 90690 + }, + { + "epoch": 0.7502998717789635, + "grad_norm": 810.5401611328125, + "learning_rate": 2.0198664498784194e-06, + "loss": 93.4054, + "step": 90700 + }, + { + "epoch": 0.7503825950283327, + "grad_norm": 836.0730590820312, + "learning_rate": 2.018734093608521e-06, + "loss": 88.2675, + "step": 90710 + }, + { + "epoch": 0.7504653182777019, + "grad_norm": 629.2024536132812, + "learning_rate": 2.017601974544269e-06, + "loss": 64.2829, + "step": 90720 + }, + { + "epoch": 0.7505480415270712, + "grad_norm": 597.7532348632812, + "learning_rate": 2.0164700927757407e-06, + "loss": 80.799, + "step": 90730 + }, + { + "epoch": 0.7506307647764404, + "grad_norm": 818.9828491210938, + "learning_rate": 2.0153384483929946e-06, + "loss": 89.5458, + "step": 90740 + }, + { + "epoch": 0.7507134880258096, + "grad_norm": 1135.253662109375, + "learning_rate": 2.0142070414860704e-06, + "loss": 113.3995, + "step": 90750 + }, + { + "epoch": 0.7507962112751789, + "grad_norm": 1244.027587890625, + "learning_rate": 2.0130758721449887e-06, + "loss": 104.453, + "step": 90760 + }, + { + "epoch": 0.7508789345245481, + "grad_norm": 731.18505859375, + "learning_rate": 2.01194494045975e-06, + "loss": 71.3096, + "step": 90770 + }, + { + "epoch": 0.7509616577739173, + "grad_norm": 613.3550415039062, + "learning_rate": 2.0108142465203413e-06, + "loss": 84.1263, + "step": 90780 + }, + { + "epoch": 0.7510443810232866, + "grad_norm": 1097.283935546875, + "learning_rate": 2.0096837904167252e-06, + "loss": 87.5751, + "step": 90790 + }, + { + "epoch": 0.7511271042726558, + "grad_norm": 1566.380615234375, + "learning_rate": 2.0085535722388454e-06, + "loss": 91.1146, + "step": 90800 + }, + { + "epoch": 0.751209827522025, + "grad_norm": 623.9690551757812, + "learning_rate": 2.007423592076629e-06, + "loss": 108.3401, + "step": 90810 + }, + { + "epoch": 0.7512925507713943, + "grad_norm": 744.1317749023438, + "learning_rate": 2.006293850019983e-06, + "loss": 98.0858, + "step": 90820 + }, + { + "epoch": 0.7513752740207635, + "grad_norm": 626.7324829101562, + "learning_rate": 2.005164346158796e-06, + "loss": 86.4027, + "step": 90830 + }, + { + "epoch": 0.7514579972701327, + "grad_norm": 633.4637451171875, + "learning_rate": 2.004035080582938e-06, + "loss": 84.082, + "step": 90840 + }, + { + "epoch": 0.751540720519502, + "grad_norm": 663.2689819335938, + "learning_rate": 2.002906053382258e-06, + "loss": 68.8682, + "step": 90850 + }, + { + "epoch": 0.7516234437688712, + "grad_norm": 998.2990112304688, + "learning_rate": 2.001777264646588e-06, + "loss": 89.2042, + "step": 90860 + }, + { + "epoch": 0.7517061670182404, + "grad_norm": 1075.44384765625, + "learning_rate": 2.000648714465744e-06, + "loss": 72.4316, + "step": 90870 + }, + { + "epoch": 0.7517888902676098, + "grad_norm": 646.1034545898438, + "learning_rate": 1.9995204029295147e-06, + "loss": 84.8476, + "step": 90880 + }, + { + "epoch": 0.751871613516979, + "grad_norm": 634.7570190429688, + "learning_rate": 1.9983923301276764e-06, + "loss": 76.6496, + "step": 90890 + }, + { + "epoch": 0.7519543367663482, + "grad_norm": 586.5011596679688, + "learning_rate": 1.9972644961499853e-06, + "loss": 68.339, + "step": 90900 + }, + { + "epoch": 0.7520370600157175, + "grad_norm": 1039.88037109375, + "learning_rate": 1.9961369010861777e-06, + "loss": 81.439, + "step": 90910 + }, + { + "epoch": 0.7521197832650867, + "grad_norm": 638.9922485351562, + "learning_rate": 1.995009545025971e-06, + "loss": 94.2341, + "step": 90920 + }, + { + "epoch": 0.7522025065144559, + "grad_norm": 710.9326171875, + "learning_rate": 1.9938824280590635e-06, + "loss": 81.596, + "step": 90930 + }, + { + "epoch": 0.7522852297638252, + "grad_norm": 1036.0745849609375, + "learning_rate": 1.992755550275135e-06, + "loss": 91.3788, + "step": 90940 + }, + { + "epoch": 0.7523679530131944, + "grad_norm": 930.1054077148438, + "learning_rate": 1.991628911763846e-06, + "loss": 102.8395, + "step": 90950 + }, + { + "epoch": 0.7524506762625636, + "grad_norm": 1212.5616455078125, + "learning_rate": 1.990502512614838e-06, + "loss": 120.0019, + "step": 90960 + }, + { + "epoch": 0.7525333995119329, + "grad_norm": 708.0130004882812, + "learning_rate": 1.989376352917733e-06, + "loss": 83.0712, + "step": 90970 + }, + { + "epoch": 0.7526161227613021, + "grad_norm": 1019.6633911132812, + "learning_rate": 1.988250432762135e-06, + "loss": 73.8178, + "step": 90980 + }, + { + "epoch": 0.7526988460106713, + "grad_norm": 602.7006225585938, + "learning_rate": 1.987124752237628e-06, + "loss": 84.8277, + "step": 90990 + }, + { + "epoch": 0.7527815692600406, + "grad_norm": 908.8017578125, + "learning_rate": 1.9859993114337773e-06, + "loss": 100.6673, + "step": 91000 + }, + { + "epoch": 0.7528642925094098, + "grad_norm": 3168.759765625, + "learning_rate": 1.984874110440129e-06, + "loss": 106.8103, + "step": 91010 + }, + { + "epoch": 0.752947015758779, + "grad_norm": 741.0905151367188, + "learning_rate": 1.9837491493462104e-06, + "loss": 112.0769, + "step": 91020 + }, + { + "epoch": 0.7530297390081483, + "grad_norm": 875.8446044921875, + "learning_rate": 1.9826244282415285e-06, + "loss": 77.1232, + "step": 91030 + }, + { + "epoch": 0.7531124622575175, + "grad_norm": 884.2897338867188, + "learning_rate": 1.9814999472155736e-06, + "loss": 76.4609, + "step": 91040 + }, + { + "epoch": 0.7531951855068867, + "grad_norm": 476.9480895996094, + "learning_rate": 1.9803757063578146e-06, + "loss": 84.1212, + "step": 91050 + }, + { + "epoch": 0.753277908756256, + "grad_norm": 548.7337646484375, + "learning_rate": 1.9792517057577026e-06, + "loss": 67.7463, + "step": 91060 + }, + { + "epoch": 0.7533606320056252, + "grad_norm": 1195.18505859375, + "learning_rate": 1.978127945504669e-06, + "loss": 96.1024, + "step": 91070 + }, + { + "epoch": 0.7534433552549944, + "grad_norm": 1155.169677734375, + "learning_rate": 1.977004425688126e-06, + "loss": 95.7761, + "step": 91080 + }, + { + "epoch": 0.7535260785043637, + "grad_norm": 591.861572265625, + "learning_rate": 1.9758811463974677e-06, + "loss": 64.2986, + "step": 91090 + }, + { + "epoch": 0.7536088017537329, + "grad_norm": 794.584228515625, + "learning_rate": 1.9747581077220675e-06, + "loss": 79.2889, + "step": 91100 + }, + { + "epoch": 0.7536915250031021, + "grad_norm": 891.2854614257812, + "learning_rate": 1.9736353097512802e-06, + "loss": 80.2451, + "step": 91110 + }, + { + "epoch": 0.7537742482524713, + "grad_norm": 474.7909851074219, + "learning_rate": 1.9725127525744423e-06, + "loss": 114.1455, + "step": 91120 + }, + { + "epoch": 0.7538569715018406, + "grad_norm": 843.3524169921875, + "learning_rate": 1.971390436280871e-06, + "loss": 97.2716, + "step": 91130 + }, + { + "epoch": 0.7539396947512098, + "grad_norm": 1710.1583251953125, + "learning_rate": 1.970268360959863e-06, + "loss": 77.38, + "step": 91140 + }, + { + "epoch": 0.754022418000579, + "grad_norm": 711.1730346679688, + "learning_rate": 1.9691465267006965e-06, + "loss": 80.5333, + "step": 91150 + }, + { + "epoch": 0.7541051412499483, + "grad_norm": 941.439697265625, + "learning_rate": 1.9680249335926314e-06, + "loss": 66.0788, + "step": 91160 + }, + { + "epoch": 0.7541878644993175, + "grad_norm": 922.4462890625, + "learning_rate": 1.9669035817249077e-06, + "loss": 88.3642, + "step": 91170 + }, + { + "epoch": 0.7542705877486867, + "grad_norm": 713.2926025390625, + "learning_rate": 1.9657824711867457e-06, + "loss": 68.5765, + "step": 91180 + }, + { + "epoch": 0.754353310998056, + "grad_norm": 617.7564697265625, + "learning_rate": 1.9646616020673474e-06, + "loss": 73.7422, + "step": 91190 + }, + { + "epoch": 0.7544360342474252, + "grad_norm": 1247.7928466796875, + "learning_rate": 1.9635409744558953e-06, + "loss": 80.8417, + "step": 91200 + }, + { + "epoch": 0.7545187574967944, + "grad_norm": 888.484375, + "learning_rate": 1.962420588441552e-06, + "loss": 80.5122, + "step": 91210 + }, + { + "epoch": 0.7546014807461637, + "grad_norm": 944.42626953125, + "learning_rate": 1.9613004441134635e-06, + "loss": 69.0574, + "step": 91220 + }, + { + "epoch": 0.7546842039955329, + "grad_norm": 731.5653076171875, + "learning_rate": 1.96018054156075e-06, + "loss": 108.5219, + "step": 91230 + }, + { + "epoch": 0.7547669272449021, + "grad_norm": 0.0, + "learning_rate": 1.9590608808725214e-06, + "loss": 79.5218, + "step": 91240 + }, + { + "epoch": 0.7548496504942714, + "grad_norm": 578.6743774414062, + "learning_rate": 1.9579414621378624e-06, + "loss": 90.3885, + "step": 91250 + }, + { + "epoch": 0.7549323737436406, + "grad_norm": 1231.1759033203125, + "learning_rate": 1.9568222854458403e-06, + "loss": 74.3492, + "step": 91260 + }, + { + "epoch": 0.7550150969930098, + "grad_norm": 1043.411376953125, + "learning_rate": 1.955703350885502e-06, + "loss": 93.9092, + "step": 91270 + }, + { + "epoch": 0.7550978202423791, + "grad_norm": 570.8914794921875, + "learning_rate": 1.954584658545877e-06, + "loss": 78.577, + "step": 91280 + }, + { + "epoch": 0.7551805434917483, + "grad_norm": 1726.863525390625, + "learning_rate": 1.9534662085159746e-06, + "loss": 119.8196, + "step": 91290 + }, + { + "epoch": 0.7552632667411175, + "grad_norm": 722.1109619140625, + "learning_rate": 1.9523480008847856e-06, + "loss": 83.1329, + "step": 91300 + }, + { + "epoch": 0.7553459899904869, + "grad_norm": 1024.208740234375, + "learning_rate": 1.9512300357412778e-06, + "loss": 82.1837, + "step": 91310 + }, + { + "epoch": 0.755428713239856, + "grad_norm": 743.8002319335938, + "learning_rate": 1.950112313174404e-06, + "loss": 95.0024, + "step": 91320 + }, + { + "epoch": 0.7555114364892253, + "grad_norm": 1056.7789306640625, + "learning_rate": 1.9489948332730945e-06, + "loss": 111.2676, + "step": 91330 + }, + { + "epoch": 0.7555941597385946, + "grad_norm": 948.68994140625, + "learning_rate": 1.947877596126266e-06, + "loss": 74.7832, + "step": 91340 + }, + { + "epoch": 0.7556768829879638, + "grad_norm": 746.3170166015625, + "learning_rate": 1.946760601822809e-06, + "loss": 77.3798, + "step": 91350 + }, + { + "epoch": 0.755759606237333, + "grad_norm": 783.4119873046875, + "learning_rate": 1.945643850451599e-06, + "loss": 91.6843, + "step": 91360 + }, + { + "epoch": 0.7558423294867023, + "grad_norm": 692.2002563476562, + "learning_rate": 1.9445273421014903e-06, + "loss": 82.1802, + "step": 91370 + }, + { + "epoch": 0.7559250527360715, + "grad_norm": 1240.4085693359375, + "learning_rate": 1.9434110768613184e-06, + "loss": 102.5016, + "step": 91380 + }, + { + "epoch": 0.7560077759854407, + "grad_norm": 1118.1903076171875, + "learning_rate": 1.9422950548199004e-06, + "loss": 82.6145, + "step": 91390 + }, + { + "epoch": 0.75609049923481, + "grad_norm": 552.0459594726562, + "learning_rate": 1.941179276066031e-06, + "loss": 90.4393, + "step": 91400 + }, + { + "epoch": 0.7561732224841792, + "grad_norm": 917.4848022460938, + "learning_rate": 1.9400637406884875e-06, + "loss": 90.3351, + "step": 91410 + }, + { + "epoch": 0.7562559457335484, + "grad_norm": 276.276611328125, + "learning_rate": 1.938948448776028e-06, + "loss": 111.6006, + "step": 91420 + }, + { + "epoch": 0.7563386689829177, + "grad_norm": 597.5177001953125, + "learning_rate": 1.9378334004173936e-06, + "loss": 78.6461, + "step": 91430 + }, + { + "epoch": 0.7564213922322869, + "grad_norm": 742.5587768554688, + "learning_rate": 1.9367185957013024e-06, + "loss": 89.7729, + "step": 91440 + }, + { + "epoch": 0.7565041154816561, + "grad_norm": 1106.80712890625, + "learning_rate": 1.9356040347164533e-06, + "loss": 76.2414, + "step": 91450 + }, + { + "epoch": 0.7565868387310254, + "grad_norm": 1360.2635498046875, + "learning_rate": 1.9344897175515283e-06, + "loss": 100.4726, + "step": 91460 + }, + { + "epoch": 0.7566695619803946, + "grad_norm": 615.8472290039062, + "learning_rate": 1.9333756442951886e-06, + "loss": 62.704, + "step": 91470 + }, + { + "epoch": 0.7567522852297638, + "grad_norm": 864.1071166992188, + "learning_rate": 1.9322618150360732e-06, + "loss": 108.7585, + "step": 91480 + }, + { + "epoch": 0.7568350084791331, + "grad_norm": 797.296142578125, + "learning_rate": 1.931148229862807e-06, + "loss": 66.7689, + "step": 91490 + }, + { + "epoch": 0.7569177317285023, + "grad_norm": 944.4542846679688, + "learning_rate": 1.9300348888639915e-06, + "loss": 90.2655, + "step": 91500 + }, + { + "epoch": 0.7570004549778715, + "grad_norm": 973.5896606445312, + "learning_rate": 1.9289217921282104e-06, + "loss": 81.9597, + "step": 91510 + }, + { + "epoch": 0.7570831782272408, + "grad_norm": 1060.939208984375, + "learning_rate": 1.927808939744027e-06, + "loss": 129.8845, + "step": 91520 + }, + { + "epoch": 0.75716590147661, + "grad_norm": 847.8220825195312, + "learning_rate": 1.9266963317999884e-06, + "loss": 77.1575, + "step": 91530 + }, + { + "epoch": 0.7572486247259792, + "grad_norm": 810.7078857421875, + "learning_rate": 1.9255839683846174e-06, + "loss": 90.8755, + "step": 91540 + }, + { + "epoch": 0.7573313479753485, + "grad_norm": 339.5457763671875, + "learning_rate": 1.9244718495864206e-06, + "loss": 58.7832, + "step": 91550 + }, + { + "epoch": 0.7574140712247177, + "grad_norm": 1120.6597900390625, + "learning_rate": 1.9233599754938857e-06, + "loss": 107.0604, + "step": 91560 + }, + { + "epoch": 0.7574967944740869, + "grad_norm": 1043.0989990234375, + "learning_rate": 1.922248346195477e-06, + "loss": 109.1071, + "step": 91570 + }, + { + "epoch": 0.7575795177234562, + "grad_norm": 790.0960693359375, + "learning_rate": 1.921136961779641e-06, + "loss": 90.9298, + "step": 91580 + }, + { + "epoch": 0.7576622409728254, + "grad_norm": 814.7366333007812, + "learning_rate": 1.9200258223348072e-06, + "loss": 87.8823, + "step": 91590 + }, + { + "epoch": 0.7577449642221946, + "grad_norm": 446.8509826660156, + "learning_rate": 1.918914927949384e-06, + "loss": 77.4138, + "step": 91600 + }, + { + "epoch": 0.7578276874715639, + "grad_norm": 1098.19091796875, + "learning_rate": 1.9178042787117594e-06, + "loss": 95.8741, + "step": 91610 + }, + { + "epoch": 0.7579104107209331, + "grad_norm": 479.6676940917969, + "learning_rate": 1.9166938747103013e-06, + "loss": 90.7424, + "step": 91620 + }, + { + "epoch": 0.7579931339703023, + "grad_norm": 925.6350708007812, + "learning_rate": 1.915583716033363e-06, + "loss": 101.3752, + "step": 91630 + }, + { + "epoch": 0.7580758572196716, + "grad_norm": 1013.79541015625, + "learning_rate": 1.9144738027692746e-06, + "loss": 101.7984, + "step": 91640 + }, + { + "epoch": 0.7581585804690408, + "grad_norm": 697.2052001953125, + "learning_rate": 1.913364135006343e-06, + "loss": 97.0362, + "step": 91650 + }, + { + "epoch": 0.75824130371841, + "grad_norm": 703.0621337890625, + "learning_rate": 1.9122547128328616e-06, + "loss": 85.1144, + "step": 91660 + }, + { + "epoch": 0.7583240269677793, + "grad_norm": 562.5040893554688, + "learning_rate": 1.9111455363371016e-06, + "loss": 90.2374, + "step": 91670 + }, + { + "epoch": 0.7584067502171485, + "grad_norm": 906.6265869140625, + "learning_rate": 1.910036605607316e-06, + "loss": 72.353, + "step": 91680 + }, + { + "epoch": 0.7584894734665177, + "grad_norm": 1488.5152587890625, + "learning_rate": 1.908927920731736e-06, + "loss": 77.0585, + "step": 91690 + }, + { + "epoch": 0.758572196715887, + "grad_norm": 573.968505859375, + "learning_rate": 1.9078194817985755e-06, + "loss": 68.6668, + "step": 91700 + }, + { + "epoch": 0.7586549199652562, + "grad_norm": 791.8029174804688, + "learning_rate": 1.9067112888960283e-06, + "loss": 73.1811, + "step": 91710 + }, + { + "epoch": 0.7587376432146254, + "grad_norm": 743.7737426757812, + "learning_rate": 1.905603342112265e-06, + "loss": 89.2683, + "step": 91720 + }, + { + "epoch": 0.7588203664639948, + "grad_norm": 863.6655883789062, + "learning_rate": 1.904495641535446e-06, + "loss": 93.7095, + "step": 91730 + }, + { + "epoch": 0.758903089713364, + "grad_norm": 533.572509765625, + "learning_rate": 1.9033881872537009e-06, + "loss": 94.9646, + "step": 91740 + }, + { + "epoch": 0.7589858129627332, + "grad_norm": 859.637939453125, + "learning_rate": 1.902280979355146e-06, + "loss": 75.9259, + "step": 91750 + }, + { + "epoch": 0.7590685362121025, + "grad_norm": 591.394287109375, + "learning_rate": 1.901174017927877e-06, + "loss": 85.4813, + "step": 91760 + }, + { + "epoch": 0.7591512594614717, + "grad_norm": 832.2922973632812, + "learning_rate": 1.9000673030599698e-06, + "loss": 91.3511, + "step": 91770 + }, + { + "epoch": 0.7592339827108409, + "grad_norm": 355.1169128417969, + "learning_rate": 1.89896083483948e-06, + "loss": 132.036, + "step": 91780 + }, + { + "epoch": 0.7593167059602102, + "grad_norm": 925.5943603515625, + "learning_rate": 1.897854613354445e-06, + "loss": 80.6945, + "step": 91790 + }, + { + "epoch": 0.7593994292095794, + "grad_norm": 1204.190185546875, + "learning_rate": 1.8967486386928819e-06, + "loss": 118.005, + "step": 91800 + }, + { + "epoch": 0.7594821524589486, + "grad_norm": 320.41064453125, + "learning_rate": 1.8956429109427855e-06, + "loss": 93.1453, + "step": 91810 + }, + { + "epoch": 0.7595648757083179, + "grad_norm": 1259.0718994140625, + "learning_rate": 1.8945374301921393e-06, + "loss": 104.1626, + "step": 91820 + }, + { + "epoch": 0.7596475989576871, + "grad_norm": 1038.4957275390625, + "learning_rate": 1.893432196528896e-06, + "loss": 71.0405, + "step": 91830 + }, + { + "epoch": 0.7597303222070563, + "grad_norm": 818.2511596679688, + "learning_rate": 1.892327210040995e-06, + "loss": 86.6824, + "step": 91840 + }, + { + "epoch": 0.7598130454564255, + "grad_norm": 662.8837280273438, + "learning_rate": 1.8912224708163561e-06, + "loss": 79.6246, + "step": 91850 + }, + { + "epoch": 0.7598957687057948, + "grad_norm": 484.0934143066406, + "learning_rate": 1.890117978942878e-06, + "loss": 66.859, + "step": 91860 + }, + { + "epoch": 0.759978491955164, + "grad_norm": 1255.5438232421875, + "learning_rate": 1.8890137345084392e-06, + "loss": 99.873, + "step": 91870 + }, + { + "epoch": 0.7600612152045332, + "grad_norm": 563.4220581054688, + "learning_rate": 1.8879097376009009e-06, + "loss": 68.9941, + "step": 91880 + }, + { + "epoch": 0.7601439384539025, + "grad_norm": 736.5455932617188, + "learning_rate": 1.8868059883081015e-06, + "loss": 69.8224, + "step": 91890 + }, + { + "epoch": 0.7602266617032717, + "grad_norm": 1109.2657470703125, + "learning_rate": 1.8857024867178625e-06, + "loss": 69.6023, + "step": 91900 + }, + { + "epoch": 0.7603093849526409, + "grad_norm": 710.873291015625, + "learning_rate": 1.8845992329179835e-06, + "loss": 75.4405, + "step": 91910 + }, + { + "epoch": 0.7603921082020102, + "grad_norm": 739.780517578125, + "learning_rate": 1.883496226996246e-06, + "loss": 71.1245, + "step": 91920 + }, + { + "epoch": 0.7604748314513794, + "grad_norm": 533.0693359375, + "learning_rate": 1.8823934690404106e-06, + "loss": 86.3577, + "step": 91930 + }, + { + "epoch": 0.7605575547007486, + "grad_norm": 1472.7764892578125, + "learning_rate": 1.8812909591382195e-06, + "loss": 94.3954, + "step": 91940 + }, + { + "epoch": 0.7606402779501179, + "grad_norm": 326.43096923828125, + "learning_rate": 1.8801886973773936e-06, + "loss": 101.5206, + "step": 91950 + }, + { + "epoch": 0.7607230011994871, + "grad_norm": 762.7881469726562, + "learning_rate": 1.8790866838456351e-06, + "loss": 72.6627, + "step": 91960 + }, + { + "epoch": 0.7608057244488563, + "grad_norm": 612.1148071289062, + "learning_rate": 1.877984918630626e-06, + "loss": 104.0263, + "step": 91970 + }, + { + "epoch": 0.7608884476982256, + "grad_norm": 1866.867919921875, + "learning_rate": 1.876883401820029e-06, + "loss": 111.1947, + "step": 91980 + }, + { + "epoch": 0.7609711709475948, + "grad_norm": 1281.453369140625, + "learning_rate": 1.8757821335014858e-06, + "loss": 104.3547, + "step": 91990 + }, + { + "epoch": 0.761053894196964, + "grad_norm": 890.40869140625, + "learning_rate": 1.8746811137626208e-06, + "loss": 82.3355, + "step": 92000 + }, + { + "epoch": 0.7611366174463333, + "grad_norm": 807.6094970703125, + "learning_rate": 1.8735803426910366e-06, + "loss": 93.4271, + "step": 92010 + }, + { + "epoch": 0.7612193406957025, + "grad_norm": 970.5708618164062, + "learning_rate": 1.8724798203743154e-06, + "loss": 91.6451, + "step": 92020 + }, + { + "epoch": 0.7613020639450717, + "grad_norm": 688.900146484375, + "learning_rate": 1.8713795469000218e-06, + "loss": 100.542, + "step": 92030 + }, + { + "epoch": 0.761384787194441, + "grad_norm": 870.4006958007812, + "learning_rate": 1.8702795223556992e-06, + "loss": 88.1513, + "step": 92040 + }, + { + "epoch": 0.7614675104438102, + "grad_norm": 558.2523803710938, + "learning_rate": 1.8691797468288713e-06, + "loss": 84.1693, + "step": 92050 + }, + { + "epoch": 0.7615502336931794, + "grad_norm": 627.4422607421875, + "learning_rate": 1.8680802204070432e-06, + "loss": 82.1566, + "step": 92060 + }, + { + "epoch": 0.7616329569425487, + "grad_norm": 792.007568359375, + "learning_rate": 1.8669809431776991e-06, + "loss": 103.5347, + "step": 92070 + }, + { + "epoch": 0.7617156801919179, + "grad_norm": 1743.6444091796875, + "learning_rate": 1.8658819152283003e-06, + "loss": 108.218, + "step": 92080 + }, + { + "epoch": 0.7617984034412871, + "grad_norm": 701.8472900390625, + "learning_rate": 1.8647831366462948e-06, + "loss": 75.0169, + "step": 92090 + }, + { + "epoch": 0.7618811266906564, + "grad_norm": 555.6596069335938, + "learning_rate": 1.8636846075191067e-06, + "loss": 91.5314, + "step": 92100 + }, + { + "epoch": 0.7619638499400256, + "grad_norm": 1169.95263671875, + "learning_rate": 1.8625863279341406e-06, + "loss": 90.8289, + "step": 92110 + }, + { + "epoch": 0.7620465731893948, + "grad_norm": 1040.67724609375, + "learning_rate": 1.8614882979787818e-06, + "loss": 75.7882, + "step": 92120 + }, + { + "epoch": 0.7621292964387641, + "grad_norm": 737.9378051757812, + "learning_rate": 1.8603905177403953e-06, + "loss": 109.6155, + "step": 92130 + }, + { + "epoch": 0.7622120196881333, + "grad_norm": 1175.499267578125, + "learning_rate": 1.8592929873063259e-06, + "loss": 77.9201, + "step": 92140 + }, + { + "epoch": 0.7622947429375025, + "grad_norm": 1110.6258544921875, + "learning_rate": 1.8581957067639e-06, + "loss": 110.9532, + "step": 92150 + }, + { + "epoch": 0.7623774661868719, + "grad_norm": 1796.15625, + "learning_rate": 1.8570986762004246e-06, + "loss": 90.3984, + "step": 92160 + }, + { + "epoch": 0.762460189436241, + "grad_norm": 778.6599731445312, + "learning_rate": 1.8560018957031816e-06, + "loss": 98.082, + "step": 92170 + }, + { + "epoch": 0.7625429126856103, + "grad_norm": 2528.74560546875, + "learning_rate": 1.8549053653594373e-06, + "loss": 116.1548, + "step": 92180 + }, + { + "epoch": 0.7626256359349796, + "grad_norm": 684.8113403320312, + "learning_rate": 1.8538090852564405e-06, + "loss": 72.5543, + "step": 92190 + }, + { + "epoch": 0.7627083591843488, + "grad_norm": 712.4026489257812, + "learning_rate": 1.852713055481416e-06, + "loss": 71.634, + "step": 92200 + }, + { + "epoch": 0.762791082433718, + "grad_norm": 856.0159301757812, + "learning_rate": 1.8516172761215695e-06, + "loss": 80.4302, + "step": 92210 + }, + { + "epoch": 0.7628738056830873, + "grad_norm": 690.3129272460938, + "learning_rate": 1.8505217472640868e-06, + "loss": 68.3549, + "step": 92220 + }, + { + "epoch": 0.7629565289324565, + "grad_norm": 787.6735229492188, + "learning_rate": 1.849426468996135e-06, + "loss": 87.8287, + "step": 92230 + }, + { + "epoch": 0.7630392521818257, + "grad_norm": 442.70745849609375, + "learning_rate": 1.8483314414048597e-06, + "loss": 102.3068, + "step": 92240 + }, + { + "epoch": 0.763121975431195, + "grad_norm": 812.5242919921875, + "learning_rate": 1.8472366645773892e-06, + "loss": 117.692, + "step": 92250 + }, + { + "epoch": 0.7632046986805642, + "grad_norm": 605.50927734375, + "learning_rate": 1.846142138600826e-06, + "loss": 69.203, + "step": 92260 + }, + { + "epoch": 0.7632874219299334, + "grad_norm": 907.07568359375, + "learning_rate": 1.8450478635622592e-06, + "loss": 139.9277, + "step": 92270 + }, + { + "epoch": 0.7633701451793027, + "grad_norm": 1189.9276123046875, + "learning_rate": 1.8439538395487528e-06, + "loss": 87.1735, + "step": 92280 + }, + { + "epoch": 0.7634528684286719, + "grad_norm": 575.260498046875, + "learning_rate": 1.842860066647356e-06, + "loss": 58.7681, + "step": 92290 + }, + { + "epoch": 0.7635355916780411, + "grad_norm": 1187.8050537109375, + "learning_rate": 1.841766544945095e-06, + "loss": 131.3646, + "step": 92300 + }, + { + "epoch": 0.7636183149274104, + "grad_norm": 811.5830078125, + "learning_rate": 1.8406732745289757e-06, + "loss": 83.9469, + "step": 92310 + }, + { + "epoch": 0.7637010381767796, + "grad_norm": 928.5191040039062, + "learning_rate": 1.839580255485985e-06, + "loss": 91.3141, + "step": 92320 + }, + { + "epoch": 0.7637837614261488, + "grad_norm": 1056.0318603515625, + "learning_rate": 1.83848748790309e-06, + "loss": 89.1773, + "step": 92330 + }, + { + "epoch": 0.7638664846755181, + "grad_norm": 1123.9605712890625, + "learning_rate": 1.8373949718672345e-06, + "loss": 61.8712, + "step": 92340 + }, + { + "epoch": 0.7639492079248873, + "grad_norm": 712.9464721679688, + "learning_rate": 1.8363027074653473e-06, + "loss": 84.6685, + "step": 92350 + }, + { + "epoch": 0.7640319311742565, + "grad_norm": 621.0189208984375, + "learning_rate": 1.835210694784334e-06, + "loss": 127.7506, + "step": 92360 + }, + { + "epoch": 0.7641146544236258, + "grad_norm": 916.6598510742188, + "learning_rate": 1.8341189339110793e-06, + "loss": 72.2045, + "step": 92370 + }, + { + "epoch": 0.764197377672995, + "grad_norm": 1478.23681640625, + "learning_rate": 1.8330274249324537e-06, + "loss": 96.8306, + "step": 92380 + }, + { + "epoch": 0.7642801009223642, + "grad_norm": 838.5194702148438, + "learning_rate": 1.831936167935301e-06, + "loss": 117.9247, + "step": 92390 + }, + { + "epoch": 0.7643628241717335, + "grad_norm": 834.4838256835938, + "learning_rate": 1.8308451630064484e-06, + "loss": 98.4779, + "step": 92400 + }, + { + "epoch": 0.7644455474211027, + "grad_norm": 1103.1943359375, + "learning_rate": 1.8297544102327014e-06, + "loss": 117.0596, + "step": 92410 + }, + { + "epoch": 0.7645282706704719, + "grad_norm": 605.728515625, + "learning_rate": 1.8286639097008484e-06, + "loss": 87.3383, + "step": 92420 + }, + { + "epoch": 0.7646109939198412, + "grad_norm": 826.8703002929688, + "learning_rate": 1.827573661497652e-06, + "loss": 79.3055, + "step": 92430 + }, + { + "epoch": 0.7646937171692104, + "grad_norm": 797.113525390625, + "learning_rate": 1.8264836657098595e-06, + "loss": 94.8799, + "step": 92440 + }, + { + "epoch": 0.7647764404185796, + "grad_norm": 1154.000244140625, + "learning_rate": 1.8253939224241974e-06, + "loss": 84.2387, + "step": 92450 + }, + { + "epoch": 0.7648591636679489, + "grad_norm": 1213.9759521484375, + "learning_rate": 1.8243044317273717e-06, + "loss": 61.1165, + "step": 92460 + }, + { + "epoch": 0.7649418869173181, + "grad_norm": 634.3380737304688, + "learning_rate": 1.823215193706066e-06, + "loss": 87.2058, + "step": 92470 + }, + { + "epoch": 0.7650246101666873, + "grad_norm": 807.1724853515625, + "learning_rate": 1.82212620844695e-06, + "loss": 88.5422, + "step": 92480 + }, + { + "epoch": 0.7651073334160566, + "grad_norm": 1034.462158203125, + "learning_rate": 1.8210374760366662e-06, + "loss": 87.1022, + "step": 92490 + }, + { + "epoch": 0.7651900566654258, + "grad_norm": 922.4021606445312, + "learning_rate": 1.8199489965618433e-06, + "loss": 101.4626, + "step": 92500 + }, + { + "epoch": 0.765272779914795, + "grad_norm": 820.8185424804688, + "learning_rate": 1.8188607701090827e-06, + "loss": 80.0996, + "step": 92510 + }, + { + "epoch": 0.7653555031641643, + "grad_norm": 701.537841796875, + "learning_rate": 1.8177727967649705e-06, + "loss": 83.0882, + "step": 92520 + }, + { + "epoch": 0.7654382264135335, + "grad_norm": 935.3859252929688, + "learning_rate": 1.816685076616073e-06, + "loss": 68.9125, + "step": 92530 + }, + { + "epoch": 0.7655209496629027, + "grad_norm": 758.0145874023438, + "learning_rate": 1.8155976097489342e-06, + "loss": 80.4841, + "step": 92540 + }, + { + "epoch": 0.765603672912272, + "grad_norm": 479.95880126953125, + "learning_rate": 1.8145103962500792e-06, + "loss": 108.154, + "step": 92550 + }, + { + "epoch": 0.7656863961616412, + "grad_norm": 1647.0076904296875, + "learning_rate": 1.8134234362060128e-06, + "loss": 97.0746, + "step": 92560 + }, + { + "epoch": 0.7657691194110104, + "grad_norm": 589.4857177734375, + "learning_rate": 1.8123367297032175e-06, + "loss": 90.745, + "step": 92570 + }, + { + "epoch": 0.7658518426603796, + "grad_norm": 579.7984008789062, + "learning_rate": 1.8112502768281608e-06, + "loss": 82.4168, + "step": 92580 + }, + { + "epoch": 0.765934565909749, + "grad_norm": 426.4167785644531, + "learning_rate": 1.810164077667287e-06, + "loss": 84.039, + "step": 92590 + }, + { + "epoch": 0.7660172891591182, + "grad_norm": 865.5641479492188, + "learning_rate": 1.809078132307016e-06, + "loss": 79.1781, + "step": 92600 + }, + { + "epoch": 0.7661000124084874, + "grad_norm": 1119.6689453125, + "learning_rate": 1.807992440833754e-06, + "loss": 72.2057, + "step": 92610 + }, + { + "epoch": 0.7661827356578567, + "grad_norm": 851.24365234375, + "learning_rate": 1.8069070033338842e-06, + "loss": 87.8442, + "step": 92620 + }, + { + "epoch": 0.7662654589072259, + "grad_norm": 1012.4996337890625, + "learning_rate": 1.8058218198937695e-06, + "loss": 104.0138, + "step": 92630 + }, + { + "epoch": 0.7663481821565951, + "grad_norm": 2512.142578125, + "learning_rate": 1.8047368905997536e-06, + "loss": 90.26, + "step": 92640 + }, + { + "epoch": 0.7664309054059644, + "grad_norm": 612.3758544921875, + "learning_rate": 1.8036522155381592e-06, + "loss": 140.536, + "step": 92650 + }, + { + "epoch": 0.7665136286553336, + "grad_norm": 825.9151611328125, + "learning_rate": 1.8025677947952879e-06, + "loss": 106.3884, + "step": 92660 + }, + { + "epoch": 0.7665963519047028, + "grad_norm": 806.4419555664062, + "learning_rate": 1.8014836284574223e-06, + "loss": 96.8457, + "step": 92670 + }, + { + "epoch": 0.7666790751540721, + "grad_norm": 366.78570556640625, + "learning_rate": 1.8003997166108278e-06, + "loss": 76.3395, + "step": 92680 + }, + { + "epoch": 0.7667617984034413, + "grad_norm": 1280.156982421875, + "learning_rate": 1.7993160593417424e-06, + "loss": 104.953, + "step": 92690 + }, + { + "epoch": 0.7668445216528105, + "grad_norm": 1448.958984375, + "learning_rate": 1.798232656736389e-06, + "loss": 86.1547, + "step": 92700 + }, + { + "epoch": 0.7669272449021798, + "grad_norm": 430.016845703125, + "learning_rate": 1.7971495088809688e-06, + "loss": 80.0066, + "step": 92710 + }, + { + "epoch": 0.767009968151549, + "grad_norm": 611.8375244140625, + "learning_rate": 1.796066615861663e-06, + "loss": 82.3525, + "step": 92720 + }, + { + "epoch": 0.7670926914009182, + "grad_norm": 163.59913635253906, + "learning_rate": 1.7949839777646327e-06, + "loss": 87.4548, + "step": 92730 + }, + { + "epoch": 0.7671754146502875, + "grad_norm": 830.4721069335938, + "learning_rate": 1.7939015946760186e-06, + "loss": 94.6959, + "step": 92740 + }, + { + "epoch": 0.7672581378996567, + "grad_norm": 602.9876098632812, + "learning_rate": 1.7928194666819398e-06, + "loss": 62.9848, + "step": 92750 + }, + { + "epoch": 0.7673408611490259, + "grad_norm": 942.3362426757812, + "learning_rate": 1.7917375938684979e-06, + "loss": 89.7564, + "step": 92760 + }, + { + "epoch": 0.7674235843983952, + "grad_norm": 1193.48486328125, + "learning_rate": 1.7906559763217713e-06, + "loss": 79.8312, + "step": 92770 + }, + { + "epoch": 0.7675063076477644, + "grad_norm": 819.6219482421875, + "learning_rate": 1.7895746141278198e-06, + "loss": 80.9784, + "step": 92780 + }, + { + "epoch": 0.7675890308971336, + "grad_norm": 716.1319580078125, + "learning_rate": 1.7884935073726822e-06, + "loss": 96.7796, + "step": 92790 + }, + { + "epoch": 0.7676717541465029, + "grad_norm": 862.4302368164062, + "learning_rate": 1.7874126561423771e-06, + "loss": 66.9127, + "step": 92800 + }, + { + "epoch": 0.7677544773958721, + "grad_norm": 323.0167236328125, + "learning_rate": 1.786332060522904e-06, + "loss": 83.0131, + "step": 92810 + }, + { + "epoch": 0.7678372006452413, + "grad_norm": 819.0519409179688, + "learning_rate": 1.7852517206002396e-06, + "loss": 93.2481, + "step": 92820 + }, + { + "epoch": 0.7679199238946106, + "grad_norm": 551.5955810546875, + "learning_rate": 1.7841716364603423e-06, + "loss": 84.1821, + "step": 92830 + }, + { + "epoch": 0.7680026471439798, + "grad_norm": 791.5858154296875, + "learning_rate": 1.783091808189149e-06, + "loss": 107.4551, + "step": 92840 + }, + { + "epoch": 0.768085370393349, + "grad_norm": 1241.2384033203125, + "learning_rate": 1.7820122358725772e-06, + "loss": 107.9027, + "step": 92850 + }, + { + "epoch": 0.7681680936427183, + "grad_norm": 390.0361633300781, + "learning_rate": 1.780932919596523e-06, + "loss": 86.1765, + "step": 92860 + }, + { + "epoch": 0.7682508168920875, + "grad_norm": 390.01922607421875, + "learning_rate": 1.779853859446863e-06, + "loss": 97.645, + "step": 92870 + }, + { + "epoch": 0.7683335401414567, + "grad_norm": 877.824462890625, + "learning_rate": 1.778775055509453e-06, + "loss": 121.0466, + "step": 92880 + }, + { + "epoch": 0.768416263390826, + "grad_norm": 679.44580078125, + "learning_rate": 1.777696507870128e-06, + "loss": 88.3246, + "step": 92890 + }, + { + "epoch": 0.7684989866401952, + "grad_norm": 1261.235595703125, + "learning_rate": 1.776618216614704e-06, + "loss": 91.5801, + "step": 92900 + }, + { + "epoch": 0.7685817098895644, + "grad_norm": 1093.8907470703125, + "learning_rate": 1.7755401818289748e-06, + "loss": 82.9775, + "step": 92910 + }, + { + "epoch": 0.7686644331389337, + "grad_norm": 567.2011108398438, + "learning_rate": 1.774462403598715e-06, + "loss": 104.8191, + "step": 92920 + }, + { + "epoch": 0.7687471563883029, + "grad_norm": 864.6884155273438, + "learning_rate": 1.7733848820096789e-06, + "loss": 72.2253, + "step": 92930 + }, + { + "epoch": 0.7688298796376721, + "grad_norm": 781.83740234375, + "learning_rate": 1.7723076171475995e-06, + "loss": 97.3354, + "step": 92940 + }, + { + "epoch": 0.7689126028870414, + "grad_norm": 622.6500854492188, + "learning_rate": 1.7712306090981896e-06, + "loss": 131.4019, + "step": 92950 + }, + { + "epoch": 0.7689953261364106, + "grad_norm": 603.605712890625, + "learning_rate": 1.7701538579471423e-06, + "loss": 64.7121, + "step": 92960 + }, + { + "epoch": 0.7690780493857798, + "grad_norm": 691.0977783203125, + "learning_rate": 1.7690773637801295e-06, + "loss": 89.4358, + "step": 92970 + }, + { + "epoch": 0.7691607726351491, + "grad_norm": 857.383056640625, + "learning_rate": 1.768001126682803e-06, + "loss": 98.0366, + "step": 92980 + }, + { + "epoch": 0.7692434958845183, + "grad_norm": 2012.945556640625, + "learning_rate": 1.7669251467407938e-06, + "loss": 76.4047, + "step": 92990 + }, + { + "epoch": 0.7693262191338875, + "grad_norm": 356.16473388671875, + "learning_rate": 1.7658494240397127e-06, + "loss": 73.393, + "step": 93000 + }, + { + "epoch": 0.7694089423832569, + "grad_norm": 853.9513549804688, + "learning_rate": 1.7647739586651508e-06, + "loss": 91.1541, + "step": 93010 + }, + { + "epoch": 0.769491665632626, + "grad_norm": 880.5347900390625, + "learning_rate": 1.7636987507026787e-06, + "loss": 117.1849, + "step": 93020 + }, + { + "epoch": 0.7695743888819953, + "grad_norm": 714.1282958984375, + "learning_rate": 1.762623800237841e-06, + "loss": 121.1683, + "step": 93030 + }, + { + "epoch": 0.7696571121313646, + "grad_norm": 2766.218994140625, + "learning_rate": 1.7615491073561714e-06, + "loss": 75.8225, + "step": 93040 + }, + { + "epoch": 0.7697398353807338, + "grad_norm": 723.0350952148438, + "learning_rate": 1.760474672143177e-06, + "loss": 83.1655, + "step": 93050 + }, + { + "epoch": 0.769822558630103, + "grad_norm": 864.4639892578125, + "learning_rate": 1.7594004946843458e-06, + "loss": 71.9456, + "step": 93060 + }, + { + "epoch": 0.7699052818794723, + "grad_norm": 1015.13330078125, + "learning_rate": 1.7583265750651446e-06, + "loss": 125.6941, + "step": 93070 + }, + { + "epoch": 0.7699880051288415, + "grad_norm": 551.4693603515625, + "learning_rate": 1.7572529133710204e-06, + "loss": 68.586, + "step": 93080 + }, + { + "epoch": 0.7700707283782107, + "grad_norm": 509.70941162109375, + "learning_rate": 1.7561795096874002e-06, + "loss": 66.0901, + "step": 93090 + }, + { + "epoch": 0.77015345162758, + "grad_norm": 1378.9339599609375, + "learning_rate": 1.755106364099689e-06, + "loss": 88.016, + "step": 93100 + }, + { + "epoch": 0.7702361748769492, + "grad_norm": 1190.9852294921875, + "learning_rate": 1.7540334766932738e-06, + "loss": 79.6336, + "step": 93110 + }, + { + "epoch": 0.7703188981263184, + "grad_norm": 805.7669677734375, + "learning_rate": 1.7529608475535165e-06, + "loss": 89.6155, + "step": 93120 + }, + { + "epoch": 0.7704016213756877, + "grad_norm": 520.67724609375, + "learning_rate": 1.7518884767657612e-06, + "loss": 62.7607, + "step": 93130 + }, + { + "epoch": 0.7704843446250569, + "grad_norm": 1119.815673828125, + "learning_rate": 1.7508163644153342e-06, + "loss": 97.3919, + "step": 93140 + }, + { + "epoch": 0.7705670678744261, + "grad_norm": 592.175048828125, + "learning_rate": 1.7497445105875377e-06, + "loss": 81.5456, + "step": 93150 + }, + { + "epoch": 0.7706497911237954, + "grad_norm": 603.5911254882812, + "learning_rate": 1.7486729153676536e-06, + "loss": 70.6593, + "step": 93160 + }, + { + "epoch": 0.7707325143731646, + "grad_norm": 762.583251953125, + "learning_rate": 1.7476015788409439e-06, + "loss": 93.7638, + "step": 93170 + }, + { + "epoch": 0.7708152376225338, + "grad_norm": 1937.641845703125, + "learning_rate": 1.7465305010926503e-06, + "loss": 100.976, + "step": 93180 + }, + { + "epoch": 0.7708979608719031, + "grad_norm": 1821.3037109375, + "learning_rate": 1.745459682207995e-06, + "loss": 98.9063, + "step": 93190 + }, + { + "epoch": 0.7709806841212723, + "grad_norm": 418.82037353515625, + "learning_rate": 1.7443891222721749e-06, + "loss": 96.1245, + "step": 93200 + }, + { + "epoch": 0.7710634073706415, + "grad_norm": 596.7721557617188, + "learning_rate": 1.7433188213703712e-06, + "loss": 83.4011, + "step": 93210 + }, + { + "epoch": 0.7711461306200108, + "grad_norm": 824.0478515625, + "learning_rate": 1.7422487795877424e-06, + "loss": 104.4605, + "step": 93220 + }, + { + "epoch": 0.77122885386938, + "grad_norm": 780.7681884765625, + "learning_rate": 1.7411789970094257e-06, + "loss": 109.6282, + "step": 93230 + }, + { + "epoch": 0.7713115771187492, + "grad_norm": 721.4907836914062, + "learning_rate": 1.7401094737205415e-06, + "loss": 127.5015, + "step": 93240 + }, + { + "epoch": 0.7713943003681185, + "grad_norm": 0.0, + "learning_rate": 1.739040209806186e-06, + "loss": 103.458, + "step": 93250 + }, + { + "epoch": 0.7714770236174877, + "grad_norm": 1141.449462890625, + "learning_rate": 1.7379712053514352e-06, + "loss": 91.0078, + "step": 93260 + }, + { + "epoch": 0.7715597468668569, + "grad_norm": 780.21337890625, + "learning_rate": 1.736902460441345e-06, + "loss": 83.8483, + "step": 93270 + }, + { + "epoch": 0.7716424701162262, + "grad_norm": 1003.1300048828125, + "learning_rate": 1.735833975160952e-06, + "loss": 91.4468, + "step": 93280 + }, + { + "epoch": 0.7717251933655954, + "grad_norm": 672.5834350585938, + "learning_rate": 1.7347657495952675e-06, + "loss": 95.7494, + "step": 93290 + }, + { + "epoch": 0.7718079166149646, + "grad_norm": 1039.33544921875, + "learning_rate": 1.7336977838292867e-06, + "loss": 83.7675, + "step": 93300 + }, + { + "epoch": 0.7718906398643338, + "grad_norm": 1989.511474609375, + "learning_rate": 1.7326300779479826e-06, + "loss": 78.2224, + "step": 93310 + }, + { + "epoch": 0.7719733631137031, + "grad_norm": 427.48193359375, + "learning_rate": 1.731562632036307e-06, + "loss": 89.7143, + "step": 93320 + }, + { + "epoch": 0.7720560863630723, + "grad_norm": 573.6722412109375, + "learning_rate": 1.730495446179194e-06, + "loss": 85.7981, + "step": 93330 + }, + { + "epoch": 0.7721388096124415, + "grad_norm": 937.4099731445312, + "learning_rate": 1.7294285204615536e-06, + "loss": 76.9562, + "step": 93340 + }, + { + "epoch": 0.7722215328618108, + "grad_norm": 931.5322265625, + "learning_rate": 1.7283618549682757e-06, + "loss": 73.166, + "step": 93350 + }, + { + "epoch": 0.77230425611118, + "grad_norm": 915.3590087890625, + "learning_rate": 1.727295449784232e-06, + "loss": 78.7844, + "step": 93360 + }, + { + "epoch": 0.7723869793605492, + "grad_norm": 1507.6265869140625, + "learning_rate": 1.726229304994268e-06, + "loss": 77.3544, + "step": 93370 + }, + { + "epoch": 0.7724697026099185, + "grad_norm": 809.1603393554688, + "learning_rate": 1.7251634206832135e-06, + "loss": 81.6114, + "step": 93380 + }, + { + "epoch": 0.7725524258592877, + "grad_norm": 410.7778015136719, + "learning_rate": 1.7240977969358757e-06, + "loss": 87.7018, + "step": 93390 + }, + { + "epoch": 0.7726351491086569, + "grad_norm": 805.246826171875, + "learning_rate": 1.7230324338370425e-06, + "loss": 80.5176, + "step": 93400 + }, + { + "epoch": 0.7727178723580262, + "grad_norm": 687.7548828125, + "learning_rate": 1.721967331471479e-06, + "loss": 79.3853, + "step": 93410 + }, + { + "epoch": 0.7728005956073954, + "grad_norm": 941.4087524414062, + "learning_rate": 1.7209024899239297e-06, + "loss": 81.4306, + "step": 93420 + }, + { + "epoch": 0.7728833188567646, + "grad_norm": 947.3975830078125, + "learning_rate": 1.7198379092791213e-06, + "loss": 79.1074, + "step": 93430 + }, + { + "epoch": 0.772966042106134, + "grad_norm": 1059.6492919921875, + "learning_rate": 1.7187735896217567e-06, + "loss": 75.3719, + "step": 93440 + }, + { + "epoch": 0.7730487653555032, + "grad_norm": 853.2528076171875, + "learning_rate": 1.7177095310365205e-06, + "loss": 69.0534, + "step": 93450 + }, + { + "epoch": 0.7731314886048724, + "grad_norm": 940.2296142578125, + "learning_rate": 1.7166457336080716e-06, + "loss": 70.2273, + "step": 93460 + }, + { + "epoch": 0.7732142118542417, + "grad_norm": 934.7996215820312, + "learning_rate": 1.715582197421053e-06, + "loss": 77.2213, + "step": 93470 + }, + { + "epoch": 0.7732969351036109, + "grad_norm": 550.6903076171875, + "learning_rate": 1.7145189225600856e-06, + "loss": 76.0427, + "step": 93480 + }, + { + "epoch": 0.7733796583529801, + "grad_norm": 1751.7384033203125, + "learning_rate": 1.7134559091097691e-06, + "loss": 95.2524, + "step": 93490 + }, + { + "epoch": 0.7734623816023494, + "grad_norm": 1242.5704345703125, + "learning_rate": 1.7123931571546826e-06, + "loss": 85.0259, + "step": 93500 + }, + { + "epoch": 0.7735451048517186, + "grad_norm": 1116.49365234375, + "learning_rate": 1.711330666779385e-06, + "loss": 93.0945, + "step": 93510 + }, + { + "epoch": 0.7736278281010878, + "grad_norm": 1309.2081298828125, + "learning_rate": 1.7102684380684109e-06, + "loss": 102.1733, + "step": 93520 + }, + { + "epoch": 0.7737105513504571, + "grad_norm": 610.8143920898438, + "learning_rate": 1.7092064711062816e-06, + "loss": 83.7248, + "step": 93530 + }, + { + "epoch": 0.7737932745998263, + "grad_norm": 656.4087524414062, + "learning_rate": 1.708144765977492e-06, + "loss": 83.5821, + "step": 93540 + }, + { + "epoch": 0.7738759978491955, + "grad_norm": 2868.773193359375, + "learning_rate": 1.7070833227665146e-06, + "loss": 86.4508, + "step": 93550 + }, + { + "epoch": 0.7739587210985648, + "grad_norm": 547.4402465820312, + "learning_rate": 1.7060221415578042e-06, + "loss": 96.0463, + "step": 93560 + }, + { + "epoch": 0.774041444347934, + "grad_norm": 983.4544067382812, + "learning_rate": 1.7049612224357954e-06, + "loss": 72.9841, + "step": 93570 + }, + { + "epoch": 0.7741241675973032, + "grad_norm": 447.38299560546875, + "learning_rate": 1.703900565484899e-06, + "loss": 75.4073, + "step": 93580 + }, + { + "epoch": 0.7742068908466725, + "grad_norm": 797.6534423828125, + "learning_rate": 1.7028401707895082e-06, + "loss": 72.9507, + "step": 93590 + }, + { + "epoch": 0.7742896140960417, + "grad_norm": 536.6268310546875, + "learning_rate": 1.7017800384339928e-06, + "loss": 96.6598, + "step": 93600 + }, + { + "epoch": 0.7743723373454109, + "grad_norm": 601.033935546875, + "learning_rate": 1.700720168502703e-06, + "loss": 94.863, + "step": 93610 + }, + { + "epoch": 0.7744550605947802, + "grad_norm": 1244.7003173828125, + "learning_rate": 1.6996605610799682e-06, + "loss": 89.0981, + "step": 93620 + }, + { + "epoch": 0.7745377838441494, + "grad_norm": 423.31646728515625, + "learning_rate": 1.6986012162500953e-06, + "loss": 90.755, + "step": 93630 + }, + { + "epoch": 0.7746205070935186, + "grad_norm": 583.6690673828125, + "learning_rate": 1.697542134097373e-06, + "loss": 102.8274, + "step": 93640 + }, + { + "epoch": 0.7747032303428879, + "grad_norm": 1013.60693359375, + "learning_rate": 1.6964833147060661e-06, + "loss": 76.9443, + "step": 93650 + }, + { + "epoch": 0.7747859535922571, + "grad_norm": 1234.383056640625, + "learning_rate": 1.6954247581604216e-06, + "loss": 84.067, + "step": 93660 + }, + { + "epoch": 0.7748686768416263, + "grad_norm": 1029.810791015625, + "learning_rate": 1.6943664645446622e-06, + "loss": 89.6185, + "step": 93670 + }, + { + "epoch": 0.7749514000909956, + "grad_norm": 1082.3262939453125, + "learning_rate": 1.6933084339429935e-06, + "loss": 88.7879, + "step": 93680 + }, + { + "epoch": 0.7750341233403648, + "grad_norm": 1071.9556884765625, + "learning_rate": 1.692250666439596e-06, + "loss": 78.9346, + "step": 93690 + }, + { + "epoch": 0.775116846589734, + "grad_norm": 677.65771484375, + "learning_rate": 1.6911931621186329e-06, + "loss": 82.8995, + "step": 93700 + }, + { + "epoch": 0.7751995698391033, + "grad_norm": 867.5599365234375, + "learning_rate": 1.6901359210642444e-06, + "loss": 78.1593, + "step": 93710 + }, + { + "epoch": 0.7752822930884725, + "grad_norm": 942.0477905273438, + "learning_rate": 1.6890789433605508e-06, + "loss": 94.003, + "step": 93720 + }, + { + "epoch": 0.7753650163378417, + "grad_norm": 519.2806396484375, + "learning_rate": 1.6880222290916503e-06, + "loss": 60.9571, + "step": 93730 + }, + { + "epoch": 0.775447739587211, + "grad_norm": 774.544921875, + "learning_rate": 1.686965778341621e-06, + "loss": 84.0111, + "step": 93740 + }, + { + "epoch": 0.7755304628365802, + "grad_norm": 667.4334716796875, + "learning_rate": 1.68590959119452e-06, + "loss": 76.5631, + "step": 93750 + }, + { + "epoch": 0.7756131860859494, + "grad_norm": 467.7774353027344, + "learning_rate": 1.6848536677343836e-06, + "loss": 60.4382, + "step": 93760 + }, + { + "epoch": 0.7756959093353187, + "grad_norm": 455.2456970214844, + "learning_rate": 1.683798008045226e-06, + "loss": 78.7938, + "step": 93770 + }, + { + "epoch": 0.7757786325846879, + "grad_norm": 1058.66943359375, + "learning_rate": 1.6827426122110412e-06, + "loss": 78.3543, + "step": 93780 + }, + { + "epoch": 0.7758613558340571, + "grad_norm": 635.2317504882812, + "learning_rate": 1.6816874803158034e-06, + "loss": 79.6428, + "step": 93790 + }, + { + "epoch": 0.7759440790834264, + "grad_norm": 562.6470947265625, + "learning_rate": 1.6806326124434634e-06, + "loss": 59.4104, + "step": 93800 + }, + { + "epoch": 0.7760268023327956, + "grad_norm": 832.704833984375, + "learning_rate": 1.679578008677953e-06, + "loss": 97.8018, + "step": 93810 + }, + { + "epoch": 0.7761095255821648, + "grad_norm": 826.8822631835938, + "learning_rate": 1.6785236691031808e-06, + "loss": 66.9371, + "step": 93820 + }, + { + "epoch": 0.7761922488315341, + "grad_norm": 717.45068359375, + "learning_rate": 1.6774695938030378e-06, + "loss": 60.8474, + "step": 93830 + }, + { + "epoch": 0.7762749720809033, + "grad_norm": 893.1639404296875, + "learning_rate": 1.6764157828613902e-06, + "loss": 94.3922, + "step": 93840 + }, + { + "epoch": 0.7763576953302725, + "grad_norm": 422.0210876464844, + "learning_rate": 1.675362236362086e-06, + "loss": 73.2281, + "step": 93850 + }, + { + "epoch": 0.7764404185796419, + "grad_norm": 989.2019653320312, + "learning_rate": 1.6743089543889502e-06, + "loss": 103.5385, + "step": 93860 + }, + { + "epoch": 0.776523141829011, + "grad_norm": 823.348388671875, + "learning_rate": 1.6732559370257884e-06, + "loss": 91.4836, + "step": 93870 + }, + { + "epoch": 0.7766058650783803, + "grad_norm": 1299.27392578125, + "learning_rate": 1.6722031843563836e-06, + "loss": 93.1182, + "step": 93880 + }, + { + "epoch": 0.7766885883277496, + "grad_norm": 577.8670654296875, + "learning_rate": 1.6711506964644992e-06, + "loss": 93.8182, + "step": 93890 + }, + { + "epoch": 0.7767713115771188, + "grad_norm": 502.0442810058594, + "learning_rate": 1.6700984734338765e-06, + "loss": 68.5818, + "step": 93900 + }, + { + "epoch": 0.776854034826488, + "grad_norm": 1039.2012939453125, + "learning_rate": 1.669046515348236e-06, + "loss": 85.6031, + "step": 93910 + }, + { + "epoch": 0.7769367580758573, + "grad_norm": 390.88385009765625, + "learning_rate": 1.6679948222912773e-06, + "loss": 79.0019, + "step": 93920 + }, + { + "epoch": 0.7770194813252265, + "grad_norm": 908.3690795898438, + "learning_rate": 1.6669433943466789e-06, + "loss": 125.9054, + "step": 93930 + }, + { + "epoch": 0.7771022045745957, + "grad_norm": 942.0368041992188, + "learning_rate": 1.6658922315980975e-06, + "loss": 101.4422, + "step": 93940 + }, + { + "epoch": 0.777184927823965, + "grad_norm": 817.743896484375, + "learning_rate": 1.6648413341291703e-06, + "loss": 79.908, + "step": 93950 + }, + { + "epoch": 0.7772676510733342, + "grad_norm": 882.8682250976562, + "learning_rate": 1.6637907020235117e-06, + "loss": 76.305, + "step": 93960 + }, + { + "epoch": 0.7773503743227034, + "grad_norm": 1031.4886474609375, + "learning_rate": 1.662740335364717e-06, + "loss": 126.5732, + "step": 93970 + }, + { + "epoch": 0.7774330975720727, + "grad_norm": 771.1046142578125, + "learning_rate": 1.661690234236355e-06, + "loss": 95.6885, + "step": 93980 + }, + { + "epoch": 0.7775158208214419, + "grad_norm": 716.6741943359375, + "learning_rate": 1.6606403987219815e-06, + "loss": 78.7987, + "step": 93990 + }, + { + "epoch": 0.7775985440708111, + "grad_norm": 630.847900390625, + "learning_rate": 1.6595908289051266e-06, + "loss": 90.0902, + "step": 94000 + }, + { + "epoch": 0.7776812673201804, + "grad_norm": 845.2430419921875, + "learning_rate": 1.6585415248692988e-06, + "loss": 88.3763, + "step": 94010 + }, + { + "epoch": 0.7777639905695496, + "grad_norm": 1075.101318359375, + "learning_rate": 1.6574924866979863e-06, + "loss": 54.1172, + "step": 94020 + }, + { + "epoch": 0.7778467138189188, + "grad_norm": 1161.794921875, + "learning_rate": 1.6564437144746564e-06, + "loss": 78.1358, + "step": 94030 + }, + { + "epoch": 0.777929437068288, + "grad_norm": 867.4984130859375, + "learning_rate": 1.6553952082827562e-06, + "loss": 96.2229, + "step": 94040 + }, + { + "epoch": 0.7780121603176573, + "grad_norm": 909.298583984375, + "learning_rate": 1.6543469682057105e-06, + "loss": 99.5179, + "step": 94050 + }, + { + "epoch": 0.7780948835670265, + "grad_norm": 684.060791015625, + "learning_rate": 1.6532989943269207e-06, + "loss": 81.0873, + "step": 94060 + }, + { + "epoch": 0.7781776068163957, + "grad_norm": 1213.9482421875, + "learning_rate": 1.6522512867297707e-06, + "loss": 106.0108, + "step": 94070 + }, + { + "epoch": 0.778260330065765, + "grad_norm": 980.2380981445312, + "learning_rate": 1.6512038454976198e-06, + "loss": 85.677, + "step": 94080 + }, + { + "epoch": 0.7783430533151342, + "grad_norm": 450.2137451171875, + "learning_rate": 1.6501566707138116e-06, + "loss": 65.2738, + "step": 94090 + }, + { + "epoch": 0.7784257765645034, + "grad_norm": 525.07177734375, + "learning_rate": 1.6491097624616637e-06, + "loss": 75.8729, + "step": 94100 + }, + { + "epoch": 0.7785084998138727, + "grad_norm": 908.7725219726562, + "learning_rate": 1.6480631208244735e-06, + "loss": 125.1627, + "step": 94110 + }, + { + "epoch": 0.7785912230632419, + "grad_norm": 468.6520690917969, + "learning_rate": 1.6470167458855174e-06, + "loss": 110.0471, + "step": 94120 + }, + { + "epoch": 0.7786739463126111, + "grad_norm": 1324.9775390625, + "learning_rate": 1.645970637728051e-06, + "loss": 73.7453, + "step": 94130 + }, + { + "epoch": 0.7787566695619804, + "grad_norm": 1378.573486328125, + "learning_rate": 1.6449247964353094e-06, + "loss": 115.2216, + "step": 94140 + }, + { + "epoch": 0.7788393928113496, + "grad_norm": 1270.413330078125, + "learning_rate": 1.643879222090502e-06, + "loss": 75.4719, + "step": 94150 + }, + { + "epoch": 0.7789221160607188, + "grad_norm": 823.2882690429688, + "learning_rate": 1.642833914776823e-06, + "loss": 99.8761, + "step": 94160 + }, + { + "epoch": 0.7790048393100881, + "grad_norm": 1631.2496337890625, + "learning_rate": 1.6417888745774418e-06, + "loss": 93.0183, + "step": 94170 + }, + { + "epoch": 0.7790875625594573, + "grad_norm": 618.7994384765625, + "learning_rate": 1.640744101575506e-06, + "loss": 87.0626, + "step": 94180 + }, + { + "epoch": 0.7791702858088265, + "grad_norm": 874.3128051757812, + "learning_rate": 1.6396995958541468e-06, + "loss": 78.908, + "step": 94190 + }, + { + "epoch": 0.7792530090581958, + "grad_norm": 1032.2872314453125, + "learning_rate": 1.6386553574964691e-06, + "loss": 87.541, + "step": 94200 + }, + { + "epoch": 0.779335732307565, + "grad_norm": 711.1810302734375, + "learning_rate": 1.6376113865855585e-06, + "loss": 65.8986, + "step": 94210 + }, + { + "epoch": 0.7794184555569342, + "grad_norm": 1084.5020751953125, + "learning_rate": 1.6365676832044796e-06, + "loss": 92.7198, + "step": 94220 + }, + { + "epoch": 0.7795011788063035, + "grad_norm": 877.0111694335938, + "learning_rate": 1.6355242474362732e-06, + "loss": 100.024, + "step": 94230 + }, + { + "epoch": 0.7795839020556727, + "grad_norm": 1269.5721435546875, + "learning_rate": 1.634481079363961e-06, + "loss": 84.5142, + "step": 94240 + }, + { + "epoch": 0.7796666253050419, + "grad_norm": 683.024169921875, + "learning_rate": 1.6334381790705439e-06, + "loss": 81.7544, + "step": 94250 + }, + { + "epoch": 0.7797493485544112, + "grad_norm": 611.2015991210938, + "learning_rate": 1.6323955466390001e-06, + "loss": 94.3467, + "step": 94260 + }, + { + "epoch": 0.7798320718037804, + "grad_norm": 724.1978149414062, + "learning_rate": 1.6313531821522876e-06, + "loss": 93.952, + "step": 94270 + }, + { + "epoch": 0.7799147950531496, + "grad_norm": 540.2195434570312, + "learning_rate": 1.6303110856933413e-06, + "loss": 78.9152, + "step": 94280 + }, + { + "epoch": 0.779997518302519, + "grad_norm": 1860.32666015625, + "learning_rate": 1.629269257345078e-06, + "loss": 131.9263, + "step": 94290 + }, + { + "epoch": 0.7800802415518882, + "grad_norm": 987.336181640625, + "learning_rate": 1.628227697190391e-06, + "loss": 98.8791, + "step": 94300 + }, + { + "epoch": 0.7801629648012574, + "grad_norm": 608.0608520507812, + "learning_rate": 1.6271864053121528e-06, + "loss": 141.6185, + "step": 94310 + }, + { + "epoch": 0.7802456880506267, + "grad_norm": 752.9619140625, + "learning_rate": 1.6261453817932122e-06, + "loss": 61.8438, + "step": 94320 + }, + { + "epoch": 0.7803284112999959, + "grad_norm": 849.4782104492188, + "learning_rate": 1.6251046267163988e-06, + "loss": 94.6276, + "step": 94330 + }, + { + "epoch": 0.7804111345493651, + "grad_norm": 788.5552368164062, + "learning_rate": 1.6240641401645224e-06, + "loss": 96.3306, + "step": 94340 + }, + { + "epoch": 0.7804938577987344, + "grad_norm": 507.6255187988281, + "learning_rate": 1.6230239222203687e-06, + "loss": 103.3219, + "step": 94350 + }, + { + "epoch": 0.7805765810481036, + "grad_norm": 920.311767578125, + "learning_rate": 1.621983972966703e-06, + "loss": 114.5354, + "step": 94360 + }, + { + "epoch": 0.7806593042974728, + "grad_norm": 1070.4920654296875, + "learning_rate": 1.6209442924862684e-06, + "loss": 107.5379, + "step": 94370 + }, + { + "epoch": 0.7807420275468421, + "grad_norm": 1888.6494140625, + "learning_rate": 1.6199048808617896e-06, + "loss": 89.8358, + "step": 94380 + }, + { + "epoch": 0.7808247507962113, + "grad_norm": 1030.0611572265625, + "learning_rate": 1.6188657381759676e-06, + "loss": 86.013, + "step": 94390 + }, + { + "epoch": 0.7809074740455805, + "grad_norm": 437.46112060546875, + "learning_rate": 1.6178268645114826e-06, + "loss": 73.3406, + "step": 94400 + }, + { + "epoch": 0.7809901972949498, + "grad_norm": 701.3662719726562, + "learning_rate": 1.6167882599509904e-06, + "loss": 88.5935, + "step": 94410 + }, + { + "epoch": 0.781072920544319, + "grad_norm": 639.0773315429688, + "learning_rate": 1.6157499245771296e-06, + "loss": 73.676, + "step": 94420 + }, + { + "epoch": 0.7811556437936882, + "grad_norm": 894.396484375, + "learning_rate": 1.6147118584725163e-06, + "loss": 90.9503, + "step": 94430 + }, + { + "epoch": 0.7812383670430575, + "grad_norm": 1062.1143798828125, + "learning_rate": 1.6136740617197433e-06, + "loss": 90.4783, + "step": 94440 + }, + { + "epoch": 0.7813210902924267, + "grad_norm": 959.5033569335938, + "learning_rate": 1.612636534401384e-06, + "loss": 61.4715, + "step": 94450 + }, + { + "epoch": 0.7814038135417959, + "grad_norm": 1169.7418212890625, + "learning_rate": 1.61159927659999e-06, + "loss": 77.9506, + "step": 94460 + }, + { + "epoch": 0.7814865367911652, + "grad_norm": 1380.6806640625, + "learning_rate": 1.6105622883980893e-06, + "loss": 80.2579, + "step": 94470 + }, + { + "epoch": 0.7815692600405344, + "grad_norm": 1116.7760009765625, + "learning_rate": 1.6095255698781954e-06, + "loss": 120.8729, + "step": 94480 + }, + { + "epoch": 0.7816519832899036, + "grad_norm": 709.1279907226562, + "learning_rate": 1.6084891211227899e-06, + "loss": 89.6381, + "step": 94490 + }, + { + "epoch": 0.7817347065392729, + "grad_norm": 626.1630859375, + "learning_rate": 1.6074529422143398e-06, + "loss": 82.1522, + "step": 94500 + }, + { + "epoch": 0.7818174297886421, + "grad_norm": 1719.0965576171875, + "learning_rate": 1.6064170332352897e-06, + "loss": 68.7441, + "step": 94510 + }, + { + "epoch": 0.7819001530380113, + "grad_norm": 716.7944946289062, + "learning_rate": 1.6053813942680618e-06, + "loss": 82.9248, + "step": 94520 + }, + { + "epoch": 0.7819828762873806, + "grad_norm": 828.8280029296875, + "learning_rate": 1.604346025395057e-06, + "loss": 79.9472, + "step": 94530 + }, + { + "epoch": 0.7820655995367498, + "grad_norm": 675.82568359375, + "learning_rate": 1.6033109266986552e-06, + "loss": 118.3841, + "step": 94540 + }, + { + "epoch": 0.782148322786119, + "grad_norm": 966.230224609375, + "learning_rate": 1.602276098261214e-06, + "loss": 103.3147, + "step": 94550 + }, + { + "epoch": 0.7822310460354883, + "grad_norm": 696.0429077148438, + "learning_rate": 1.6012415401650706e-06, + "loss": 98.5404, + "step": 94560 + }, + { + "epoch": 0.7823137692848575, + "grad_norm": 739.5772705078125, + "learning_rate": 1.6002072524925395e-06, + "loss": 78.0328, + "step": 94570 + }, + { + "epoch": 0.7823964925342267, + "grad_norm": 537.0844116210938, + "learning_rate": 1.5991732353259142e-06, + "loss": 76.4263, + "step": 94580 + }, + { + "epoch": 0.782479215783596, + "grad_norm": 462.9024658203125, + "learning_rate": 1.598139488747467e-06, + "loss": 95.2623, + "step": 94590 + }, + { + "epoch": 0.7825619390329652, + "grad_norm": 698.7216186523438, + "learning_rate": 1.5971060128394483e-06, + "loss": 85.2989, + "step": 94600 + }, + { + "epoch": 0.7826446622823344, + "grad_norm": 887.9057006835938, + "learning_rate": 1.596072807684087e-06, + "loss": 112.7182, + "step": 94610 + }, + { + "epoch": 0.7827273855317037, + "grad_norm": 981.1459350585938, + "learning_rate": 1.5950398733635903e-06, + "loss": 94.8544, + "step": 94620 + }, + { + "epoch": 0.7828101087810729, + "grad_norm": 1017.7367553710938, + "learning_rate": 1.5940072099601446e-06, + "loss": 69.3493, + "step": 94630 + }, + { + "epoch": 0.7828928320304421, + "grad_norm": 593.1333618164062, + "learning_rate": 1.5929748175559135e-06, + "loss": 60.2959, + "step": 94640 + }, + { + "epoch": 0.7829755552798114, + "grad_norm": 808.7359008789062, + "learning_rate": 1.5919426962330398e-06, + "loss": 85.7781, + "step": 94650 + }, + { + "epoch": 0.7830582785291806, + "grad_norm": 1296.2574462890625, + "learning_rate": 1.5909108460736455e-06, + "loss": 82.9248, + "step": 94660 + }, + { + "epoch": 0.7831410017785498, + "grad_norm": 622.1264038085938, + "learning_rate": 1.589879267159829e-06, + "loss": 74.3232, + "step": 94670 + }, + { + "epoch": 0.7832237250279191, + "grad_norm": 381.82867431640625, + "learning_rate": 1.5888479595736695e-06, + "loss": 91.2148, + "step": 94680 + }, + { + "epoch": 0.7833064482772883, + "grad_norm": 378.500244140625, + "learning_rate": 1.5878169233972218e-06, + "loss": 89.309, + "step": 94690 + }, + { + "epoch": 0.7833891715266575, + "grad_norm": 641.9385375976562, + "learning_rate": 1.5867861587125228e-06, + "loss": 81.5497, + "step": 94700 + }, + { + "epoch": 0.7834718947760269, + "grad_norm": 2622.712890625, + "learning_rate": 1.5857556656015837e-06, + "loss": 101.5679, + "step": 94710 + }, + { + "epoch": 0.783554618025396, + "grad_norm": 1032.7587890625, + "learning_rate": 1.5847254441463978e-06, + "loss": 78.4932, + "step": 94720 + }, + { + "epoch": 0.7836373412747653, + "grad_norm": 776.2950439453125, + "learning_rate": 1.583695494428934e-06, + "loss": 71.536, + "step": 94730 + }, + { + "epoch": 0.7837200645241346, + "grad_norm": 942.0223999023438, + "learning_rate": 1.5826658165311409e-06, + "loss": 88.8115, + "step": 94740 + }, + { + "epoch": 0.7838027877735038, + "grad_norm": 739.5013427734375, + "learning_rate": 1.5816364105349451e-06, + "loss": 117.7168, + "step": 94750 + }, + { + "epoch": 0.783885511022873, + "grad_norm": 477.8927001953125, + "learning_rate": 1.5806072765222524e-06, + "loss": 96.1067, + "step": 94760 + }, + { + "epoch": 0.7839682342722422, + "grad_norm": 751.9795532226562, + "learning_rate": 1.5795784145749453e-06, + "loss": 78.0177, + "step": 94770 + }, + { + "epoch": 0.7840509575216115, + "grad_norm": 756.887451171875, + "learning_rate": 1.5785498247748864e-06, + "loss": 82.0907, + "step": 94780 + }, + { + "epoch": 0.7841336807709807, + "grad_norm": 814.6382446289062, + "learning_rate": 1.5775215072039157e-06, + "loss": 118.0077, + "step": 94790 + }, + { + "epoch": 0.7842164040203499, + "grad_norm": 492.3858337402344, + "learning_rate": 1.5764934619438515e-06, + "loss": 83.478, + "step": 94800 + }, + { + "epoch": 0.7842991272697192, + "grad_norm": 1426.8245849609375, + "learning_rate": 1.5754656890764912e-06, + "loss": 82.622, + "step": 94810 + }, + { + "epoch": 0.7843818505190884, + "grad_norm": 838.7455444335938, + "learning_rate": 1.5744381886836091e-06, + "loss": 99.2792, + "step": 94820 + }, + { + "epoch": 0.7844645737684576, + "grad_norm": 848.1531982421875, + "learning_rate": 1.5734109608469612e-06, + "loss": 92.1673, + "step": 94830 + }, + { + "epoch": 0.7845472970178269, + "grad_norm": 401.6071472167969, + "learning_rate": 1.5723840056482731e-06, + "loss": 79.848, + "step": 94840 + }, + { + "epoch": 0.7846300202671961, + "grad_norm": 879.9978637695312, + "learning_rate": 1.5713573231692613e-06, + "loss": 59.712, + "step": 94850 + }, + { + "epoch": 0.7847127435165653, + "grad_norm": 913.5989990234375, + "learning_rate": 1.5703309134916116e-06, + "loss": 74.9016, + "step": 94860 + }, + { + "epoch": 0.7847954667659346, + "grad_norm": 1169.180908203125, + "learning_rate": 1.5693047766969916e-06, + "loss": 88.2039, + "step": 94870 + }, + { + "epoch": 0.7848781900153038, + "grad_norm": 809.9779663085938, + "learning_rate": 1.568278912867045e-06, + "loss": 110.5675, + "step": 94880 + }, + { + "epoch": 0.784960913264673, + "grad_norm": 501.2241516113281, + "learning_rate": 1.5672533220833962e-06, + "loss": 67.3911, + "step": 94890 + }, + { + "epoch": 0.7850436365140423, + "grad_norm": 1112.9195556640625, + "learning_rate": 1.5662280044276467e-06, + "loss": 94.5414, + "step": 94900 + }, + { + "epoch": 0.7851263597634115, + "grad_norm": 1013.2802734375, + "learning_rate": 1.5652029599813773e-06, + "loss": 106.9486, + "step": 94910 + }, + { + "epoch": 0.7852090830127807, + "grad_norm": 693.358154296875, + "learning_rate": 1.564178188826143e-06, + "loss": 78.1435, + "step": 94920 + }, + { + "epoch": 0.78529180626215, + "grad_norm": 1105.666259765625, + "learning_rate": 1.5631536910434807e-06, + "loss": 99.9668, + "step": 94930 + }, + { + "epoch": 0.7853745295115192, + "grad_norm": 1162.7235107421875, + "learning_rate": 1.5621294667149079e-06, + "loss": 87.6308, + "step": 94940 + }, + { + "epoch": 0.7854572527608884, + "grad_norm": 539.553955078125, + "learning_rate": 1.561105515921915e-06, + "loss": 91.6051, + "step": 94950 + }, + { + "epoch": 0.7855399760102577, + "grad_norm": 722.3108520507812, + "learning_rate": 1.5600818387459748e-06, + "loss": 64.7316, + "step": 94960 + }, + { + "epoch": 0.7856226992596269, + "grad_norm": 381.7198486328125, + "learning_rate": 1.559058435268535e-06, + "loss": 66.9805, + "step": 94970 + }, + { + "epoch": 0.7857054225089961, + "grad_norm": 635.7542724609375, + "learning_rate": 1.558035305571024e-06, + "loss": 114.3884, + "step": 94980 + }, + { + "epoch": 0.7857881457583654, + "grad_norm": 1400.4791259765625, + "learning_rate": 1.5570124497348466e-06, + "loss": 110.485, + "step": 94990 + }, + { + "epoch": 0.7858708690077346, + "grad_norm": 678.18896484375, + "learning_rate": 1.5559898678413898e-06, + "loss": 96.3298, + "step": 95000 + }, + { + "epoch": 0.7859535922571038, + "grad_norm": 1071.6373291015625, + "learning_rate": 1.554967559972011e-06, + "loss": 98.7113, + "step": 95010 + }, + { + "epoch": 0.7860363155064731, + "grad_norm": 1374.9266357421875, + "learning_rate": 1.5539455262080534e-06, + "loss": 118.118, + "step": 95020 + }, + { + "epoch": 0.7861190387558423, + "grad_norm": 759.0629272460938, + "learning_rate": 1.5529237666308333e-06, + "loss": 93.9589, + "step": 95030 + }, + { + "epoch": 0.7862017620052115, + "grad_norm": 854.2042236328125, + "learning_rate": 1.551902281321651e-06, + "loss": 77.0455, + "step": 95040 + }, + { + "epoch": 0.7862844852545808, + "grad_norm": 812.8806762695312, + "learning_rate": 1.5508810703617794e-06, + "loss": 79.7987, + "step": 95050 + }, + { + "epoch": 0.78636720850395, + "grad_norm": 709.544677734375, + "learning_rate": 1.5498601338324715e-06, + "loss": 79.4533, + "step": 95060 + }, + { + "epoch": 0.7864499317533192, + "grad_norm": 869.2487182617188, + "learning_rate": 1.5488394718149586e-06, + "loss": 70.6014, + "step": 95070 + }, + { + "epoch": 0.7865326550026885, + "grad_norm": 612.1897583007812, + "learning_rate": 1.5478190843904523e-06, + "loss": 80.7072, + "step": 95080 + }, + { + "epoch": 0.7866153782520577, + "grad_norm": 713.2626342773438, + "learning_rate": 1.546798971640136e-06, + "loss": 62.9009, + "step": 95090 + }, + { + "epoch": 0.7866981015014269, + "grad_norm": 761.9999389648438, + "learning_rate": 1.5457791336451777e-06, + "loss": 74.8367, + "step": 95100 + }, + { + "epoch": 0.7867808247507962, + "grad_norm": 1282.5313720703125, + "learning_rate": 1.5447595704867213e-06, + "loss": 114.0064, + "step": 95110 + }, + { + "epoch": 0.7868635480001654, + "grad_norm": 748.5642700195312, + "learning_rate": 1.543740282245888e-06, + "loss": 96.991, + "step": 95120 + }, + { + "epoch": 0.7869462712495346, + "grad_norm": 755.327392578125, + "learning_rate": 1.5427212690037774e-06, + "loss": 69.9441, + "step": 95130 + }, + { + "epoch": 0.787028994498904, + "grad_norm": 669.56201171875, + "learning_rate": 1.5417025308414695e-06, + "loss": 101.2025, + "step": 95140 + }, + { + "epoch": 0.7871117177482732, + "grad_norm": 710.7820434570312, + "learning_rate": 1.5406840678400204e-06, + "loss": 98.5515, + "step": 95150 + }, + { + "epoch": 0.7871944409976424, + "grad_norm": 1083.8662109375, + "learning_rate": 1.5396658800804632e-06, + "loss": 102.2657, + "step": 95160 + }, + { + "epoch": 0.7872771642470117, + "grad_norm": 1122.4599609375, + "learning_rate": 1.5386479676438132e-06, + "loss": 85.0763, + "step": 95170 + }, + { + "epoch": 0.7873598874963809, + "grad_norm": 1315.6075439453125, + "learning_rate": 1.5376303306110574e-06, + "loss": 93.9366, + "step": 95180 + }, + { + "epoch": 0.7874426107457501, + "grad_norm": 796.9031372070312, + "learning_rate": 1.536612969063166e-06, + "loss": 73.0806, + "step": 95190 + }, + { + "epoch": 0.7875253339951194, + "grad_norm": 571.8799438476562, + "learning_rate": 1.5355958830810858e-06, + "loss": 67.0068, + "step": 95200 + }, + { + "epoch": 0.7876080572444886, + "grad_norm": 558.7503051757812, + "learning_rate": 1.5345790727457416e-06, + "loss": 92.4161, + "step": 95210 + }, + { + "epoch": 0.7876907804938578, + "grad_norm": 538.6721801757812, + "learning_rate": 1.5335625381380364e-06, + "loss": 75.6145, + "step": 95220 + }, + { + "epoch": 0.7877735037432271, + "grad_norm": 1402.743408203125, + "learning_rate": 1.5325462793388502e-06, + "loss": 127.778, + "step": 95230 + }, + { + "epoch": 0.7878562269925963, + "grad_norm": 934.5457763671875, + "learning_rate": 1.531530296429044e-06, + "loss": 93.78, + "step": 95240 + }, + { + "epoch": 0.7879389502419655, + "grad_norm": 658.7655029296875, + "learning_rate": 1.5305145894894547e-06, + "loss": 99.2224, + "step": 95250 + }, + { + "epoch": 0.7880216734913348, + "grad_norm": 1240.9752197265625, + "learning_rate": 1.5294991586008977e-06, + "loss": 99.2145, + "step": 95260 + }, + { + "epoch": 0.788104396740704, + "grad_norm": 847.5444946289062, + "learning_rate": 1.528484003844164e-06, + "loss": 105.1305, + "step": 95270 + }, + { + "epoch": 0.7881871199900732, + "grad_norm": 1418.359130859375, + "learning_rate": 1.5274691253000257e-06, + "loss": 150.746, + "step": 95280 + }, + { + "epoch": 0.7882698432394425, + "grad_norm": 576.1387939453125, + "learning_rate": 1.5264545230492323e-06, + "loss": 72.2739, + "step": 95290 + }, + { + "epoch": 0.7883525664888117, + "grad_norm": 1523.2347412109375, + "learning_rate": 1.5254401971725114e-06, + "loss": 76.5713, + "step": 95300 + }, + { + "epoch": 0.7884352897381809, + "grad_norm": 850.4096069335938, + "learning_rate": 1.5244261477505678e-06, + "loss": 56.1657, + "step": 95310 + }, + { + "epoch": 0.7885180129875502, + "grad_norm": 620.0430908203125, + "learning_rate": 1.5234123748640834e-06, + "loss": 91.6503, + "step": 95320 + }, + { + "epoch": 0.7886007362369194, + "grad_norm": 732.49267578125, + "learning_rate": 1.5223988785937222e-06, + "loss": 91.2292, + "step": 95330 + }, + { + "epoch": 0.7886834594862886, + "grad_norm": 1129.179443359375, + "learning_rate": 1.5213856590201241e-06, + "loss": 67.9968, + "step": 95340 + }, + { + "epoch": 0.7887661827356579, + "grad_norm": 744.8907470703125, + "learning_rate": 1.520372716223903e-06, + "loss": 109.5926, + "step": 95350 + }, + { + "epoch": 0.7888489059850271, + "grad_norm": 1017.0743408203125, + "learning_rate": 1.5193600502856548e-06, + "loss": 108.5264, + "step": 95360 + }, + { + "epoch": 0.7889316292343963, + "grad_norm": 150.6322021484375, + "learning_rate": 1.5183476612859538e-06, + "loss": 100.5434, + "step": 95370 + }, + { + "epoch": 0.7890143524837656, + "grad_norm": 1154.6719970703125, + "learning_rate": 1.5173355493053509e-06, + "loss": 91.2678, + "step": 95380 + }, + { + "epoch": 0.7890970757331348, + "grad_norm": 906.0249633789062, + "learning_rate": 1.5163237144243754e-06, + "loss": 119.1058, + "step": 95390 + }, + { + "epoch": 0.789179798982504, + "grad_norm": 976.3828735351562, + "learning_rate": 1.5153121567235334e-06, + "loss": 90.547, + "step": 95400 + }, + { + "epoch": 0.7892625222318733, + "grad_norm": 1162.3525390625, + "learning_rate": 1.5143008762833112e-06, + "loss": 95.5285, + "step": 95410 + }, + { + "epoch": 0.7893452454812425, + "grad_norm": 1161.5557861328125, + "learning_rate": 1.5132898731841689e-06, + "loss": 82.0131, + "step": 95420 + }, + { + "epoch": 0.7894279687306117, + "grad_norm": 785.49755859375, + "learning_rate": 1.512279147506553e-06, + "loss": 79.7298, + "step": 95430 + }, + { + "epoch": 0.789510691979981, + "grad_norm": 1093.42333984375, + "learning_rate": 1.5112686993308768e-06, + "loss": 117.2086, + "step": 95440 + }, + { + "epoch": 0.7895934152293502, + "grad_norm": 1063.107177734375, + "learning_rate": 1.5102585287375394e-06, + "loss": 89.8321, + "step": 95450 + }, + { + "epoch": 0.7896761384787194, + "grad_norm": 987.2368774414062, + "learning_rate": 1.5092486358069154e-06, + "loss": 105.4375, + "step": 95460 + }, + { + "epoch": 0.7897588617280887, + "grad_norm": 788.5397338867188, + "learning_rate": 1.508239020619357e-06, + "loss": 104.0471, + "step": 95470 + }, + { + "epoch": 0.7898415849774579, + "grad_norm": 926.9537963867188, + "learning_rate": 1.5072296832551942e-06, + "loss": 72.8565, + "step": 95480 + }, + { + "epoch": 0.7899243082268271, + "grad_norm": 943.4337158203125, + "learning_rate": 1.5062206237947363e-06, + "loss": 100.017, + "step": 95490 + }, + { + "epoch": 0.7900070314761963, + "grad_norm": 588.101806640625, + "learning_rate": 1.5052118423182688e-06, + "loss": 69.1182, + "step": 95500 + }, + { + "epoch": 0.7900897547255656, + "grad_norm": 267.3822021484375, + "learning_rate": 1.5042033389060563e-06, + "loss": 62.8242, + "step": 95510 + }, + { + "epoch": 0.7901724779749348, + "grad_norm": 601.46875, + "learning_rate": 1.5031951136383406e-06, + "loss": 107.1168, + "step": 95520 + }, + { + "epoch": 0.790255201224304, + "grad_norm": 795.5377197265625, + "learning_rate": 1.5021871665953414e-06, + "loss": 70.5595, + "step": 95530 + }, + { + "epoch": 0.7903379244736733, + "grad_norm": 1379.524658203125, + "learning_rate": 1.5011794978572568e-06, + "loss": 84.3169, + "step": 95540 + }, + { + "epoch": 0.7904206477230425, + "grad_norm": 967.1463012695312, + "learning_rate": 1.500172107504262e-06, + "loss": 78.8489, + "step": 95550 + }, + { + "epoch": 0.7905033709724117, + "grad_norm": 608.6145629882812, + "learning_rate": 1.4991649956165105e-06, + "loss": 64.6662, + "step": 95560 + }, + { + "epoch": 0.790586094221781, + "grad_norm": 695.8089599609375, + "learning_rate": 1.4981581622741337e-06, + "loss": 158.1259, + "step": 95570 + }, + { + "epoch": 0.7906688174711503, + "grad_norm": 959.3106079101562, + "learning_rate": 1.4971516075572407e-06, + "loss": 74.8305, + "step": 95580 + }, + { + "epoch": 0.7907515407205195, + "grad_norm": 1071.234375, + "learning_rate": 1.4961453315459184e-06, + "loss": 65.4966, + "step": 95590 + }, + { + "epoch": 0.7908342639698888, + "grad_norm": 680.824951171875, + "learning_rate": 1.4951393343202314e-06, + "loss": 92.0542, + "step": 95600 + }, + { + "epoch": 0.790916987219258, + "grad_norm": 884.766357421875, + "learning_rate": 1.4941336159602227e-06, + "loss": 81.2589, + "step": 95610 + }, + { + "epoch": 0.7909997104686272, + "grad_norm": 1236.22900390625, + "learning_rate": 1.4931281765459122e-06, + "loss": 88.8313, + "step": 95620 + }, + { + "epoch": 0.7910824337179965, + "grad_norm": 1996.257568359375, + "learning_rate": 1.492123016157298e-06, + "loss": 86.4051, + "step": 95630 + }, + { + "epoch": 0.7911651569673657, + "grad_norm": 818.218994140625, + "learning_rate": 1.4911181348743569e-06, + "loss": 82.9814, + "step": 95640 + }, + { + "epoch": 0.7912478802167349, + "grad_norm": 925.0620727539062, + "learning_rate": 1.490113532777042e-06, + "loss": 140.5014, + "step": 95650 + }, + { + "epoch": 0.7913306034661042, + "grad_norm": 568.860595703125, + "learning_rate": 1.4891092099452853e-06, + "loss": 60.901, + "step": 95660 + }, + { + "epoch": 0.7914133267154734, + "grad_norm": 1018.9000244140625, + "learning_rate": 1.4881051664589958e-06, + "loss": 94.0601, + "step": 95670 + }, + { + "epoch": 0.7914960499648426, + "grad_norm": 646.0346069335938, + "learning_rate": 1.4871014023980607e-06, + "loss": 74.5163, + "step": 95680 + }, + { + "epoch": 0.7915787732142119, + "grad_norm": 749.203857421875, + "learning_rate": 1.486097917842345e-06, + "loss": 76.4487, + "step": 95690 + }, + { + "epoch": 0.7916614964635811, + "grad_norm": 519.8616333007812, + "learning_rate": 1.4850947128716914e-06, + "loss": 76.2586, + "step": 95700 + }, + { + "epoch": 0.7917442197129503, + "grad_norm": 1079.2755126953125, + "learning_rate": 1.4840917875659206e-06, + "loss": 85.6422, + "step": 95710 + }, + { + "epoch": 0.7918269429623196, + "grad_norm": 606.11474609375, + "learning_rate": 1.4830891420048298e-06, + "loss": 91.0621, + "step": 95720 + }, + { + "epoch": 0.7919096662116888, + "grad_norm": 1178.3045654296875, + "learning_rate": 1.482086776268196e-06, + "loss": 99.66, + "step": 95730 + }, + { + "epoch": 0.791992389461058, + "grad_norm": 980.57763671875, + "learning_rate": 1.4810846904357722e-06, + "loss": 96.6671, + "step": 95740 + }, + { + "epoch": 0.7920751127104273, + "grad_norm": 1058.7889404296875, + "learning_rate": 1.48008288458729e-06, + "loss": 109.5006, + "step": 95750 + }, + { + "epoch": 0.7921578359597965, + "grad_norm": 529.0836181640625, + "learning_rate": 1.4790813588024584e-06, + "loss": 88.0496, + "step": 95760 + }, + { + "epoch": 0.7922405592091657, + "grad_norm": 1165.0322265625, + "learning_rate": 1.4780801131609657e-06, + "loss": 111.1433, + "step": 95770 + }, + { + "epoch": 0.792323282458535, + "grad_norm": 773.7676391601562, + "learning_rate": 1.477079147742474e-06, + "loss": 106.7083, + "step": 95780 + }, + { + "epoch": 0.7924060057079042, + "grad_norm": 863.2052612304688, + "learning_rate": 1.476078462626624e-06, + "loss": 83.5902, + "step": 95790 + }, + { + "epoch": 0.7924887289572734, + "grad_norm": 709.7434692382812, + "learning_rate": 1.4750780578930402e-06, + "loss": 66.4846, + "step": 95800 + }, + { + "epoch": 0.7925714522066427, + "grad_norm": 703.7163696289062, + "learning_rate": 1.4740779336213178e-06, + "loss": 86.0505, + "step": 95810 + }, + { + "epoch": 0.7926541754560119, + "grad_norm": 1240.612060546875, + "learning_rate": 1.4730780898910329e-06, + "loss": 108.2829, + "step": 95820 + }, + { + "epoch": 0.7927368987053811, + "grad_norm": 702.4696655273438, + "learning_rate": 1.4720785267817378e-06, + "loss": 59.4141, + "step": 95830 + }, + { + "epoch": 0.7928196219547504, + "grad_norm": 966.9011840820312, + "learning_rate": 1.4710792443729633e-06, + "loss": 112.7589, + "step": 95840 + }, + { + "epoch": 0.7929023452041196, + "grad_norm": 1277.8905029296875, + "learning_rate": 1.470080242744218e-06, + "loss": 72.1038, + "step": 95850 + }, + { + "epoch": 0.7929850684534888, + "grad_norm": 1504.42529296875, + "learning_rate": 1.4690815219749887e-06, + "loss": 88.3772, + "step": 95860 + }, + { + "epoch": 0.7930677917028581, + "grad_norm": 663.6552734375, + "learning_rate": 1.4680830821447368e-06, + "loss": 80.6271, + "step": 95870 + }, + { + "epoch": 0.7931505149522273, + "grad_norm": 686.2540283203125, + "learning_rate": 1.4670849233329032e-06, + "loss": 85.9933, + "step": 95880 + }, + { + "epoch": 0.7932332382015965, + "grad_norm": 648.2404174804688, + "learning_rate": 1.4660870456189098e-06, + "loss": 91.8078, + "step": 95890 + }, + { + "epoch": 0.7933159614509658, + "grad_norm": 1184.7274169921875, + "learning_rate": 1.4650894490821515e-06, + "loss": 105.1896, + "step": 95900 + }, + { + "epoch": 0.793398684700335, + "grad_norm": 900.9071655273438, + "learning_rate": 1.464092133802003e-06, + "loss": 95.3142, + "step": 95910 + }, + { + "epoch": 0.7934814079497042, + "grad_norm": 786.411865234375, + "learning_rate": 1.4630950998578158e-06, + "loss": 71.5299, + "step": 95920 + }, + { + "epoch": 0.7935641311990735, + "grad_norm": 537.1038818359375, + "learning_rate": 1.4620983473289192e-06, + "loss": 106.0131, + "step": 95930 + }, + { + "epoch": 0.7936468544484427, + "grad_norm": 907.1212158203125, + "learning_rate": 1.4611018762946217e-06, + "loss": 67.942, + "step": 95940 + }, + { + "epoch": 0.7937295776978119, + "grad_norm": 532.5782470703125, + "learning_rate": 1.4601056868342051e-06, + "loss": 77.3442, + "step": 95950 + }, + { + "epoch": 0.7938123009471812, + "grad_norm": 1575.781005859375, + "learning_rate": 1.4591097790269333e-06, + "loss": 121.2645, + "step": 95960 + }, + { + "epoch": 0.7938950241965504, + "grad_norm": 569.5699462890625, + "learning_rate": 1.4581141529520455e-06, + "loss": 100.5707, + "step": 95970 + }, + { + "epoch": 0.7939777474459196, + "grad_norm": 680.08251953125, + "learning_rate": 1.4571188086887583e-06, + "loss": 87.8128, + "step": 95980 + }, + { + "epoch": 0.794060470695289, + "grad_norm": 462.587890625, + "learning_rate": 1.4561237463162693e-06, + "loss": 92.0846, + "step": 95990 + }, + { + "epoch": 0.7941431939446582, + "grad_norm": 552.5233154296875, + "learning_rate": 1.4551289659137497e-06, + "loss": 64.9093, + "step": 96000 + }, + { + "epoch": 0.7942259171940274, + "grad_norm": 878.8954467773438, + "learning_rate": 1.454134467560349e-06, + "loss": 90.1892, + "step": 96010 + }, + { + "epoch": 0.7943086404433967, + "grad_norm": 1012.231689453125, + "learning_rate": 1.453140251335196e-06, + "loss": 85.0141, + "step": 96020 + }, + { + "epoch": 0.7943913636927659, + "grad_norm": 762.498291015625, + "learning_rate": 1.4521463173173966e-06, + "loss": 85.905, + "step": 96030 + }, + { + "epoch": 0.7944740869421351, + "grad_norm": 857.3758544921875, + "learning_rate": 1.4511526655860309e-06, + "loss": 94.0118, + "step": 96040 + }, + { + "epoch": 0.7945568101915044, + "grad_norm": 752.3488159179688, + "learning_rate": 1.4501592962201604e-06, + "loss": 79.3199, + "step": 96050 + }, + { + "epoch": 0.7946395334408736, + "grad_norm": 1673.278564453125, + "learning_rate": 1.4491662092988234e-06, + "loss": 145.9322, + "step": 96060 + }, + { + "epoch": 0.7947222566902428, + "grad_norm": 1270.6055908203125, + "learning_rate": 1.448173404901035e-06, + "loss": 113.5627, + "step": 96070 + }, + { + "epoch": 0.7948049799396121, + "grad_norm": 464.31982421875, + "learning_rate": 1.447180883105786e-06, + "loss": 75.3027, + "step": 96080 + }, + { + "epoch": 0.7948877031889813, + "grad_norm": 786.0927124023438, + "learning_rate": 1.446188643992051e-06, + "loss": 95.312, + "step": 96090 + }, + { + "epoch": 0.7949704264383505, + "grad_norm": 571.1138916015625, + "learning_rate": 1.4451966876387752e-06, + "loss": 61.4562, + "step": 96100 + }, + { + "epoch": 0.7950531496877198, + "grad_norm": 684.5860595703125, + "learning_rate": 1.4442050141248853e-06, + "loss": 63.8922, + "step": 96110 + }, + { + "epoch": 0.795135872937089, + "grad_norm": 753.0285034179688, + "learning_rate": 1.4432136235292848e-06, + "loss": 78.2068, + "step": 96120 + }, + { + "epoch": 0.7952185961864582, + "grad_norm": 1021.8491821289062, + "learning_rate": 1.442222515930851e-06, + "loss": 99.4391, + "step": 96130 + }, + { + "epoch": 0.7953013194358275, + "grad_norm": 650.785888671875, + "learning_rate": 1.441231691408444e-06, + "loss": 96.3516, + "step": 96140 + }, + { + "epoch": 0.7953840426851967, + "grad_norm": 446.81951904296875, + "learning_rate": 1.4402411500408985e-06, + "loss": 60.6617, + "step": 96150 + }, + { + "epoch": 0.7954667659345659, + "grad_norm": 918.9302978515625, + "learning_rate": 1.439250891907028e-06, + "loss": 92.1007, + "step": 96160 + }, + { + "epoch": 0.7955494891839352, + "grad_norm": 1528.4639892578125, + "learning_rate": 1.4382609170856222e-06, + "loss": 94.0727, + "step": 96170 + }, + { + "epoch": 0.7956322124333044, + "grad_norm": 991.0545654296875, + "learning_rate": 1.4372712256554471e-06, + "loss": 74.275, + "step": 96180 + }, + { + "epoch": 0.7957149356826736, + "grad_norm": 408.3299560546875, + "learning_rate": 1.436281817695252e-06, + "loss": 93.5512, + "step": 96190 + }, + { + "epoch": 0.7957976589320428, + "grad_norm": 305.2838439941406, + "learning_rate": 1.4352926932837591e-06, + "loss": 84.8283, + "step": 96200 + }, + { + "epoch": 0.7958803821814121, + "grad_norm": 1160.2215576171875, + "learning_rate": 1.4343038524996645e-06, + "loss": 73.67, + "step": 96210 + }, + { + "epoch": 0.7959631054307813, + "grad_norm": 838.9818725585938, + "learning_rate": 1.4333152954216483e-06, + "loss": 85.1046, + "step": 96220 + }, + { + "epoch": 0.7960458286801505, + "grad_norm": 1157.9669189453125, + "learning_rate": 1.4323270221283653e-06, + "loss": 96.3205, + "step": 96230 + }, + { + "epoch": 0.7961285519295198, + "grad_norm": 1076.052978515625, + "learning_rate": 1.4313390326984478e-06, + "loss": 111.263, + "step": 96240 + }, + { + "epoch": 0.796211275178889, + "grad_norm": 715.1856689453125, + "learning_rate": 1.4303513272105057e-06, + "loss": 78.668, + "step": 96250 + }, + { + "epoch": 0.7962939984282582, + "grad_norm": 793.4249877929688, + "learning_rate": 1.4293639057431258e-06, + "loss": 78.2108, + "step": 96260 + }, + { + "epoch": 0.7963767216776275, + "grad_norm": 841.6565551757812, + "learning_rate": 1.4283767683748711e-06, + "loss": 103.3483, + "step": 96270 + }, + { + "epoch": 0.7964594449269967, + "grad_norm": 1000.8982543945312, + "learning_rate": 1.4273899151842873e-06, + "loss": 90.5414, + "step": 96280 + }, + { + "epoch": 0.7965421681763659, + "grad_norm": 751.5543212890625, + "learning_rate": 1.4264033462498932e-06, + "loss": 72.9708, + "step": 96290 + }, + { + "epoch": 0.7966248914257352, + "grad_norm": 768.4910278320312, + "learning_rate": 1.4254170616501828e-06, + "loss": 84.1671, + "step": 96300 + }, + { + "epoch": 0.7967076146751044, + "grad_norm": 1033.3453369140625, + "learning_rate": 1.424431061463632e-06, + "loss": 81.5555, + "step": 96310 + }, + { + "epoch": 0.7967903379244736, + "grad_norm": 957.588623046875, + "learning_rate": 1.4234453457686914e-06, + "loss": 105.335, + "step": 96320 + }, + { + "epoch": 0.7968730611738429, + "grad_norm": 1199.724365234375, + "learning_rate": 1.4224599146437906e-06, + "loss": 87.4864, + "step": 96330 + }, + { + "epoch": 0.7969557844232121, + "grad_norm": 529.3333740234375, + "learning_rate": 1.4214747681673362e-06, + "loss": 83.0788, + "step": 96340 + }, + { + "epoch": 0.7970385076725813, + "grad_norm": 629.4002075195312, + "learning_rate": 1.4204899064177107e-06, + "loss": 89.5744, + "step": 96350 + }, + { + "epoch": 0.7971212309219506, + "grad_norm": 1154.12255859375, + "learning_rate": 1.4195053294732757e-06, + "loss": 110.1019, + "step": 96360 + }, + { + "epoch": 0.7972039541713198, + "grad_norm": 886.4882202148438, + "learning_rate": 1.4185210374123698e-06, + "loss": 64.0015, + "step": 96370 + }, + { + "epoch": 0.797286677420689, + "grad_norm": 1252.8187255859375, + "learning_rate": 1.417537030313308e-06, + "loss": 88.5995, + "step": 96380 + }, + { + "epoch": 0.7973694006700583, + "grad_norm": 1106.0509033203125, + "learning_rate": 1.416553308254383e-06, + "loss": 107.5624, + "step": 96390 + }, + { + "epoch": 0.7974521239194275, + "grad_norm": 608.2451782226562, + "learning_rate": 1.415569871313866e-06, + "loss": 101.158, + "step": 96400 + }, + { + "epoch": 0.7975348471687967, + "grad_norm": 2912.268798828125, + "learning_rate": 1.4145867195700036e-06, + "loss": 84.9362, + "step": 96410 + }, + { + "epoch": 0.797617570418166, + "grad_norm": 1384.496337890625, + "learning_rate": 1.4136038531010216e-06, + "loss": 102.7532, + "step": 96420 + }, + { + "epoch": 0.7977002936675353, + "grad_norm": 765.7507934570312, + "learning_rate": 1.4126212719851211e-06, + "loss": 101.0161, + "step": 96430 + }, + { + "epoch": 0.7977830169169045, + "grad_norm": 559.5115356445312, + "learning_rate": 1.411638976300483e-06, + "loss": 59.2422, + "step": 96440 + }, + { + "epoch": 0.7978657401662738, + "grad_norm": 683.1123046875, + "learning_rate": 1.4106569661252623e-06, + "loss": 78.2508, + "step": 96450 + }, + { + "epoch": 0.797948463415643, + "grad_norm": 1658.7459716796875, + "learning_rate": 1.4096752415375941e-06, + "loss": 71.4444, + "step": 96460 + }, + { + "epoch": 0.7980311866650122, + "grad_norm": 790.0877075195312, + "learning_rate": 1.40869380261559e-06, + "loss": 91.7176, + "step": 96470 + }, + { + "epoch": 0.7981139099143815, + "grad_norm": 841.804931640625, + "learning_rate": 1.4077126494373379e-06, + "loss": 81.0083, + "step": 96480 + }, + { + "epoch": 0.7981966331637507, + "grad_norm": 936.5332641601562, + "learning_rate": 1.406731782080904e-06, + "loss": 73.6172, + "step": 96490 + }, + { + "epoch": 0.7982793564131199, + "grad_norm": 620.3352661132812, + "learning_rate": 1.4057512006243312e-06, + "loss": 113.0523, + "step": 96500 + }, + { + "epoch": 0.7983620796624892, + "grad_norm": 785.4129638671875, + "learning_rate": 1.4047709051456398e-06, + "loss": 82.846, + "step": 96510 + }, + { + "epoch": 0.7984448029118584, + "grad_norm": 1122.926025390625, + "learning_rate": 1.403790895722828e-06, + "loss": 73.4351, + "step": 96520 + }, + { + "epoch": 0.7985275261612276, + "grad_norm": 1235.810546875, + "learning_rate": 1.40281117243387e-06, + "loss": 113.9662, + "step": 96530 + }, + { + "epoch": 0.7986102494105969, + "grad_norm": 608.0697021484375, + "learning_rate": 1.4018317353567185e-06, + "loss": 91.6859, + "step": 96540 + }, + { + "epoch": 0.7986929726599661, + "grad_norm": 847.255615234375, + "learning_rate": 1.4008525845693022e-06, + "loss": 103.3336, + "step": 96550 + }, + { + "epoch": 0.7987756959093353, + "grad_norm": 1009.511474609375, + "learning_rate": 1.3998737201495277e-06, + "loss": 79.1723, + "step": 96560 + }, + { + "epoch": 0.7988584191587046, + "grad_norm": 701.9151611328125, + "learning_rate": 1.3988951421752789e-06, + "loss": 81.0391, + "step": 96570 + }, + { + "epoch": 0.7989411424080738, + "grad_norm": 777.0698852539062, + "learning_rate": 1.3979168507244172e-06, + "loss": 94.3455, + "step": 96580 + }, + { + "epoch": 0.799023865657443, + "grad_norm": 523.3616943359375, + "learning_rate": 1.3969388458747802e-06, + "loss": 78.6874, + "step": 96590 + }, + { + "epoch": 0.7991065889068123, + "grad_norm": 876.171142578125, + "learning_rate": 1.3959611277041834e-06, + "loss": 85.3403, + "step": 96600 + }, + { + "epoch": 0.7991893121561815, + "grad_norm": 538.2999267578125, + "learning_rate": 1.394983696290419e-06, + "loss": 94.9753, + "step": 96610 + }, + { + "epoch": 0.7992720354055507, + "grad_norm": 890.1272583007812, + "learning_rate": 1.3940065517112579e-06, + "loss": 90.0231, + "step": 96620 + }, + { + "epoch": 0.79935475865492, + "grad_norm": 1206.276123046875, + "learning_rate": 1.3930296940444472e-06, + "loss": 85.5307, + "step": 96630 + }, + { + "epoch": 0.7994374819042892, + "grad_norm": 1130.0594482421875, + "learning_rate": 1.392053123367707e-06, + "loss": 89.702, + "step": 96640 + }, + { + "epoch": 0.7995202051536584, + "grad_norm": 827.4190063476562, + "learning_rate": 1.3910768397587427e-06, + "loss": 137.461, + "step": 96650 + }, + { + "epoch": 0.7996029284030277, + "grad_norm": 794.8823852539062, + "learning_rate": 1.3901008432952323e-06, + "loss": 111.7997, + "step": 96660 + }, + { + "epoch": 0.7996856516523969, + "grad_norm": 721.3433837890625, + "learning_rate": 1.38912513405483e-06, + "loss": 81.388, + "step": 96670 + }, + { + "epoch": 0.7997683749017661, + "grad_norm": 998.39990234375, + "learning_rate": 1.3881497121151694e-06, + "loss": 63.9328, + "step": 96680 + }, + { + "epoch": 0.7998510981511354, + "grad_norm": 1102.243408203125, + "learning_rate": 1.3871745775538598e-06, + "loss": 95.1184, + "step": 96690 + }, + { + "epoch": 0.7999338214005046, + "grad_norm": 737.71240234375, + "learning_rate": 1.3861997304484887e-06, + "loss": 87.6006, + "step": 96700 + }, + { + "epoch": 0.8000165446498738, + "grad_norm": 1060.644775390625, + "learning_rate": 1.3852251708766195e-06, + "loss": 82.6589, + "step": 96710 + }, + { + "epoch": 0.8000992678992431, + "grad_norm": 946.8339233398438, + "learning_rate": 1.384250898915796e-06, + "loss": 100.851, + "step": 96720 + }, + { + "epoch": 0.8001819911486123, + "grad_norm": 1012.8794555664062, + "learning_rate": 1.3832769146435327e-06, + "loss": 131.0507, + "step": 96730 + }, + { + "epoch": 0.8002647143979815, + "grad_norm": 1018.9071655273438, + "learning_rate": 1.3823032181373253e-06, + "loss": 84.4321, + "step": 96740 + }, + { + "epoch": 0.8003474376473508, + "grad_norm": 2128.4169921875, + "learning_rate": 1.3813298094746491e-06, + "loss": 97.8866, + "step": 96750 + }, + { + "epoch": 0.80043016089672, + "grad_norm": 691.6203002929688, + "learning_rate": 1.3803566887329528e-06, + "loss": 69.4865, + "step": 96760 + }, + { + "epoch": 0.8005128841460892, + "grad_norm": 894.628662109375, + "learning_rate": 1.3793838559896628e-06, + "loss": 97.1931, + "step": 96770 + }, + { + "epoch": 0.8005956073954585, + "grad_norm": 844.1419067382812, + "learning_rate": 1.3784113113221826e-06, + "loss": 85.3941, + "step": 96780 + }, + { + "epoch": 0.8006783306448277, + "grad_norm": 966.1434326171875, + "learning_rate": 1.3774390548078942e-06, + "loss": 91.669, + "step": 96790 + }, + { + "epoch": 0.8007610538941969, + "grad_norm": 842.3434448242188, + "learning_rate": 1.3764670865241557e-06, + "loss": 80.8415, + "step": 96800 + }, + { + "epoch": 0.8008437771435662, + "grad_norm": 607.7752685546875, + "learning_rate": 1.3754954065483006e-06, + "loss": 110.9996, + "step": 96810 + }, + { + "epoch": 0.8009265003929354, + "grad_norm": 1030.109130859375, + "learning_rate": 1.374524014957641e-06, + "loss": 79.0626, + "step": 96820 + }, + { + "epoch": 0.8010092236423046, + "grad_norm": 892.157470703125, + "learning_rate": 1.373552911829466e-06, + "loss": 96.9693, + "step": 96830 + }, + { + "epoch": 0.801091946891674, + "grad_norm": 1113.1424560546875, + "learning_rate": 1.3725820972410437e-06, + "loss": 83.1945, + "step": 96840 + }, + { + "epoch": 0.8011746701410432, + "grad_norm": 726.9800415039062, + "learning_rate": 1.3716115712696166e-06, + "loss": 92.1727, + "step": 96850 + }, + { + "epoch": 0.8012573933904124, + "grad_norm": 1125.0447998046875, + "learning_rate": 1.3706413339924047e-06, + "loss": 102.9361, + "step": 96860 + }, + { + "epoch": 0.8013401166397817, + "grad_norm": 452.7906799316406, + "learning_rate": 1.369671385486605e-06, + "loss": 91.2214, + "step": 96870 + }, + { + "epoch": 0.8014228398891509, + "grad_norm": 509.42926025390625, + "learning_rate": 1.3687017258293928e-06, + "loss": 82.8149, + "step": 96880 + }, + { + "epoch": 0.8015055631385201, + "grad_norm": 1304.8404541015625, + "learning_rate": 1.36773235509792e-06, + "loss": 123.0008, + "step": 96890 + }, + { + "epoch": 0.8015882863878894, + "grad_norm": 618.9688720703125, + "learning_rate": 1.366763273369312e-06, + "loss": 85.5639, + "step": 96900 + }, + { + "epoch": 0.8016710096372586, + "grad_norm": 613.8262329101562, + "learning_rate": 1.3657944807206764e-06, + "loss": 64.9996, + "step": 96910 + }, + { + "epoch": 0.8017537328866278, + "grad_norm": 841.429931640625, + "learning_rate": 1.3648259772290957e-06, + "loss": 109.687, + "step": 96920 + }, + { + "epoch": 0.801836456135997, + "grad_norm": 544.7089233398438, + "learning_rate": 1.3638577629716265e-06, + "loss": 68.7118, + "step": 96930 + }, + { + "epoch": 0.8019191793853663, + "grad_norm": 917.5126342773438, + "learning_rate": 1.3628898380253092e-06, + "loss": 85.4107, + "step": 96940 + }, + { + "epoch": 0.8020019026347355, + "grad_norm": 790.5340576171875, + "learning_rate": 1.3619222024671557e-06, + "loss": 83.7136, + "step": 96950 + }, + { + "epoch": 0.8020846258841047, + "grad_norm": 1110.8216552734375, + "learning_rate": 1.360954856374156e-06, + "loss": 105.1742, + "step": 96960 + }, + { + "epoch": 0.802167349133474, + "grad_norm": 620.2880859375, + "learning_rate": 1.3599877998232768e-06, + "loss": 78.4277, + "step": 96970 + }, + { + "epoch": 0.8022500723828432, + "grad_norm": 1336.2269287109375, + "learning_rate": 1.359021032891465e-06, + "loss": 87.1557, + "step": 96980 + }, + { + "epoch": 0.8023327956322124, + "grad_norm": 723.6621704101562, + "learning_rate": 1.358054555655638e-06, + "loss": 84.3839, + "step": 96990 + }, + { + "epoch": 0.8024155188815817, + "grad_norm": 838.581298828125, + "learning_rate": 1.357088368192696e-06, + "loss": 111.4981, + "step": 97000 + }, + { + "epoch": 0.8024982421309509, + "grad_norm": 564.4741821289062, + "learning_rate": 1.356122470579514e-06, + "loss": 67.0325, + "step": 97010 + }, + { + "epoch": 0.8025809653803201, + "grad_norm": 722.4137573242188, + "learning_rate": 1.3551568628929434e-06, + "loss": 66.7703, + "step": 97020 + }, + { + "epoch": 0.8026636886296894, + "grad_norm": 958.3905029296875, + "learning_rate": 1.3541915452098126e-06, + "loss": 101.0116, + "step": 97030 + }, + { + "epoch": 0.8027464118790586, + "grad_norm": 1039.2890625, + "learning_rate": 1.3532265176069298e-06, + "loss": 88.899, + "step": 97040 + }, + { + "epoch": 0.8028291351284278, + "grad_norm": 715.7427368164062, + "learning_rate": 1.3522617801610767e-06, + "loss": 91.0962, + "step": 97050 + }, + { + "epoch": 0.8029118583777971, + "grad_norm": 973.7153930664062, + "learning_rate": 1.3512973329490137e-06, + "loss": 88.5474, + "step": 97060 + }, + { + "epoch": 0.8029945816271663, + "grad_norm": 1014.022705078125, + "learning_rate": 1.3503331760474759e-06, + "loss": 78.5856, + "step": 97070 + }, + { + "epoch": 0.8030773048765355, + "grad_norm": 1352.9793701171875, + "learning_rate": 1.3493693095331773e-06, + "loss": 103.864, + "step": 97080 + }, + { + "epoch": 0.8031600281259048, + "grad_norm": 384.0733642578125, + "learning_rate": 1.3484057334828088e-06, + "loss": 79.577, + "step": 97090 + }, + { + "epoch": 0.803242751375274, + "grad_norm": 1254.0048828125, + "learning_rate": 1.3474424479730375e-06, + "loss": 108.2203, + "step": 97100 + }, + { + "epoch": 0.8033254746246432, + "grad_norm": 641.9199829101562, + "learning_rate": 1.3464794530805076e-06, + "loss": 83.1312, + "step": 97110 + }, + { + "epoch": 0.8034081978740125, + "grad_norm": 770.1970825195312, + "learning_rate": 1.34551674888184e-06, + "loss": 89.1073, + "step": 97120 + }, + { + "epoch": 0.8034909211233817, + "grad_norm": 795.2720947265625, + "learning_rate": 1.3445543354536317e-06, + "loss": 134.2826, + "step": 97130 + }, + { + "epoch": 0.8035736443727509, + "grad_norm": 371.7003479003906, + "learning_rate": 1.3435922128724599e-06, + "loss": 85.9229, + "step": 97140 + }, + { + "epoch": 0.8036563676221202, + "grad_norm": 1355.1334228515625, + "learning_rate": 1.3426303812148766e-06, + "loss": 116.6411, + "step": 97150 + }, + { + "epoch": 0.8037390908714894, + "grad_norm": 1486.0423583984375, + "learning_rate": 1.3416688405574074e-06, + "loss": 83.7708, + "step": 97160 + }, + { + "epoch": 0.8038218141208586, + "grad_norm": 490.289794921875, + "learning_rate": 1.3407075909765593e-06, + "loss": 103.2564, + "step": 97170 + }, + { + "epoch": 0.8039045373702279, + "grad_norm": 699.4607543945312, + "learning_rate": 1.339746632548814e-06, + "loss": 106.1111, + "step": 97180 + }, + { + "epoch": 0.8039872606195971, + "grad_norm": 749.7698974609375, + "learning_rate": 1.3387859653506314e-06, + "loss": 96.1973, + "step": 97190 + }, + { + "epoch": 0.8040699838689663, + "grad_norm": 257.79119873046875, + "learning_rate": 1.3378255894584463e-06, + "loss": 93.4603, + "step": 97200 + }, + { + "epoch": 0.8041527071183356, + "grad_norm": 482.4723815917969, + "learning_rate": 1.3368655049486717e-06, + "loss": 114.9346, + "step": 97210 + }, + { + "epoch": 0.8042354303677048, + "grad_norm": 308.8788757324219, + "learning_rate": 1.3359057118976976e-06, + "loss": 88.0903, + "step": 97220 + }, + { + "epoch": 0.804318153617074, + "grad_norm": 932.05419921875, + "learning_rate": 1.3349462103818906e-06, + "loss": 76.6318, + "step": 97230 + }, + { + "epoch": 0.8044008768664433, + "grad_norm": 3546.006103515625, + "learning_rate": 1.3339870004775929e-06, + "loss": 82.4357, + "step": 97240 + }, + { + "epoch": 0.8044836001158125, + "grad_norm": 689.85302734375, + "learning_rate": 1.3330280822611246e-06, + "loss": 94.2522, + "step": 97250 + }, + { + "epoch": 0.8045663233651817, + "grad_norm": 322.86669921875, + "learning_rate": 1.3320694558087832e-06, + "loss": 76.2741, + "step": 97260 + }, + { + "epoch": 0.8046490466145511, + "grad_norm": 1147.7379150390625, + "learning_rate": 1.3311111211968414e-06, + "loss": 87.9512, + "step": 97270 + }, + { + "epoch": 0.8047317698639203, + "grad_norm": 778.215087890625, + "learning_rate": 1.3301530785015492e-06, + "loss": 68.0114, + "step": 97280 + }, + { + "epoch": 0.8048144931132895, + "grad_norm": 918.3253173828125, + "learning_rate": 1.329195327799135e-06, + "loss": 85.8742, + "step": 97290 + }, + { + "epoch": 0.8048972163626588, + "grad_norm": 1470.5067138671875, + "learning_rate": 1.3282378691658015e-06, + "loss": 80.2126, + "step": 97300 + }, + { + "epoch": 0.804979939612028, + "grad_norm": 682.7937622070312, + "learning_rate": 1.3272807026777302e-06, + "loss": 77.8562, + "step": 97310 + }, + { + "epoch": 0.8050626628613972, + "grad_norm": 711.0369262695312, + "learning_rate": 1.3263238284110769e-06, + "loss": 88.1487, + "step": 97320 + }, + { + "epoch": 0.8051453861107665, + "grad_norm": 711.9708251953125, + "learning_rate": 1.3253672464419776e-06, + "loss": 80.779, + "step": 97330 + }, + { + "epoch": 0.8052281093601357, + "grad_norm": 892.573486328125, + "learning_rate": 1.3244109568465414e-06, + "loss": 106.5878, + "step": 97340 + }, + { + "epoch": 0.8053108326095049, + "grad_norm": 895.7553100585938, + "learning_rate": 1.3234549597008572e-06, + "loss": 84.2314, + "step": 97350 + }, + { + "epoch": 0.8053935558588742, + "grad_norm": 486.6683349609375, + "learning_rate": 1.322499255080989e-06, + "loss": 66.8342, + "step": 97360 + }, + { + "epoch": 0.8054762791082434, + "grad_norm": 531.82763671875, + "learning_rate": 1.3215438430629774e-06, + "loss": 66.0651, + "step": 97370 + }, + { + "epoch": 0.8055590023576126, + "grad_norm": 902.6575317382812, + "learning_rate": 1.3205887237228399e-06, + "loss": 80.48, + "step": 97380 + }, + { + "epoch": 0.8056417256069819, + "grad_norm": 784.8746948242188, + "learning_rate": 1.3196338971365719e-06, + "loss": 80.4638, + "step": 97390 + }, + { + "epoch": 0.8057244488563511, + "grad_norm": 715.1857299804688, + "learning_rate": 1.3186793633801443e-06, + "loss": 91.9943, + "step": 97400 + }, + { + "epoch": 0.8058071721057203, + "grad_norm": 620.5697631835938, + "learning_rate": 1.317725122529504e-06, + "loss": 79.546, + "step": 97410 + }, + { + "epoch": 0.8058898953550896, + "grad_norm": 688.434326171875, + "learning_rate": 1.3167711746605771e-06, + "loss": 88.8246, + "step": 97420 + }, + { + "epoch": 0.8059726186044588, + "grad_norm": 629.0154418945312, + "learning_rate": 1.315817519849264e-06, + "loss": 78.7018, + "step": 97430 + }, + { + "epoch": 0.806055341853828, + "grad_norm": 568.8397216796875, + "learning_rate": 1.3148641581714421e-06, + "loss": 64.2857, + "step": 97440 + }, + { + "epoch": 0.8061380651031973, + "grad_norm": 1156.6614990234375, + "learning_rate": 1.3139110897029672e-06, + "loss": 80.8269, + "step": 97450 + }, + { + "epoch": 0.8062207883525665, + "grad_norm": 939.9249267578125, + "learning_rate": 1.3129583145196701e-06, + "loss": 98.3824, + "step": 97460 + }, + { + "epoch": 0.8063035116019357, + "grad_norm": 677.0330810546875, + "learning_rate": 1.3120058326973583e-06, + "loss": 80.2276, + "step": 97470 + }, + { + "epoch": 0.806386234851305, + "grad_norm": 733.0352172851562, + "learning_rate": 1.3110536443118172e-06, + "loss": 100.2774, + "step": 97480 + }, + { + "epoch": 0.8064689581006742, + "grad_norm": 895.4539794921875, + "learning_rate": 1.3101017494388074e-06, + "loss": 88.9246, + "step": 97490 + }, + { + "epoch": 0.8065516813500434, + "grad_norm": 1752.5474853515625, + "learning_rate": 1.3091501481540676e-06, + "loss": 83.0293, + "step": 97500 + }, + { + "epoch": 0.8066344045994127, + "grad_norm": 781.7203979492188, + "learning_rate": 1.3081988405333106e-06, + "loss": 95.9738, + "step": 97510 + }, + { + "epoch": 0.8067171278487819, + "grad_norm": 1117.3587646484375, + "learning_rate": 1.3072478266522298e-06, + "loss": 97.9509, + "step": 97520 + }, + { + "epoch": 0.8067998510981511, + "grad_norm": 1128.07177734375, + "learning_rate": 1.3062971065864915e-06, + "loss": 90.2064, + "step": 97530 + }, + { + "epoch": 0.8068825743475204, + "grad_norm": 563.0820922851562, + "learning_rate": 1.305346680411741e-06, + "loss": 97.338, + "step": 97540 + }, + { + "epoch": 0.8069652975968896, + "grad_norm": 1097.416748046875, + "learning_rate": 1.3043965482035987e-06, + "loss": 94.4323, + "step": 97550 + }, + { + "epoch": 0.8070480208462588, + "grad_norm": 1162.277587890625, + "learning_rate": 1.3034467100376624e-06, + "loss": 112.2583, + "step": 97560 + }, + { + "epoch": 0.8071307440956281, + "grad_norm": 708.5651245117188, + "learning_rate": 1.3024971659895069e-06, + "loss": 72.2733, + "step": 97570 + }, + { + "epoch": 0.8072134673449973, + "grad_norm": 877.0707397460938, + "learning_rate": 1.3015479161346839e-06, + "loss": 72.6177, + "step": 97580 + }, + { + "epoch": 0.8072961905943665, + "grad_norm": 622.7434692382812, + "learning_rate": 1.3005989605487168e-06, + "loss": 85.8667, + "step": 97590 + }, + { + "epoch": 0.8073789138437358, + "grad_norm": 1124.309326171875, + "learning_rate": 1.2996502993071137e-06, + "loss": 128.3939, + "step": 97600 + }, + { + "epoch": 0.807461637093105, + "grad_norm": 959.4961547851562, + "learning_rate": 1.2987019324853539e-06, + "loss": 86.4192, + "step": 97610 + }, + { + "epoch": 0.8075443603424742, + "grad_norm": 926.9354248046875, + "learning_rate": 1.2977538601588951e-06, + "loss": 83.3312, + "step": 97620 + }, + { + "epoch": 0.8076270835918435, + "grad_norm": 765.6702270507812, + "learning_rate": 1.2968060824031704e-06, + "loss": 100.2946, + "step": 97630 + }, + { + "epoch": 0.8077098068412127, + "grad_norm": 663.2040405273438, + "learning_rate": 1.29585859929359e-06, + "loss": 92.4886, + "step": 97640 + }, + { + "epoch": 0.8077925300905819, + "grad_norm": 1108.396484375, + "learning_rate": 1.2949114109055417e-06, + "loss": 64.0099, + "step": 97650 + }, + { + "epoch": 0.8078752533399511, + "grad_norm": 627.171142578125, + "learning_rate": 1.2939645173143894e-06, + "loss": 86.472, + "step": 97660 + }, + { + "epoch": 0.8079579765893204, + "grad_norm": 2170.154541015625, + "learning_rate": 1.293017918595471e-06, + "loss": 131.4853, + "step": 97670 + }, + { + "epoch": 0.8080406998386896, + "grad_norm": 752.8032836914062, + "learning_rate": 1.2920716148241036e-06, + "loss": 90.6649, + "step": 97680 + }, + { + "epoch": 0.8081234230880588, + "grad_norm": 707.9442138671875, + "learning_rate": 1.2911256060755794e-06, + "loss": 95.8097, + "step": 97690 + }, + { + "epoch": 0.8082061463374282, + "grad_norm": 716.3200073242188, + "learning_rate": 1.2901798924251712e-06, + "loss": 83.3109, + "step": 97700 + }, + { + "epoch": 0.8082888695867974, + "grad_norm": 769.82763671875, + "learning_rate": 1.2892344739481228e-06, + "loss": 91.1391, + "step": 97710 + }, + { + "epoch": 0.8083715928361666, + "grad_norm": 720.6710205078125, + "learning_rate": 1.288289350719657e-06, + "loss": 69.3394, + "step": 97720 + }, + { + "epoch": 0.8084543160855359, + "grad_norm": 1892.12451171875, + "learning_rate": 1.2873445228149733e-06, + "loss": 99.0175, + "step": 97730 + }, + { + "epoch": 0.8085370393349051, + "grad_norm": 1591.5841064453125, + "learning_rate": 1.2863999903092473e-06, + "loss": 87.3172, + "step": 97740 + }, + { + "epoch": 0.8086197625842743, + "grad_norm": 2348.568603515625, + "learning_rate": 1.2854557532776323e-06, + "loss": 88.4138, + "step": 97750 + }, + { + "epoch": 0.8087024858336436, + "grad_norm": 3484.744873046875, + "learning_rate": 1.2845118117952544e-06, + "loss": 120.2466, + "step": 97760 + }, + { + "epoch": 0.8087852090830128, + "grad_norm": 1318.188720703125, + "learning_rate": 1.2835681659372196e-06, + "loss": 66.7873, + "step": 97770 + }, + { + "epoch": 0.808867932332382, + "grad_norm": 612.43408203125, + "learning_rate": 1.282624815778608e-06, + "loss": 92.2864, + "step": 97780 + }, + { + "epoch": 0.8089506555817513, + "grad_norm": 984.0529174804688, + "learning_rate": 1.281681761394481e-06, + "loss": 67.4189, + "step": 97790 + }, + { + "epoch": 0.8090333788311205, + "grad_norm": 486.3183288574219, + "learning_rate": 1.2807390028598715e-06, + "loss": 81.0175, + "step": 97800 + }, + { + "epoch": 0.8091161020804897, + "grad_norm": 899.5704956054688, + "learning_rate": 1.2797965402497902e-06, + "loss": 67.1271, + "step": 97810 + }, + { + "epoch": 0.809198825329859, + "grad_norm": 440.5632019042969, + "learning_rate": 1.278854373639225e-06, + "loss": 75.3582, + "step": 97820 + }, + { + "epoch": 0.8092815485792282, + "grad_norm": 1131.36083984375, + "learning_rate": 1.2779125031031413e-06, + "loss": 95.9433, + "step": 97830 + }, + { + "epoch": 0.8093642718285974, + "grad_norm": 1148.0606689453125, + "learning_rate": 1.2769709287164755e-06, + "loss": 91.1547, + "step": 97840 + }, + { + "epoch": 0.8094469950779667, + "grad_norm": 1007.1502685546875, + "learning_rate": 1.2760296505541469e-06, + "loss": 128.5402, + "step": 97850 + }, + { + "epoch": 0.8095297183273359, + "grad_norm": 675.3720703125, + "learning_rate": 1.2750886686910485e-06, + "loss": 82.86, + "step": 97860 + }, + { + "epoch": 0.8096124415767051, + "grad_norm": 554.8408813476562, + "learning_rate": 1.2741479832020492e-06, + "loss": 103.5147, + "step": 97870 + }, + { + "epoch": 0.8096951648260744, + "grad_norm": 1001.0286865234375, + "learning_rate": 1.2732075941619948e-06, + "loss": 82.1101, + "step": 97880 + }, + { + "epoch": 0.8097778880754436, + "grad_norm": 778.3543090820312, + "learning_rate": 1.2722675016457091e-06, + "loss": 81.8874, + "step": 97890 + }, + { + "epoch": 0.8098606113248128, + "grad_norm": 948.7130737304688, + "learning_rate": 1.271327705727991e-06, + "loss": 99.7648, + "step": 97900 + }, + { + "epoch": 0.8099433345741821, + "grad_norm": 3270.607421875, + "learning_rate": 1.2703882064836142e-06, + "loss": 103.626, + "step": 97910 + }, + { + "epoch": 0.8100260578235513, + "grad_norm": 953.6922607421875, + "learning_rate": 1.2694490039873336e-06, + "loss": 93.171, + "step": 97920 + }, + { + "epoch": 0.8101087810729205, + "grad_norm": 762.2303466796875, + "learning_rate": 1.2685100983138731e-06, + "loss": 63.3839, + "step": 97930 + }, + { + "epoch": 0.8101915043222898, + "grad_norm": 976.2236328125, + "learning_rate": 1.2675714895379387e-06, + "loss": 94.6773, + "step": 97940 + }, + { + "epoch": 0.810274227571659, + "grad_norm": 606.363525390625, + "learning_rate": 1.2666331777342112e-06, + "loss": 81.9096, + "step": 97950 + }, + { + "epoch": 0.8103569508210282, + "grad_norm": 1317.268798828125, + "learning_rate": 1.265695162977348e-06, + "loss": 93.898, + "step": 97960 + }, + { + "epoch": 0.8104396740703975, + "grad_norm": 738.7415161132812, + "learning_rate": 1.2647574453419826e-06, + "loss": 63.2881, + "step": 97970 + }, + { + "epoch": 0.8105223973197667, + "grad_norm": 1187.66162109375, + "learning_rate": 1.2638200249027233e-06, + "loss": 84.0641, + "step": 97980 + }, + { + "epoch": 0.8106051205691359, + "grad_norm": 1476.5244140625, + "learning_rate": 1.2628829017341594e-06, + "loss": 133.0605, + "step": 97990 + }, + { + "epoch": 0.8106878438185052, + "grad_norm": 747.8192749023438, + "learning_rate": 1.2619460759108521e-06, + "loss": 98.8314, + "step": 98000 + }, + { + "epoch": 0.8107705670678744, + "grad_norm": 760.4857177734375, + "learning_rate": 1.2610095475073415e-06, + "loss": 64.8025, + "step": 98010 + }, + { + "epoch": 0.8108532903172436, + "grad_norm": 1347.1624755859375, + "learning_rate": 1.26007331659814e-06, + "loss": 89.3215, + "step": 98020 + }, + { + "epoch": 0.8109360135666129, + "grad_norm": 778.6011352539062, + "learning_rate": 1.2591373832577408e-06, + "loss": 83.764, + "step": 98030 + }, + { + "epoch": 0.8110187368159821, + "grad_norm": 825.260498046875, + "learning_rate": 1.2582017475606117e-06, + "loss": 106.7728, + "step": 98040 + }, + { + "epoch": 0.8111014600653513, + "grad_norm": 1110.2823486328125, + "learning_rate": 1.2572664095811976e-06, + "loss": 93.7398, + "step": 98050 + }, + { + "epoch": 0.8111841833147206, + "grad_norm": 764.0325317382812, + "learning_rate": 1.2563313693939177e-06, + "loss": 89.7206, + "step": 98060 + }, + { + "epoch": 0.8112669065640898, + "grad_norm": 744.2551879882812, + "learning_rate": 1.25539662707317e-06, + "loss": 72.9195, + "step": 98070 + }, + { + "epoch": 0.811349629813459, + "grad_norm": 857.3899536132812, + "learning_rate": 1.2544621826933257e-06, + "loss": 84.4764, + "step": 98080 + }, + { + "epoch": 0.8114323530628283, + "grad_norm": 1150.891845703125, + "learning_rate": 1.2535280363287388e-06, + "loss": 73.521, + "step": 98090 + }, + { + "epoch": 0.8115150763121975, + "grad_norm": 585.0152587890625, + "learning_rate": 1.2525941880537307e-06, + "loss": 74.5685, + "step": 98100 + }, + { + "epoch": 0.8115977995615667, + "grad_norm": 927.9107666015625, + "learning_rate": 1.2516606379426044e-06, + "loss": 58.3165, + "step": 98110 + }, + { + "epoch": 0.8116805228109361, + "grad_norm": 997.0220336914062, + "learning_rate": 1.250727386069639e-06, + "loss": 102.8923, + "step": 98120 + }, + { + "epoch": 0.8117632460603053, + "grad_norm": 918.0977783203125, + "learning_rate": 1.2497944325090882e-06, + "loss": 92.9588, + "step": 98130 + }, + { + "epoch": 0.8118459693096745, + "grad_norm": 437.514404296875, + "learning_rate": 1.248861777335184e-06, + "loss": 74.5204, + "step": 98140 + }, + { + "epoch": 0.8119286925590438, + "grad_norm": 873.174072265625, + "learning_rate": 1.247929420622132e-06, + "loss": 76.6309, + "step": 98150 + }, + { + "epoch": 0.812011415808413, + "grad_norm": 1936.2078857421875, + "learning_rate": 1.2469973624441168e-06, + "loss": 133.0517, + "step": 98160 + }, + { + "epoch": 0.8120941390577822, + "grad_norm": 568.5994262695312, + "learning_rate": 1.2460656028752976e-06, + "loss": 93.9224, + "step": 98170 + }, + { + "epoch": 0.8121768623071515, + "grad_norm": 882.7687377929688, + "learning_rate": 1.24513414198981e-06, + "loss": 120.2803, + "step": 98180 + }, + { + "epoch": 0.8122595855565207, + "grad_norm": 753.4219970703125, + "learning_rate": 1.244202979861766e-06, + "loss": 78.88, + "step": 98190 + }, + { + "epoch": 0.8123423088058899, + "grad_norm": 552.3338012695312, + "learning_rate": 1.2432721165652544e-06, + "loss": 56.6679, + "step": 98200 + }, + { + "epoch": 0.8124250320552592, + "grad_norm": 948.5594482421875, + "learning_rate": 1.2423415521743392e-06, + "loss": 77.7815, + "step": 98210 + }, + { + "epoch": 0.8125077553046284, + "grad_norm": 668.4248657226562, + "learning_rate": 1.2414112867630619e-06, + "loss": 82.7063, + "step": 98220 + }, + { + "epoch": 0.8125904785539976, + "grad_norm": 507.14691162109375, + "learning_rate": 1.2404813204054383e-06, + "loss": 77.7765, + "step": 98230 + }, + { + "epoch": 0.8126732018033669, + "grad_norm": 1215.5714111328125, + "learning_rate": 1.239551653175462e-06, + "loss": 132.3814, + "step": 98240 + }, + { + "epoch": 0.8127559250527361, + "grad_norm": 711.6063232421875, + "learning_rate": 1.238622285147103e-06, + "loss": 81.4849, + "step": 98250 + }, + { + "epoch": 0.8128386483021053, + "grad_norm": 1013.1691284179688, + "learning_rate": 1.237693216394306e-06, + "loss": 117.3906, + "step": 98260 + }, + { + "epoch": 0.8129213715514746, + "grad_norm": 735.0127563476562, + "learning_rate": 1.236764446990994e-06, + "loss": 75.7862, + "step": 98270 + }, + { + "epoch": 0.8130040948008438, + "grad_norm": 728.4838256835938, + "learning_rate": 1.2358359770110634e-06, + "loss": 64.463, + "step": 98280 + }, + { + "epoch": 0.813086818050213, + "grad_norm": 746.21044921875, + "learning_rate": 1.2349078065283886e-06, + "loss": 84.8162, + "step": 98290 + }, + { + "epoch": 0.8131695412995823, + "grad_norm": 745.414306640625, + "learning_rate": 1.2339799356168207e-06, + "loss": 91.913, + "step": 98300 + }, + { + "epoch": 0.8132522645489515, + "grad_norm": 788.8545532226562, + "learning_rate": 1.2330523643501858e-06, + "loss": 73.4357, + "step": 98310 + }, + { + "epoch": 0.8133349877983207, + "grad_norm": 1031.7391357421875, + "learning_rate": 1.2321250928022855e-06, + "loss": 94.2501, + "step": 98320 + }, + { + "epoch": 0.81341771104769, + "grad_norm": 723.92919921875, + "learning_rate": 1.2311981210468998e-06, + "loss": 80.1034, + "step": 98330 + }, + { + "epoch": 0.8135004342970592, + "grad_norm": 1015.01220703125, + "learning_rate": 1.2302714491577834e-06, + "loss": 126.1582, + "step": 98340 + }, + { + "epoch": 0.8135831575464284, + "grad_norm": 718.5742797851562, + "learning_rate": 1.2293450772086667e-06, + "loss": 60.2428, + "step": 98350 + }, + { + "epoch": 0.8136658807957977, + "grad_norm": 796.4895629882812, + "learning_rate": 1.2284190052732575e-06, + "loss": 75.6503, + "step": 98360 + }, + { + "epoch": 0.8137486040451669, + "grad_norm": 479.49462890625, + "learning_rate": 1.2274932334252387e-06, + "loss": 111.6113, + "step": 98370 + }, + { + "epoch": 0.8138313272945361, + "grad_norm": 723.4696655273438, + "learning_rate": 1.2265677617382698e-06, + "loss": 76.3011, + "step": 98380 + }, + { + "epoch": 0.8139140505439053, + "grad_norm": 791.28173828125, + "learning_rate": 1.2256425902859864e-06, + "loss": 78.6211, + "step": 98390 + }, + { + "epoch": 0.8139967737932746, + "grad_norm": 1258.62548828125, + "learning_rate": 1.224717719142e-06, + "loss": 89.4393, + "step": 98400 + }, + { + "epoch": 0.8140794970426438, + "grad_norm": 820.9175415039062, + "learning_rate": 1.2237931483798993e-06, + "loss": 107.2661, + "step": 98410 + }, + { + "epoch": 0.814162220292013, + "grad_norm": 1227.457275390625, + "learning_rate": 1.2228688780732463e-06, + "loss": 71.8221, + "step": 98420 + }, + { + "epoch": 0.8142449435413823, + "grad_norm": 548.6934204101562, + "learning_rate": 1.2219449082955825e-06, + "loss": 115.0666, + "step": 98430 + }, + { + "epoch": 0.8143276667907515, + "grad_norm": 997.916259765625, + "learning_rate": 1.2210212391204234e-06, + "loss": 82.2138, + "step": 98440 + }, + { + "epoch": 0.8144103900401207, + "grad_norm": 1067.23486328125, + "learning_rate": 1.2200978706212606e-06, + "loss": 70.915, + "step": 98450 + }, + { + "epoch": 0.81449311328949, + "grad_norm": 1409.0748291015625, + "learning_rate": 1.2191748028715632e-06, + "loss": 64.1739, + "step": 98460 + }, + { + "epoch": 0.8145758365388592, + "grad_norm": 1686.0634765625, + "learning_rate": 1.2182520359447753e-06, + "loss": 88.7429, + "step": 98470 + }, + { + "epoch": 0.8146585597882284, + "grad_norm": 990.0397338867188, + "learning_rate": 1.2173295699143172e-06, + "loss": 106.0889, + "step": 98480 + }, + { + "epoch": 0.8147412830375977, + "grad_norm": 991.7344360351562, + "learning_rate": 1.2164074048535846e-06, + "loss": 95.5769, + "step": 98490 + }, + { + "epoch": 0.8148240062869669, + "grad_norm": 1526.431396484375, + "learning_rate": 1.2154855408359507e-06, + "loss": 99.9895, + "step": 98500 + }, + { + "epoch": 0.8149067295363361, + "grad_norm": 802.585205078125, + "learning_rate": 1.2145639779347634e-06, + "loss": 83.8027, + "step": 98510 + }, + { + "epoch": 0.8149894527857054, + "grad_norm": 806.9586181640625, + "learning_rate": 1.2136427162233493e-06, + "loss": 82.421, + "step": 98520 + }, + { + "epoch": 0.8150721760350746, + "grad_norm": 810.6295166015625, + "learning_rate": 1.212721755775006e-06, + "loss": 98.6951, + "step": 98530 + }, + { + "epoch": 0.8151548992844438, + "grad_norm": 912.5408325195312, + "learning_rate": 1.2118010966630095e-06, + "loss": 83.3605, + "step": 98540 + }, + { + "epoch": 0.8152376225338132, + "grad_norm": 1080.7371826171875, + "learning_rate": 1.210880738960616e-06, + "loss": 84.3936, + "step": 98550 + }, + { + "epoch": 0.8153203457831824, + "grad_norm": 572.9903564453125, + "learning_rate": 1.2099606827410521e-06, + "loss": 64.3546, + "step": 98560 + }, + { + "epoch": 0.8154030690325516, + "grad_norm": 914.2869873046875, + "learning_rate": 1.2090409280775228e-06, + "loss": 94.1465, + "step": 98570 + }, + { + "epoch": 0.8154857922819209, + "grad_norm": 880.4005126953125, + "learning_rate": 1.208121475043209e-06, + "loss": 79.7238, + "step": 98580 + }, + { + "epoch": 0.8155685155312901, + "grad_norm": 1575.5872802734375, + "learning_rate": 1.2072023237112668e-06, + "loss": 102.0344, + "step": 98590 + }, + { + "epoch": 0.8156512387806593, + "grad_norm": 1086.993408203125, + "learning_rate": 1.2062834741548291e-06, + "loss": 90.5145, + "step": 98600 + }, + { + "epoch": 0.8157339620300286, + "grad_norm": 759.2196655273438, + "learning_rate": 1.2053649264470064e-06, + "loss": 93.4888, + "step": 98610 + }, + { + "epoch": 0.8158166852793978, + "grad_norm": 939.29736328125, + "learning_rate": 1.2044466806608794e-06, + "loss": 66.8826, + "step": 98620 + }, + { + "epoch": 0.815899408528767, + "grad_norm": 691.4633178710938, + "learning_rate": 1.203528736869511e-06, + "loss": 75.7453, + "step": 98630 + }, + { + "epoch": 0.8159821317781363, + "grad_norm": 843.3115234375, + "learning_rate": 1.2026110951459364e-06, + "loss": 104.9937, + "step": 98640 + }, + { + "epoch": 0.8160648550275055, + "grad_norm": 707.0924682617188, + "learning_rate": 1.2016937555631702e-06, + "loss": 66.0707, + "step": 98650 + }, + { + "epoch": 0.8161475782768747, + "grad_norm": 1021.8479614257812, + "learning_rate": 1.2007767181942003e-06, + "loss": 99.7831, + "step": 98660 + }, + { + "epoch": 0.816230301526244, + "grad_norm": 850.4284057617188, + "learning_rate": 1.1998599831119912e-06, + "loss": 75.5687, + "step": 98670 + }, + { + "epoch": 0.8163130247756132, + "grad_norm": 1095.51318359375, + "learning_rate": 1.1989435503894826e-06, + "loss": 101.9039, + "step": 98680 + }, + { + "epoch": 0.8163957480249824, + "grad_norm": 1368.3948974609375, + "learning_rate": 1.1980274200995928e-06, + "loss": 91.4454, + "step": 98690 + }, + { + "epoch": 0.8164784712743517, + "grad_norm": 969.6019287109375, + "learning_rate": 1.197111592315211e-06, + "loss": 84.7942, + "step": 98700 + }, + { + "epoch": 0.8165611945237209, + "grad_norm": 531.8544311523438, + "learning_rate": 1.1961960671092066e-06, + "loss": 84.7319, + "step": 98710 + }, + { + "epoch": 0.8166439177730901, + "grad_norm": 626.1632080078125, + "learning_rate": 1.1952808445544246e-06, + "loss": 70.9973, + "step": 98720 + }, + { + "epoch": 0.8167266410224594, + "grad_norm": 718.9005737304688, + "learning_rate": 1.1943659247236838e-06, + "loss": 81.7739, + "step": 98730 + }, + { + "epoch": 0.8168093642718286, + "grad_norm": 826.1453247070312, + "learning_rate": 1.1934513076897798e-06, + "loss": 77.8973, + "step": 98740 + }, + { + "epoch": 0.8168920875211978, + "grad_norm": 670.5861206054688, + "learning_rate": 1.1925369935254872e-06, + "loss": 70.1664, + "step": 98750 + }, + { + "epoch": 0.8169748107705671, + "grad_norm": 1031.8236083984375, + "learning_rate": 1.191622982303552e-06, + "loss": 96.2467, + "step": 98760 + }, + { + "epoch": 0.8170575340199363, + "grad_norm": 862.9549560546875, + "learning_rate": 1.1907092740966976e-06, + "loss": 95.5136, + "step": 98770 + }, + { + "epoch": 0.8171402572693055, + "grad_norm": 1296.6314697265625, + "learning_rate": 1.1897958689776256e-06, + "loss": 125.511, + "step": 98780 + }, + { + "epoch": 0.8172229805186748, + "grad_norm": 840.3079833984375, + "learning_rate": 1.1888827670190085e-06, + "loss": 103.1509, + "step": 98790 + }, + { + "epoch": 0.817305703768044, + "grad_norm": 822.6774291992188, + "learning_rate": 1.1879699682934993e-06, + "loss": 89.4122, + "step": 98800 + }, + { + "epoch": 0.8173884270174132, + "grad_norm": 5291.2255859375, + "learning_rate": 1.1870574728737244e-06, + "loss": 141.3245, + "step": 98810 + }, + { + "epoch": 0.8174711502667825, + "grad_norm": 973.1185302734375, + "learning_rate": 1.1861452808322877e-06, + "loss": 87.4708, + "step": 98820 + }, + { + "epoch": 0.8175538735161517, + "grad_norm": 890.1282348632812, + "learning_rate": 1.1852333922417658e-06, + "loss": 85.0632, + "step": 98830 + }, + { + "epoch": 0.8176365967655209, + "grad_norm": 1301.4791259765625, + "learning_rate": 1.1843218071747171e-06, + "loss": 96.202, + "step": 98840 + }, + { + "epoch": 0.8177193200148902, + "grad_norm": 659.366455078125, + "learning_rate": 1.183410525703671e-06, + "loss": 86.4163, + "step": 98850 + }, + { + "epoch": 0.8178020432642594, + "grad_norm": 824.1502685546875, + "learning_rate": 1.182499547901133e-06, + "loss": 80.8635, + "step": 98860 + }, + { + "epoch": 0.8178847665136286, + "grad_norm": 1438.4332275390625, + "learning_rate": 1.1815888738395882e-06, + "loss": 104.6398, + "step": 98870 + }, + { + "epoch": 0.8179674897629979, + "grad_norm": 850.4103393554688, + "learning_rate": 1.1806785035914903e-06, + "loss": 112.7476, + "step": 98880 + }, + { + "epoch": 0.8180502130123671, + "grad_norm": 1077.782470703125, + "learning_rate": 1.1797684372292762e-06, + "loss": 61.9891, + "step": 98890 + }, + { + "epoch": 0.8181329362617363, + "grad_norm": 528.4021606445312, + "learning_rate": 1.1788586748253545e-06, + "loss": 92.2062, + "step": 98900 + }, + { + "epoch": 0.8182156595111056, + "grad_norm": 739.4981079101562, + "learning_rate": 1.1779492164521117e-06, + "loss": 67.1746, + "step": 98910 + }, + { + "epoch": 0.8182983827604748, + "grad_norm": 935.0463256835938, + "learning_rate": 1.177040062181909e-06, + "loss": 65.3778, + "step": 98920 + }, + { + "epoch": 0.818381106009844, + "grad_norm": 1494.3590087890625, + "learning_rate": 1.1761312120870822e-06, + "loss": 85.324, + "step": 98930 + }, + { + "epoch": 0.8184638292592133, + "grad_norm": 514.237060546875, + "learning_rate": 1.175222666239947e-06, + "loss": 91.5604, + "step": 98940 + }, + { + "epoch": 0.8185465525085825, + "grad_norm": 856.9160766601562, + "learning_rate": 1.174314424712792e-06, + "loss": 86.7412, + "step": 98950 + }, + { + "epoch": 0.8186292757579517, + "grad_norm": 849.8342895507812, + "learning_rate": 1.1734064875778795e-06, + "loss": 81.7563, + "step": 98960 + }, + { + "epoch": 0.8187119990073211, + "grad_norm": 648.2426147460938, + "learning_rate": 1.1724988549074506e-06, + "loss": 84.3063, + "step": 98970 + }, + { + "epoch": 0.8187947222566903, + "grad_norm": 736.9127197265625, + "learning_rate": 1.1715915267737228e-06, + "loss": 123.6431, + "step": 98980 + }, + { + "epoch": 0.8188774455060595, + "grad_norm": 2442.851318359375, + "learning_rate": 1.1706845032488867e-06, + "loss": 78.3283, + "step": 98990 + }, + { + "epoch": 0.8189601687554288, + "grad_norm": 895.6693115234375, + "learning_rate": 1.1697777844051105e-06, + "loss": 67.0833, + "step": 99000 + }, + { + "epoch": 0.819042892004798, + "grad_norm": 616.1843872070312, + "learning_rate": 1.1688713703145377e-06, + "loss": 97.3783, + "step": 99010 + }, + { + "epoch": 0.8191256152541672, + "grad_norm": 753.047119140625, + "learning_rate": 1.1679652610492875e-06, + "loss": 78.6516, + "step": 99020 + }, + { + "epoch": 0.8192083385035365, + "grad_norm": 1777.462890625, + "learning_rate": 1.1670594566814536e-06, + "loss": 78.5895, + "step": 99030 + }, + { + "epoch": 0.8192910617529057, + "grad_norm": 898.646484375, + "learning_rate": 1.1661539572831105e-06, + "loss": 67.8556, + "step": 99040 + }, + { + "epoch": 0.8193737850022749, + "grad_norm": 1100.24267578125, + "learning_rate": 1.1652487629263003e-06, + "loss": 92.8213, + "step": 99050 + }, + { + "epoch": 0.8194565082516442, + "grad_norm": 727.767822265625, + "learning_rate": 1.1643438736830476e-06, + "loss": 87.5095, + "step": 99060 + }, + { + "epoch": 0.8195392315010134, + "grad_norm": 556.3981323242188, + "learning_rate": 1.1634392896253495e-06, + "loss": 98.1264, + "step": 99070 + }, + { + "epoch": 0.8196219547503826, + "grad_norm": 761.73828125, + "learning_rate": 1.1625350108251793e-06, + "loss": 85.1379, + "step": 99080 + }, + { + "epoch": 0.8197046779997519, + "grad_norm": 553.5272827148438, + "learning_rate": 1.1616310373544865e-06, + "loss": 72.9364, + "step": 99090 + }, + { + "epoch": 0.8197874012491211, + "grad_norm": 601.8910522460938, + "learning_rate": 1.1607273692851967e-06, + "loss": 87.1516, + "step": 99100 + }, + { + "epoch": 0.8198701244984903, + "grad_norm": 369.5881652832031, + "learning_rate": 1.1598240066892103e-06, + "loss": 66.5272, + "step": 99110 + }, + { + "epoch": 0.8199528477478595, + "grad_norm": 523.6664428710938, + "learning_rate": 1.1589209496384035e-06, + "loss": 84.0007, + "step": 99120 + }, + { + "epoch": 0.8200355709972288, + "grad_norm": 1775.257080078125, + "learning_rate": 1.1580181982046285e-06, + "loss": 100.826, + "step": 99130 + }, + { + "epoch": 0.820118294246598, + "grad_norm": 1454.958984375, + "learning_rate": 1.1571157524597137e-06, + "loss": 103.4577, + "step": 99140 + }, + { + "epoch": 0.8202010174959672, + "grad_norm": 673.6801147460938, + "learning_rate": 1.1562136124754613e-06, + "loss": 89.0967, + "step": 99150 + }, + { + "epoch": 0.8202837407453365, + "grad_norm": 514.0848388671875, + "learning_rate": 1.1553117783236516e-06, + "loss": 67.5358, + "step": 99160 + }, + { + "epoch": 0.8203664639947057, + "grad_norm": 888.429443359375, + "learning_rate": 1.154410250076039e-06, + "loss": 101.8155, + "step": 99170 + }, + { + "epoch": 0.8204491872440749, + "grad_norm": 515.0179443359375, + "learning_rate": 1.1535090278043538e-06, + "loss": 92.6118, + "step": 99180 + }, + { + "epoch": 0.8205319104934442, + "grad_norm": 659.7156372070312, + "learning_rate": 1.1526081115803017e-06, + "loss": 77.2248, + "step": 99190 + }, + { + "epoch": 0.8206146337428134, + "grad_norm": 680.0890502929688, + "learning_rate": 1.1517075014755657e-06, + "loss": 107.0121, + "step": 99200 + }, + { + "epoch": 0.8206973569921826, + "grad_norm": 1098.1448974609375, + "learning_rate": 1.1508071975618025e-06, + "loss": 96.3001, + "step": 99210 + }, + { + "epoch": 0.8207800802415519, + "grad_norm": 628.8194580078125, + "learning_rate": 1.1499071999106449e-06, + "loss": 94.3524, + "step": 99220 + }, + { + "epoch": 0.8208628034909211, + "grad_norm": 908.0435791015625, + "learning_rate": 1.149007508593702e-06, + "loss": 89.7701, + "step": 99230 + }, + { + "epoch": 0.8209455267402903, + "grad_norm": 635.8421630859375, + "learning_rate": 1.148108123682558e-06, + "loss": 69.7421, + "step": 99240 + }, + { + "epoch": 0.8210282499896596, + "grad_norm": 540.8094482421875, + "learning_rate": 1.1472090452487728e-06, + "loss": 83.0186, + "step": 99250 + }, + { + "epoch": 0.8211109732390288, + "grad_norm": 655.1044311523438, + "learning_rate": 1.146310273363882e-06, + "loss": 79.3086, + "step": 99260 + }, + { + "epoch": 0.821193696488398, + "grad_norm": 579.1731567382812, + "learning_rate": 1.1454118080993965e-06, + "loss": 86.8246, + "step": 99270 + }, + { + "epoch": 0.8212764197377673, + "grad_norm": 1405.385498046875, + "learning_rate": 1.1445136495268033e-06, + "loss": 124.9013, + "step": 99280 + }, + { + "epoch": 0.8213591429871365, + "grad_norm": 553.7492065429688, + "learning_rate": 1.143615797717565e-06, + "loss": 80.3261, + "step": 99290 + }, + { + "epoch": 0.8214418662365057, + "grad_norm": 2029.3385009765625, + "learning_rate": 1.1427182527431192e-06, + "loss": 95.4808, + "step": 99300 + }, + { + "epoch": 0.821524589485875, + "grad_norm": 756.5120849609375, + "learning_rate": 1.1418210146748792e-06, + "loss": 67.8008, + "step": 99310 + }, + { + "epoch": 0.8216073127352442, + "grad_norm": 396.6879577636719, + "learning_rate": 1.1409240835842344e-06, + "loss": 70.0907, + "step": 99320 + }, + { + "epoch": 0.8216900359846134, + "grad_norm": 836.3045654296875, + "learning_rate": 1.1400274595425499e-06, + "loss": 81.1893, + "step": 99330 + }, + { + "epoch": 0.8217727592339827, + "grad_norm": 831.0084838867188, + "learning_rate": 1.139131142621165e-06, + "loss": 108.7785, + "step": 99340 + }, + { + "epoch": 0.8218554824833519, + "grad_norm": 594.3251953125, + "learning_rate": 1.1382351328913964e-06, + "loss": 90.5988, + "step": 99350 + }, + { + "epoch": 0.8219382057327211, + "grad_norm": 997.0202026367188, + "learning_rate": 1.137339430424535e-06, + "loss": 98.3652, + "step": 99360 + }, + { + "epoch": 0.8220209289820904, + "grad_norm": 1159.4931640625, + "learning_rate": 1.1364440352918482e-06, + "loss": 93.9449, + "step": 99370 + }, + { + "epoch": 0.8221036522314596, + "grad_norm": 1038.8775634765625, + "learning_rate": 1.1355489475645798e-06, + "loss": 109.6431, + "step": 99380 + }, + { + "epoch": 0.8221863754808288, + "grad_norm": 1000.264404296875, + "learning_rate": 1.1346541673139428e-06, + "loss": 81.506, + "step": 99390 + }, + { + "epoch": 0.8222690987301982, + "grad_norm": 986.3641967773438, + "learning_rate": 1.1337596946111357e-06, + "loss": 91.9383, + "step": 99400 + }, + { + "epoch": 0.8223518219795674, + "grad_norm": 548.0112915039062, + "learning_rate": 1.132865529527326e-06, + "loss": 79.2564, + "step": 99410 + }, + { + "epoch": 0.8224345452289366, + "grad_norm": 912.9034423828125, + "learning_rate": 1.1319716721336587e-06, + "loss": 88.9765, + "step": 99420 + }, + { + "epoch": 0.8225172684783059, + "grad_norm": 463.36883544921875, + "learning_rate": 1.1310781225012535e-06, + "loss": 58.9931, + "step": 99430 + }, + { + "epoch": 0.8225999917276751, + "grad_norm": 1589.856201171875, + "learning_rate": 1.130184880701206e-06, + "loss": 102.2906, + "step": 99440 + }, + { + "epoch": 0.8226827149770443, + "grad_norm": 653.0153198242188, + "learning_rate": 1.1292919468045876e-06, + "loss": 83.1269, + "step": 99450 + }, + { + "epoch": 0.8227654382264136, + "grad_norm": 1476.3031005859375, + "learning_rate": 1.128399320882445e-06, + "loss": 81.7267, + "step": 99460 + }, + { + "epoch": 0.8228481614757828, + "grad_norm": 633.2077026367188, + "learning_rate": 1.1275070030058016e-06, + "loss": 74.4383, + "step": 99470 + }, + { + "epoch": 0.822930884725152, + "grad_norm": 1069.526123046875, + "learning_rate": 1.1266149932456516e-06, + "loss": 93.1175, + "step": 99480 + }, + { + "epoch": 0.8230136079745213, + "grad_norm": 513.8902587890625, + "learning_rate": 1.1257232916729693e-06, + "loss": 78.2727, + "step": 99490 + }, + { + "epoch": 0.8230963312238905, + "grad_norm": 552.6843872070312, + "learning_rate": 1.1248318983587052e-06, + "loss": 77.5065, + "step": 99500 + }, + { + "epoch": 0.8231790544732597, + "grad_norm": 878.9365234375, + "learning_rate": 1.1239408133737828e-06, + "loss": 77.3874, + "step": 99510 + }, + { + "epoch": 0.823261777722629, + "grad_norm": 1088.0330810546875, + "learning_rate": 1.123050036789101e-06, + "loss": 93.4705, + "step": 99520 + }, + { + "epoch": 0.8233445009719982, + "grad_norm": 916.3295288085938, + "learning_rate": 1.122159568675535e-06, + "loss": 86.091, + "step": 99530 + }, + { + "epoch": 0.8234272242213674, + "grad_norm": 595.7514038085938, + "learning_rate": 1.121269409103935e-06, + "loss": 110.6302, + "step": 99540 + }, + { + "epoch": 0.8235099474707367, + "grad_norm": 819.598388671875, + "learning_rate": 1.1203795581451288e-06, + "loss": 116.6621, + "step": 99550 + }, + { + "epoch": 0.8235926707201059, + "grad_norm": 669.3980712890625, + "learning_rate": 1.1194900158699146e-06, + "loss": 83.4266, + "step": 99560 + }, + { + "epoch": 0.8236753939694751, + "grad_norm": 630.958251953125, + "learning_rate": 1.1186007823490708e-06, + "loss": 86.0332, + "step": 99570 + }, + { + "epoch": 0.8237581172188444, + "grad_norm": 1063.6295166015625, + "learning_rate": 1.1177118576533492e-06, + "loss": 99.3058, + "step": 99580 + }, + { + "epoch": 0.8238408404682136, + "grad_norm": 1337.9732666015625, + "learning_rate": 1.116823241853477e-06, + "loss": 54.1832, + "step": 99590 + }, + { + "epoch": 0.8239235637175828, + "grad_norm": 680.459228515625, + "learning_rate": 1.1159349350201587e-06, + "loss": 86.0068, + "step": 99600 + }, + { + "epoch": 0.8240062869669521, + "grad_norm": 976.7091674804688, + "learning_rate": 1.1150469372240724e-06, + "loss": 85.8299, + "step": 99610 + }, + { + "epoch": 0.8240890102163213, + "grad_norm": 839.8695678710938, + "learning_rate": 1.114159248535872e-06, + "loss": 85.7322, + "step": 99620 + }, + { + "epoch": 0.8241717334656905, + "grad_norm": 524.162841796875, + "learning_rate": 1.1132718690261868e-06, + "loss": 87.2623, + "step": 99630 + }, + { + "epoch": 0.8242544567150598, + "grad_norm": 808.6865234375, + "learning_rate": 1.1123847987656221e-06, + "loss": 89.9084, + "step": 99640 + }, + { + "epoch": 0.824337179964429, + "grad_norm": 1346.7198486328125, + "learning_rate": 1.1114980378247565e-06, + "loss": 73.3104, + "step": 99650 + }, + { + "epoch": 0.8244199032137982, + "grad_norm": 860.0714111328125, + "learning_rate": 1.1106115862741457e-06, + "loss": 136.1401, + "step": 99660 + }, + { + "epoch": 0.8245026264631675, + "grad_norm": 874.57666015625, + "learning_rate": 1.1097254441843215e-06, + "loss": 112.3625, + "step": 99670 + }, + { + "epoch": 0.8245853497125367, + "grad_norm": 788.482666015625, + "learning_rate": 1.1088396116257893e-06, + "loss": 54.8236, + "step": 99680 + }, + { + "epoch": 0.8246680729619059, + "grad_norm": 807.5397338867188, + "learning_rate": 1.10795408866903e-06, + "loss": 79.1535, + "step": 99690 + }, + { + "epoch": 0.8247507962112752, + "grad_norm": 1106.181396484375, + "learning_rate": 1.1070688753845032e-06, + "loss": 117.5441, + "step": 99700 + }, + { + "epoch": 0.8248335194606444, + "grad_norm": 782.138427734375, + "learning_rate": 1.1061839718426399e-06, + "loss": 92.2893, + "step": 99710 + }, + { + "epoch": 0.8249162427100136, + "grad_norm": 1031.1993408203125, + "learning_rate": 1.1052993781138477e-06, + "loss": 80.9152, + "step": 99720 + }, + { + "epoch": 0.8249989659593829, + "grad_norm": 443.52801513671875, + "learning_rate": 1.1044150942685112e-06, + "loss": 104.3299, + "step": 99730 + }, + { + "epoch": 0.8250816892087521, + "grad_norm": 1952.7318115234375, + "learning_rate": 1.1035311203769855e-06, + "loss": 135.8231, + "step": 99740 + }, + { + "epoch": 0.8251644124581213, + "grad_norm": 863.5310668945312, + "learning_rate": 1.1026474565096068e-06, + "loss": 85.8996, + "step": 99750 + }, + { + "epoch": 0.8252471357074906, + "grad_norm": 756.4827880859375, + "learning_rate": 1.1017641027366832e-06, + "loss": 97.5026, + "step": 99760 + }, + { + "epoch": 0.8253298589568598, + "grad_norm": 881.189453125, + "learning_rate": 1.1008810591284997e-06, + "loss": 100.8524, + "step": 99770 + }, + { + "epoch": 0.825412582206229, + "grad_norm": 574.4223022460938, + "learning_rate": 1.0999983257553137e-06, + "loss": 82.259, + "step": 99780 + }, + { + "epoch": 0.8254953054555983, + "grad_norm": 1166.70166015625, + "learning_rate": 1.0991159026873643e-06, + "loss": 79.2827, + "step": 99790 + }, + { + "epoch": 0.8255780287049675, + "grad_norm": 712.7079467773438, + "learning_rate": 1.0982337899948603e-06, + "loss": 71.3755, + "step": 99800 + }, + { + "epoch": 0.8256607519543367, + "grad_norm": 921.7284545898438, + "learning_rate": 1.0973519877479878e-06, + "loss": 50.2585, + "step": 99810 + }, + { + "epoch": 0.8257434752037061, + "grad_norm": 480.2099914550781, + "learning_rate": 1.0964704960169054e-06, + "loss": 75.6909, + "step": 99820 + }, + { + "epoch": 0.8258261984530753, + "grad_norm": 659.5361328125, + "learning_rate": 1.0955893148717512e-06, + "loss": 92.5624, + "step": 99830 + }, + { + "epoch": 0.8259089217024445, + "grad_norm": 386.55810546875, + "learning_rate": 1.0947084443826361e-06, + "loss": 71.9917, + "step": 99840 + }, + { + "epoch": 0.8259916449518137, + "grad_norm": 968.19140625, + "learning_rate": 1.0938278846196471e-06, + "loss": 75.8837, + "step": 99850 + }, + { + "epoch": 0.826074368201183, + "grad_norm": 1425.4276123046875, + "learning_rate": 1.0929476356528469e-06, + "loss": 70.3875, + "step": 99860 + }, + { + "epoch": 0.8261570914505522, + "grad_norm": 895.6668090820312, + "learning_rate": 1.092067697552272e-06, + "loss": 91.4471, + "step": 99870 + }, + { + "epoch": 0.8262398146999214, + "grad_norm": 1033.364013671875, + "learning_rate": 1.091188070387934e-06, + "loss": 90.5695, + "step": 99880 + }, + { + "epoch": 0.8263225379492907, + "grad_norm": 660.748046875, + "learning_rate": 1.0903087542298241e-06, + "loss": 96.0515, + "step": 99890 + }, + { + "epoch": 0.8264052611986599, + "grad_norm": 480.1137390136719, + "learning_rate": 1.0894297491479044e-06, + "loss": 80.2592, + "step": 99900 + }, + { + "epoch": 0.8264879844480291, + "grad_norm": 643.3427124023438, + "learning_rate": 1.0885510552121115e-06, + "loss": 59.6039, + "step": 99910 + }, + { + "epoch": 0.8265707076973984, + "grad_norm": 1013.935546875, + "learning_rate": 1.0876726724923597e-06, + "loss": 77.1848, + "step": 99920 + }, + { + "epoch": 0.8266534309467676, + "grad_norm": 841.8418579101562, + "learning_rate": 1.086794601058538e-06, + "loss": 83.5782, + "step": 99930 + }, + { + "epoch": 0.8267361541961368, + "grad_norm": 337.9281311035156, + "learning_rate": 1.0859168409805109e-06, + "loss": 68.1758, + "step": 99940 + }, + { + "epoch": 0.8268188774455061, + "grad_norm": 665.4752807617188, + "learning_rate": 1.0850393923281176e-06, + "loss": 100.0561, + "step": 99950 + }, + { + "epoch": 0.8269016006948753, + "grad_norm": 569.7544555664062, + "learning_rate": 1.0841622551711728e-06, + "loss": 83.8357, + "step": 99960 + }, + { + "epoch": 0.8269843239442445, + "grad_norm": 661.4277954101562, + "learning_rate": 1.0832854295794659e-06, + "loss": 65.98, + "step": 99970 + }, + { + "epoch": 0.8270670471936138, + "grad_norm": 930.4299926757812, + "learning_rate": 1.0824089156227624e-06, + "loss": 77.1078, + "step": 99980 + }, + { + "epoch": 0.827149770442983, + "grad_norm": 1039.9451904296875, + "learning_rate": 1.0815327133708015e-06, + "loss": 49.8599, + "step": 99990 + }, + { + "epoch": 0.8272324936923522, + "grad_norm": 907.8119506835938, + "learning_rate": 1.0806568228932995e-06, + "loss": 87.3194, + "step": 100000 + }, + { + "epoch": 0.8273152169417215, + "grad_norm": 665.1284790039062, + "learning_rate": 1.079781244259947e-06, + "loss": 113.8395, + "step": 100010 + }, + { + "epoch": 0.8273979401910907, + "grad_norm": 1413.6357421875, + "learning_rate": 1.0789059775404093e-06, + "loss": 98.0291, + "step": 100020 + }, + { + "epoch": 0.8274806634404599, + "grad_norm": 738.0913696289062, + "learning_rate": 1.0780310228043278e-06, + "loss": 97.2634, + "step": 100030 + }, + { + "epoch": 0.8275633866898292, + "grad_norm": 610.05859375, + "learning_rate": 1.0771563801213186e-06, + "loss": 86.15, + "step": 100040 + }, + { + "epoch": 0.8276461099391984, + "grad_norm": 723.9077758789062, + "learning_rate": 1.076282049560972e-06, + "loss": 92.4268, + "step": 100050 + }, + { + "epoch": 0.8277288331885676, + "grad_norm": 939.378662109375, + "learning_rate": 1.075408031192856e-06, + "loss": 82.4279, + "step": 100060 + }, + { + "epoch": 0.8278115564379369, + "grad_norm": 1257.1422119140625, + "learning_rate": 1.0745343250865114e-06, + "loss": 73.5913, + "step": 100070 + }, + { + "epoch": 0.8278942796873061, + "grad_norm": 568.9317626953125, + "learning_rate": 1.073660931311455e-06, + "loss": 89.3267, + "step": 100080 + }, + { + "epoch": 0.8279770029366753, + "grad_norm": 454.4713134765625, + "learning_rate": 1.0727878499371786e-06, + "loss": 71.951, + "step": 100090 + }, + { + "epoch": 0.8280597261860446, + "grad_norm": 692.2919311523438, + "learning_rate": 1.0719150810331497e-06, + "loss": 70.2479, + "step": 100100 + }, + { + "epoch": 0.8281424494354138, + "grad_norm": 1330.4888916015625, + "learning_rate": 1.0710426246688105e-06, + "loss": 102.4429, + "step": 100110 + }, + { + "epoch": 0.828225172684783, + "grad_norm": 739.1103515625, + "learning_rate": 1.0701704809135782e-06, + "loss": 78.6834, + "step": 100120 + }, + { + "epoch": 0.8283078959341523, + "grad_norm": 964.680908203125, + "learning_rate": 1.0692986498368452e-06, + "loss": 103.2081, + "step": 100130 + }, + { + "epoch": 0.8283906191835215, + "grad_norm": 928.7930908203125, + "learning_rate": 1.0684271315079786e-06, + "loss": 75.7119, + "step": 100140 + }, + { + "epoch": 0.8284733424328907, + "grad_norm": 771.4544677734375, + "learning_rate": 1.0675559259963226e-06, + "loss": 91.3507, + "step": 100150 + }, + { + "epoch": 0.82855606568226, + "grad_norm": 1169.62109375, + "learning_rate": 1.066685033371193e-06, + "loss": 69.6342, + "step": 100160 + }, + { + "epoch": 0.8286387889316292, + "grad_norm": 1027.3876953125, + "learning_rate": 1.0658144537018844e-06, + "loss": 68.9951, + "step": 100170 + }, + { + "epoch": 0.8287215121809984, + "grad_norm": 561.0545043945312, + "learning_rate": 1.0649441870576644e-06, + "loss": 63.7628, + "step": 100180 + }, + { + "epoch": 0.8288042354303677, + "grad_norm": 2845.06689453125, + "learning_rate": 1.0640742335077758e-06, + "loss": 74.9551, + "step": 100190 + }, + { + "epoch": 0.8288869586797369, + "grad_norm": 514.4091186523438, + "learning_rate": 1.0632045931214369e-06, + "loss": 69.5426, + "step": 100200 + }, + { + "epoch": 0.8289696819291061, + "grad_norm": 830.9058837890625, + "learning_rate": 1.0623352659678415e-06, + "loss": 88.3192, + "step": 100210 + }, + { + "epoch": 0.8290524051784754, + "grad_norm": 681.1116333007812, + "learning_rate": 1.0614662521161573e-06, + "loss": 73.3991, + "step": 100220 + }, + { + "epoch": 0.8291351284278446, + "grad_norm": 1170.43212890625, + "learning_rate": 1.0605975516355277e-06, + "loss": 56.5079, + "step": 100230 + }, + { + "epoch": 0.8292178516772138, + "grad_norm": 640.3706665039062, + "learning_rate": 1.059729164595073e-06, + "loss": 76.578, + "step": 100240 + }, + { + "epoch": 0.8293005749265832, + "grad_norm": 1166.9835205078125, + "learning_rate": 1.0588610910638825e-06, + "loss": 109.2789, + "step": 100250 + }, + { + "epoch": 0.8293832981759524, + "grad_norm": 928.90185546875, + "learning_rate": 1.057993331111029e-06, + "loss": 89.8654, + "step": 100260 + }, + { + "epoch": 0.8294660214253216, + "grad_norm": 927.5479736328125, + "learning_rate": 1.057125884805555e-06, + "loss": 82.5449, + "step": 100270 + }, + { + "epoch": 0.8295487446746909, + "grad_norm": 980.9613037109375, + "learning_rate": 1.0562587522164792e-06, + "loss": 93.9806, + "step": 100280 + }, + { + "epoch": 0.8296314679240601, + "grad_norm": 702.5833740234375, + "learning_rate": 1.0553919334127943e-06, + "loss": 67.3568, + "step": 100290 + }, + { + "epoch": 0.8297141911734293, + "grad_norm": 1179.7244873046875, + "learning_rate": 1.0545254284634703e-06, + "loss": 96.2675, + "step": 100300 + }, + { + "epoch": 0.8297969144227986, + "grad_norm": 885.7655639648438, + "learning_rate": 1.053659237437451e-06, + "loss": 112.9435, + "step": 100310 + }, + { + "epoch": 0.8298796376721678, + "grad_norm": 940.1737060546875, + "learning_rate": 1.052793360403655e-06, + "loss": 80.4012, + "step": 100320 + }, + { + "epoch": 0.829962360921537, + "grad_norm": 1659.75341796875, + "learning_rate": 1.0519277974309771e-06, + "loss": 96.4695, + "step": 100330 + }, + { + "epoch": 0.8300450841709063, + "grad_norm": 809.1747436523438, + "learning_rate": 1.0510625485882825e-06, + "loss": 90.7687, + "step": 100340 + }, + { + "epoch": 0.8301278074202755, + "grad_norm": 565.280029296875, + "learning_rate": 1.0501976139444191e-06, + "loss": 96.0154, + "step": 100350 + }, + { + "epoch": 0.8302105306696447, + "grad_norm": 1022.1803588867188, + "learning_rate": 1.0493329935682045e-06, + "loss": 84.8389, + "step": 100360 + }, + { + "epoch": 0.830293253919014, + "grad_norm": 615.2613525390625, + "learning_rate": 1.0484686875284323e-06, + "loss": 68.2956, + "step": 100370 + }, + { + "epoch": 0.8303759771683832, + "grad_norm": 573.2850952148438, + "learning_rate": 1.0476046958938719e-06, + "loss": 95.2249, + "step": 100380 + }, + { + "epoch": 0.8304587004177524, + "grad_norm": 504.07470703125, + "learning_rate": 1.0467410187332667e-06, + "loss": 77.4282, + "step": 100390 + }, + { + "epoch": 0.8305414236671217, + "grad_norm": 1332.99169921875, + "learning_rate": 1.045877656115335e-06, + "loss": 117.6646, + "step": 100400 + }, + { + "epoch": 0.8306241469164909, + "grad_norm": 665.0089721679688, + "learning_rate": 1.045014608108773e-06, + "loss": 91.1985, + "step": 100410 + }, + { + "epoch": 0.8307068701658601, + "grad_norm": 1073.398681640625, + "learning_rate": 1.0441518747822466e-06, + "loss": 96.1906, + "step": 100420 + }, + { + "epoch": 0.8307895934152294, + "grad_norm": 774.0573120117188, + "learning_rate": 1.0432894562044004e-06, + "loss": 83.1879, + "step": 100430 + }, + { + "epoch": 0.8308723166645986, + "grad_norm": 1170.0047607421875, + "learning_rate": 1.0424273524438521e-06, + "loss": 98.2264, + "step": 100440 + }, + { + "epoch": 0.8309550399139678, + "grad_norm": 567.7711181640625, + "learning_rate": 1.041565563569198e-06, + "loss": 72.0893, + "step": 100450 + }, + { + "epoch": 0.8310377631633371, + "grad_norm": 946.6346435546875, + "learning_rate": 1.040704089649005e-06, + "loss": 101.0679, + "step": 100460 + }, + { + "epoch": 0.8311204864127063, + "grad_norm": 609.6173095703125, + "learning_rate": 1.039842930751817e-06, + "loss": 93.027, + "step": 100470 + }, + { + "epoch": 0.8312032096620755, + "grad_norm": 693.0861206054688, + "learning_rate": 1.0389820869461525e-06, + "loss": 75.2829, + "step": 100480 + }, + { + "epoch": 0.8312859329114448, + "grad_norm": 987.9539184570312, + "learning_rate": 1.0381215583005043e-06, + "loss": 80.7349, + "step": 100490 + }, + { + "epoch": 0.831368656160814, + "grad_norm": 645.7682495117188, + "learning_rate": 1.0372613448833429e-06, + "loss": 103.9739, + "step": 100500 + }, + { + "epoch": 0.8314513794101832, + "grad_norm": 983.28271484375, + "learning_rate": 1.0364014467631078e-06, + "loss": 79.8077, + "step": 100510 + }, + { + "epoch": 0.8315341026595525, + "grad_norm": 754.6640625, + "learning_rate": 1.03554186400822e-06, + "loss": 88.8747, + "step": 100520 + }, + { + "epoch": 0.8316168259089217, + "grad_norm": 1055.522705078125, + "learning_rate": 1.034682596687071e-06, + "loss": 90.0075, + "step": 100530 + }, + { + "epoch": 0.8316995491582909, + "grad_norm": 1082.3468017578125, + "learning_rate": 1.0338236448680283e-06, + "loss": 82.0571, + "step": 100540 + }, + { + "epoch": 0.8317822724076602, + "grad_norm": 1203.2752685546875, + "learning_rate": 1.0329650086194371e-06, + "loss": 92.2786, + "step": 100550 + }, + { + "epoch": 0.8318649956570294, + "grad_norm": 513.8129272460938, + "learning_rate": 1.0321066880096142e-06, + "loss": 100.5733, + "step": 100560 + }, + { + "epoch": 0.8319477189063986, + "grad_norm": 637.0855712890625, + "learning_rate": 1.0312486831068518e-06, + "loss": 104.5459, + "step": 100570 + }, + { + "epoch": 0.8320304421557678, + "grad_norm": 712.1045532226562, + "learning_rate": 1.0303909939794176e-06, + "loss": 69.6618, + "step": 100580 + }, + { + "epoch": 0.8321131654051371, + "grad_norm": 686.3661499023438, + "learning_rate": 1.0295336206955553e-06, + "loss": 88.7248, + "step": 100590 + }, + { + "epoch": 0.8321958886545063, + "grad_norm": 864.8832397460938, + "learning_rate": 1.0286765633234795e-06, + "loss": 83.1856, + "step": 100600 + }, + { + "epoch": 0.8322786119038755, + "grad_norm": 716.6763916015625, + "learning_rate": 1.027819821931384e-06, + "loss": 115.8508, + "step": 100610 + }, + { + "epoch": 0.8323613351532448, + "grad_norm": 1016.16748046875, + "learning_rate": 1.0269633965874348e-06, + "loss": 84.6865, + "step": 100620 + }, + { + "epoch": 0.832444058402614, + "grad_norm": 910.2161254882812, + "learning_rate": 1.0261072873597744e-06, + "loss": 85.6898, + "step": 100630 + }, + { + "epoch": 0.8325267816519832, + "grad_norm": 555.4244384765625, + "learning_rate": 1.0252514943165188e-06, + "loss": 95.8644, + "step": 100640 + }, + { + "epoch": 0.8326095049013525, + "grad_norm": 991.6724853515625, + "learning_rate": 1.0243960175257605e-06, + "loss": 90.1952, + "step": 100650 + }, + { + "epoch": 0.8326922281507217, + "grad_norm": 1144.724609375, + "learning_rate": 1.0235408570555661e-06, + "loss": 81.0206, + "step": 100660 + }, + { + "epoch": 0.832774951400091, + "grad_norm": 1066.9254150390625, + "learning_rate": 1.022686012973978e-06, + "loss": 65.6117, + "step": 100670 + }, + { + "epoch": 0.8328576746494603, + "grad_norm": 1321.929931640625, + "learning_rate": 1.0218314853490086e-06, + "loss": 115.5369, + "step": 100680 + }, + { + "epoch": 0.8329403978988295, + "grad_norm": 735.5868530273438, + "learning_rate": 1.0209772742486501e-06, + "loss": 69.8609, + "step": 100690 + }, + { + "epoch": 0.8330231211481987, + "grad_norm": 1540.8714599609375, + "learning_rate": 1.0201233797408694e-06, + "loss": 103.727, + "step": 100700 + }, + { + "epoch": 0.833105844397568, + "grad_norm": 1017.089111328125, + "learning_rate": 1.019269801893606e-06, + "loss": 84.9051, + "step": 100710 + }, + { + "epoch": 0.8331885676469372, + "grad_norm": 844.1512451171875, + "learning_rate": 1.0184165407747755e-06, + "loss": 62.8379, + "step": 100720 + }, + { + "epoch": 0.8332712908963064, + "grad_norm": 1083.6983642578125, + "learning_rate": 1.0175635964522661e-06, + "loss": 76.4931, + "step": 100730 + }, + { + "epoch": 0.8333540141456757, + "grad_norm": 1482.5361328125, + "learning_rate": 1.0167109689939459e-06, + "loss": 98.549, + "step": 100740 + }, + { + "epoch": 0.8334367373950449, + "grad_norm": 684.1696166992188, + "learning_rate": 1.0158586584676533e-06, + "loss": 69.7886, + "step": 100750 + }, + { + "epoch": 0.8335194606444141, + "grad_norm": 612.1447143554688, + "learning_rate": 1.0150066649412038e-06, + "loss": 65.4549, + "step": 100760 + }, + { + "epoch": 0.8336021838937834, + "grad_norm": 578.9083251953125, + "learning_rate": 1.0141549884823837e-06, + "loss": 81.0457, + "step": 100770 + }, + { + "epoch": 0.8336849071431526, + "grad_norm": 911.5106201171875, + "learning_rate": 1.0133036291589587e-06, + "loss": 87.0133, + "step": 100780 + }, + { + "epoch": 0.8337676303925218, + "grad_norm": 845.751708984375, + "learning_rate": 1.0124525870386676e-06, + "loss": 75.2599, + "step": 100790 + }, + { + "epoch": 0.8338503536418911, + "grad_norm": 547.631591796875, + "learning_rate": 1.0116018621892237e-06, + "loss": 111.8343, + "step": 100800 + }, + { + "epoch": 0.8339330768912603, + "grad_norm": 1028.4368896484375, + "learning_rate": 1.0107514546783154e-06, + "loss": 92.0812, + "step": 100810 + }, + { + "epoch": 0.8340158001406295, + "grad_norm": 653.3988037109375, + "learning_rate": 1.0099013645736056e-06, + "loss": 84.4328, + "step": 100820 + }, + { + "epoch": 0.8340985233899988, + "grad_norm": 479.5120544433594, + "learning_rate": 1.0090515919427308e-06, + "loss": 93.0216, + "step": 100830 + }, + { + "epoch": 0.834181246639368, + "grad_norm": 596.44677734375, + "learning_rate": 1.0082021368533078e-06, + "loss": 72.0628, + "step": 100840 + }, + { + "epoch": 0.8342639698887372, + "grad_norm": 1943.263916015625, + "learning_rate": 1.0073529993729191e-06, + "loss": 97.5844, + "step": 100850 + }, + { + "epoch": 0.8343466931381065, + "grad_norm": 601.1087646484375, + "learning_rate": 1.0065041795691289e-06, + "loss": 85.2571, + "step": 100860 + }, + { + "epoch": 0.8344294163874757, + "grad_norm": 903.16064453125, + "learning_rate": 1.0056556775094734e-06, + "loss": 106.5367, + "step": 100870 + }, + { + "epoch": 0.8345121396368449, + "grad_norm": 942.596923828125, + "learning_rate": 1.0048074932614637e-06, + "loss": 85.1813, + "step": 100880 + }, + { + "epoch": 0.8345948628862142, + "grad_norm": 885.8580322265625, + "learning_rate": 1.0039596268925867e-06, + "loss": 126.8213, + "step": 100890 + }, + { + "epoch": 0.8346775861355834, + "grad_norm": 819.2152709960938, + "learning_rate": 1.0031120784703025e-06, + "loss": 100.7738, + "step": 100900 + }, + { + "epoch": 0.8347603093849526, + "grad_norm": 1307.6453857421875, + "learning_rate": 1.0022648480620474e-06, + "loss": 91.3189, + "step": 100910 + }, + { + "epoch": 0.8348430326343219, + "grad_norm": 1590.2379150390625, + "learning_rate": 1.001417935735231e-06, + "loss": 102.9999, + "step": 100920 + }, + { + "epoch": 0.8349257558836911, + "grad_norm": 919.4576416015625, + "learning_rate": 1.0005713415572383e-06, + "loss": 103.8122, + "step": 100930 + }, + { + "epoch": 0.8350084791330603, + "grad_norm": 835.6314697265625, + "learning_rate": 9.99725065595429e-07, + "loss": 95.0695, + "step": 100940 + }, + { + "epoch": 0.8350912023824296, + "grad_norm": 1234.212646484375, + "learning_rate": 9.988791079171378e-07, + "loss": 119.7262, + "step": 100950 + }, + { + "epoch": 0.8351739256317988, + "grad_norm": 558.1318969726562, + "learning_rate": 9.98033468589673e-07, + "loss": 114.4958, + "step": 100960 + }, + { + "epoch": 0.835256648881168, + "grad_norm": 611.1209106445312, + "learning_rate": 9.971881476803185e-07, + "loss": 76.4033, + "step": 100970 + }, + { + "epoch": 0.8353393721305373, + "grad_norm": 1021.4954223632812, + "learning_rate": 9.963431452563331e-07, + "loss": 86.9654, + "step": 100980 + }, + { + "epoch": 0.8354220953799065, + "grad_norm": 538.6202392578125, + "learning_rate": 9.954984613849488e-07, + "loss": 93.7995, + "step": 100990 + }, + { + "epoch": 0.8355048186292757, + "grad_norm": 643.6083984375, + "learning_rate": 9.94654096133374e-07, + "loss": 80.0093, + "step": 101000 + }, + { + "epoch": 0.835587541878645, + "grad_norm": 822.85791015625, + "learning_rate": 9.938100495687907e-07, + "loss": 74.7564, + "step": 101010 + }, + { + "epoch": 0.8356702651280142, + "grad_norm": 732.5307006835938, + "learning_rate": 9.929663217583562e-07, + "loss": 98.5391, + "step": 101020 + }, + { + "epoch": 0.8357529883773834, + "grad_norm": 762.5067138671875, + "learning_rate": 9.92122912769201e-07, + "loss": 108.4045, + "step": 101030 + }, + { + "epoch": 0.8358357116267527, + "grad_norm": 957.1695556640625, + "learning_rate": 9.912798226684322e-07, + "loss": 86.2965, + "step": 101040 + }, + { + "epoch": 0.8359184348761219, + "grad_norm": 614.9725952148438, + "learning_rate": 9.90437051523131e-07, + "loss": 68.6436, + "step": 101050 + }, + { + "epoch": 0.8360011581254911, + "grad_norm": 594.4864501953125, + "learning_rate": 9.895945994003514e-07, + "loss": 76.7402, + "step": 101060 + }, + { + "epoch": 0.8360838813748604, + "grad_norm": 601.7037353515625, + "learning_rate": 9.887524663671243e-07, + "loss": 92.365, + "step": 101070 + }, + { + "epoch": 0.8361666046242296, + "grad_norm": 1368.8843994140625, + "learning_rate": 9.879106524904547e-07, + "loss": 100.576, + "step": 101080 + }, + { + "epoch": 0.8362493278735988, + "grad_norm": 971.6827392578125, + "learning_rate": 9.870691578373216e-07, + "loss": 72.7704, + "step": 101090 + }, + { + "epoch": 0.8363320511229682, + "grad_norm": 824.6112670898438, + "learning_rate": 9.862279824746784e-07, + "loss": 66.5189, + "step": 101100 + }, + { + "epoch": 0.8364147743723374, + "grad_norm": 811.2408447265625, + "learning_rate": 9.853871264694536e-07, + "loss": 101.336, + "step": 101110 + }, + { + "epoch": 0.8364974976217066, + "grad_norm": 577.3998413085938, + "learning_rate": 9.845465898885509e-07, + "loss": 91.787, + "step": 101120 + }, + { + "epoch": 0.8365802208710759, + "grad_norm": 623.9453735351562, + "learning_rate": 9.837063727988478e-07, + "loss": 64.0953, + "step": 101130 + }, + { + "epoch": 0.8366629441204451, + "grad_norm": 773.7106323242188, + "learning_rate": 9.828664752671963e-07, + "loss": 73.4519, + "step": 101140 + }, + { + "epoch": 0.8367456673698143, + "grad_norm": 1168.3428955078125, + "learning_rate": 9.82026897360423e-07, + "loss": 84.8574, + "step": 101150 + }, + { + "epoch": 0.8368283906191836, + "grad_norm": 1095.8922119140625, + "learning_rate": 9.811876391453296e-07, + "loss": 95.7534, + "step": 101160 + }, + { + "epoch": 0.8369111138685528, + "grad_norm": 932.7236938476562, + "learning_rate": 9.803487006886914e-07, + "loss": 102.7611, + "step": 101170 + }, + { + "epoch": 0.836993837117922, + "grad_norm": 983.7799072265625, + "learning_rate": 9.795100820572601e-07, + "loss": 72.2526, + "step": 101180 + }, + { + "epoch": 0.8370765603672913, + "grad_norm": 440.9442138671875, + "learning_rate": 9.78671783317761e-07, + "loss": 82.0223, + "step": 101190 + }, + { + "epoch": 0.8371592836166605, + "grad_norm": 806.7423706054688, + "learning_rate": 9.778338045368901e-07, + "loss": 94.5054, + "step": 101200 + }, + { + "epoch": 0.8372420068660297, + "grad_norm": 1106.8468017578125, + "learning_rate": 9.769961457813254e-07, + "loss": 93.8625, + "step": 101210 + }, + { + "epoch": 0.837324730115399, + "grad_norm": 983.9953002929688, + "learning_rate": 9.761588071177141e-07, + "loss": 109.6925, + "step": 101220 + }, + { + "epoch": 0.8374074533647682, + "grad_norm": 850.5468139648438, + "learning_rate": 9.753217886126797e-07, + "loss": 84.854, + "step": 101230 + }, + { + "epoch": 0.8374901766141374, + "grad_norm": 1075.742919921875, + "learning_rate": 9.7448509033282e-07, + "loss": 79.7995, + "step": 101240 + }, + { + "epoch": 0.8375728998635067, + "grad_norm": 621.546875, + "learning_rate": 9.73648712344707e-07, + "loss": 81.4679, + "step": 101250 + }, + { + "epoch": 0.8376556231128759, + "grad_norm": 409.54095458984375, + "learning_rate": 9.728126547148875e-07, + "loss": 86.3328, + "step": 101260 + }, + { + "epoch": 0.8377383463622451, + "grad_norm": 1011.2172241210938, + "learning_rate": 9.719769175098842e-07, + "loss": 70.703, + "step": 101270 + }, + { + "epoch": 0.8378210696116144, + "grad_norm": 919.0086669921875, + "learning_rate": 9.711415007961899e-07, + "loss": 94.6781, + "step": 101280 + }, + { + "epoch": 0.8379037928609836, + "grad_norm": 908.69189453125, + "learning_rate": 9.70306404640275e-07, + "loss": 133.2232, + "step": 101290 + }, + { + "epoch": 0.8379865161103528, + "grad_norm": 736.891845703125, + "learning_rate": 9.69471629108587e-07, + "loss": 87.2299, + "step": 101300 + }, + { + "epoch": 0.838069239359722, + "grad_norm": 376.5754699707031, + "learning_rate": 9.686371742675443e-07, + "loss": 78.6543, + "step": 101310 + }, + { + "epoch": 0.8381519626090913, + "grad_norm": 6189.4169921875, + "learning_rate": 9.678030401835399e-07, + "loss": 124.2069, + "step": 101320 + }, + { + "epoch": 0.8382346858584605, + "grad_norm": 1200.981689453125, + "learning_rate": 9.66969226922942e-07, + "loss": 96.2958, + "step": 101330 + }, + { + "epoch": 0.8383174091078297, + "grad_norm": 1382.705078125, + "learning_rate": 9.66135734552094e-07, + "loss": 82.9574, + "step": 101340 + }, + { + "epoch": 0.838400132357199, + "grad_norm": 671.8123168945312, + "learning_rate": 9.653025631373125e-07, + "loss": 62.2783, + "step": 101350 + }, + { + "epoch": 0.8384828556065682, + "grad_norm": 720.500732421875, + "learning_rate": 9.644697127448904e-07, + "loss": 67.8807, + "step": 101360 + }, + { + "epoch": 0.8385655788559374, + "grad_norm": 402.5303955078125, + "learning_rate": 9.636371834410918e-07, + "loss": 76.3787, + "step": 101370 + }, + { + "epoch": 0.8386483021053067, + "grad_norm": 698.8262329101562, + "learning_rate": 9.62804975292158e-07, + "loss": 90.8034, + "step": 101380 + }, + { + "epoch": 0.8387310253546759, + "grad_norm": 715.2416381835938, + "learning_rate": 9.619730883643026e-07, + "loss": 70.7118, + "step": 101390 + }, + { + "epoch": 0.8388137486040451, + "grad_norm": 1074.1124267578125, + "learning_rate": 9.611415227237181e-07, + "loss": 67.4193, + "step": 101400 + }, + { + "epoch": 0.8388964718534144, + "grad_norm": 1017.0978393554688, + "learning_rate": 9.603102784365664e-07, + "loss": 95.0126, + "step": 101410 + }, + { + "epoch": 0.8389791951027836, + "grad_norm": 695.4487915039062, + "learning_rate": 9.594793555689868e-07, + "loss": 113.1844, + "step": 101420 + }, + { + "epoch": 0.8390619183521528, + "grad_norm": 556.859130859375, + "learning_rate": 9.58648754187091e-07, + "loss": 102.4575, + "step": 101430 + }, + { + "epoch": 0.8391446416015221, + "grad_norm": 1115.348388671875, + "learning_rate": 9.578184743569662e-07, + "loss": 101.0772, + "step": 101440 + }, + { + "epoch": 0.8392273648508913, + "grad_norm": 611.0310668945312, + "learning_rate": 9.569885161446762e-07, + "loss": 75.4577, + "step": 101450 + }, + { + "epoch": 0.8393100881002605, + "grad_norm": 886.7157592773438, + "learning_rate": 9.56158879616253e-07, + "loss": 94.1332, + "step": 101460 + }, + { + "epoch": 0.8393928113496298, + "grad_norm": 683.062744140625, + "learning_rate": 9.553295648377097e-07, + "loss": 63.7114, + "step": 101470 + }, + { + "epoch": 0.839475534598999, + "grad_norm": 957.2645874023438, + "learning_rate": 9.5450057187503e-07, + "loss": 87.1377, + "step": 101480 + }, + { + "epoch": 0.8395582578483682, + "grad_norm": 845.6005249023438, + "learning_rate": 9.536719007941725e-07, + "loss": 78.0765, + "step": 101490 + }, + { + "epoch": 0.8396409810977375, + "grad_norm": 448.1517639160156, + "learning_rate": 9.528435516610729e-07, + "loss": 96.6448, + "step": 101500 + }, + { + "epoch": 0.8397237043471067, + "grad_norm": 999.4203491210938, + "learning_rate": 9.520155245416379e-07, + "loss": 84.303, + "step": 101510 + }, + { + "epoch": 0.839806427596476, + "grad_norm": 661.683349609375, + "learning_rate": 9.511878195017499e-07, + "loss": 71.9918, + "step": 101520 + }, + { + "epoch": 0.8398891508458453, + "grad_norm": 1106.2100830078125, + "learning_rate": 9.503604366072666e-07, + "loss": 80.9662, + "step": 101530 + }, + { + "epoch": 0.8399718740952145, + "grad_norm": 1053.9813232421875, + "learning_rate": 9.495333759240171e-07, + "loss": 99.4201, + "step": 101540 + }, + { + "epoch": 0.8400545973445837, + "grad_norm": 1770.6314697265625, + "learning_rate": 9.487066375178078e-07, + "loss": 97.2852, + "step": 101550 + }, + { + "epoch": 0.840137320593953, + "grad_norm": 831.5025634765625, + "learning_rate": 9.47880221454418e-07, + "loss": 87.7667, + "step": 101560 + }, + { + "epoch": 0.8402200438433222, + "grad_norm": 619.336181640625, + "learning_rate": 9.470541277996026e-07, + "loss": 105.6826, + "step": 101570 + }, + { + "epoch": 0.8403027670926914, + "grad_norm": 590.185302734375, + "learning_rate": 9.462283566190894e-07, + "loss": 116.2632, + "step": 101580 + }, + { + "epoch": 0.8403854903420607, + "grad_norm": 652.5518798828125, + "learning_rate": 9.454029079785809e-07, + "loss": 90.1779, + "step": 101590 + }, + { + "epoch": 0.8404682135914299, + "grad_norm": 699.6583251953125, + "learning_rate": 9.445777819437557e-07, + "loss": 91.0062, + "step": 101600 + }, + { + "epoch": 0.8405509368407991, + "grad_norm": 601.4156494140625, + "learning_rate": 9.437529785802647e-07, + "loss": 69.9341, + "step": 101610 + }, + { + "epoch": 0.8406336600901684, + "grad_norm": 309.7095642089844, + "learning_rate": 9.429284979537346e-07, + "loss": 72.0386, + "step": 101620 + }, + { + "epoch": 0.8407163833395376, + "grad_norm": 1362.3778076171875, + "learning_rate": 9.421043401297636e-07, + "loss": 86.1247, + "step": 101630 + }, + { + "epoch": 0.8407991065889068, + "grad_norm": 1056.7972412109375, + "learning_rate": 9.412805051739266e-07, + "loss": 94.5386, + "step": 101640 + }, + { + "epoch": 0.8408818298382761, + "grad_norm": 394.115478515625, + "learning_rate": 9.404569931517726e-07, + "loss": 57.8183, + "step": 101650 + }, + { + "epoch": 0.8409645530876453, + "grad_norm": 1883.9283447265625, + "learning_rate": 9.396338041288255e-07, + "loss": 83.5486, + "step": 101660 + }, + { + "epoch": 0.8410472763370145, + "grad_norm": 773.7887573242188, + "learning_rate": 9.388109381705817e-07, + "loss": 64.771, + "step": 101670 + }, + { + "epoch": 0.8411299995863838, + "grad_norm": 1021.2868041992188, + "learning_rate": 9.379883953425134e-07, + "loss": 84.9474, + "step": 101680 + }, + { + "epoch": 0.841212722835753, + "grad_norm": 808.8427734375, + "learning_rate": 9.371661757100648e-07, + "loss": 94.7058, + "step": 101690 + }, + { + "epoch": 0.8412954460851222, + "grad_norm": 811.6118774414062, + "learning_rate": 9.363442793386606e-07, + "loss": 95.701, + "step": 101700 + }, + { + "epoch": 0.8413781693344915, + "grad_norm": 923.9641723632812, + "learning_rate": 9.355227062936912e-07, + "loss": 101.2932, + "step": 101710 + }, + { + "epoch": 0.8414608925838607, + "grad_norm": 789.2193603515625, + "learning_rate": 9.34701456640526e-07, + "loss": 69.8859, + "step": 101720 + }, + { + "epoch": 0.8415436158332299, + "grad_norm": 676.7222900390625, + "learning_rate": 9.338805304445092e-07, + "loss": 77.5232, + "step": 101730 + }, + { + "epoch": 0.8416263390825992, + "grad_norm": 793.80615234375, + "learning_rate": 9.330599277709579e-07, + "loss": 83.3514, + "step": 101740 + }, + { + "epoch": 0.8417090623319684, + "grad_norm": 857.0948486328125, + "learning_rate": 9.322396486851626e-07, + "loss": 94.8482, + "step": 101750 + }, + { + "epoch": 0.8417917855813376, + "grad_norm": 867.2514038085938, + "learning_rate": 9.314196932523906e-07, + "loss": 96.5802, + "step": 101760 + }, + { + "epoch": 0.8418745088307069, + "grad_norm": 1006.9816284179688, + "learning_rate": 9.306000615378813e-07, + "loss": 89.8353, + "step": 101770 + }, + { + "epoch": 0.8419572320800761, + "grad_norm": 811.1846923828125, + "learning_rate": 9.297807536068476e-07, + "loss": 98.205, + "step": 101780 + }, + { + "epoch": 0.8420399553294453, + "grad_norm": 760.5066528320312, + "learning_rate": 9.289617695244818e-07, + "loss": 109.3908, + "step": 101790 + }, + { + "epoch": 0.8421226785788146, + "grad_norm": 855.790771484375, + "learning_rate": 9.281431093559439e-07, + "loss": 89.0774, + "step": 101800 + }, + { + "epoch": 0.8422054018281838, + "grad_norm": 901.5059814453125, + "learning_rate": 9.273247731663709e-07, + "loss": 77.9359, + "step": 101810 + }, + { + "epoch": 0.842288125077553, + "grad_norm": 856.0442504882812, + "learning_rate": 9.26506761020875e-07, + "loss": 103.1285, + "step": 101820 + }, + { + "epoch": 0.8423708483269223, + "grad_norm": 919.891357421875, + "learning_rate": 9.256890729845414e-07, + "loss": 96.1979, + "step": 101830 + }, + { + "epoch": 0.8424535715762915, + "grad_norm": 742.4312133789062, + "learning_rate": 9.248717091224291e-07, + "loss": 88.905, + "step": 101840 + }, + { + "epoch": 0.8425362948256607, + "grad_norm": 697.6671142578125, + "learning_rate": 9.240546694995733e-07, + "loss": 71.3341, + "step": 101850 + }, + { + "epoch": 0.84261901807503, + "grad_norm": 611.2695922851562, + "learning_rate": 9.23237954180981e-07, + "loss": 75.6786, + "step": 101860 + }, + { + "epoch": 0.8427017413243992, + "grad_norm": 730.123779296875, + "learning_rate": 9.224215632316346e-07, + "loss": 69.9281, + "step": 101870 + }, + { + "epoch": 0.8427844645737684, + "grad_norm": 824.8983764648438, + "learning_rate": 9.216054967164916e-07, + "loss": 95.3742, + "step": 101880 + }, + { + "epoch": 0.8428671878231377, + "grad_norm": 783.8893432617188, + "learning_rate": 9.207897547004812e-07, + "loss": 81.9008, + "step": 101890 + }, + { + "epoch": 0.8429499110725069, + "grad_norm": 996.7938842773438, + "learning_rate": 9.199743372485093e-07, + "loss": 117.712, + "step": 101900 + }, + { + "epoch": 0.8430326343218761, + "grad_norm": 677.0623779296875, + "learning_rate": 9.191592444254549e-07, + "loss": 76.7643, + "step": 101910 + }, + { + "epoch": 0.8431153575712454, + "grad_norm": 1035.1934814453125, + "learning_rate": 9.183444762961702e-07, + "loss": 91.2087, + "step": 101920 + }, + { + "epoch": 0.8431980808206146, + "grad_norm": 940.440185546875, + "learning_rate": 9.175300329254839e-07, + "loss": 106.9464, + "step": 101930 + }, + { + "epoch": 0.8432808040699838, + "grad_norm": 685.4142456054688, + "learning_rate": 9.167159143781967e-07, + "loss": 85.0712, + "step": 101940 + }, + { + "epoch": 0.8433635273193532, + "grad_norm": 931.5978393554688, + "learning_rate": 9.159021207190843e-07, + "loss": 80.0363, + "step": 101950 + }, + { + "epoch": 0.8434462505687224, + "grad_norm": 683.4605102539062, + "learning_rate": 9.150886520128966e-07, + "loss": 107.6678, + "step": 101960 + }, + { + "epoch": 0.8435289738180916, + "grad_norm": 1009.03369140625, + "learning_rate": 9.142755083243577e-07, + "loss": 84.9778, + "step": 101970 + }, + { + "epoch": 0.8436116970674609, + "grad_norm": 847.3112182617188, + "learning_rate": 9.134626897181659e-07, + "loss": 75.6202, + "step": 101980 + }, + { + "epoch": 0.8436944203168301, + "grad_norm": 609.8668823242188, + "learning_rate": 9.126501962589928e-07, + "loss": 90.4189, + "step": 101990 + }, + { + "epoch": 0.8437771435661993, + "grad_norm": 860.5015869140625, + "learning_rate": 9.118380280114858e-07, + "loss": 82.1258, + "step": 102000 + }, + { + "epoch": 0.8438598668155685, + "grad_norm": 771.5186157226562, + "learning_rate": 9.110261850402641e-07, + "loss": 77.5802, + "step": 102010 + }, + { + "epoch": 0.8439425900649378, + "grad_norm": 814.3161010742188, + "learning_rate": 9.102146674099232e-07, + "loss": 74.584, + "step": 102020 + }, + { + "epoch": 0.844025313314307, + "grad_norm": 399.5432434082031, + "learning_rate": 9.094034751850317e-07, + "loss": 119.564, + "step": 102030 + }, + { + "epoch": 0.8441080365636762, + "grad_norm": 472.89300537109375, + "learning_rate": 9.085926084301327e-07, + "loss": 73.9548, + "step": 102040 + }, + { + "epoch": 0.8441907598130455, + "grad_norm": 1127.3804931640625, + "learning_rate": 9.077820672097426e-07, + "loss": 69.2523, + "step": 102050 + }, + { + "epoch": 0.8442734830624147, + "grad_norm": 555.0116577148438, + "learning_rate": 9.069718515883524e-07, + "loss": 92.0813, + "step": 102060 + }, + { + "epoch": 0.8443562063117839, + "grad_norm": 739.4345703125, + "learning_rate": 9.06161961630428e-07, + "loss": 74.4284, + "step": 102070 + }, + { + "epoch": 0.8444389295611532, + "grad_norm": 1793.3900146484375, + "learning_rate": 9.053523974004075e-07, + "loss": 115.2249, + "step": 102080 + }, + { + "epoch": 0.8445216528105224, + "grad_norm": 1008.2157592773438, + "learning_rate": 9.045431589627052e-07, + "loss": 80.6738, + "step": 102090 + }, + { + "epoch": 0.8446043760598916, + "grad_norm": 867.9142456054688, + "learning_rate": 9.037342463817084e-07, + "loss": 84.0623, + "step": 102100 + }, + { + "epoch": 0.8446870993092609, + "grad_norm": 1016.9595336914062, + "learning_rate": 9.029256597217778e-07, + "loss": 97.6914, + "step": 102110 + }, + { + "epoch": 0.8447698225586301, + "grad_norm": 538.5012817382812, + "learning_rate": 9.021173990472498e-07, + "loss": 72.0225, + "step": 102120 + }, + { + "epoch": 0.8448525458079993, + "grad_norm": 910.0997314453125, + "learning_rate": 9.013094644224346e-07, + "loss": 87.6451, + "step": 102130 + }, + { + "epoch": 0.8449352690573686, + "grad_norm": 567.3370361328125, + "learning_rate": 9.005018559116135e-07, + "loss": 68.3961, + "step": 102140 + }, + { + "epoch": 0.8450179923067378, + "grad_norm": 592.2027587890625, + "learning_rate": 8.996945735790447e-07, + "loss": 95.2749, + "step": 102150 + }, + { + "epoch": 0.845100715556107, + "grad_norm": 781.4312133789062, + "learning_rate": 8.988876174889616e-07, + "loss": 62.1267, + "step": 102160 + }, + { + "epoch": 0.8451834388054763, + "grad_norm": 1090.7720947265625, + "learning_rate": 8.980809877055696e-07, + "loss": 76.1743, + "step": 102170 + }, + { + "epoch": 0.8452661620548455, + "grad_norm": 1055.83447265625, + "learning_rate": 8.97274684293048e-07, + "loss": 90.5386, + "step": 102180 + }, + { + "epoch": 0.8453488853042147, + "grad_norm": 443.5655822753906, + "learning_rate": 8.964687073155509e-07, + "loss": 100.6941, + "step": 102190 + }, + { + "epoch": 0.845431608553584, + "grad_norm": 919.127685546875, + "learning_rate": 8.95663056837206e-07, + "loss": 86.7603, + "step": 102200 + }, + { + "epoch": 0.8455143318029532, + "grad_norm": 703.267822265625, + "learning_rate": 8.948577329221153e-07, + "loss": 92.9238, + "step": 102210 + }, + { + "epoch": 0.8455970550523224, + "grad_norm": 597.8628540039062, + "learning_rate": 8.940527356343564e-07, + "loss": 80.7923, + "step": 102220 + }, + { + "epoch": 0.8456797783016917, + "grad_norm": 985.700927734375, + "learning_rate": 8.93248065037976e-07, + "loss": 72.7414, + "step": 102230 + }, + { + "epoch": 0.8457625015510609, + "grad_norm": 608.7626953125, + "learning_rate": 8.924437211969983e-07, + "loss": 95.9518, + "step": 102240 + }, + { + "epoch": 0.8458452248004301, + "grad_norm": 734.87841796875, + "learning_rate": 8.916397041754238e-07, + "loss": 93.5361, + "step": 102250 + }, + { + "epoch": 0.8459279480497994, + "grad_norm": 707.2564697265625, + "learning_rate": 8.90836014037223e-07, + "loss": 87.7497, + "step": 102260 + }, + { + "epoch": 0.8460106712991686, + "grad_norm": 466.18157958984375, + "learning_rate": 8.900326508463425e-07, + "loss": 99.7546, + "step": 102270 + }, + { + "epoch": 0.8460933945485378, + "grad_norm": 665.1851806640625, + "learning_rate": 8.892296146667018e-07, + "loss": 98.5748, + "step": 102280 + }, + { + "epoch": 0.8461761177979071, + "grad_norm": 1132.5751953125, + "learning_rate": 8.884269055621941e-07, + "loss": 91.5124, + "step": 102290 + }, + { + "epoch": 0.8462588410472763, + "grad_norm": 954.9869384765625, + "learning_rate": 8.876245235966884e-07, + "loss": 124.0621, + "step": 102300 + }, + { + "epoch": 0.8463415642966455, + "grad_norm": 1059.5760498046875, + "learning_rate": 8.868224688340277e-07, + "loss": 87.5652, + "step": 102310 + }, + { + "epoch": 0.8464242875460148, + "grad_norm": 886.4304809570312, + "learning_rate": 8.860207413380245e-07, + "loss": 93.7244, + "step": 102320 + }, + { + "epoch": 0.846507010795384, + "grad_norm": 954.0653076171875, + "learning_rate": 8.852193411724702e-07, + "loss": 94.8709, + "step": 102330 + }, + { + "epoch": 0.8465897340447532, + "grad_norm": 650.5189208984375, + "learning_rate": 8.844182684011276e-07, + "loss": 68.5645, + "step": 102340 + }, + { + "epoch": 0.8466724572941225, + "grad_norm": 680.576904296875, + "learning_rate": 8.83617523087737e-07, + "loss": 66.5604, + "step": 102350 + }, + { + "epoch": 0.8467551805434917, + "grad_norm": 767.3767700195312, + "learning_rate": 8.828171052960077e-07, + "loss": 114.503, + "step": 102360 + }, + { + "epoch": 0.846837903792861, + "grad_norm": 712.951171875, + "learning_rate": 8.820170150896268e-07, + "loss": 114.4278, + "step": 102370 + }, + { + "epoch": 0.8469206270422303, + "grad_norm": 753.8101806640625, + "learning_rate": 8.812172525322527e-07, + "loss": 66.3853, + "step": 102380 + }, + { + "epoch": 0.8470033502915995, + "grad_norm": 927.0662231445312, + "learning_rate": 8.8041781768752e-07, + "loss": 63.6904, + "step": 102390 + }, + { + "epoch": 0.8470860735409687, + "grad_norm": 1022.492919921875, + "learning_rate": 8.796187106190346e-07, + "loss": 80.9226, + "step": 102400 + }, + { + "epoch": 0.847168796790338, + "grad_norm": 647.39697265625, + "learning_rate": 8.788199313903778e-07, + "loss": 103.7165, + "step": 102410 + }, + { + "epoch": 0.8472515200397072, + "grad_norm": 835.044189453125, + "learning_rate": 8.78021480065106e-07, + "loss": 99.1959, + "step": 102420 + }, + { + "epoch": 0.8473342432890764, + "grad_norm": 1518.077880859375, + "learning_rate": 8.772233567067473e-07, + "loss": 88.389, + "step": 102430 + }, + { + "epoch": 0.8474169665384457, + "grad_norm": 717.4688110351562, + "learning_rate": 8.764255613788037e-07, + "loss": 77.5931, + "step": 102440 + }, + { + "epoch": 0.8474996897878149, + "grad_norm": 487.7947998046875, + "learning_rate": 8.756280941447554e-07, + "loss": 83.3511, + "step": 102450 + }, + { + "epoch": 0.8475824130371841, + "grad_norm": 1047.3568115234375, + "learning_rate": 8.748309550680506e-07, + "loss": 115.5404, + "step": 102460 + }, + { + "epoch": 0.8476651362865534, + "grad_norm": 601.6625366210938, + "learning_rate": 8.740341442121153e-07, + "loss": 80.3495, + "step": 102470 + }, + { + "epoch": 0.8477478595359226, + "grad_norm": 533.0794067382812, + "learning_rate": 8.732376616403487e-07, + "loss": 76.5713, + "step": 102480 + }, + { + "epoch": 0.8478305827852918, + "grad_norm": 918.6082153320312, + "learning_rate": 8.724415074161207e-07, + "loss": 88.8103, + "step": 102490 + }, + { + "epoch": 0.8479133060346611, + "grad_norm": 590.0209350585938, + "learning_rate": 8.716456816027791e-07, + "loss": 80.2414, + "step": 102500 + }, + { + "epoch": 0.8479960292840303, + "grad_norm": 869.5015869140625, + "learning_rate": 8.708501842636441e-07, + "loss": 91.9651, + "step": 102510 + }, + { + "epoch": 0.8480787525333995, + "grad_norm": 636.9760131835938, + "learning_rate": 8.700550154620091e-07, + "loss": 70.51, + "step": 102520 + }, + { + "epoch": 0.8481614757827688, + "grad_norm": 512.7169189453125, + "learning_rate": 8.692601752611435e-07, + "loss": 74.1003, + "step": 102530 + }, + { + "epoch": 0.848244199032138, + "grad_norm": 1306.0889892578125, + "learning_rate": 8.684656637242866e-07, + "loss": 87.0951, + "step": 102540 + }, + { + "epoch": 0.8483269222815072, + "grad_norm": 2022.1649169921875, + "learning_rate": 8.676714809146569e-07, + "loss": 118.8545, + "step": 102550 + }, + { + "epoch": 0.8484096455308765, + "grad_norm": 810.895751953125, + "learning_rate": 8.668776268954437e-07, + "loss": 87.6652, + "step": 102560 + }, + { + "epoch": 0.8484923687802457, + "grad_norm": 636.8590698242188, + "learning_rate": 8.660841017298082e-07, + "loss": 66.9613, + "step": 102570 + }, + { + "epoch": 0.8485750920296149, + "grad_norm": 1104.5308837890625, + "learning_rate": 8.652909054808884e-07, + "loss": 99.1564, + "step": 102580 + }, + { + "epoch": 0.8486578152789842, + "grad_norm": 802.7866821289062, + "learning_rate": 8.644980382117956e-07, + "loss": 89.1793, + "step": 102590 + }, + { + "epoch": 0.8487405385283534, + "grad_norm": 734.8566284179688, + "learning_rate": 8.637054999856148e-07, + "loss": 60.249, + "step": 102600 + }, + { + "epoch": 0.8488232617777226, + "grad_norm": 743.7099609375, + "learning_rate": 8.629132908654042e-07, + "loss": 92.0852, + "step": 102610 + }, + { + "epoch": 0.8489059850270919, + "grad_norm": 810.8662719726562, + "learning_rate": 8.621214109141962e-07, + "loss": 84.611, + "step": 102620 + }, + { + "epoch": 0.8489887082764611, + "grad_norm": 723.8987426757812, + "learning_rate": 8.613298601949971e-07, + "loss": 98.8016, + "step": 102630 + }, + { + "epoch": 0.8490714315258303, + "grad_norm": 1335.8057861328125, + "learning_rate": 8.605386387707865e-07, + "loss": 79.3897, + "step": 102640 + }, + { + "epoch": 0.8491541547751996, + "grad_norm": 916.5572509765625, + "learning_rate": 8.597477467045207e-07, + "loss": 84.2059, + "step": 102650 + }, + { + "epoch": 0.8492368780245688, + "grad_norm": 1256.606689453125, + "learning_rate": 8.589571840591243e-07, + "loss": 95.1835, + "step": 102660 + }, + { + "epoch": 0.849319601273938, + "grad_norm": 850.797119140625, + "learning_rate": 8.581669508975005e-07, + "loss": 84.3254, + "step": 102670 + }, + { + "epoch": 0.8494023245233073, + "grad_norm": 709.500244140625, + "learning_rate": 8.573770472825233e-07, + "loss": 73.6143, + "step": 102680 + }, + { + "epoch": 0.8494850477726765, + "grad_norm": 595.16650390625, + "learning_rate": 8.565874732770429e-07, + "loss": 72.705, + "step": 102690 + }, + { + "epoch": 0.8495677710220457, + "grad_norm": 871.81005859375, + "learning_rate": 8.55798228943881e-07, + "loss": 92.2849, + "step": 102700 + }, + { + "epoch": 0.849650494271415, + "grad_norm": 950.0880737304688, + "learning_rate": 8.550093143458355e-07, + "loss": 90.0403, + "step": 102710 + }, + { + "epoch": 0.8497332175207842, + "grad_norm": 855.10888671875, + "learning_rate": 8.542207295456751e-07, + "loss": 91.0368, + "step": 102720 + }, + { + "epoch": 0.8498159407701534, + "grad_norm": 846.3700561523438, + "learning_rate": 8.53432474606144e-07, + "loss": 101.0878, + "step": 102730 + }, + { + "epoch": 0.8498986640195226, + "grad_norm": 732.4088745117188, + "learning_rate": 8.526445495899627e-07, + "loss": 95.0499, + "step": 102740 + }, + { + "epoch": 0.8499813872688919, + "grad_norm": 723.0433349609375, + "learning_rate": 8.518569545598198e-07, + "loss": 65.2936, + "step": 102750 + }, + { + "epoch": 0.8500641105182611, + "grad_norm": 791.6629638671875, + "learning_rate": 8.510696895783821e-07, + "loss": 92.8047, + "step": 102760 + }, + { + "epoch": 0.8501468337676303, + "grad_norm": 669.09521484375, + "learning_rate": 8.502827547082876e-07, + "loss": 79.3404, + "step": 102770 + }, + { + "epoch": 0.8502295570169996, + "grad_norm": 292.19622802734375, + "learning_rate": 8.494961500121501e-07, + "loss": 74.0627, + "step": 102780 + }, + { + "epoch": 0.8503122802663688, + "grad_norm": 881.7793579101562, + "learning_rate": 8.487098755525552e-07, + "loss": 85.533, + "step": 102790 + }, + { + "epoch": 0.850395003515738, + "grad_norm": 861.0848999023438, + "learning_rate": 8.47923931392064e-07, + "loss": 66.5782, + "step": 102800 + }, + { + "epoch": 0.8504777267651074, + "grad_norm": 857.7160034179688, + "learning_rate": 8.471383175932102e-07, + "loss": 99.694, + "step": 102810 + }, + { + "epoch": 0.8505604500144766, + "grad_norm": 544.4356689453125, + "learning_rate": 8.463530342185011e-07, + "loss": 83.8572, + "step": 102820 + }, + { + "epoch": 0.8506431732638458, + "grad_norm": 837.5228881835938, + "learning_rate": 8.455680813304185e-07, + "loss": 65.1481, + "step": 102830 + }, + { + "epoch": 0.8507258965132151, + "grad_norm": 898.0479736328125, + "learning_rate": 8.447834589914172e-07, + "loss": 82.4079, + "step": 102840 + }, + { + "epoch": 0.8508086197625843, + "grad_norm": 715.7451782226562, + "learning_rate": 8.439991672639264e-07, + "loss": 89.7485, + "step": 102850 + }, + { + "epoch": 0.8508913430119535, + "grad_norm": 1106.7410888671875, + "learning_rate": 8.432152062103488e-07, + "loss": 75.6558, + "step": 102860 + }, + { + "epoch": 0.8509740662613228, + "grad_norm": 957.0466918945312, + "learning_rate": 8.424315758930596e-07, + "loss": 94.4857, + "step": 102870 + }, + { + "epoch": 0.851056789510692, + "grad_norm": 787.596435546875, + "learning_rate": 8.416482763744093e-07, + "loss": 130.3268, + "step": 102880 + }, + { + "epoch": 0.8511395127600612, + "grad_norm": 982.1759033203125, + "learning_rate": 8.408653077167217e-07, + "loss": 101.9882, + "step": 102890 + }, + { + "epoch": 0.8512222360094305, + "grad_norm": 746.855224609375, + "learning_rate": 8.400826699822933e-07, + "loss": 100.449, + "step": 102900 + }, + { + "epoch": 0.8513049592587997, + "grad_norm": 440.57867431640625, + "learning_rate": 8.393003632333957e-07, + "loss": 70.7138, + "step": 102910 + }, + { + "epoch": 0.8513876825081689, + "grad_norm": 402.388427734375, + "learning_rate": 8.385183875322733e-07, + "loss": 111.9875, + "step": 102920 + }, + { + "epoch": 0.8514704057575382, + "grad_norm": 730.9739990234375, + "learning_rate": 8.377367429411443e-07, + "loss": 68.7929, + "step": 102930 + }, + { + "epoch": 0.8515531290069074, + "grad_norm": 1171.7159423828125, + "learning_rate": 8.369554295221999e-07, + "loss": 123.8448, + "step": 102940 + }, + { + "epoch": 0.8516358522562766, + "grad_norm": 828.75048828125, + "learning_rate": 8.361744473376066e-07, + "loss": 78.1169, + "step": 102950 + }, + { + "epoch": 0.8517185755056459, + "grad_norm": 694.2513427734375, + "learning_rate": 8.353937964495029e-07, + "loss": 103.3453, + "step": 102960 + }, + { + "epoch": 0.8518012987550151, + "grad_norm": 1413.7930908203125, + "learning_rate": 8.346134769200021e-07, + "loss": 83.3088, + "step": 102970 + }, + { + "epoch": 0.8518840220043843, + "grad_norm": 860.365234375, + "learning_rate": 8.338334888111899e-07, + "loss": 77.3941, + "step": 102980 + }, + { + "epoch": 0.8519667452537536, + "grad_norm": 750.07177734375, + "learning_rate": 8.330538321851284e-07, + "loss": 126.0141, + "step": 102990 + }, + { + "epoch": 0.8520494685031228, + "grad_norm": 850.6483764648438, + "learning_rate": 8.322745071038474e-07, + "loss": 96.3906, + "step": 103000 + }, + { + "epoch": 0.852132191752492, + "grad_norm": 1176.2943115234375, + "learning_rate": 8.314955136293579e-07, + "loss": 80.5075, + "step": 103010 + }, + { + "epoch": 0.8522149150018613, + "grad_norm": 1178.8140869140625, + "learning_rate": 8.307168518236391e-07, + "loss": 113.7988, + "step": 103020 + }, + { + "epoch": 0.8522976382512305, + "grad_norm": 824.7714233398438, + "learning_rate": 8.299385217486466e-07, + "loss": 103.9782, + "step": 103030 + }, + { + "epoch": 0.8523803615005997, + "grad_norm": 816.1295776367188, + "learning_rate": 8.291605234663075e-07, + "loss": 102.6652, + "step": 103040 + }, + { + "epoch": 0.852463084749969, + "grad_norm": 942.5098876953125, + "learning_rate": 8.283828570385239e-07, + "loss": 83.2088, + "step": 103050 + }, + { + "epoch": 0.8525458079993382, + "grad_norm": 1104.326904296875, + "learning_rate": 8.276055225271718e-07, + "loss": 79.5488, + "step": 103060 + }, + { + "epoch": 0.8526285312487074, + "grad_norm": 845.7022705078125, + "learning_rate": 8.26828519994099e-07, + "loss": 79.7386, + "step": 103070 + }, + { + "epoch": 0.8527112544980767, + "grad_norm": 627.2059326171875, + "learning_rate": 8.260518495011299e-07, + "loss": 104.046, + "step": 103080 + }, + { + "epoch": 0.8527939777474459, + "grad_norm": 1893.8099365234375, + "learning_rate": 8.25275511110058e-07, + "loss": 90.7877, + "step": 103090 + }, + { + "epoch": 0.8528767009968151, + "grad_norm": 1187.659912109375, + "learning_rate": 8.244995048826532e-07, + "loss": 103.0294, + "step": 103100 + }, + { + "epoch": 0.8529594242461844, + "grad_norm": 673.2490844726562, + "learning_rate": 8.237238308806611e-07, + "loss": 96.1213, + "step": 103110 + }, + { + "epoch": 0.8530421474955536, + "grad_norm": 440.6217041015625, + "learning_rate": 8.229484891657974e-07, + "loss": 80.7047, + "step": 103120 + }, + { + "epoch": 0.8531248707449228, + "grad_norm": 816.9833984375, + "learning_rate": 8.221734797997522e-07, + "loss": 72.0308, + "step": 103130 + }, + { + "epoch": 0.8532075939942921, + "grad_norm": 1029.72900390625, + "learning_rate": 8.213988028441893e-07, + "loss": 90.9839, + "step": 103140 + }, + { + "epoch": 0.8532903172436613, + "grad_norm": 952.5328979492188, + "learning_rate": 8.20624458360747e-07, + "loss": 89.0053, + "step": 103150 + }, + { + "epoch": 0.8533730404930305, + "grad_norm": 1081.2255859375, + "learning_rate": 8.198504464110358e-07, + "loss": 122.348, + "step": 103160 + }, + { + "epoch": 0.8534557637423998, + "grad_norm": 1057.4853515625, + "learning_rate": 8.190767670566407e-07, + "loss": 92.9125, + "step": 103170 + }, + { + "epoch": 0.853538486991769, + "grad_norm": 975.9583129882812, + "learning_rate": 8.183034203591189e-07, + "loss": 117.357, + "step": 103180 + }, + { + "epoch": 0.8536212102411382, + "grad_norm": 674.9142456054688, + "learning_rate": 8.175304063800021e-07, + "loss": 82.4939, + "step": 103190 + }, + { + "epoch": 0.8537039334905075, + "grad_norm": 896.0360717773438, + "learning_rate": 8.167577251807951e-07, + "loss": 71.3201, + "step": 103200 + }, + { + "epoch": 0.8537866567398767, + "grad_norm": 779.8267211914062, + "learning_rate": 8.159853768229786e-07, + "loss": 78.3528, + "step": 103210 + }, + { + "epoch": 0.853869379989246, + "grad_norm": 607.8330078125, + "learning_rate": 8.152133613680035e-07, + "loss": 90.1494, + "step": 103220 + }, + { + "epoch": 0.8539521032386153, + "grad_norm": 634.585205078125, + "learning_rate": 8.144416788772957e-07, + "loss": 72.4637, + "step": 103230 + }, + { + "epoch": 0.8540348264879845, + "grad_norm": 672.9807739257812, + "learning_rate": 8.136703294122544e-07, + "loss": 74.1094, + "step": 103240 + }, + { + "epoch": 0.8541175497373537, + "grad_norm": 954.1567993164062, + "learning_rate": 8.128993130342538e-07, + "loss": 98.1181, + "step": 103250 + }, + { + "epoch": 0.854200272986723, + "grad_norm": 597.5408935546875, + "learning_rate": 8.121286298046372e-07, + "loss": 86.6912, + "step": 103260 + }, + { + "epoch": 0.8542829962360922, + "grad_norm": 522.8579711914062, + "learning_rate": 8.113582797847252e-07, + "loss": 96.0704, + "step": 103270 + }, + { + "epoch": 0.8543657194854614, + "grad_norm": 649.9797973632812, + "learning_rate": 8.105882630358125e-07, + "loss": 78.3974, + "step": 103280 + }, + { + "epoch": 0.8544484427348307, + "grad_norm": 538.1500244140625, + "learning_rate": 8.098185796191632e-07, + "loss": 62.7917, + "step": 103290 + }, + { + "epoch": 0.8545311659841999, + "grad_norm": 425.60430908203125, + "learning_rate": 8.090492295960206e-07, + "loss": 67.8472, + "step": 103300 + }, + { + "epoch": 0.8546138892335691, + "grad_norm": 498.4320068359375, + "learning_rate": 8.082802130275968e-07, + "loss": 80.5484, + "step": 103310 + }, + { + "epoch": 0.8546966124829384, + "grad_norm": 679.7952880859375, + "learning_rate": 8.075115299750797e-07, + "loss": 92.3465, + "step": 103320 + }, + { + "epoch": 0.8547793357323076, + "grad_norm": 643.3114013671875, + "learning_rate": 8.067431804996284e-07, + "loss": 77.1838, + "step": 103330 + }, + { + "epoch": 0.8548620589816768, + "grad_norm": 746.61865234375, + "learning_rate": 8.059751646623792e-07, + "loss": 91.1856, + "step": 103340 + }, + { + "epoch": 0.8549447822310461, + "grad_norm": 1265.4454345703125, + "learning_rate": 8.052074825244371e-07, + "loss": 99.7866, + "step": 103350 + }, + { + "epoch": 0.8550275054804153, + "grad_norm": 791.6976318359375, + "learning_rate": 8.044401341468839e-07, + "loss": 75.7532, + "step": 103360 + }, + { + "epoch": 0.8551102287297845, + "grad_norm": 963.1607055664062, + "learning_rate": 8.036731195907743e-07, + "loss": 87.9717, + "step": 103370 + }, + { + "epoch": 0.8551929519791538, + "grad_norm": 702.797119140625, + "learning_rate": 8.029064389171365e-07, + "loss": 77.9525, + "step": 103380 + }, + { + "epoch": 0.855275675228523, + "grad_norm": 794.2588500976562, + "learning_rate": 8.021400921869693e-07, + "loss": 62.4597, + "step": 103390 + }, + { + "epoch": 0.8553583984778922, + "grad_norm": 654.7899780273438, + "learning_rate": 8.013740794612512e-07, + "loss": 90.647, + "step": 103400 + }, + { + "epoch": 0.8554411217272615, + "grad_norm": 317.69580078125, + "learning_rate": 8.006084008009285e-07, + "loss": 68.5561, + "step": 103410 + }, + { + "epoch": 0.8555238449766307, + "grad_norm": 737.1478271484375, + "learning_rate": 7.998430562669234e-07, + "loss": 94.218, + "step": 103420 + }, + { + "epoch": 0.8556065682259999, + "grad_norm": 625.2897338867188, + "learning_rate": 7.990780459201291e-07, + "loss": 68.3049, + "step": 103430 + }, + { + "epoch": 0.8556892914753692, + "grad_norm": 955.1998291015625, + "learning_rate": 7.983133698214158e-07, + "loss": 98.8367, + "step": 103440 + }, + { + "epoch": 0.8557720147247384, + "grad_norm": 1487.3785400390625, + "learning_rate": 7.975490280316239e-07, + "loss": 124.9303, + "step": 103450 + }, + { + "epoch": 0.8558547379741076, + "grad_norm": 822.9546508789062, + "learning_rate": 7.96785020611569e-07, + "loss": 81.8008, + "step": 103460 + }, + { + "epoch": 0.8559374612234768, + "grad_norm": 357.85772705078125, + "learning_rate": 7.960213476220402e-07, + "loss": 73.6397, + "step": 103470 + }, + { + "epoch": 0.8560201844728461, + "grad_norm": 1243.547119140625, + "learning_rate": 7.952580091237993e-07, + "loss": 73.8588, + "step": 103480 + }, + { + "epoch": 0.8561029077222153, + "grad_norm": 1284.897705078125, + "learning_rate": 7.944950051775802e-07, + "loss": 114.3679, + "step": 103490 + }, + { + "epoch": 0.8561856309715845, + "grad_norm": 482.4344482421875, + "learning_rate": 7.937323358440935e-07, + "loss": 61.2101, + "step": 103500 + }, + { + "epoch": 0.8562683542209538, + "grad_norm": 644.4739990234375, + "learning_rate": 7.929700011840225e-07, + "loss": 59.5683, + "step": 103510 + }, + { + "epoch": 0.856351077470323, + "grad_norm": 894.75341796875, + "learning_rate": 7.922080012580191e-07, + "loss": 64.3507, + "step": 103520 + }, + { + "epoch": 0.8564338007196922, + "grad_norm": 336.1860046386719, + "learning_rate": 7.914463361267144e-07, + "loss": 77.5185, + "step": 103530 + }, + { + "epoch": 0.8565165239690615, + "grad_norm": 769.1115112304688, + "learning_rate": 7.906850058507098e-07, + "loss": 70.1408, + "step": 103540 + }, + { + "epoch": 0.8565992472184307, + "grad_norm": 759.4151000976562, + "learning_rate": 7.899240104905814e-07, + "loss": 62.1766, + "step": 103550 + }, + { + "epoch": 0.8566819704677999, + "grad_norm": 1259.5947265625, + "learning_rate": 7.891633501068774e-07, + "loss": 83.4048, + "step": 103560 + }, + { + "epoch": 0.8567646937171692, + "grad_norm": 948.8060302734375, + "learning_rate": 7.88403024760121e-07, + "loss": 82.6916, + "step": 103570 + }, + { + "epoch": 0.8568474169665384, + "grad_norm": 881.0368041992188, + "learning_rate": 7.876430345108072e-07, + "loss": 92.3865, + "step": 103580 + }, + { + "epoch": 0.8569301402159076, + "grad_norm": 609.7398681640625, + "learning_rate": 7.868833794194048e-07, + "loss": 55.702, + "step": 103590 + }, + { + "epoch": 0.8570128634652769, + "grad_norm": 967.415771484375, + "learning_rate": 7.861240595463565e-07, + "loss": 66.2445, + "step": 103600 + }, + { + "epoch": 0.8570955867146461, + "grad_norm": 916.1266479492188, + "learning_rate": 7.853650749520775e-07, + "loss": 95.8712, + "step": 103610 + }, + { + "epoch": 0.8571783099640153, + "grad_norm": 1089.77783203125, + "learning_rate": 7.846064256969571e-07, + "loss": 96.1446, + "step": 103620 + }, + { + "epoch": 0.8572610332133846, + "grad_norm": 1048.322265625, + "learning_rate": 7.838481118413571e-07, + "loss": 81.8681, + "step": 103630 + }, + { + "epoch": 0.8573437564627538, + "grad_norm": 375.7430419921875, + "learning_rate": 7.830901334456137e-07, + "loss": 88.0491, + "step": 103640 + }, + { + "epoch": 0.857426479712123, + "grad_norm": 1264.2864990234375, + "learning_rate": 7.823324905700352e-07, + "loss": 87.9994, + "step": 103650 + }, + { + "epoch": 0.8575092029614924, + "grad_norm": 961.72216796875, + "learning_rate": 7.815751832749035e-07, + "loss": 85.5537, + "step": 103660 + }, + { + "epoch": 0.8575919262108616, + "grad_norm": 1320.927978515625, + "learning_rate": 7.808182116204755e-07, + "loss": 114.779, + "step": 103670 + }, + { + "epoch": 0.8576746494602308, + "grad_norm": 738.6021728515625, + "learning_rate": 7.800615756669783e-07, + "loss": 100.2147, + "step": 103680 + }, + { + "epoch": 0.8577573727096001, + "grad_norm": 623.4457397460938, + "learning_rate": 7.793052754746144e-07, + "loss": 89.7, + "step": 103690 + }, + { + "epoch": 0.8578400959589693, + "grad_norm": 870.4207763671875, + "learning_rate": 7.785493111035597e-07, + "loss": 102.5554, + "step": 103700 + }, + { + "epoch": 0.8579228192083385, + "grad_norm": 1017.7730102539062, + "learning_rate": 7.777936826139626e-07, + "loss": 99.2322, + "step": 103710 + }, + { + "epoch": 0.8580055424577078, + "grad_norm": 630.4139404296875, + "learning_rate": 7.770383900659451e-07, + "loss": 73.928, + "step": 103720 + }, + { + "epoch": 0.858088265707077, + "grad_norm": 926.4790649414062, + "learning_rate": 7.762834335196013e-07, + "loss": 78.6408, + "step": 103730 + }, + { + "epoch": 0.8581709889564462, + "grad_norm": 1010.821044921875, + "learning_rate": 7.755288130350008e-07, + "loss": 70.0562, + "step": 103740 + }, + { + "epoch": 0.8582537122058155, + "grad_norm": 1223.12060546875, + "learning_rate": 7.747745286721852e-07, + "loss": 103.973, + "step": 103750 + }, + { + "epoch": 0.8583364354551847, + "grad_norm": 652.41796875, + "learning_rate": 7.740205804911693e-07, + "loss": 83.6676, + "step": 103760 + }, + { + "epoch": 0.8584191587045539, + "grad_norm": 588.4229736328125, + "learning_rate": 7.732669685519406e-07, + "loss": 99.9844, + "step": 103770 + }, + { + "epoch": 0.8585018819539232, + "grad_norm": 514.46728515625, + "learning_rate": 7.725136929144617e-07, + "loss": 77.1376, + "step": 103780 + }, + { + "epoch": 0.8585846052032924, + "grad_norm": 480.8714294433594, + "learning_rate": 7.717607536386662e-07, + "loss": 62.5537, + "step": 103790 + }, + { + "epoch": 0.8586673284526616, + "grad_norm": 789.140380859375, + "learning_rate": 7.71008150784463e-07, + "loss": 91.3066, + "step": 103800 + }, + { + "epoch": 0.8587500517020309, + "grad_norm": 514.1470947265625, + "learning_rate": 7.702558844117325e-07, + "loss": 103.8474, + "step": 103810 + }, + { + "epoch": 0.8588327749514001, + "grad_norm": 347.6160888671875, + "learning_rate": 7.695039545803295e-07, + "loss": 95.0633, + "step": 103820 + }, + { + "epoch": 0.8589154982007693, + "grad_norm": 526.0089721679688, + "learning_rate": 7.687523613500814e-07, + "loss": 83.8635, + "step": 103830 + }, + { + "epoch": 0.8589982214501386, + "grad_norm": 982.0831909179688, + "learning_rate": 7.680011047807894e-07, + "loss": 118.7206, + "step": 103840 + }, + { + "epoch": 0.8590809446995078, + "grad_norm": 723.9911499023438, + "learning_rate": 7.672501849322266e-07, + "loss": 83.4975, + "step": 103850 + }, + { + "epoch": 0.859163667948877, + "grad_norm": 953.7279663085938, + "learning_rate": 7.664996018641413e-07, + "loss": 72.9386, + "step": 103860 + }, + { + "epoch": 0.8592463911982463, + "grad_norm": 819.978271484375, + "learning_rate": 7.657493556362539e-07, + "loss": 84.9272, + "step": 103870 + }, + { + "epoch": 0.8593291144476155, + "grad_norm": 1022.4807739257812, + "learning_rate": 7.649994463082572e-07, + "loss": 93.3535, + "step": 103880 + }, + { + "epoch": 0.8594118376969847, + "grad_norm": 627.8077392578125, + "learning_rate": 7.642498739398185e-07, + "loss": 118.7882, + "step": 103890 + }, + { + "epoch": 0.859494560946354, + "grad_norm": 927.2117309570312, + "learning_rate": 7.63500638590578e-07, + "loss": 72.8843, + "step": 103900 + }, + { + "epoch": 0.8595772841957232, + "grad_norm": 905.3977661132812, + "learning_rate": 7.62751740320149e-07, + "loss": 96.9559, + "step": 103910 + }, + { + "epoch": 0.8596600074450924, + "grad_norm": 507.61529541015625, + "learning_rate": 7.620031791881172e-07, + "loss": 89.9055, + "step": 103920 + }, + { + "epoch": 0.8597427306944617, + "grad_norm": 831.4810180664062, + "learning_rate": 7.612549552540426e-07, + "loss": 68.1213, + "step": 103930 + }, + { + "epoch": 0.8598254539438309, + "grad_norm": 1053.16943359375, + "learning_rate": 7.605070685774596e-07, + "loss": 113.8502, + "step": 103940 + }, + { + "epoch": 0.8599081771932001, + "grad_norm": 908.6838989257812, + "learning_rate": 7.597595192178702e-07, + "loss": 100.7114, + "step": 103950 + }, + { + "epoch": 0.8599909004425694, + "grad_norm": 800.2693481445312, + "learning_rate": 7.590123072347566e-07, + "loss": 98.1212, + "step": 103960 + }, + { + "epoch": 0.8600736236919386, + "grad_norm": 559.2039794921875, + "learning_rate": 7.582654326875705e-07, + "loss": 84.4858, + "step": 103970 + }, + { + "epoch": 0.8601563469413078, + "grad_norm": 799.5015258789062, + "learning_rate": 7.575188956357371e-07, + "loss": 88.0666, + "step": 103980 + }, + { + "epoch": 0.8602390701906771, + "grad_norm": 413.585693359375, + "learning_rate": 7.567726961386546e-07, + "loss": 68.5193, + "step": 103990 + }, + { + "epoch": 0.8603217934400463, + "grad_norm": 689.4683837890625, + "learning_rate": 7.560268342556948e-07, + "loss": 68.5218, + "step": 104000 + }, + { + "epoch": 0.8604045166894155, + "grad_norm": 1106.870361328125, + "learning_rate": 7.552813100462025e-07, + "loss": 97.9523, + "step": 104010 + }, + { + "epoch": 0.8604872399387848, + "grad_norm": 949.8282470703125, + "learning_rate": 7.54536123569497e-07, + "loss": 75.4276, + "step": 104020 + }, + { + "epoch": 0.860569963188154, + "grad_norm": 1082.73828125, + "learning_rate": 7.537912748848669e-07, + "loss": 102.4075, + "step": 104030 + }, + { + "epoch": 0.8606526864375232, + "grad_norm": 1304.8778076171875, + "learning_rate": 7.530467640515782e-07, + "loss": 101.8359, + "step": 104040 + }, + { + "epoch": 0.8607354096868925, + "grad_norm": 732.41015625, + "learning_rate": 7.523025911288656e-07, + "loss": 90.7578, + "step": 104050 + }, + { + "epoch": 0.8608181329362617, + "grad_norm": 1173.3736572265625, + "learning_rate": 7.51558756175943e-07, + "loss": 85.3348, + "step": 104060 + }, + { + "epoch": 0.860900856185631, + "grad_norm": 673.0076904296875, + "learning_rate": 7.508152592519924e-07, + "loss": 98.3655, + "step": 104070 + }, + { + "epoch": 0.8609835794350003, + "grad_norm": 3371.377197265625, + "learning_rate": 7.500721004161709e-07, + "loss": 105.9698, + "step": 104080 + }, + { + "epoch": 0.8610663026843695, + "grad_norm": 945.2011108398438, + "learning_rate": 7.493292797276075e-07, + "loss": 106.223, + "step": 104090 + }, + { + "epoch": 0.8611490259337387, + "grad_norm": 777.4385375976562, + "learning_rate": 7.485867972454053e-07, + "loss": 71.3168, + "step": 104100 + }, + { + "epoch": 0.861231749183108, + "grad_norm": 1511.415283203125, + "learning_rate": 7.478446530286415e-07, + "loss": 109.106, + "step": 104110 + }, + { + "epoch": 0.8613144724324772, + "grad_norm": 477.33001708984375, + "learning_rate": 7.471028471363628e-07, + "loss": 119.4962, + "step": 104120 + }, + { + "epoch": 0.8613971956818464, + "grad_norm": 616.1210327148438, + "learning_rate": 7.463613796275921e-07, + "loss": 104.9887, + "step": 104130 + }, + { + "epoch": 0.8614799189312157, + "grad_norm": 676.0572509765625, + "learning_rate": 7.456202505613252e-07, + "loss": 79.0803, + "step": 104140 + }, + { + "epoch": 0.8615626421805849, + "grad_norm": 776.3839111328125, + "learning_rate": 7.448794599965286e-07, + "loss": 73.2689, + "step": 104150 + }, + { + "epoch": 0.8616453654299541, + "grad_norm": 814.0975341796875, + "learning_rate": 7.441390079921463e-07, + "loss": 71.7772, + "step": 104160 + }, + { + "epoch": 0.8617280886793234, + "grad_norm": 962.8041381835938, + "learning_rate": 7.433988946070913e-07, + "loss": 76.7952, + "step": 104170 + }, + { + "epoch": 0.8618108119286926, + "grad_norm": 901.0029296875, + "learning_rate": 7.426591199002514e-07, + "loss": 142.9842, + "step": 104180 + }, + { + "epoch": 0.8618935351780618, + "grad_norm": 527.9906005859375, + "learning_rate": 7.419196839304865e-07, + "loss": 70.0582, + "step": 104190 + }, + { + "epoch": 0.861976258427431, + "grad_norm": 820.9356079101562, + "learning_rate": 7.411805867566319e-07, + "loss": 69.29, + "step": 104200 + }, + { + "epoch": 0.8620589816768003, + "grad_norm": 788.0999145507812, + "learning_rate": 7.404418284374909e-07, + "loss": 103.0304, + "step": 104210 + }, + { + "epoch": 0.8621417049261695, + "grad_norm": 770.0556640625, + "learning_rate": 7.397034090318455e-07, + "loss": 82.4049, + "step": 104220 + }, + { + "epoch": 0.8622244281755387, + "grad_norm": 595.1284790039062, + "learning_rate": 7.389653285984471e-07, + "loss": 77.7128, + "step": 104230 + }, + { + "epoch": 0.862307151424908, + "grad_norm": 597.3800048828125, + "learning_rate": 7.382275871960215e-07, + "loss": 73.3765, + "step": 104240 + }, + { + "epoch": 0.8623898746742772, + "grad_norm": 1349.270263671875, + "learning_rate": 7.374901848832683e-07, + "loss": 98.555, + "step": 104250 + }, + { + "epoch": 0.8624725979236464, + "grad_norm": 962.1214599609375, + "learning_rate": 7.367531217188595e-07, + "loss": 66.6915, + "step": 104260 + }, + { + "epoch": 0.8625553211730157, + "grad_norm": 757.8329467773438, + "learning_rate": 7.360163977614388e-07, + "loss": 112.147, + "step": 104270 + }, + { + "epoch": 0.8626380444223849, + "grad_norm": 1464.066650390625, + "learning_rate": 7.352800130696253e-07, + "loss": 84.9319, + "step": 104280 + }, + { + "epoch": 0.8627207676717541, + "grad_norm": 862.40478515625, + "learning_rate": 7.345439677020077e-07, + "loss": 94.3516, + "step": 104290 + }, + { + "epoch": 0.8628034909211234, + "grad_norm": 282.1973876953125, + "learning_rate": 7.33808261717151e-07, + "loss": 82.4753, + "step": 104300 + }, + { + "epoch": 0.8628862141704926, + "grad_norm": 608.2290649414062, + "learning_rate": 7.330728951735916e-07, + "loss": 79.4648, + "step": 104310 + }, + { + "epoch": 0.8629689374198618, + "grad_norm": 794.7872924804688, + "learning_rate": 7.323378681298394e-07, + "loss": 76.8241, + "step": 104320 + }, + { + "epoch": 0.8630516606692311, + "grad_norm": 795.65380859375, + "learning_rate": 7.316031806443774e-07, + "loss": 68.2953, + "step": 104330 + }, + { + "epoch": 0.8631343839186003, + "grad_norm": 975.4625244140625, + "learning_rate": 7.308688327756591e-07, + "loss": 89.0946, + "step": 104340 + }, + { + "epoch": 0.8632171071679695, + "grad_norm": 435.83013916015625, + "learning_rate": 7.301348245821172e-07, + "loss": 83.3241, + "step": 104350 + }, + { + "epoch": 0.8632998304173388, + "grad_norm": 991.8388061523438, + "learning_rate": 7.294011561221503e-07, + "loss": 82.6674, + "step": 104360 + }, + { + "epoch": 0.863382553666708, + "grad_norm": 756.9240112304688, + "learning_rate": 7.286678274541358e-07, + "loss": 61.5385, + "step": 104370 + }, + { + "epoch": 0.8634652769160772, + "grad_norm": 836.5737915039062, + "learning_rate": 7.279348386364182e-07, + "loss": 88.6052, + "step": 104380 + }, + { + "epoch": 0.8635480001654465, + "grad_norm": 563.5185546875, + "learning_rate": 7.272021897273196e-07, + "loss": 77.5748, + "step": 104390 + }, + { + "epoch": 0.8636307234148157, + "grad_norm": 952.6004638671875, + "learning_rate": 7.264698807851328e-07, + "loss": 80.778, + "step": 104400 + }, + { + "epoch": 0.8637134466641849, + "grad_norm": 484.9126281738281, + "learning_rate": 7.257379118681251e-07, + "loss": 74.0515, + "step": 104410 + }, + { + "epoch": 0.8637961699135542, + "grad_norm": 680.9312133789062, + "learning_rate": 7.250062830345356e-07, + "loss": 96.0891, + "step": 104420 + }, + { + "epoch": 0.8638788931629234, + "grad_norm": 759.776123046875, + "learning_rate": 7.242749943425765e-07, + "loss": 110.3812, + "step": 104430 + }, + { + "epoch": 0.8639616164122926, + "grad_norm": 1156.90380859375, + "learning_rate": 7.235440458504317e-07, + "loss": 98.4978, + "step": 104440 + }, + { + "epoch": 0.8640443396616619, + "grad_norm": 910.2420654296875, + "learning_rate": 7.228134376162632e-07, + "loss": 121.0294, + "step": 104450 + }, + { + "epoch": 0.8641270629110311, + "grad_norm": 1492.252197265625, + "learning_rate": 7.22083169698199e-07, + "loss": 107.3472, + "step": 104460 + }, + { + "epoch": 0.8642097861604003, + "grad_norm": 1519.6361083984375, + "learning_rate": 7.21353242154344e-07, + "loss": 108.2701, + "step": 104470 + }, + { + "epoch": 0.8642925094097696, + "grad_norm": 1137.7952880859375, + "learning_rate": 7.206236550427747e-07, + "loss": 70.6361, + "step": 104480 + }, + { + "epoch": 0.8643752326591388, + "grad_norm": 538.6994018554688, + "learning_rate": 7.198944084215421e-07, + "loss": 59.6236, + "step": 104490 + }, + { + "epoch": 0.864457955908508, + "grad_norm": 511.9731140136719, + "learning_rate": 7.191655023486682e-07, + "loss": 84.1222, + "step": 104500 + }, + { + "epoch": 0.8645406791578774, + "grad_norm": 897.7769775390625, + "learning_rate": 7.184369368821486e-07, + "loss": 84.8233, + "step": 104510 + }, + { + "epoch": 0.8646234024072466, + "grad_norm": 597.1415405273438, + "learning_rate": 7.177087120799525e-07, + "loss": 68.9195, + "step": 104520 + }, + { + "epoch": 0.8647061256566158, + "grad_norm": 973.8169555664062, + "learning_rate": 7.169808280000213e-07, + "loss": 130.055, + "step": 104530 + }, + { + "epoch": 0.8647888489059851, + "grad_norm": 729.9190673828125, + "learning_rate": 7.16253284700269e-07, + "loss": 54.8949, + "step": 104540 + }, + { + "epoch": 0.8648715721553543, + "grad_norm": 1125.672119140625, + "learning_rate": 7.155260822385828e-07, + "loss": 103.3253, + "step": 104550 + }, + { + "epoch": 0.8649542954047235, + "grad_norm": 1073.4981689453125, + "learning_rate": 7.147992206728238e-07, + "loss": 69.4759, + "step": 104560 + }, + { + "epoch": 0.8650370186540928, + "grad_norm": 947.5101318359375, + "learning_rate": 7.140727000608239e-07, + "loss": 90.7275, + "step": 104570 + }, + { + "epoch": 0.865119741903462, + "grad_norm": 874.85302734375, + "learning_rate": 7.133465204603895e-07, + "loss": 91.6719, + "step": 104580 + }, + { + "epoch": 0.8652024651528312, + "grad_norm": 994.0018310546875, + "learning_rate": 7.126206819292997e-07, + "loss": 79.112, + "step": 104590 + }, + { + "epoch": 0.8652851884022005, + "grad_norm": 875.634521484375, + "learning_rate": 7.118951845253053e-07, + "loss": 75.0624, + "step": 104600 + }, + { + "epoch": 0.8653679116515697, + "grad_norm": 905.0895385742188, + "learning_rate": 7.111700283061318e-07, + "loss": 60.7013, + "step": 104610 + }, + { + "epoch": 0.8654506349009389, + "grad_norm": 526.9947509765625, + "learning_rate": 7.104452133294759e-07, + "loss": 88.9498, + "step": 104620 + }, + { + "epoch": 0.8655333581503082, + "grad_norm": 724.978515625, + "learning_rate": 7.097207396530081e-07, + "loss": 81.7083, + "step": 104630 + }, + { + "epoch": 0.8656160813996774, + "grad_norm": 626.593017578125, + "learning_rate": 7.089966073343712e-07, + "loss": 86.5442, + "step": 104640 + }, + { + "epoch": 0.8656988046490466, + "grad_norm": 718.5762329101562, + "learning_rate": 7.082728164311814e-07, + "loss": 77.1169, + "step": 104650 + }, + { + "epoch": 0.8657815278984159, + "grad_norm": 908.7042236328125, + "learning_rate": 7.07549367001027e-07, + "loss": 97.2458, + "step": 104660 + }, + { + "epoch": 0.8658642511477851, + "grad_norm": 863.9970092773438, + "learning_rate": 7.068262591014696e-07, + "loss": 97.336, + "step": 104670 + }, + { + "epoch": 0.8659469743971543, + "grad_norm": 558.906494140625, + "learning_rate": 7.06103492790044e-07, + "loss": 70.4578, + "step": 104680 + }, + { + "epoch": 0.8660296976465236, + "grad_norm": 552.6804809570312, + "learning_rate": 7.053810681242573e-07, + "loss": 70.8659, + "step": 104690 + }, + { + "epoch": 0.8661124208958928, + "grad_norm": 508.5573425292969, + "learning_rate": 7.046589851615893e-07, + "loss": 64.113, + "step": 104700 + }, + { + "epoch": 0.866195144145262, + "grad_norm": 806.9329833984375, + "learning_rate": 7.039372439594927e-07, + "loss": 76.6169, + "step": 104710 + }, + { + "epoch": 0.8662778673946313, + "grad_norm": 811.2073974609375, + "learning_rate": 7.032158445753934e-07, + "loss": 89.2829, + "step": 104720 + }, + { + "epoch": 0.8663605906440005, + "grad_norm": 810.32421875, + "learning_rate": 7.024947870666899e-07, + "loss": 82.497, + "step": 104730 + }, + { + "epoch": 0.8664433138933697, + "grad_norm": 758.9779052734375, + "learning_rate": 7.017740714907534e-07, + "loss": 82.4294, + "step": 104740 + }, + { + "epoch": 0.866526037142739, + "grad_norm": 794.656982421875, + "learning_rate": 7.010536979049277e-07, + "loss": 96.2241, + "step": 104750 + }, + { + "epoch": 0.8666087603921082, + "grad_norm": 494.71063232421875, + "learning_rate": 7.003336663665294e-07, + "loss": 83.7569, + "step": 104760 + }, + { + "epoch": 0.8666914836414774, + "grad_norm": 880.26318359375, + "learning_rate": 6.996139769328492e-07, + "loss": 84.9818, + "step": 104770 + }, + { + "epoch": 0.8667742068908467, + "grad_norm": 766.883544921875, + "learning_rate": 6.988946296611482e-07, + "loss": 80.0834, + "step": 104780 + }, + { + "epoch": 0.8668569301402159, + "grad_norm": 1070.8145751953125, + "learning_rate": 6.981756246086623e-07, + "loss": 81.7685, + "step": 104790 + }, + { + "epoch": 0.8669396533895851, + "grad_norm": 951.6563110351562, + "learning_rate": 6.974569618325993e-07, + "loss": 87.9841, + "step": 104800 + }, + { + "epoch": 0.8670223766389544, + "grad_norm": 1062.390869140625, + "learning_rate": 6.967386413901395e-07, + "loss": 88.5198, + "step": 104810 + }, + { + "epoch": 0.8671050998883236, + "grad_norm": 774.4631958007812, + "learning_rate": 6.96020663338437e-07, + "loss": 91.2737, + "step": 104820 + }, + { + "epoch": 0.8671878231376928, + "grad_norm": 432.4591979980469, + "learning_rate": 6.953030277346179e-07, + "loss": 77.8616, + "step": 104830 + }, + { + "epoch": 0.8672705463870621, + "grad_norm": 691.8195190429688, + "learning_rate": 6.945857346357804e-07, + "loss": 122.5289, + "step": 104840 + }, + { + "epoch": 0.8673532696364313, + "grad_norm": 952.2196044921875, + "learning_rate": 6.938687840989972e-07, + "loss": 63.169, + "step": 104850 + }, + { + "epoch": 0.8674359928858005, + "grad_norm": 0.0, + "learning_rate": 6.931521761813126e-07, + "loss": 88.435, + "step": 104860 + }, + { + "epoch": 0.8675187161351698, + "grad_norm": 733.5023193359375, + "learning_rate": 6.924359109397433e-07, + "loss": 74.1316, + "step": 104870 + }, + { + "epoch": 0.867601439384539, + "grad_norm": 641.714111328125, + "learning_rate": 6.917199884312809e-07, + "loss": 88.4582, + "step": 104880 + }, + { + "epoch": 0.8676841626339082, + "grad_norm": 790.0752563476562, + "learning_rate": 6.910044087128848e-07, + "loss": 84.1604, + "step": 104890 + }, + { + "epoch": 0.8677668858832776, + "grad_norm": 663.8312377929688, + "learning_rate": 6.902891718414916e-07, + "loss": 88.1632, + "step": 104900 + }, + { + "epoch": 0.8678496091326467, + "grad_norm": 591.2030639648438, + "learning_rate": 6.895742778740117e-07, + "loss": 81.408, + "step": 104910 + }, + { + "epoch": 0.867932332382016, + "grad_norm": 624.845458984375, + "learning_rate": 6.888597268673236e-07, + "loss": 62.2073, + "step": 104920 + }, + { + "epoch": 0.8680150556313851, + "grad_norm": 794.0993041992188, + "learning_rate": 6.881455188782821e-07, + "loss": 77.9982, + "step": 104930 + }, + { + "epoch": 0.8680977788807545, + "grad_norm": 666.7411499023438, + "learning_rate": 6.874316539637127e-07, + "loss": 97.2312, + "step": 104940 + }, + { + "epoch": 0.8681805021301237, + "grad_norm": 1049.414306640625, + "learning_rate": 6.867181321804145e-07, + "loss": 65.9971, + "step": 104950 + }, + { + "epoch": 0.8682632253794929, + "grad_norm": 1004.761474609375, + "learning_rate": 6.860049535851593e-07, + "loss": 101.2232, + "step": 104960 + }, + { + "epoch": 0.8683459486288622, + "grad_norm": 1056.7117919921875, + "learning_rate": 6.852921182346927e-07, + "loss": 89.3601, + "step": 104970 + }, + { + "epoch": 0.8684286718782314, + "grad_norm": 801.506103515625, + "learning_rate": 6.84579626185729e-07, + "loss": 115.8205, + "step": 104980 + }, + { + "epoch": 0.8685113951276006, + "grad_norm": 475.1383056640625, + "learning_rate": 6.838674774949594e-07, + "loss": 71.0523, + "step": 104990 + }, + { + "epoch": 0.8685941183769699, + "grad_norm": 2474.7802734375, + "learning_rate": 6.831556722190453e-07, + "loss": 73.0823, + "step": 105000 + }, + { + "epoch": 0.8686768416263391, + "grad_norm": 897.792724609375, + "learning_rate": 6.82444210414624e-07, + "loss": 100.9971, + "step": 105010 + }, + { + "epoch": 0.8687595648757083, + "grad_norm": 886.6505737304688, + "learning_rate": 6.817330921383014e-07, + "loss": 82.7632, + "step": 105020 + }, + { + "epoch": 0.8688422881250776, + "grad_norm": 768.843505859375, + "learning_rate": 6.81022317446659e-07, + "loss": 80.1767, + "step": 105030 + }, + { + "epoch": 0.8689250113744468, + "grad_norm": 497.87469482421875, + "learning_rate": 6.803118863962488e-07, + "loss": 81.78, + "step": 105040 + }, + { + "epoch": 0.869007734623816, + "grad_norm": 491.0223083496094, + "learning_rate": 6.796017990435977e-07, + "loss": 47.6318, + "step": 105050 + }, + { + "epoch": 0.8690904578731853, + "grad_norm": 710.4462890625, + "learning_rate": 6.788920554452044e-07, + "loss": 94.524, + "step": 105060 + }, + { + "epoch": 0.8691731811225545, + "grad_norm": 816.4830932617188, + "learning_rate": 6.781826556575377e-07, + "loss": 103.3359, + "step": 105070 + }, + { + "epoch": 0.8692559043719237, + "grad_norm": 654.7847290039062, + "learning_rate": 6.77473599737043e-07, + "loss": 90.9469, + "step": 105080 + }, + { + "epoch": 0.869338627621293, + "grad_norm": 850.1752319335938, + "learning_rate": 6.767648877401361e-07, + "loss": 75.9913, + "step": 105090 + }, + { + "epoch": 0.8694213508706622, + "grad_norm": 577.8858032226562, + "learning_rate": 6.76056519723205e-07, + "loss": 108.0124, + "step": 105100 + }, + { + "epoch": 0.8695040741200314, + "grad_norm": 1150.85205078125, + "learning_rate": 6.753484957426132e-07, + "loss": 108.4332, + "step": 105110 + }, + { + "epoch": 0.8695867973694007, + "grad_norm": 380.7832336425781, + "learning_rate": 6.746408158546947e-07, + "loss": 58.2194, + "step": 105120 + }, + { + "epoch": 0.8696695206187699, + "grad_norm": 683.9119873046875, + "learning_rate": 6.739334801157554e-07, + "loss": 94.9094, + "step": 105130 + }, + { + "epoch": 0.8697522438681391, + "grad_norm": 695.6708984375, + "learning_rate": 6.732264885820761e-07, + "loss": 90.9071, + "step": 105140 + }, + { + "epoch": 0.8698349671175084, + "grad_norm": 586.7554321289062, + "learning_rate": 6.725198413099071e-07, + "loss": 68.9317, + "step": 105150 + }, + { + "epoch": 0.8699176903668776, + "grad_norm": 791.099609375, + "learning_rate": 6.718135383554736e-07, + "loss": 86.9691, + "step": 105160 + }, + { + "epoch": 0.8700004136162468, + "grad_norm": 785.0557250976562, + "learning_rate": 6.711075797749733e-07, + "loss": 89.6558, + "step": 105170 + }, + { + "epoch": 0.8700831368656161, + "grad_norm": 899.1886596679688, + "learning_rate": 6.704019656245764e-07, + "loss": 93.6414, + "step": 105180 + }, + { + "epoch": 0.8701658601149853, + "grad_norm": 802.5755615234375, + "learning_rate": 6.696966959604234e-07, + "loss": 108.0222, + "step": 105190 + }, + { + "epoch": 0.8702485833643545, + "grad_norm": 1072.0621337890625, + "learning_rate": 6.689917708386317e-07, + "loss": 87.7261, + "step": 105200 + }, + { + "epoch": 0.8703313066137238, + "grad_norm": 519.7999267578125, + "learning_rate": 6.682871903152888e-07, + "loss": 99.5766, + "step": 105210 + }, + { + "epoch": 0.870414029863093, + "grad_norm": 964.565673828125, + "learning_rate": 6.675829544464535e-07, + "loss": 81.1996, + "step": 105220 + }, + { + "epoch": 0.8704967531124622, + "grad_norm": 692.7098388671875, + "learning_rate": 6.668790632881611e-07, + "loss": 78.7892, + "step": 105230 + }, + { + "epoch": 0.8705794763618315, + "grad_norm": 1007.2482299804688, + "learning_rate": 6.66175516896414e-07, + "loss": 140.3374, + "step": 105240 + }, + { + "epoch": 0.8706621996112007, + "grad_norm": 1265.0731201171875, + "learning_rate": 6.654723153271913e-07, + "loss": 86.9678, + "step": 105250 + }, + { + "epoch": 0.8707449228605699, + "grad_norm": 632.7286376953125, + "learning_rate": 6.64769458636444e-07, + "loss": 93.2092, + "step": 105260 + }, + { + "epoch": 0.8708276461099392, + "grad_norm": 836.0169067382812, + "learning_rate": 6.640669468800947e-07, + "loss": 92.5722, + "step": 105270 + }, + { + "epoch": 0.8709103693593084, + "grad_norm": 658.7850952148438, + "learning_rate": 6.633647801140391e-07, + "loss": 100.487, + "step": 105280 + }, + { + "epoch": 0.8709930926086776, + "grad_norm": 587.75146484375, + "learning_rate": 6.626629583941447e-07, + "loss": 68.9679, + "step": 105290 + }, + { + "epoch": 0.8710758158580469, + "grad_norm": 817.0744018554688, + "learning_rate": 6.619614817762537e-07, + "loss": 73.4202, + "step": 105300 + }, + { + "epoch": 0.8711585391074161, + "grad_norm": 1041.8173828125, + "learning_rate": 6.612603503161802e-07, + "loss": 64.3145, + "step": 105310 + }, + { + "epoch": 0.8712412623567853, + "grad_norm": 1404.5458984375, + "learning_rate": 6.605595640697071e-07, + "loss": 99.786, + "step": 105320 + }, + { + "epoch": 0.8713239856061546, + "grad_norm": 771.7532958984375, + "learning_rate": 6.598591230925943e-07, + "loss": 82.4178, + "step": 105330 + }, + { + "epoch": 0.8714067088555238, + "grad_norm": 751.2391357421875, + "learning_rate": 6.591590274405723e-07, + "loss": 122.4575, + "step": 105340 + }, + { + "epoch": 0.871489432104893, + "grad_norm": 911.8988037109375, + "learning_rate": 6.584592771693449e-07, + "loss": 81.5654, + "step": 105350 + }, + { + "epoch": 0.8715721553542624, + "grad_norm": 1637.146484375, + "learning_rate": 6.57759872334588e-07, + "loss": 97.9183, + "step": 105360 + }, + { + "epoch": 0.8716548786036316, + "grad_norm": 329.8784484863281, + "learning_rate": 6.570608129919492e-07, + "loss": 62.8334, + "step": 105370 + }, + { + "epoch": 0.8717376018530008, + "grad_norm": 1938.16455078125, + "learning_rate": 6.563620991970509e-07, + "loss": 76.7647, + "step": 105380 + }, + { + "epoch": 0.8718203251023701, + "grad_norm": 717.9798583984375, + "learning_rate": 6.556637310054842e-07, + "loss": 82.9026, + "step": 105390 + }, + { + "epoch": 0.8719030483517393, + "grad_norm": 1446.15966796875, + "learning_rate": 6.54965708472819e-07, + "loss": 78.1929, + "step": 105400 + }, + { + "epoch": 0.8719857716011085, + "grad_norm": 1189.690673828125, + "learning_rate": 6.542680316545902e-07, + "loss": 72.0123, + "step": 105410 + }, + { + "epoch": 0.8720684948504778, + "grad_norm": 613.6715698242188, + "learning_rate": 6.535707006063097e-07, + "loss": 61.0173, + "step": 105420 + }, + { + "epoch": 0.872151218099847, + "grad_norm": 1527.14990234375, + "learning_rate": 6.528737153834613e-07, + "loss": 130.8081, + "step": 105430 + }, + { + "epoch": 0.8722339413492162, + "grad_norm": 1041.12109375, + "learning_rate": 6.521770760415008e-07, + "loss": 93.4483, + "step": 105440 + }, + { + "epoch": 0.8723166645985855, + "grad_norm": 852.52197265625, + "learning_rate": 6.514807826358566e-07, + "loss": 90.9408, + "step": 105450 + }, + { + "epoch": 0.8723993878479547, + "grad_norm": 647.1378173828125, + "learning_rate": 6.507848352219299e-07, + "loss": 67.9603, + "step": 105460 + }, + { + "epoch": 0.8724821110973239, + "grad_norm": 1192.1534423828125, + "learning_rate": 6.500892338550929e-07, + "loss": 111.3489, + "step": 105470 + }, + { + "epoch": 0.8725648343466932, + "grad_norm": 814.5704345703125, + "learning_rate": 6.493939785906928e-07, + "loss": 87.0574, + "step": 105480 + }, + { + "epoch": 0.8726475575960624, + "grad_norm": 741.8197021484375, + "learning_rate": 6.486990694840467e-07, + "loss": 68.0563, + "step": 105490 + }, + { + "epoch": 0.8727302808454316, + "grad_norm": 948.8922729492188, + "learning_rate": 6.480045065904461e-07, + "loss": 89.7507, + "step": 105500 + }, + { + "epoch": 0.8728130040948009, + "grad_norm": 1155.1693115234375, + "learning_rate": 6.47310289965154e-07, + "loss": 81.7769, + "step": 105510 + }, + { + "epoch": 0.8728957273441701, + "grad_norm": 919.3316040039062, + "learning_rate": 6.466164196634056e-07, + "loss": 97.3234, + "step": 105520 + }, + { + "epoch": 0.8729784505935393, + "grad_norm": 987.2399291992188, + "learning_rate": 6.459228957404101e-07, + "loss": 97.3726, + "step": 105530 + }, + { + "epoch": 0.8730611738429086, + "grad_norm": 838.5607299804688, + "learning_rate": 6.452297182513468e-07, + "loss": 69.4488, + "step": 105540 + }, + { + "epoch": 0.8731438970922778, + "grad_norm": 1150.2685546875, + "learning_rate": 6.445368872513691e-07, + "loss": 72.3736, + "step": 105550 + }, + { + "epoch": 0.873226620341647, + "grad_norm": 687.7527465820312, + "learning_rate": 6.438444027956026e-07, + "loss": 68.8998, + "step": 105560 + }, + { + "epoch": 0.8733093435910163, + "grad_norm": 455.3255920410156, + "learning_rate": 6.431522649391447e-07, + "loss": 94.7914, + "step": 105570 + }, + { + "epoch": 0.8733920668403855, + "grad_norm": 599.0716552734375, + "learning_rate": 6.42460473737066e-07, + "loss": 77.5691, + "step": 105580 + }, + { + "epoch": 0.8734747900897547, + "grad_norm": 982.269775390625, + "learning_rate": 6.417690292444084e-07, + "loss": 88.5389, + "step": 105590 + }, + { + "epoch": 0.873557513339124, + "grad_norm": 304.9704895019531, + "learning_rate": 6.410779315161885e-07, + "loss": 74.6791, + "step": 105600 + }, + { + "epoch": 0.8736402365884932, + "grad_norm": 840.72021484375, + "learning_rate": 6.403871806073924e-07, + "loss": 66.5112, + "step": 105610 + }, + { + "epoch": 0.8737229598378624, + "grad_norm": 1154.091552734375, + "learning_rate": 6.396967765729806e-07, + "loss": 92.7399, + "step": 105620 + }, + { + "epoch": 0.8738056830872317, + "grad_norm": 875.4635009765625, + "learning_rate": 6.390067194678851e-07, + "loss": 72.3126, + "step": 105630 + }, + { + "epoch": 0.8738884063366009, + "grad_norm": 776.973876953125, + "learning_rate": 6.383170093470103e-07, + "loss": 86.9083, + "step": 105640 + }, + { + "epoch": 0.8739711295859701, + "grad_norm": 1360.440673828125, + "learning_rate": 6.376276462652342e-07, + "loss": 85.9056, + "step": 105650 + }, + { + "epoch": 0.8740538528353393, + "grad_norm": 1393.611572265625, + "learning_rate": 6.36938630277405e-07, + "loss": 97.7015, + "step": 105660 + }, + { + "epoch": 0.8741365760847086, + "grad_norm": 762.5498046875, + "learning_rate": 6.36249961438346e-07, + "loss": 111.3497, + "step": 105670 + }, + { + "epoch": 0.8742192993340778, + "grad_norm": 832.800048828125, + "learning_rate": 6.355616398028502e-07, + "loss": 81.6632, + "step": 105680 + }, + { + "epoch": 0.874302022583447, + "grad_norm": 661.8202514648438, + "learning_rate": 6.348736654256848e-07, + "loss": 86.8958, + "step": 105690 + }, + { + "epoch": 0.8743847458328163, + "grad_norm": 882.3600463867188, + "learning_rate": 6.341860383615889e-07, + "loss": 83.2556, + "step": 105700 + }, + { + "epoch": 0.8744674690821855, + "grad_norm": 867.7281494140625, + "learning_rate": 6.33498758665273e-07, + "loss": 94.6405, + "step": 105710 + }, + { + "epoch": 0.8745501923315547, + "grad_norm": 554.966796875, + "learning_rate": 6.328118263914218e-07, + "loss": 82.4325, + "step": 105720 + }, + { + "epoch": 0.874632915580924, + "grad_norm": 895.0468139648438, + "learning_rate": 6.321252415946904e-07, + "loss": 65.7037, + "step": 105730 + }, + { + "epoch": 0.8747156388302932, + "grad_norm": 812.9765014648438, + "learning_rate": 6.314390043297092e-07, + "loss": 88.4549, + "step": 105740 + }, + { + "epoch": 0.8747983620796624, + "grad_norm": 640.9434204101562, + "learning_rate": 6.307531146510754e-07, + "loss": 86.2904, + "step": 105750 + }, + { + "epoch": 0.8748810853290317, + "grad_norm": 553.9396362304688, + "learning_rate": 6.300675726133648e-07, + "loss": 75.9555, + "step": 105760 + }, + { + "epoch": 0.874963808578401, + "grad_norm": 419.89013671875, + "learning_rate": 6.293823782711222e-07, + "loss": 118.8286, + "step": 105770 + }, + { + "epoch": 0.8750465318277701, + "grad_norm": 733.7764892578125, + "learning_rate": 6.286975316788657e-07, + "loss": 60.0976, + "step": 105780 + }, + { + "epoch": 0.8751292550771395, + "grad_norm": 1776.192138671875, + "learning_rate": 6.280130328910849e-07, + "loss": 106.2316, + "step": 105790 + }, + { + "epoch": 0.8752119783265087, + "grad_norm": 659.7675170898438, + "learning_rate": 6.27328881962242e-07, + "loss": 86.0116, + "step": 105800 + }, + { + "epoch": 0.8752947015758779, + "grad_norm": 1190.6595458984375, + "learning_rate": 6.266450789467727e-07, + "loss": 82.2913, + "step": 105810 + }, + { + "epoch": 0.8753774248252472, + "grad_norm": 1189.8983154296875, + "learning_rate": 6.259616238990828e-07, + "loss": 50.2323, + "step": 105820 + }, + { + "epoch": 0.8754601480746164, + "grad_norm": 1355.994873046875, + "learning_rate": 6.252785168735537e-07, + "loss": 82.7705, + "step": 105830 + }, + { + "epoch": 0.8755428713239856, + "grad_norm": 752.8018798828125, + "learning_rate": 6.245957579245349e-07, + "loss": 67.2084, + "step": 105840 + }, + { + "epoch": 0.8756255945733549, + "grad_norm": 1385.89306640625, + "learning_rate": 6.239133471063502e-07, + "loss": 72.485, + "step": 105850 + }, + { + "epoch": 0.8757083178227241, + "grad_norm": 467.3131103515625, + "learning_rate": 6.23231284473298e-07, + "loss": 92.7437, + "step": 105860 + }, + { + "epoch": 0.8757910410720933, + "grad_norm": 937.4329833984375, + "learning_rate": 6.225495700796452e-07, + "loss": 98.6699, + "step": 105870 + }, + { + "epoch": 0.8758737643214626, + "grad_norm": 530.5836791992188, + "learning_rate": 6.218682039796343e-07, + "loss": 79.1359, + "step": 105880 + }, + { + "epoch": 0.8759564875708318, + "grad_norm": 652.8727416992188, + "learning_rate": 6.211871862274765e-07, + "loss": 91.6557, + "step": 105890 + }, + { + "epoch": 0.876039210820201, + "grad_norm": 818.7814331054688, + "learning_rate": 6.205065168773589e-07, + "loss": 101.5155, + "step": 105900 + }, + { + "epoch": 0.8761219340695703, + "grad_norm": 876.2640380859375, + "learning_rate": 6.198261959834384e-07, + "loss": 103.2004, + "step": 105910 + }, + { + "epoch": 0.8762046573189395, + "grad_norm": 516.7633056640625, + "learning_rate": 6.191462235998463e-07, + "loss": 77.5895, + "step": 105920 + }, + { + "epoch": 0.8762873805683087, + "grad_norm": 646.3327026367188, + "learning_rate": 6.184665997806832e-07, + "loss": 56.7936, + "step": 105930 + }, + { + "epoch": 0.876370103817678, + "grad_norm": 576.1641235351562, + "learning_rate": 6.177873245800237e-07, + "loss": 70.1931, + "step": 105940 + }, + { + "epoch": 0.8764528270670472, + "grad_norm": 1594.127685546875, + "learning_rate": 6.171083980519138e-07, + "loss": 94.1285, + "step": 105950 + }, + { + "epoch": 0.8765355503164164, + "grad_norm": 1294.6268310546875, + "learning_rate": 6.164298202503754e-07, + "loss": 101.6075, + "step": 105960 + }, + { + "epoch": 0.8766182735657857, + "grad_norm": 540.323974609375, + "learning_rate": 6.157515912293982e-07, + "loss": 87.1123, + "step": 105970 + }, + { + "epoch": 0.8767009968151549, + "grad_norm": 570.9243774414062, + "learning_rate": 6.150737110429461e-07, + "loss": 74.8245, + "step": 105980 + }, + { + "epoch": 0.8767837200645241, + "grad_norm": 906.1620483398438, + "learning_rate": 6.143961797449549e-07, + "loss": 71.5885, + "step": 105990 + }, + { + "epoch": 0.8768664433138934, + "grad_norm": 878.2374267578125, + "learning_rate": 6.137189973893331e-07, + "loss": 73.5436, + "step": 106000 + }, + { + "epoch": 0.8769491665632626, + "grad_norm": 873.0944213867188, + "learning_rate": 6.130421640299594e-07, + "loss": 61.5041, + "step": 106010 + }, + { + "epoch": 0.8770318898126318, + "grad_norm": 734.4179077148438, + "learning_rate": 6.123656797206873e-07, + "loss": 85.497, + "step": 106020 + }, + { + "epoch": 0.8771146130620011, + "grad_norm": 1206.48193359375, + "learning_rate": 6.116895445153415e-07, + "loss": 61.9635, + "step": 106030 + }, + { + "epoch": 0.8771973363113703, + "grad_norm": 976.5135498046875, + "learning_rate": 6.11013758467719e-07, + "loss": 101.1229, + "step": 106040 + }, + { + "epoch": 0.8772800595607395, + "grad_norm": 1087.355224609375, + "learning_rate": 6.103383216315883e-07, + "loss": 75.2539, + "step": 106050 + }, + { + "epoch": 0.8773627828101088, + "grad_norm": 674.3126220703125, + "learning_rate": 6.096632340606922e-07, + "loss": 77.9221, + "step": 106060 + }, + { + "epoch": 0.877445506059478, + "grad_norm": 753.2930297851562, + "learning_rate": 6.089884958087439e-07, + "loss": 87.5172, + "step": 106070 + }, + { + "epoch": 0.8775282293088472, + "grad_norm": 640.8326416015625, + "learning_rate": 6.083141069294285e-07, + "loss": 74.8427, + "step": 106080 + }, + { + "epoch": 0.8776109525582165, + "grad_norm": 772.4714965820312, + "learning_rate": 6.07640067476406e-07, + "loss": 132.3394, + "step": 106090 + }, + { + "epoch": 0.8776936758075857, + "grad_norm": 566.04248046875, + "learning_rate": 6.069663775033041e-07, + "loss": 65.6599, + "step": 106100 + }, + { + "epoch": 0.8777763990569549, + "grad_norm": 780.9393920898438, + "learning_rate": 6.06293037063726e-07, + "loss": 80.8205, + "step": 106110 + }, + { + "epoch": 0.8778591223063242, + "grad_norm": 654.2213745117188, + "learning_rate": 6.056200462112466e-07, + "loss": 98.3166, + "step": 106120 + }, + { + "epoch": 0.8779418455556934, + "grad_norm": 1010.9708862304688, + "learning_rate": 6.049474049994125e-07, + "loss": 77.5668, + "step": 106130 + }, + { + "epoch": 0.8780245688050626, + "grad_norm": 871.8776245117188, + "learning_rate": 6.042751134817431e-07, + "loss": 87.4441, + "step": 106140 + }, + { + "epoch": 0.8781072920544319, + "grad_norm": 801.5313110351562, + "learning_rate": 6.03603171711728e-07, + "loss": 70.5738, + "step": 106150 + }, + { + "epoch": 0.8781900153038011, + "grad_norm": 602.4784545898438, + "learning_rate": 6.029315797428331e-07, + "loss": 87.0834, + "step": 106160 + }, + { + "epoch": 0.8782727385531703, + "grad_norm": 798.4912109375, + "learning_rate": 6.02260337628493e-07, + "loss": 63.067, + "step": 106170 + }, + { + "epoch": 0.8783554618025396, + "grad_norm": 1035.9962158203125, + "learning_rate": 6.015894454221143e-07, + "loss": 102.9226, + "step": 106180 + }, + { + "epoch": 0.8784381850519088, + "grad_norm": 891.0628051757812, + "learning_rate": 6.00918903177078e-07, + "loss": 80.5977, + "step": 106190 + }, + { + "epoch": 0.878520908301278, + "grad_norm": 597.5098876953125, + "learning_rate": 6.002487109467347e-07, + "loss": 72.3878, + "step": 106200 + }, + { + "epoch": 0.8786036315506474, + "grad_norm": 920.417236328125, + "learning_rate": 5.995788687844095e-07, + "loss": 87.8151, + "step": 106210 + }, + { + "epoch": 0.8786863548000166, + "grad_norm": 683.3025512695312, + "learning_rate": 5.98909376743399e-07, + "loss": 95.9658, + "step": 106220 + }, + { + "epoch": 0.8787690780493858, + "grad_norm": 583.1339111328125, + "learning_rate": 5.982402348769706e-07, + "loss": 85.8737, + "step": 106230 + }, + { + "epoch": 0.8788518012987551, + "grad_norm": 894.7290649414062, + "learning_rate": 5.975714432383645e-07, + "loss": 148.6901, + "step": 106240 + }, + { + "epoch": 0.8789345245481243, + "grad_norm": 651.498291015625, + "learning_rate": 5.969030018807953e-07, + "loss": 69.2491, + "step": 106250 + }, + { + "epoch": 0.8790172477974935, + "grad_norm": 544.6094360351562, + "learning_rate": 5.962349108574478e-07, + "loss": 72.5222, + "step": 106260 + }, + { + "epoch": 0.8790999710468628, + "grad_norm": 1018.2208251953125, + "learning_rate": 5.955671702214765e-07, + "loss": 109.5223, + "step": 106270 + }, + { + "epoch": 0.879182694296232, + "grad_norm": 892.130615234375, + "learning_rate": 5.948997800260125e-07, + "loss": 109.8219, + "step": 106280 + }, + { + "epoch": 0.8792654175456012, + "grad_norm": 1923.7149658203125, + "learning_rate": 5.94232740324156e-07, + "loss": 79.6006, + "step": 106290 + }, + { + "epoch": 0.8793481407949705, + "grad_norm": 582.2818603515625, + "learning_rate": 5.935660511689805e-07, + "loss": 97.3011, + "step": 106300 + }, + { + "epoch": 0.8794308640443397, + "grad_norm": 1395.069580078125, + "learning_rate": 5.928997126135317e-07, + "loss": 101.4692, + "step": 106310 + }, + { + "epoch": 0.8795135872937089, + "grad_norm": 628.81298828125, + "learning_rate": 5.922337247108267e-07, + "loss": 88.9791, + "step": 106320 + }, + { + "epoch": 0.8795963105430782, + "grad_norm": 1247.3292236328125, + "learning_rate": 5.915680875138558e-07, + "loss": 81.8968, + "step": 106330 + }, + { + "epoch": 0.8796790337924474, + "grad_norm": 532.4841918945312, + "learning_rate": 5.909028010755786e-07, + "loss": 105.6303, + "step": 106340 + }, + { + "epoch": 0.8797617570418166, + "grad_norm": 748.1610717773438, + "learning_rate": 5.902378654489327e-07, + "loss": 93.8092, + "step": 106350 + }, + { + "epoch": 0.8798444802911859, + "grad_norm": 1042.5313720703125, + "learning_rate": 5.89573280686821e-07, + "loss": 82.0542, + "step": 106360 + }, + { + "epoch": 0.8799272035405551, + "grad_norm": 775.1285400390625, + "learning_rate": 5.889090468421216e-07, + "loss": 109.767, + "step": 106370 + }, + { + "epoch": 0.8800099267899243, + "grad_norm": 807.12744140625, + "learning_rate": 5.882451639676856e-07, + "loss": 85.814, + "step": 106380 + }, + { + "epoch": 0.8800926500392935, + "grad_norm": 712.1463012695312, + "learning_rate": 5.875816321163346e-07, + "loss": 80.7074, + "step": 106390 + }, + { + "epoch": 0.8801753732886628, + "grad_norm": 937.6873168945312, + "learning_rate": 5.869184513408633e-07, + "loss": 99.54, + "step": 106400 + }, + { + "epoch": 0.880258096538032, + "grad_norm": 647.0883178710938, + "learning_rate": 5.862556216940368e-07, + "loss": 66.8527, + "step": 106410 + }, + { + "epoch": 0.8803408197874012, + "grad_norm": 748.40673828125, + "learning_rate": 5.85593143228595e-07, + "loss": 90.8509, + "step": 106420 + }, + { + "epoch": 0.8804235430367705, + "grad_norm": 1141.7353515625, + "learning_rate": 5.849310159972466e-07, + "loss": 66.829, + "step": 106430 + }, + { + "epoch": 0.8805062662861397, + "grad_norm": 880.968994140625, + "learning_rate": 5.84269240052675e-07, + "loss": 92.991, + "step": 106440 + }, + { + "epoch": 0.8805889895355089, + "grad_norm": 803.4313354492188, + "learning_rate": 5.836078154475349e-07, + "loss": 126.6737, + "step": 106450 + }, + { + "epoch": 0.8806717127848782, + "grad_norm": 478.593994140625, + "learning_rate": 5.82946742234452e-07, + "loss": 79.7044, + "step": 106460 + }, + { + "epoch": 0.8807544360342474, + "grad_norm": 715.75146484375, + "learning_rate": 5.822860204660253e-07, + "loss": 70.8862, + "step": 106470 + }, + { + "epoch": 0.8808371592836166, + "grad_norm": 524.4168090820312, + "learning_rate": 5.816256501948259e-07, + "loss": 90.5394, + "step": 106480 + }, + { + "epoch": 0.8809198825329859, + "grad_norm": 752.4254150390625, + "learning_rate": 5.809656314733953e-07, + "loss": 86.0278, + "step": 106490 + }, + { + "epoch": 0.8810026057823551, + "grad_norm": 623.67529296875, + "learning_rate": 5.803059643542491e-07, + "loss": 67.2863, + "step": 106500 + }, + { + "epoch": 0.8810853290317243, + "grad_norm": 909.5820922851562, + "learning_rate": 5.796466488898733e-07, + "loss": 94.9997, + "step": 106510 + }, + { + "epoch": 0.8811680522810936, + "grad_norm": 1068.1527099609375, + "learning_rate": 5.789876851327275e-07, + "loss": 110.8501, + "step": 106520 + }, + { + "epoch": 0.8812507755304628, + "grad_norm": 2067.63671875, + "learning_rate": 5.783290731352415e-07, + "loss": 74.2425, + "step": 106530 + }, + { + "epoch": 0.881333498779832, + "grad_norm": 889.1475830078125, + "learning_rate": 5.776708129498188e-07, + "loss": 105.7229, + "step": 106540 + }, + { + "epoch": 0.8814162220292013, + "grad_norm": 828.7091674804688, + "learning_rate": 5.770129046288331e-07, + "loss": 111.1305, + "step": 106550 + }, + { + "epoch": 0.8814989452785705, + "grad_norm": 997.931640625, + "learning_rate": 5.76355348224632e-07, + "loss": 110.2826, + "step": 106560 + }, + { + "epoch": 0.8815816685279397, + "grad_norm": 1100.512451171875, + "learning_rate": 5.756981437895342e-07, + "loss": 103.0764, + "step": 106570 + }, + { + "epoch": 0.881664391777309, + "grad_norm": 890.1248779296875, + "learning_rate": 5.750412913758307e-07, + "loss": 87.1903, + "step": 106580 + }, + { + "epoch": 0.8817471150266782, + "grad_norm": 1222.4423828125, + "learning_rate": 5.743847910357836e-07, + "loss": 80.1742, + "step": 106590 + }, + { + "epoch": 0.8818298382760474, + "grad_norm": 637.3331298828125, + "learning_rate": 5.737286428216288e-07, + "loss": 63.0052, + "step": 106600 + }, + { + "epoch": 0.8819125615254167, + "grad_norm": 919.8414306640625, + "learning_rate": 5.730728467855695e-07, + "loss": 85.3765, + "step": 106610 + }, + { + "epoch": 0.881995284774786, + "grad_norm": 661.7064208984375, + "learning_rate": 5.724174029797886e-07, + "loss": 64.6699, + "step": 106620 + }, + { + "epoch": 0.8820780080241551, + "grad_norm": 944.8833618164062, + "learning_rate": 5.717623114564347e-07, + "loss": 80.0257, + "step": 106630 + }, + { + "epoch": 0.8821607312735245, + "grad_norm": 992.529296875, + "learning_rate": 5.711075722676312e-07, + "loss": 102.9055, + "step": 106640 + }, + { + "epoch": 0.8822434545228937, + "grad_norm": 691.0933227539062, + "learning_rate": 5.704531854654721e-07, + "loss": 75.375, + "step": 106650 + }, + { + "epoch": 0.8823261777722629, + "grad_norm": 519.4947509765625, + "learning_rate": 5.69799151102024e-07, + "loss": 76.9458, + "step": 106660 + }, + { + "epoch": 0.8824089010216322, + "grad_norm": 3443.64794921875, + "learning_rate": 5.691454692293258e-07, + "loss": 99.6053, + "step": 106670 + }, + { + "epoch": 0.8824916242710014, + "grad_norm": 666.6217041015625, + "learning_rate": 5.684921398993875e-07, + "loss": 58.0222, + "step": 106680 + }, + { + "epoch": 0.8825743475203706, + "grad_norm": 765.2488403320312, + "learning_rate": 5.678391631641933e-07, + "loss": 69.1236, + "step": 106690 + }, + { + "epoch": 0.8826570707697399, + "grad_norm": 797.377197265625, + "learning_rate": 5.671865390756948e-07, + "loss": 73.226, + "step": 106700 + }, + { + "epoch": 0.8827397940191091, + "grad_norm": 577.6158447265625, + "learning_rate": 5.665342676858182e-07, + "loss": 68.9044, + "step": 106710 + }, + { + "epoch": 0.8828225172684783, + "grad_norm": 503.404541015625, + "learning_rate": 5.658823490464638e-07, + "loss": 60.142, + "step": 106720 + }, + { + "epoch": 0.8829052405178476, + "grad_norm": 959.497314453125, + "learning_rate": 5.652307832095016e-07, + "loss": 98.6422, + "step": 106730 + }, + { + "epoch": 0.8829879637672168, + "grad_norm": 983.735595703125, + "learning_rate": 5.645795702267731e-07, + "loss": 76.1658, + "step": 106740 + }, + { + "epoch": 0.883070687016586, + "grad_norm": 817.6513671875, + "learning_rate": 5.639287101500923e-07, + "loss": 102.0763, + "step": 106750 + }, + { + "epoch": 0.8831534102659553, + "grad_norm": 574.2067260742188, + "learning_rate": 5.63278203031245e-07, + "loss": 80.8163, + "step": 106760 + }, + { + "epoch": 0.8832361335153245, + "grad_norm": 823.816650390625, + "learning_rate": 5.626280489219893e-07, + "loss": 62.7599, + "step": 106770 + }, + { + "epoch": 0.8833188567646937, + "grad_norm": 1204.035888671875, + "learning_rate": 5.619782478740565e-07, + "loss": 77.9808, + "step": 106780 + }, + { + "epoch": 0.883401580014063, + "grad_norm": 1444.1622314453125, + "learning_rate": 5.613287999391453e-07, + "loss": 104.0911, + "step": 106790 + }, + { + "epoch": 0.8834843032634322, + "grad_norm": 976.5982666015625, + "learning_rate": 5.606797051689294e-07, + "loss": 66.8928, + "step": 106800 + }, + { + "epoch": 0.8835670265128014, + "grad_norm": 785.950927734375, + "learning_rate": 5.600309636150575e-07, + "loss": 96.6358, + "step": 106810 + }, + { + "epoch": 0.8836497497621707, + "grad_norm": 736.114990234375, + "learning_rate": 5.593825753291443e-07, + "loss": 73.0294, + "step": 106820 + }, + { + "epoch": 0.8837324730115399, + "grad_norm": 567.788330078125, + "learning_rate": 5.587345403627803e-07, + "loss": 88.5388, + "step": 106830 + }, + { + "epoch": 0.8838151962609091, + "grad_norm": 1305.841064453125, + "learning_rate": 5.580868587675265e-07, + "loss": 75.9461, + "step": 106840 + }, + { + "epoch": 0.8838979195102784, + "grad_norm": 732.5675048828125, + "learning_rate": 5.574395305949148e-07, + "loss": 97.3696, + "step": 106850 + }, + { + "epoch": 0.8839806427596476, + "grad_norm": 1100.3834228515625, + "learning_rate": 5.567925558964532e-07, + "loss": 80.1282, + "step": 106860 + }, + { + "epoch": 0.8840633660090168, + "grad_norm": 1010.8685913085938, + "learning_rate": 5.561459347236148e-07, + "loss": 90.0034, + "step": 106870 + }, + { + "epoch": 0.8841460892583861, + "grad_norm": 371.01641845703125, + "learning_rate": 5.554996671278495e-07, + "loss": 85.3001, + "step": 106880 + }, + { + "epoch": 0.8842288125077553, + "grad_norm": 580.9638671875, + "learning_rate": 5.548537531605785e-07, + "loss": 85.6992, + "step": 106890 + }, + { + "epoch": 0.8843115357571245, + "grad_norm": 764.029052734375, + "learning_rate": 5.542081928731929e-07, + "loss": 57.3023, + "step": 106900 + }, + { + "epoch": 0.8843942590064938, + "grad_norm": 610.1265258789062, + "learning_rate": 5.535629863170594e-07, + "loss": 80.0151, + "step": 106910 + }, + { + "epoch": 0.884476982255863, + "grad_norm": 615.563720703125, + "learning_rate": 5.529181335435124e-07, + "loss": 81.8557, + "step": 106920 + }, + { + "epoch": 0.8845597055052322, + "grad_norm": 1012.834716796875, + "learning_rate": 5.522736346038598e-07, + "loss": 95.8173, + "step": 106930 + }, + { + "epoch": 0.8846424287546015, + "grad_norm": 1021.8489990234375, + "learning_rate": 5.516294895493824e-07, + "loss": 78.0766, + "step": 106940 + }, + { + "epoch": 0.8847251520039707, + "grad_norm": 827.3118286132812, + "learning_rate": 5.509856984313316e-07, + "loss": 80.5865, + "step": 106950 + }, + { + "epoch": 0.8848078752533399, + "grad_norm": 990.0963745117188, + "learning_rate": 5.503422613009296e-07, + "loss": 69.4347, + "step": 106960 + }, + { + "epoch": 0.8848905985027092, + "grad_norm": 593.1627197265625, + "learning_rate": 5.49699178209373e-07, + "loss": 66.5936, + "step": 106970 + }, + { + "epoch": 0.8849733217520784, + "grad_norm": 835.3819580078125, + "learning_rate": 5.490564492078287e-07, + "loss": 93.1555, + "step": 106980 + }, + { + "epoch": 0.8850560450014476, + "grad_norm": 934.9071044921875, + "learning_rate": 5.484140743474359e-07, + "loss": 92.1782, + "step": 106990 + }, + { + "epoch": 0.8851387682508169, + "grad_norm": 756.7869873046875, + "learning_rate": 5.477720536793035e-07, + "loss": 87.1874, + "step": 107000 + }, + { + "epoch": 0.8852214915001861, + "grad_norm": 781.40771484375, + "learning_rate": 5.471303872545175e-07, + "loss": 97.5913, + "step": 107010 + }, + { + "epoch": 0.8853042147495553, + "grad_norm": 991.211181640625, + "learning_rate": 5.464890751241303e-07, + "loss": 74.9666, + "step": 107020 + }, + { + "epoch": 0.8853869379989247, + "grad_norm": 1911.62109375, + "learning_rate": 5.458481173391694e-07, + "loss": 87.3059, + "step": 107030 + }, + { + "epoch": 0.8854696612482938, + "grad_norm": 1125.0557861328125, + "learning_rate": 5.452075139506314e-07, + "loss": 97.0107, + "step": 107040 + }, + { + "epoch": 0.885552384497663, + "grad_norm": 1039.9718017578125, + "learning_rate": 5.445672650094863e-07, + "loss": 114.9293, + "step": 107050 + }, + { + "epoch": 0.8856351077470324, + "grad_norm": 1257.056396484375, + "learning_rate": 5.43927370566677e-07, + "loss": 100.9882, + "step": 107060 + }, + { + "epoch": 0.8857178309964016, + "grad_norm": 506.9411926269531, + "learning_rate": 5.432878306731154e-07, + "loss": 72.05, + "step": 107070 + }, + { + "epoch": 0.8858005542457708, + "grad_norm": 1244.661376953125, + "learning_rate": 5.426486453796881e-07, + "loss": 103.0482, + "step": 107080 + }, + { + "epoch": 0.8858832774951401, + "grad_norm": 558.5795288085938, + "learning_rate": 5.420098147372515e-07, + "loss": 87.9579, + "step": 107090 + }, + { + "epoch": 0.8859660007445093, + "grad_norm": 582.4910278320312, + "learning_rate": 5.413713387966329e-07, + "loss": 90.7297, + "step": 107100 + }, + { + "epoch": 0.8860487239938785, + "grad_norm": 797.0523681640625, + "learning_rate": 5.407332176086367e-07, + "loss": 72.4927, + "step": 107110 + }, + { + "epoch": 0.8861314472432477, + "grad_norm": 889.9099731445312, + "learning_rate": 5.400954512240331e-07, + "loss": 59.4978, + "step": 107120 + }, + { + "epoch": 0.886214170492617, + "grad_norm": 705.6456909179688, + "learning_rate": 5.394580396935656e-07, + "loss": 71.2413, + "step": 107130 + }, + { + "epoch": 0.8862968937419862, + "grad_norm": 1448.95751953125, + "learning_rate": 5.388209830679508e-07, + "loss": 86.1324, + "step": 107140 + }, + { + "epoch": 0.8863796169913554, + "grad_norm": 619.7808227539062, + "learning_rate": 5.381842813978766e-07, + "loss": 88.696, + "step": 107150 + }, + { + "epoch": 0.8864623402407247, + "grad_norm": 811.8013916015625, + "learning_rate": 5.375479347340018e-07, + "loss": 69.7523, + "step": 107160 + }, + { + "epoch": 0.8865450634900939, + "grad_norm": 882.953369140625, + "learning_rate": 5.369119431269582e-07, + "loss": 73.939, + "step": 107170 + }, + { + "epoch": 0.8866277867394631, + "grad_norm": 563.3268432617188, + "learning_rate": 5.362763066273479e-07, + "loss": 89.5063, + "step": 107180 + }, + { + "epoch": 0.8867105099888324, + "grad_norm": 741.295166015625, + "learning_rate": 5.356410252857458e-07, + "loss": 136.3258, + "step": 107190 + }, + { + "epoch": 0.8867932332382016, + "grad_norm": 918.1881103515625, + "learning_rate": 5.350060991526996e-07, + "loss": 87.6655, + "step": 107200 + }, + { + "epoch": 0.8868759564875708, + "grad_norm": 571.861572265625, + "learning_rate": 5.343715282787271e-07, + "loss": 80.5383, + "step": 107210 + }, + { + "epoch": 0.8869586797369401, + "grad_norm": 1086.863037109375, + "learning_rate": 5.33737312714317e-07, + "loss": 95.8323, + "step": 107220 + }, + { + "epoch": 0.8870414029863093, + "grad_norm": 937.6774291992188, + "learning_rate": 5.33103452509931e-07, + "loss": 79.3969, + "step": 107230 + }, + { + "epoch": 0.8871241262356785, + "grad_norm": 990.2754516601562, + "learning_rate": 5.32469947716004e-07, + "loss": 83.5339, + "step": 107240 + }, + { + "epoch": 0.8872068494850478, + "grad_norm": 578.4818115234375, + "learning_rate": 5.318367983829393e-07, + "loss": 88.6761, + "step": 107250 + }, + { + "epoch": 0.887289572734417, + "grad_norm": 930.7289428710938, + "learning_rate": 5.312040045611144e-07, + "loss": 108.0735, + "step": 107260 + }, + { + "epoch": 0.8873722959837862, + "grad_norm": 776.857666015625, + "learning_rate": 5.305715663008781e-07, + "loss": 90.2431, + "step": 107270 + }, + { + "epoch": 0.8874550192331555, + "grad_norm": 1201.76171875, + "learning_rate": 5.299394836525507e-07, + "loss": 78.6439, + "step": 107280 + }, + { + "epoch": 0.8875377424825247, + "grad_norm": 609.2925415039062, + "learning_rate": 5.293077566664234e-07, + "loss": 98.0536, + "step": 107290 + }, + { + "epoch": 0.8876204657318939, + "grad_norm": 1422.3968505859375, + "learning_rate": 5.286763853927601e-07, + "loss": 97.6561, + "step": 107300 + }, + { + "epoch": 0.8877031889812632, + "grad_norm": 1007.32568359375, + "learning_rate": 5.280453698817961e-07, + "loss": 73.6183, + "step": 107310 + }, + { + "epoch": 0.8877859122306324, + "grad_norm": 1175.0106201171875, + "learning_rate": 5.27414710183739e-07, + "loss": 96.213, + "step": 107320 + }, + { + "epoch": 0.8878686354800016, + "grad_norm": 509.9411926269531, + "learning_rate": 5.26784406348767e-07, + "loss": 113.796, + "step": 107330 + }, + { + "epoch": 0.8879513587293709, + "grad_norm": 646.4354858398438, + "learning_rate": 5.261544584270301e-07, + "loss": 102.4905, + "step": 107340 + }, + { + "epoch": 0.8880340819787401, + "grad_norm": 795.3406372070312, + "learning_rate": 5.255248664686507e-07, + "loss": 91.493, + "step": 107350 + }, + { + "epoch": 0.8881168052281093, + "grad_norm": 746.2446899414062, + "learning_rate": 5.24895630523723e-07, + "loss": 75.7493, + "step": 107360 + }, + { + "epoch": 0.8881995284774786, + "grad_norm": 759.6536865234375, + "learning_rate": 5.242667506423122e-07, + "loss": 78.0097, + "step": 107370 + }, + { + "epoch": 0.8882822517268478, + "grad_norm": 706.584228515625, + "learning_rate": 5.236382268744544e-07, + "loss": 65.1131, + "step": 107380 + }, + { + "epoch": 0.888364974976217, + "grad_norm": 904.0101928710938, + "learning_rate": 5.230100592701598e-07, + "loss": 94.9032, + "step": 107390 + }, + { + "epoch": 0.8884476982255863, + "grad_norm": 1153.80908203125, + "learning_rate": 5.223822478794083e-07, + "loss": 92.7698, + "step": 107400 + }, + { + "epoch": 0.8885304214749555, + "grad_norm": 827.2999877929688, + "learning_rate": 5.217547927521515e-07, + "loss": 87.4861, + "step": 107410 + }, + { + "epoch": 0.8886131447243247, + "grad_norm": 802.9961547851562, + "learning_rate": 5.211276939383136e-07, + "loss": 74.549, + "step": 107420 + }, + { + "epoch": 0.888695867973694, + "grad_norm": 1088.9659423828125, + "learning_rate": 5.205009514877895e-07, + "loss": 94.3598, + "step": 107430 + }, + { + "epoch": 0.8887785912230632, + "grad_norm": 792.8547973632812, + "learning_rate": 5.198745654504472e-07, + "loss": 89.176, + "step": 107440 + }, + { + "epoch": 0.8888613144724324, + "grad_norm": 344.9277038574219, + "learning_rate": 5.19248535876124e-07, + "loss": 91.6492, + "step": 107450 + }, + { + "epoch": 0.8889440377218017, + "grad_norm": 0.0, + "learning_rate": 5.186228628146317e-07, + "loss": 69.1638, + "step": 107460 + }, + { + "epoch": 0.889026760971171, + "grad_norm": 1069.0447998046875, + "learning_rate": 5.179975463157511e-07, + "loss": 114.1318, + "step": 107470 + }, + { + "epoch": 0.8891094842205401, + "grad_norm": 491.0845642089844, + "learning_rate": 5.173725864292356e-07, + "loss": 81.6012, + "step": 107480 + }, + { + "epoch": 0.8891922074699095, + "grad_norm": 585.45654296875, + "learning_rate": 5.167479832048117e-07, + "loss": 101.9051, + "step": 107490 + }, + { + "epoch": 0.8892749307192787, + "grad_norm": 1258.58984375, + "learning_rate": 5.16123736692175e-07, + "loss": 72.9357, + "step": 107500 + }, + { + "epoch": 0.8893576539686479, + "grad_norm": 1067.1220703125, + "learning_rate": 5.154998469409945e-07, + "loss": 106.5081, + "step": 107510 + }, + { + "epoch": 0.8894403772180172, + "grad_norm": 908.0804443359375, + "learning_rate": 5.1487631400091e-07, + "loss": 80.5405, + "step": 107520 + }, + { + "epoch": 0.8895231004673864, + "grad_norm": 528.5298461914062, + "learning_rate": 5.142531379215338e-07, + "loss": 68.822, + "step": 107530 + }, + { + "epoch": 0.8896058237167556, + "grad_norm": 997.2656860351562, + "learning_rate": 5.136303187524478e-07, + "loss": 88.9139, + "step": 107540 + }, + { + "epoch": 0.8896885469661249, + "grad_norm": 1044.582275390625, + "learning_rate": 5.130078565432089e-07, + "loss": 82.6202, + "step": 107550 + }, + { + "epoch": 0.8897712702154941, + "grad_norm": 683.0887451171875, + "learning_rate": 5.123857513433406e-07, + "loss": 77.2961, + "step": 107560 + }, + { + "epoch": 0.8898539934648633, + "grad_norm": 307.61688232421875, + "learning_rate": 5.117640032023436e-07, + "loss": 82.64, + "step": 107570 + }, + { + "epoch": 0.8899367167142326, + "grad_norm": 549.88720703125, + "learning_rate": 5.111426121696866e-07, + "loss": 84.3351, + "step": 107580 + }, + { + "epoch": 0.8900194399636018, + "grad_norm": 685.7962646484375, + "learning_rate": 5.105215782948108e-07, + "loss": 86.4093, + "step": 107590 + }, + { + "epoch": 0.890102163212971, + "grad_norm": 4017.52880859375, + "learning_rate": 5.099009016271295e-07, + "loss": 93.5784, + "step": 107600 + }, + { + "epoch": 0.8901848864623403, + "grad_norm": 562.3822631835938, + "learning_rate": 5.092805822160262e-07, + "loss": 70.4941, + "step": 107610 + }, + { + "epoch": 0.8902676097117095, + "grad_norm": 987.6434936523438, + "learning_rate": 5.086606201108574e-07, + "loss": 84.2584, + "step": 107620 + }, + { + "epoch": 0.8903503329610787, + "grad_norm": 864.2491455078125, + "learning_rate": 5.080410153609511e-07, + "loss": 103.629, + "step": 107630 + }, + { + "epoch": 0.890433056210448, + "grad_norm": 553.91064453125, + "learning_rate": 5.074217680156062e-07, + "loss": 72.658, + "step": 107640 + }, + { + "epoch": 0.8905157794598172, + "grad_norm": 913.7005615234375, + "learning_rate": 5.068028781240925e-07, + "loss": 83.6153, + "step": 107650 + }, + { + "epoch": 0.8905985027091864, + "grad_norm": 643.3348388671875, + "learning_rate": 5.061843457356519e-07, + "loss": 92.5445, + "step": 107660 + }, + { + "epoch": 0.8906812259585557, + "grad_norm": 616.33984375, + "learning_rate": 5.055661708994996e-07, + "loss": 101.3816, + "step": 107670 + }, + { + "epoch": 0.8907639492079249, + "grad_norm": 926.9981079101562, + "learning_rate": 5.049483536648209e-07, + "loss": 75.4115, + "step": 107680 + }, + { + "epoch": 0.8908466724572941, + "grad_norm": 888.5517578125, + "learning_rate": 5.043308940807717e-07, + "loss": 90.7422, + "step": 107690 + }, + { + "epoch": 0.8909293957066634, + "grad_norm": 884.9143676757812, + "learning_rate": 5.037137921964814e-07, + "loss": 93.778, + "step": 107700 + }, + { + "epoch": 0.8910121189560326, + "grad_norm": 1310.1082763671875, + "learning_rate": 5.030970480610492e-07, + "loss": 92.942, + "step": 107710 + }, + { + "epoch": 0.8910948422054018, + "grad_norm": 1047.660888671875, + "learning_rate": 5.024806617235484e-07, + "loss": 86.2382, + "step": 107720 + }, + { + "epoch": 0.8911775654547711, + "grad_norm": 638.484619140625, + "learning_rate": 5.01864633233019e-07, + "loss": 103.3557, + "step": 107730 + }, + { + "epoch": 0.8912602887041403, + "grad_norm": 1222.545654296875, + "learning_rate": 5.01248962638477e-07, + "loss": 81.7074, + "step": 107740 + }, + { + "epoch": 0.8913430119535095, + "grad_norm": 648.8272705078125, + "learning_rate": 5.006336499889075e-07, + "loss": 87.4129, + "step": 107750 + }, + { + "epoch": 0.8914257352028788, + "grad_norm": 611.2053833007812, + "learning_rate": 5.000186953332709e-07, + "loss": 77.1965, + "step": 107760 + }, + { + "epoch": 0.891508458452248, + "grad_norm": 863.5994262695312, + "learning_rate": 4.99404098720494e-07, + "loss": 83.2366, + "step": 107770 + }, + { + "epoch": 0.8915911817016172, + "grad_norm": 1112.973388671875, + "learning_rate": 4.987898601994778e-07, + "loss": 94.6965, + "step": 107780 + }, + { + "epoch": 0.8916739049509865, + "grad_norm": 807.7054443359375, + "learning_rate": 4.981759798190949e-07, + "loss": 70.4171, + "step": 107790 + }, + { + "epoch": 0.8917566282003557, + "grad_norm": 845.5135498046875, + "learning_rate": 4.97562457628189e-07, + "loss": 95.9467, + "step": 107800 + }, + { + "epoch": 0.8918393514497249, + "grad_norm": 1642.2303466796875, + "learning_rate": 4.969492936755759e-07, + "loss": 71.8787, + "step": 107810 + }, + { + "epoch": 0.8919220746990941, + "grad_norm": 796.374755859375, + "learning_rate": 4.963364880100402e-07, + "loss": 104.153, + "step": 107820 + }, + { + "epoch": 0.8920047979484634, + "grad_norm": 420.7682800292969, + "learning_rate": 4.957240406803409e-07, + "loss": 78.5665, + "step": 107830 + }, + { + "epoch": 0.8920875211978326, + "grad_norm": 785.2117309570312, + "learning_rate": 4.951119517352082e-07, + "loss": 84.3331, + "step": 107840 + }, + { + "epoch": 0.8921702444472018, + "grad_norm": 341.3686828613281, + "learning_rate": 4.945002212233412e-07, + "loss": 83.6106, + "step": 107850 + }, + { + "epoch": 0.8922529676965711, + "grad_norm": 785.5858154296875, + "learning_rate": 4.938888491934158e-07, + "loss": 72.6205, + "step": 107860 + }, + { + "epoch": 0.8923356909459403, + "grad_norm": 1233.1529541015625, + "learning_rate": 4.932778356940743e-07, + "loss": 132.9701, + "step": 107870 + }, + { + "epoch": 0.8924184141953095, + "grad_norm": 772.5987548828125, + "learning_rate": 4.926671807739319e-07, + "loss": 69.4721, + "step": 107880 + }, + { + "epoch": 0.8925011374446788, + "grad_norm": 575.5115966796875, + "learning_rate": 4.920568844815776e-07, + "loss": 90.4714, + "step": 107890 + }, + { + "epoch": 0.892583860694048, + "grad_norm": 895.2702026367188, + "learning_rate": 4.914469468655675e-07, + "loss": 64.3026, + "step": 107900 + }, + { + "epoch": 0.8926665839434172, + "grad_norm": 973.0512084960938, + "learning_rate": 4.908373679744316e-07, + "loss": 69.2861, + "step": 107910 + }, + { + "epoch": 0.8927493071927866, + "grad_norm": 948.7655639648438, + "learning_rate": 4.902281478566728e-07, + "loss": 86.1326, + "step": 107920 + }, + { + "epoch": 0.8928320304421558, + "grad_norm": 656.8770751953125, + "learning_rate": 4.896192865607629e-07, + "loss": 78.8228, + "step": 107930 + }, + { + "epoch": 0.892914753691525, + "grad_norm": 1080.7999267578125, + "learning_rate": 4.890107841351466e-07, + "loss": 86.0108, + "step": 107940 + }, + { + "epoch": 0.8929974769408943, + "grad_norm": 1222.696533203125, + "learning_rate": 4.884026406282383e-07, + "loss": 82.2702, + "step": 107950 + }, + { + "epoch": 0.8930802001902635, + "grad_norm": 1044.7401123046875, + "learning_rate": 4.877948560884277e-07, + "loss": 83.305, + "step": 107960 + }, + { + "epoch": 0.8931629234396327, + "grad_norm": 748.3582153320312, + "learning_rate": 4.871874305640723e-07, + "loss": 77.1618, + "step": 107970 + }, + { + "epoch": 0.893245646689002, + "grad_norm": 833.1705932617188, + "learning_rate": 4.865803641035027e-07, + "loss": 83.6409, + "step": 107980 + }, + { + "epoch": 0.8933283699383712, + "grad_norm": 888.131591796875, + "learning_rate": 4.859736567550188e-07, + "loss": 99.5259, + "step": 107990 + }, + { + "epoch": 0.8934110931877404, + "grad_norm": 908.0950927734375, + "learning_rate": 4.853673085668947e-07, + "loss": 103.253, + "step": 108000 + }, + { + "epoch": 0.8934938164371097, + "grad_norm": 908.1394653320312, + "learning_rate": 4.84761319587374e-07, + "loss": 63.2051, + "step": 108010 + }, + { + "epoch": 0.8935765396864789, + "grad_norm": 761.0054931640625, + "learning_rate": 4.841556898646732e-07, + "loss": 73.0463, + "step": 108020 + }, + { + "epoch": 0.8936592629358481, + "grad_norm": 886.8889770507812, + "learning_rate": 4.835504194469792e-07, + "loss": 85.4193, + "step": 108030 + }, + { + "epoch": 0.8937419861852174, + "grad_norm": 672.312744140625, + "learning_rate": 4.829455083824508e-07, + "loss": 91.8696, + "step": 108040 + }, + { + "epoch": 0.8938247094345866, + "grad_norm": 700.14599609375, + "learning_rate": 4.823409567192172e-07, + "loss": 88.3726, + "step": 108050 + }, + { + "epoch": 0.8939074326839558, + "grad_norm": 1457.9686279296875, + "learning_rate": 4.817367645053806e-07, + "loss": 94.6632, + "step": 108060 + }, + { + "epoch": 0.8939901559333251, + "grad_norm": 1704.705810546875, + "learning_rate": 4.811329317890151e-07, + "loss": 94.3083, + "step": 108070 + }, + { + "epoch": 0.8940728791826943, + "grad_norm": 720.1951293945312, + "learning_rate": 4.805294586181624e-07, + "loss": 87.1297, + "step": 108080 + }, + { + "epoch": 0.8941556024320635, + "grad_norm": 581.7968139648438, + "learning_rate": 4.799263450408386e-07, + "loss": 66.5786, + "step": 108090 + }, + { + "epoch": 0.8942383256814328, + "grad_norm": 1700.38232421875, + "learning_rate": 4.79323591105032e-07, + "loss": 110.1113, + "step": 108100 + }, + { + "epoch": 0.894321048930802, + "grad_norm": 746.1917724609375, + "learning_rate": 4.787211968586996e-07, + "loss": 78.2092, + "step": 108110 + }, + { + "epoch": 0.8944037721801712, + "grad_norm": 1086.206298828125, + "learning_rate": 4.781191623497716e-07, + "loss": 102.1627, + "step": 108120 + }, + { + "epoch": 0.8944864954295405, + "grad_norm": 983.9944458007812, + "learning_rate": 4.775174876261496e-07, + "loss": 101.4622, + "step": 108130 + }, + { + "epoch": 0.8945692186789097, + "grad_norm": 724.5037231445312, + "learning_rate": 4.769161727357047e-07, + "loss": 98.8229, + "step": 108140 + }, + { + "epoch": 0.8946519419282789, + "grad_norm": 1058.3212890625, + "learning_rate": 4.763152177262836e-07, + "loss": 78.5956, + "step": 108150 + }, + { + "epoch": 0.8947346651776482, + "grad_norm": 1093.287841796875, + "learning_rate": 4.757146226456988e-07, + "loss": 128.6798, + "step": 108160 + }, + { + "epoch": 0.8948173884270174, + "grad_norm": 1089.8331298828125, + "learning_rate": 4.75114387541738e-07, + "loss": 102.0988, + "step": 108170 + }, + { + "epoch": 0.8949001116763866, + "grad_norm": 221.10438537597656, + "learning_rate": 4.7451451246215863e-07, + "loss": 87.4969, + "step": 108180 + }, + { + "epoch": 0.8949828349257559, + "grad_norm": 798.0653076171875, + "learning_rate": 4.7391499745469026e-07, + "loss": 77.1766, + "step": 108190 + }, + { + "epoch": 0.8950655581751251, + "grad_norm": 767.3575439453125, + "learning_rate": 4.733158425670342e-07, + "loss": 78.9256, + "step": 108200 + }, + { + "epoch": 0.8951482814244943, + "grad_norm": 1510.5670166015625, + "learning_rate": 4.727170478468612e-07, + "loss": 112.9961, + "step": 108210 + }, + { + "epoch": 0.8952310046738636, + "grad_norm": 2023.6324462890625, + "learning_rate": 4.721186133418154e-07, + "loss": 107.9993, + "step": 108220 + }, + { + "epoch": 0.8953137279232328, + "grad_norm": 434.2220458984375, + "learning_rate": 4.71520539099512e-07, + "loss": 86.038, + "step": 108230 + }, + { + "epoch": 0.895396451172602, + "grad_norm": 890.50048828125, + "learning_rate": 4.709228251675357e-07, + "loss": 90.7458, + "step": 108240 + }, + { + "epoch": 0.8954791744219713, + "grad_norm": 873.815673828125, + "learning_rate": 4.7032547159344466e-07, + "loss": 91.8802, + "step": 108250 + }, + { + "epoch": 0.8955618976713405, + "grad_norm": 899.265869140625, + "learning_rate": 4.6972847842476743e-07, + "loss": 121.3053, + "step": 108260 + }, + { + "epoch": 0.8956446209207097, + "grad_norm": 987.6566162109375, + "learning_rate": 4.6913184570900436e-07, + "loss": 125.955, + "step": 108270 + }, + { + "epoch": 0.895727344170079, + "grad_norm": 746.0098876953125, + "learning_rate": 4.685355734936264e-07, + "loss": 93.6544, + "step": 108280 + }, + { + "epoch": 0.8958100674194482, + "grad_norm": 577.643310546875, + "learning_rate": 4.6793966182607564e-07, + "loss": 85.8028, + "step": 108290 + }, + { + "epoch": 0.8958927906688174, + "grad_norm": 748.9794921875, + "learning_rate": 4.673441107537674e-07, + "loss": 79.7184, + "step": 108300 + }, + { + "epoch": 0.8959755139181868, + "grad_norm": 652.255126953125, + "learning_rate": 4.6674892032408605e-07, + "loss": 81.2928, + "step": 108310 + }, + { + "epoch": 0.896058237167556, + "grad_norm": 1240.67822265625, + "learning_rate": 4.661540905843881e-07, + "loss": 94.0372, + "step": 108320 + }, + { + "epoch": 0.8961409604169251, + "grad_norm": 807.7527465820312, + "learning_rate": 4.655596215820013e-07, + "loss": 78.2325, + "step": 108330 + }, + { + "epoch": 0.8962236836662945, + "grad_norm": 633.8656616210938, + "learning_rate": 4.649655133642256e-07, + "loss": 96.0483, + "step": 108340 + }, + { + "epoch": 0.8963064069156637, + "grad_norm": 622.1317749023438, + "learning_rate": 4.643717659783309e-07, + "loss": 88.281, + "step": 108350 + }, + { + "epoch": 0.8963891301650329, + "grad_norm": 937.6145629882812, + "learning_rate": 4.637783794715589e-07, + "loss": 81.1072, + "step": 108360 + }, + { + "epoch": 0.8964718534144022, + "grad_norm": 1903.2171630859375, + "learning_rate": 4.6318535389112296e-07, + "loss": 86.6768, + "step": 108370 + }, + { + "epoch": 0.8965545766637714, + "grad_norm": 1076.609619140625, + "learning_rate": 4.6259268928420753e-07, + "loss": 98.4922, + "step": 108380 + }, + { + "epoch": 0.8966372999131406, + "grad_norm": 897.179443359375, + "learning_rate": 4.620003856979671e-07, + "loss": 92.1115, + "step": 108390 + }, + { + "epoch": 0.8967200231625099, + "grad_norm": 866.747314453125, + "learning_rate": 4.6140844317953013e-07, + "loss": 92.1406, + "step": 108400 + }, + { + "epoch": 0.8968027464118791, + "grad_norm": 500.00457763671875, + "learning_rate": 4.6081686177599395e-07, + "loss": 66.3908, + "step": 108410 + }, + { + "epoch": 0.8968854696612483, + "grad_norm": 782.8357543945312, + "learning_rate": 4.602256415344275e-07, + "loss": 64.6097, + "step": 108420 + }, + { + "epoch": 0.8969681929106176, + "grad_norm": 1170.851318359375, + "learning_rate": 4.5963478250187266e-07, + "loss": 92.5552, + "step": 108430 + }, + { + "epoch": 0.8970509161599868, + "grad_norm": 1039.9666748046875, + "learning_rate": 4.5904428472534014e-07, + "loss": 93.8967, + "step": 108440 + }, + { + "epoch": 0.897133639409356, + "grad_norm": 638.94091796875, + "learning_rate": 4.58454148251814e-07, + "loss": 75.4576, + "step": 108450 + }, + { + "epoch": 0.8972163626587253, + "grad_norm": 805.0771484375, + "learning_rate": 4.578643731282484e-07, + "loss": 77.5966, + "step": 108460 + }, + { + "epoch": 0.8972990859080945, + "grad_norm": 896.1200561523438, + "learning_rate": 4.5727495940156906e-07, + "loss": 89.5274, + "step": 108470 + }, + { + "epoch": 0.8973818091574637, + "grad_norm": 995.3990478515625, + "learning_rate": 4.56685907118673e-07, + "loss": 81.9692, + "step": 108480 + }, + { + "epoch": 0.897464532406833, + "grad_norm": 967.8817749023438, + "learning_rate": 4.560972163264282e-07, + "loss": 85.0128, + "step": 108490 + }, + { + "epoch": 0.8975472556562022, + "grad_norm": 1025.315185546875, + "learning_rate": 4.5550888707167505e-07, + "loss": 98.1567, + "step": 108500 + }, + { + "epoch": 0.8976299789055714, + "grad_norm": 477.4637756347656, + "learning_rate": 4.549209194012216e-07, + "loss": 67.9016, + "step": 108510 + }, + { + "epoch": 0.8977127021549407, + "grad_norm": 873.9866943359375, + "learning_rate": 4.543333133618522e-07, + "loss": 98.0505, + "step": 108520 + }, + { + "epoch": 0.8977954254043099, + "grad_norm": 1049.532958984375, + "learning_rate": 4.537460690003198e-07, + "loss": 67.647, + "step": 108530 + }, + { + "epoch": 0.8978781486536791, + "grad_norm": 1115.316162109375, + "learning_rate": 4.531591863633478e-07, + "loss": 82.5628, + "step": 108540 + }, + { + "epoch": 0.8979608719030483, + "grad_norm": 532.7764892578125, + "learning_rate": 4.5257266549763203e-07, + "loss": 51.0, + "step": 108550 + }, + { + "epoch": 0.8980435951524176, + "grad_norm": 729.6134033203125, + "learning_rate": 4.5198650644983965e-07, + "loss": 114.1188, + "step": 108560 + }, + { + "epoch": 0.8981263184017868, + "grad_norm": 739.4089965820312, + "learning_rate": 4.514007092666084e-07, + "loss": 87.889, + "step": 108570 + }, + { + "epoch": 0.898209041651156, + "grad_norm": 662.3689575195312, + "learning_rate": 4.5081527399454814e-07, + "loss": 102.5013, + "step": 108580 + }, + { + "epoch": 0.8982917649005253, + "grad_norm": 909.9256591796875, + "learning_rate": 4.502302006802378e-07, + "loss": 82.6323, + "step": 108590 + }, + { + "epoch": 0.8983744881498945, + "grad_norm": 909.2214965820312, + "learning_rate": 4.496454893702301e-07, + "loss": 78.7108, + "step": 108600 + }, + { + "epoch": 0.8984572113992637, + "grad_norm": 580.1622314453125, + "learning_rate": 4.4906114011104616e-07, + "loss": 59.7331, + "step": 108610 + }, + { + "epoch": 0.898539934648633, + "grad_norm": 1207.3009033203125, + "learning_rate": 4.4847715294918215e-07, + "loss": 84.35, + "step": 108620 + }, + { + "epoch": 0.8986226578980022, + "grad_norm": 520.7291259765625, + "learning_rate": 4.478935279311031e-07, + "loss": 91.6451, + "step": 108630 + }, + { + "epoch": 0.8987053811473714, + "grad_norm": 703.8441162109375, + "learning_rate": 4.4731026510324406e-07, + "loss": 97.7126, + "step": 108640 + }, + { + "epoch": 0.8987881043967407, + "grad_norm": 707.9328002929688, + "learning_rate": 4.4672736451201347e-07, + "loss": 93.7411, + "step": 108650 + }, + { + "epoch": 0.8988708276461099, + "grad_norm": 912.7407836914062, + "learning_rate": 4.461448262037893e-07, + "loss": 107.5078, + "step": 108660 + }, + { + "epoch": 0.8989535508954791, + "grad_norm": 891.9956665039062, + "learning_rate": 4.455626502249233e-07, + "loss": 66.6602, + "step": 108670 + }, + { + "epoch": 0.8990362741448484, + "grad_norm": 736.6570434570312, + "learning_rate": 4.4498083662173396e-07, + "loss": 52.5633, + "step": 108680 + }, + { + "epoch": 0.8991189973942176, + "grad_norm": 798.45703125, + "learning_rate": 4.443993854405154e-07, + "loss": 95.3546, + "step": 108690 + }, + { + "epoch": 0.8992017206435868, + "grad_norm": 643.7161865234375, + "learning_rate": 4.4381829672752896e-07, + "loss": 111.5154, + "step": 108700 + }, + { + "epoch": 0.8992844438929561, + "grad_norm": 1088.9283447265625, + "learning_rate": 4.4323757052901153e-07, + "loss": 87.3449, + "step": 108710 + }, + { + "epoch": 0.8993671671423253, + "grad_norm": 626.2893676757812, + "learning_rate": 4.4265720689116776e-07, + "loss": 87.1247, + "step": 108720 + }, + { + "epoch": 0.8994498903916945, + "grad_norm": 608.726806640625, + "learning_rate": 4.420772058601747e-07, + "loss": 86.0277, + "step": 108730 + }, + { + "epoch": 0.8995326136410638, + "grad_norm": 433.5987243652344, + "learning_rate": 4.414975674821803e-07, + "loss": 73.3416, + "step": 108740 + }, + { + "epoch": 0.899615336890433, + "grad_norm": 1192.2998046875, + "learning_rate": 4.4091829180330503e-07, + "loss": 65.4931, + "step": 108750 + }, + { + "epoch": 0.8996980601398022, + "grad_norm": 438.9178466796875, + "learning_rate": 4.40339378869637e-07, + "loss": 76.4078, + "step": 108760 + }, + { + "epoch": 0.8997807833891716, + "grad_norm": 786.5850219726562, + "learning_rate": 4.3976082872723814e-07, + "loss": 68.2923, + "step": 108770 + }, + { + "epoch": 0.8998635066385408, + "grad_norm": 1577.2021484375, + "learning_rate": 4.3918264142214173e-07, + "loss": 98.498, + "step": 108780 + }, + { + "epoch": 0.89994622988791, + "grad_norm": 880.0924072265625, + "learning_rate": 4.3860481700035096e-07, + "loss": 84.358, + "step": 108790 + }, + { + "epoch": 0.9000289531372793, + "grad_norm": 2280.10693359375, + "learning_rate": 4.3802735550784014e-07, + "loss": 75.876, + "step": 108800 + }, + { + "epoch": 0.9001116763866485, + "grad_norm": 687.7626953125, + "learning_rate": 4.37450256990557e-07, + "loss": 121.7229, + "step": 108810 + }, + { + "epoch": 0.9001943996360177, + "grad_norm": 1037.4595947265625, + "learning_rate": 4.368735214944181e-07, + "loss": 87.6477, + "step": 108820 + }, + { + "epoch": 0.900277122885387, + "grad_norm": 5595.93359375, + "learning_rate": 4.362971490653106e-07, + "loss": 120.0495, + "step": 108830 + }, + { + "epoch": 0.9003598461347562, + "grad_norm": 1015.2440795898438, + "learning_rate": 4.357211397490951e-07, + "loss": 81.7864, + "step": 108840 + }, + { + "epoch": 0.9004425693841254, + "grad_norm": 632.1942749023438, + "learning_rate": 4.351454935916011e-07, + "loss": 83.9828, + "step": 108850 + }, + { + "epoch": 0.9005252926334947, + "grad_norm": 1923.382568359375, + "learning_rate": 4.3457021063862957e-07, + "loss": 84.9657, + "step": 108860 + }, + { + "epoch": 0.9006080158828639, + "grad_norm": 621.4093627929688, + "learning_rate": 4.339952909359546e-07, + "loss": 86.5398, + "step": 108870 + }, + { + "epoch": 0.9006907391322331, + "grad_norm": 1436.72021484375, + "learning_rate": 4.3342073452931845e-07, + "loss": 89.1011, + "step": 108880 + }, + { + "epoch": 0.9007734623816024, + "grad_norm": 388.8465881347656, + "learning_rate": 4.328465414644373e-07, + "loss": 75.2237, + "step": 108890 + }, + { + "epoch": 0.9008561856309716, + "grad_norm": 778.0177612304688, + "learning_rate": 4.322727117869951e-07, + "loss": 99.6206, + "step": 108900 + }, + { + "epoch": 0.9009389088803408, + "grad_norm": 582.366455078125, + "learning_rate": 4.3169924554265165e-07, + "loss": 82.2605, + "step": 108910 + }, + { + "epoch": 0.9010216321297101, + "grad_norm": 1931.376953125, + "learning_rate": 4.3112614277703304e-07, + "loss": 106.5273, + "step": 108920 + }, + { + "epoch": 0.9011043553790793, + "grad_norm": 899.9615478515625, + "learning_rate": 4.3055340353574004e-07, + "loss": 89.0014, + "step": 108930 + }, + { + "epoch": 0.9011870786284485, + "grad_norm": 740.9810791015625, + "learning_rate": 4.299810278643407e-07, + "loss": 104.6826, + "step": 108940 + }, + { + "epoch": 0.9012698018778178, + "grad_norm": 413.1753845214844, + "learning_rate": 4.2940901580837744e-07, + "loss": 94.5246, + "step": 108950 + }, + { + "epoch": 0.901352525127187, + "grad_norm": 717.9979858398438, + "learning_rate": 4.2883736741336277e-07, + "loss": 57.4638, + "step": 108960 + }, + { + "epoch": 0.9014352483765562, + "grad_norm": 962.7039184570312, + "learning_rate": 4.282660827247803e-07, + "loss": 105.8891, + "step": 108970 + }, + { + "epoch": 0.9015179716259255, + "grad_norm": 647.7130737304688, + "learning_rate": 4.2769516178808366e-07, + "loss": 62.1071, + "step": 108980 + }, + { + "epoch": 0.9016006948752947, + "grad_norm": 995.0176391601562, + "learning_rate": 4.2712460464869934e-07, + "loss": 63.1329, + "step": 108990 + }, + { + "epoch": 0.9016834181246639, + "grad_norm": 1008.2059326171875, + "learning_rate": 4.26554411352022e-07, + "loss": 88.6648, + "step": 109000 + }, + { + "epoch": 0.9017661413740332, + "grad_norm": 1263.0606689453125, + "learning_rate": 4.259845819434233e-07, + "loss": 86.4794, + "step": 109010 + }, + { + "epoch": 0.9018488646234024, + "grad_norm": 1270.1148681640625, + "learning_rate": 4.254151164682385e-07, + "loss": 106.5339, + "step": 109020 + }, + { + "epoch": 0.9019315878727716, + "grad_norm": 885.9868774414062, + "learning_rate": 4.248460149717781e-07, + "loss": 76.8595, + "step": 109030 + }, + { + "epoch": 0.9020143111221409, + "grad_norm": 845.4027099609375, + "learning_rate": 4.242772774993237e-07, + "loss": 83.7632, + "step": 109040 + }, + { + "epoch": 0.9020970343715101, + "grad_norm": 1098.0489501953125, + "learning_rate": 4.237089040961262e-07, + "loss": 64.5871, + "step": 109050 + }, + { + "epoch": 0.9021797576208793, + "grad_norm": 846.4615478515625, + "learning_rate": 4.2314089480740893e-07, + "loss": 79.8472, + "step": 109060 + }, + { + "epoch": 0.9022624808702486, + "grad_norm": 858.0891723632812, + "learning_rate": 4.2257324967836575e-07, + "loss": 93.2545, + "step": 109070 + }, + { + "epoch": 0.9023452041196178, + "grad_norm": 1338.1405029296875, + "learning_rate": 4.2200596875416165e-07, + "loss": 91.2463, + "step": 109080 + }, + { + "epoch": 0.902427927368987, + "grad_norm": 1132.8909912109375, + "learning_rate": 4.2143905207993217e-07, + "loss": 94.3591, + "step": 109090 + }, + { + "epoch": 0.9025106506183563, + "grad_norm": 1090.1063232421875, + "learning_rate": 4.2087249970078513e-07, + "loss": 80.4746, + "step": 109100 + }, + { + "epoch": 0.9025933738677255, + "grad_norm": 697.7597045898438, + "learning_rate": 4.2030631166179727e-07, + "loss": 69.0265, + "step": 109110 + }, + { + "epoch": 0.9026760971170947, + "grad_norm": 925.8981323242188, + "learning_rate": 4.197404880080186e-07, + "loss": 97.9635, + "step": 109120 + }, + { + "epoch": 0.902758820366464, + "grad_norm": 705.66552734375, + "learning_rate": 4.191750287844687e-07, + "loss": 96.6312, + "step": 109130 + }, + { + "epoch": 0.9028415436158332, + "grad_norm": 497.7852478027344, + "learning_rate": 4.186099340361383e-07, + "loss": 87.5341, + "step": 109140 + }, + { + "epoch": 0.9029242668652024, + "grad_norm": 537.1528930664062, + "learning_rate": 4.180452038079902e-07, + "loss": 82.3161, + "step": 109150 + }, + { + "epoch": 0.9030069901145718, + "grad_norm": 627.737548828125, + "learning_rate": 4.174808381449563e-07, + "loss": 84.3565, + "step": 109160 + }, + { + "epoch": 0.903089713363941, + "grad_norm": 743.4111328125, + "learning_rate": 4.1691683709194184e-07, + "loss": 62.4872, + "step": 109170 + }, + { + "epoch": 0.9031724366133101, + "grad_norm": 784.0700073242188, + "learning_rate": 4.163532006938209e-07, + "loss": 76.4367, + "step": 109180 + }, + { + "epoch": 0.9032551598626795, + "grad_norm": 770.558837890625, + "learning_rate": 4.1578992899543926e-07, + "loss": 90.6556, + "step": 109190 + }, + { + "epoch": 0.9033378831120487, + "grad_norm": 586.0490112304688, + "learning_rate": 4.1522702204161493e-07, + "loss": 84.0449, + "step": 109200 + }, + { + "epoch": 0.9034206063614179, + "grad_norm": 923.1376953125, + "learning_rate": 4.146644798771349e-07, + "loss": 118.2938, + "step": 109210 + }, + { + "epoch": 0.9035033296107872, + "grad_norm": 749.9999389648438, + "learning_rate": 4.141023025467583e-07, + "loss": 104.1386, + "step": 109220 + }, + { + "epoch": 0.9035860528601564, + "grad_norm": 1027.83349609375, + "learning_rate": 4.1354049009521504e-07, + "loss": 80.9892, + "step": 109230 + }, + { + "epoch": 0.9036687761095256, + "grad_norm": 886.0511474609375, + "learning_rate": 4.1297904256720646e-07, + "loss": 76.1879, + "step": 109240 + }, + { + "epoch": 0.9037514993588949, + "grad_norm": 880.5858764648438, + "learning_rate": 4.1241796000740296e-07, + "loss": 60.6476, + "step": 109250 + }, + { + "epoch": 0.9038342226082641, + "grad_norm": 1270.9674072265625, + "learning_rate": 4.118572424604489e-07, + "loss": 69.0217, + "step": 109260 + }, + { + "epoch": 0.9039169458576333, + "grad_norm": 513.490234375, + "learning_rate": 4.112968899709574e-07, + "loss": 70.7013, + "step": 109270 + }, + { + "epoch": 0.9039996691070025, + "grad_norm": 1045.45654296875, + "learning_rate": 4.1073690258351287e-07, + "loss": 71.1828, + "step": 109280 + }, + { + "epoch": 0.9040823923563718, + "grad_norm": 977.5629272460938, + "learning_rate": 4.101772803426707e-07, + "loss": 104.8837, + "step": 109290 + }, + { + "epoch": 0.904165115605741, + "grad_norm": 690.1253662109375, + "learning_rate": 4.0961802329295864e-07, + "loss": 70.6142, + "step": 109300 + }, + { + "epoch": 0.9042478388551102, + "grad_norm": 1407.6588134765625, + "learning_rate": 4.090591314788728e-07, + "loss": 83.4057, + "step": 109310 + }, + { + "epoch": 0.9043305621044795, + "grad_norm": 595.9920654296875, + "learning_rate": 4.085006049448825e-07, + "loss": 56.3994, + "step": 109320 + }, + { + "epoch": 0.9044132853538487, + "grad_norm": 965.2755126953125, + "learning_rate": 4.0794244373542736e-07, + "loss": 99.3913, + "step": 109330 + }, + { + "epoch": 0.9044960086032179, + "grad_norm": 831.5714111328125, + "learning_rate": 4.0738464789491673e-07, + "loss": 81.9469, + "step": 109340 + }, + { + "epoch": 0.9045787318525872, + "grad_norm": 581.9131469726562, + "learning_rate": 4.0682721746773346e-07, + "loss": 74.2238, + "step": 109350 + }, + { + "epoch": 0.9046614551019564, + "grad_norm": 733.6002197265625, + "learning_rate": 4.062701524982271e-07, + "loss": 83.6481, + "step": 109360 + }, + { + "epoch": 0.9047441783513256, + "grad_norm": 720.9714965820312, + "learning_rate": 4.057134530307233e-07, + "loss": 66.0066, + "step": 109370 + }, + { + "epoch": 0.9048269016006949, + "grad_norm": 1211.7706298828125, + "learning_rate": 4.0515711910951436e-07, + "loss": 103.6463, + "step": 109380 + }, + { + "epoch": 0.9049096248500641, + "grad_norm": 887.6861572265625, + "learning_rate": 4.0460115077886665e-07, + "loss": 74.1794, + "step": 109390 + }, + { + "epoch": 0.9049923480994333, + "grad_norm": 725.7745361328125, + "learning_rate": 4.0404554808301523e-07, + "loss": 86.6203, + "step": 109400 + }, + { + "epoch": 0.9050750713488026, + "grad_norm": 892.6699829101562, + "learning_rate": 4.0349031106616697e-07, + "loss": 83.9293, + "step": 109410 + }, + { + "epoch": 0.9051577945981718, + "grad_norm": 1044.3946533203125, + "learning_rate": 4.029354397724994e-07, + "loss": 81.0904, + "step": 109420 + }, + { + "epoch": 0.905240517847541, + "grad_norm": 426.7649230957031, + "learning_rate": 4.023809342461615e-07, + "loss": 81.0433, + "step": 109430 + }, + { + "epoch": 0.9053232410969103, + "grad_norm": 915.0441284179688, + "learning_rate": 4.0182679453127316e-07, + "loss": 117.5908, + "step": 109440 + }, + { + "epoch": 0.9054059643462795, + "grad_norm": 572.4617919921875, + "learning_rate": 4.0127302067192285e-07, + "loss": 74.3403, + "step": 109450 + }, + { + "epoch": 0.9054886875956487, + "grad_norm": 1104.9822998046875, + "learning_rate": 4.007196127121726e-07, + "loss": 92.4061, + "step": 109460 + }, + { + "epoch": 0.905571410845018, + "grad_norm": 1072.473876953125, + "learning_rate": 4.001665706960556e-07, + "loss": 92.3097, + "step": 109470 + }, + { + "epoch": 0.9056541340943872, + "grad_norm": 716.0008544921875, + "learning_rate": 3.996138946675737e-07, + "loss": 90.4532, + "step": 109480 + }, + { + "epoch": 0.9057368573437564, + "grad_norm": 509.80621337890625, + "learning_rate": 3.9906158467070187e-07, + "loss": 87.5559, + "step": 109490 + }, + { + "epoch": 0.9058195805931257, + "grad_norm": 1042.047607421875, + "learning_rate": 3.985096407493838e-07, + "loss": 98.2946, + "step": 109500 + }, + { + "epoch": 0.9059023038424949, + "grad_norm": 788.8461303710938, + "learning_rate": 3.97958062947536e-07, + "loss": 78.6691, + "step": 109510 + }, + { + "epoch": 0.9059850270918641, + "grad_norm": 560.147216796875, + "learning_rate": 3.9740685130904455e-07, + "loss": 60.2149, + "step": 109520 + }, + { + "epoch": 0.9060677503412334, + "grad_norm": 1752.591796875, + "learning_rate": 3.9685600587776815e-07, + "loss": 108.2573, + "step": 109530 + }, + { + "epoch": 0.9061504735906026, + "grad_norm": 870.2755737304688, + "learning_rate": 3.9630552669753243e-07, + "loss": 70.9846, + "step": 109540 + }, + { + "epoch": 0.9062331968399718, + "grad_norm": 1336.35400390625, + "learning_rate": 3.9575541381213776e-07, + "loss": 69.7969, + "step": 109550 + }, + { + "epoch": 0.9063159200893411, + "grad_norm": 897.2123413085938, + "learning_rate": 3.9520566726535367e-07, + "loss": 82.9407, + "step": 109560 + }, + { + "epoch": 0.9063986433387103, + "grad_norm": 1298.525390625, + "learning_rate": 3.9465628710092185e-07, + "loss": 114.8549, + "step": 109570 + }, + { + "epoch": 0.9064813665880795, + "grad_norm": 1084.1280517578125, + "learning_rate": 3.9410727336255396e-07, + "loss": 84.6156, + "step": 109580 + }, + { + "epoch": 0.9065640898374488, + "grad_norm": 580.8189697265625, + "learning_rate": 3.935586260939322e-07, + "loss": 58.7614, + "step": 109590 + }, + { + "epoch": 0.906646813086818, + "grad_norm": 892.2899169921875, + "learning_rate": 3.930103453387096e-07, + "loss": 102.3395, + "step": 109600 + }, + { + "epoch": 0.9067295363361872, + "grad_norm": 429.3028564453125, + "learning_rate": 3.9246243114051174e-07, + "loss": 77.4244, + "step": 109610 + }, + { + "epoch": 0.9068122595855566, + "grad_norm": 469.5216979980469, + "learning_rate": 3.919148835429315e-07, + "loss": 64.1674, + "step": 109620 + }, + { + "epoch": 0.9068949828349258, + "grad_norm": 779.257568359375, + "learning_rate": 3.913677025895357e-07, + "loss": 84.8305, + "step": 109630 + }, + { + "epoch": 0.906977706084295, + "grad_norm": 1248.734375, + "learning_rate": 3.9082088832386124e-07, + "loss": 85.1336, + "step": 109640 + }, + { + "epoch": 0.9070604293336643, + "grad_norm": 785.3364868164062, + "learning_rate": 3.9027444078941435e-07, + "loss": 110.6684, + "step": 109650 + }, + { + "epoch": 0.9071431525830335, + "grad_norm": 1085.676513671875, + "learning_rate": 3.897283600296753e-07, + "loss": 77.48, + "step": 109660 + }, + { + "epoch": 0.9072258758324027, + "grad_norm": 958.9440307617188, + "learning_rate": 3.8918264608809207e-07, + "loss": 78.6371, + "step": 109670 + }, + { + "epoch": 0.907308599081772, + "grad_norm": 1247.623046875, + "learning_rate": 3.886372990080856e-07, + "loss": 88.1574, + "step": 109680 + }, + { + "epoch": 0.9073913223311412, + "grad_norm": 860.1593017578125, + "learning_rate": 3.8809231883304544e-07, + "loss": 73.0786, + "step": 109690 + }, + { + "epoch": 0.9074740455805104, + "grad_norm": 1131.9002685546875, + "learning_rate": 3.875477056063343e-07, + "loss": 99.7562, + "step": 109700 + }, + { + "epoch": 0.9075567688298797, + "grad_norm": 578.1716918945312, + "learning_rate": 3.8700345937128346e-07, + "loss": 74.0123, + "step": 109710 + }, + { + "epoch": 0.9076394920792489, + "grad_norm": 990.4799194335938, + "learning_rate": 3.864595801711968e-07, + "loss": 96.1826, + "step": 109720 + }, + { + "epoch": 0.9077222153286181, + "grad_norm": 1159.52490234375, + "learning_rate": 3.8591606804934733e-07, + "loss": 113.0236, + "step": 109730 + }, + { + "epoch": 0.9078049385779874, + "grad_norm": 732.7988891601562, + "learning_rate": 3.853729230489811e-07, + "loss": 95.0779, + "step": 109740 + }, + { + "epoch": 0.9078876618273566, + "grad_norm": 947.6547241210938, + "learning_rate": 3.8483014521331184e-07, + "loss": 64.9452, + "step": 109750 + }, + { + "epoch": 0.9079703850767258, + "grad_norm": 954.159912109375, + "learning_rate": 3.8428773458552835e-07, + "loss": 96.2663, + "step": 109760 + }, + { + "epoch": 0.9080531083260951, + "grad_norm": 807.883056640625, + "learning_rate": 3.837456912087867e-07, + "loss": 105.7476, + "step": 109770 + }, + { + "epoch": 0.9081358315754643, + "grad_norm": 719.7168579101562, + "learning_rate": 3.8320401512621505e-07, + "loss": 90.5569, + "step": 109780 + }, + { + "epoch": 0.9082185548248335, + "grad_norm": 927.4055786132812, + "learning_rate": 3.826627063809113e-07, + "loss": 55.1157, + "step": 109790 + }, + { + "epoch": 0.9083012780742028, + "grad_norm": 900.8541870117188, + "learning_rate": 3.821217650159453e-07, + "loss": 66.976, + "step": 109800 + }, + { + "epoch": 0.908384001323572, + "grad_norm": 1139.78125, + "learning_rate": 3.8158119107435667e-07, + "loss": 83.4938, + "step": 109810 + }, + { + "epoch": 0.9084667245729412, + "grad_norm": 928.9935302734375, + "learning_rate": 3.810409845991575e-07, + "loss": 81.5093, + "step": 109820 + }, + { + "epoch": 0.9085494478223105, + "grad_norm": 866.55712890625, + "learning_rate": 3.805011456333285e-07, + "loss": 72.1152, + "step": 109830 + }, + { + "epoch": 0.9086321710716797, + "grad_norm": 1261.871826171875, + "learning_rate": 3.799616742198231e-07, + "loss": 81.8657, + "step": 109840 + }, + { + "epoch": 0.9087148943210489, + "grad_norm": 633.0125122070312, + "learning_rate": 3.794225704015636e-07, + "loss": 115.4469, + "step": 109850 + }, + { + "epoch": 0.9087976175704182, + "grad_norm": 1088.3365478515625, + "learning_rate": 3.7888383422144517e-07, + "loss": 96.6127, + "step": 109860 + }, + { + "epoch": 0.9088803408197874, + "grad_norm": 1705.669677734375, + "learning_rate": 3.7834546572233287e-07, + "loss": 90.1035, + "step": 109870 + }, + { + "epoch": 0.9089630640691566, + "grad_norm": 1340.4810791015625, + "learning_rate": 3.778074649470603e-07, + "loss": 97.6526, + "step": 109880 + }, + { + "epoch": 0.9090457873185259, + "grad_norm": 471.0425720214844, + "learning_rate": 3.772698319384349e-07, + "loss": 75.8749, + "step": 109890 + }, + { + "epoch": 0.9091285105678951, + "grad_norm": 664.8509521484375, + "learning_rate": 3.7673256673923356e-07, + "loss": 78.4327, + "step": 109900 + }, + { + "epoch": 0.9092112338172643, + "grad_norm": 1128.6776123046875, + "learning_rate": 3.7619566939220363e-07, + "loss": 109.4718, + "step": 109910 + }, + { + "epoch": 0.9092939570666336, + "grad_norm": 811.8029174804688, + "learning_rate": 3.7565913994006386e-07, + "loss": 80.7976, + "step": 109920 + }, + { + "epoch": 0.9093766803160028, + "grad_norm": 873.7833251953125, + "learning_rate": 3.751229784255039e-07, + "loss": 89.7391, + "step": 109930 + }, + { + "epoch": 0.909459403565372, + "grad_norm": 1071.7000732421875, + "learning_rate": 3.745871848911831e-07, + "loss": 93.3303, + "step": 109940 + }, + { + "epoch": 0.9095421268147413, + "grad_norm": 740.0640869140625, + "learning_rate": 3.7405175937973103e-07, + "loss": 81.3143, + "step": 109950 + }, + { + "epoch": 0.9096248500641105, + "grad_norm": 1029.9522705078125, + "learning_rate": 3.735167019337527e-07, + "loss": 94.7903, + "step": 109960 + }, + { + "epoch": 0.9097075733134797, + "grad_norm": 1091.040283203125, + "learning_rate": 3.7298201259581615e-07, + "loss": 78.4557, + "step": 109970 + }, + { + "epoch": 0.909790296562849, + "grad_norm": 554.0980834960938, + "learning_rate": 3.724476914084657e-07, + "loss": 78.7385, + "step": 109980 + }, + { + "epoch": 0.9098730198122182, + "grad_norm": 1428.4512939453125, + "learning_rate": 3.719137384142152e-07, + "loss": 104.3615, + "step": 109990 + }, + { + "epoch": 0.9099557430615874, + "grad_norm": 995.0647583007812, + "learning_rate": 3.7138015365554834e-07, + "loss": 110.6832, + "step": 110000 + }, + { + "epoch": 0.9100384663109566, + "grad_norm": 580.6385498046875, + "learning_rate": 3.7084693717492016e-07, + "loss": 56.6644, + "step": 110010 + }, + { + "epoch": 0.910121189560326, + "grad_norm": 1886.46240234375, + "learning_rate": 3.7031408901475605e-07, + "loss": 68.3266, + "step": 110020 + }, + { + "epoch": 0.9102039128096951, + "grad_norm": 573.3003540039062, + "learning_rate": 3.6978160921745277e-07, + "loss": 82.59, + "step": 110030 + }, + { + "epoch": 0.9102866360590643, + "grad_norm": 722.798828125, + "learning_rate": 3.692494978253769e-07, + "loss": 111.005, + "step": 110040 + }, + { + "epoch": 0.9103693593084337, + "grad_norm": 463.8318786621094, + "learning_rate": 3.6871775488086624e-07, + "loss": 60.8547, + "step": 110050 + }, + { + "epoch": 0.9104520825578029, + "grad_norm": 783.125, + "learning_rate": 3.681863804262292e-07, + "loss": 91.4471, + "step": 110060 + }, + { + "epoch": 0.910534805807172, + "grad_norm": 773.2374267578125, + "learning_rate": 3.676553745037448e-07, + "loss": 104.682, + "step": 110070 + }, + { + "epoch": 0.9106175290565414, + "grad_norm": 917.3580322265625, + "learning_rate": 3.671247371556624e-07, + "loss": 94.0389, + "step": 110080 + }, + { + "epoch": 0.9107002523059106, + "grad_norm": 892.2453002929688, + "learning_rate": 3.665944684242029e-07, + "loss": 98.2737, + "step": 110090 + }, + { + "epoch": 0.9107829755552798, + "grad_norm": 978.7361450195312, + "learning_rate": 3.660645683515568e-07, + "loss": 78.8953, + "step": 110100 + }, + { + "epoch": 0.9108656988046491, + "grad_norm": 538.931640625, + "learning_rate": 3.65535036979886e-07, + "loss": 89.924, + "step": 110110 + }, + { + "epoch": 0.9109484220540183, + "grad_norm": 516.4479370117188, + "learning_rate": 3.650058743513235e-07, + "loss": 87.3976, + "step": 110120 + }, + { + "epoch": 0.9110311453033875, + "grad_norm": 1158.6689453125, + "learning_rate": 3.644770805079717e-07, + "loss": 77.452, + "step": 110130 + }, + { + "epoch": 0.9111138685527568, + "grad_norm": 1315.6121826171875, + "learning_rate": 3.639486554919042e-07, + "loss": 106.9289, + "step": 110140 + }, + { + "epoch": 0.911196591802126, + "grad_norm": 816.3543701171875, + "learning_rate": 3.634205993451656e-07, + "loss": 82.4566, + "step": 110150 + }, + { + "epoch": 0.9112793150514952, + "grad_norm": 838.74267578125, + "learning_rate": 3.628929121097707e-07, + "loss": 87.7295, + "step": 110160 + }, + { + "epoch": 0.9113620383008645, + "grad_norm": 713.4924926757812, + "learning_rate": 3.6236559382770597e-07, + "loss": 71.8615, + "step": 110170 + }, + { + "epoch": 0.9114447615502337, + "grad_norm": 1672.6297607421875, + "learning_rate": 3.6183864454092664e-07, + "loss": 99.8394, + "step": 110180 + }, + { + "epoch": 0.9115274847996029, + "grad_norm": 982.43896484375, + "learning_rate": 3.6131206429135977e-07, + "loss": 80.5979, + "step": 110190 + }, + { + "epoch": 0.9116102080489722, + "grad_norm": 1182.8560791015625, + "learning_rate": 3.607858531209035e-07, + "loss": 89.0724, + "step": 110200 + }, + { + "epoch": 0.9116929312983414, + "grad_norm": 1181.8760986328125, + "learning_rate": 3.6026001107142606e-07, + "loss": 103.4322, + "step": 110210 + }, + { + "epoch": 0.9117756545477106, + "grad_norm": 418.0647277832031, + "learning_rate": 3.597345381847656e-07, + "loss": 89.5779, + "step": 110220 + }, + { + "epoch": 0.9118583777970799, + "grad_norm": 929.8257446289062, + "learning_rate": 3.592094345027325e-07, + "loss": 77.7989, + "step": 110230 + }, + { + "epoch": 0.9119411010464491, + "grad_norm": 1179.2841796875, + "learning_rate": 3.5868470006710564e-07, + "loss": 121.445, + "step": 110240 + }, + { + "epoch": 0.9120238242958183, + "grad_norm": 564.2163696289062, + "learning_rate": 3.581603349196372e-07, + "loss": 64.8469, + "step": 110250 + }, + { + "epoch": 0.9121065475451876, + "grad_norm": 596.6292114257812, + "learning_rate": 3.576363391020471e-07, + "loss": 93.1684, + "step": 110260 + }, + { + "epoch": 0.9121892707945568, + "grad_norm": 695.0462646484375, + "learning_rate": 3.5711271265602807e-07, + "loss": 77.7925, + "step": 110270 + }, + { + "epoch": 0.912271994043926, + "grad_norm": 707.590087890625, + "learning_rate": 3.56589455623243e-07, + "loss": 71.3893, + "step": 110280 + }, + { + "epoch": 0.9123547172932953, + "grad_norm": 0.0, + "learning_rate": 3.56066568045324e-07, + "loss": 64.9731, + "step": 110290 + }, + { + "epoch": 0.9124374405426645, + "grad_norm": 654.4577026367188, + "learning_rate": 3.555440499638768e-07, + "loss": 80.6185, + "step": 110300 + }, + { + "epoch": 0.9125201637920337, + "grad_norm": 691.3319702148438, + "learning_rate": 3.55021901420472e-07, + "loss": 61.5873, + "step": 110310 + }, + { + "epoch": 0.912602887041403, + "grad_norm": 966.6505737304688, + "learning_rate": 3.54500122456658e-07, + "loss": 73.3954, + "step": 110320 + }, + { + "epoch": 0.9126856102907722, + "grad_norm": 1062.859130859375, + "learning_rate": 3.5397871311394937e-07, + "loss": 88.4193, + "step": 110330 + }, + { + "epoch": 0.9127683335401414, + "grad_norm": 1225.390625, + "learning_rate": 3.534576734338324e-07, + "loss": 85.1301, + "step": 110340 + }, + { + "epoch": 0.9128510567895107, + "grad_norm": 604.2843017578125, + "learning_rate": 3.529370034577634e-07, + "loss": 74.7762, + "step": 110350 + }, + { + "epoch": 0.9129337800388799, + "grad_norm": 659.66357421875, + "learning_rate": 3.5241670322717025e-07, + "loss": 113.1879, + "step": 110360 + }, + { + "epoch": 0.9130165032882491, + "grad_norm": 746.4213256835938, + "learning_rate": 3.518967727834499e-07, + "loss": 81.8533, + "step": 110370 + }, + { + "epoch": 0.9130992265376184, + "grad_norm": 654.5377807617188, + "learning_rate": 3.513772121679715e-07, + "loss": 65.3405, + "step": 110380 + }, + { + "epoch": 0.9131819497869876, + "grad_norm": 734.1673583984375, + "learning_rate": 3.508580214220753e-07, + "loss": 102.2626, + "step": 110390 + }, + { + "epoch": 0.9132646730363568, + "grad_norm": 512.5213012695312, + "learning_rate": 3.503392005870687e-07, + "loss": 81.4534, + "step": 110400 + }, + { + "epoch": 0.9133473962857261, + "grad_norm": 852.9417114257812, + "learning_rate": 3.498207497042316e-07, + "loss": 85.8927, + "step": 110410 + }, + { + "epoch": 0.9134301195350953, + "grad_norm": 632.1807250976562, + "learning_rate": 3.4930266881481714e-07, + "loss": 124.6005, + "step": 110420 + }, + { + "epoch": 0.9135128427844645, + "grad_norm": 901.0861206054688, + "learning_rate": 3.487849579600455e-07, + "loss": 94.3462, + "step": 110430 + }, + { + "epoch": 0.9135955660338339, + "grad_norm": 660.1860961914062, + "learning_rate": 3.482676171811089e-07, + "loss": 86.6812, + "step": 110440 + }, + { + "epoch": 0.913678289283203, + "grad_norm": 1478.7293701171875, + "learning_rate": 3.4775064651916877e-07, + "loss": 68.7283, + "step": 110450 + }, + { + "epoch": 0.9137610125325722, + "grad_norm": 927.5498657226562, + "learning_rate": 3.472340460153595e-07, + "loss": 72.09, + "step": 110460 + }, + { + "epoch": 0.9138437357819416, + "grad_norm": 589.6502685546875, + "learning_rate": 3.4671781571078424e-07, + "loss": 97.7405, + "step": 110470 + }, + { + "epoch": 0.9139264590313108, + "grad_norm": 614.3450927734375, + "learning_rate": 3.462019556465157e-07, + "loss": 126.026, + "step": 110480 + }, + { + "epoch": 0.91400918228068, + "grad_norm": 394.06805419921875, + "learning_rate": 3.4568646586359944e-07, + "loss": 83.8727, + "step": 110490 + }, + { + "epoch": 0.9140919055300493, + "grad_norm": 824.491455078125, + "learning_rate": 3.4517134640305097e-07, + "loss": 100.4316, + "step": 110500 + }, + { + "epoch": 0.9141746287794185, + "grad_norm": 895.6577758789062, + "learning_rate": 3.446565973058552e-07, + "loss": 83.8836, + "step": 110510 + }, + { + "epoch": 0.9142573520287877, + "grad_norm": 1477.3956298828125, + "learning_rate": 3.441422186129689e-07, + "loss": 109.6496, + "step": 110520 + }, + { + "epoch": 0.914340075278157, + "grad_norm": 818.725830078125, + "learning_rate": 3.4362821036531936e-07, + "loss": 93.3673, + "step": 110530 + }, + { + "epoch": 0.9144227985275262, + "grad_norm": 812.5512084960938, + "learning_rate": 3.431145726038032e-07, + "loss": 72.2729, + "step": 110540 + }, + { + "epoch": 0.9145055217768954, + "grad_norm": 812.0310668945312, + "learning_rate": 3.426013053692878e-07, + "loss": 97.3781, + "step": 110550 + }, + { + "epoch": 0.9145882450262647, + "grad_norm": 372.3507385253906, + "learning_rate": 3.4208840870261326e-07, + "loss": 81.1851, + "step": 110560 + }, + { + "epoch": 0.9146709682756339, + "grad_norm": 1096.9320068359375, + "learning_rate": 3.415758826445864e-07, + "loss": 82.4156, + "step": 110570 + }, + { + "epoch": 0.9147536915250031, + "grad_norm": 1314.9417724609375, + "learning_rate": 3.410637272359868e-07, + "loss": 97.9378, + "step": 110580 + }, + { + "epoch": 0.9148364147743724, + "grad_norm": 835.7549438476562, + "learning_rate": 3.405519425175652e-07, + "loss": 110.182, + "step": 110590 + }, + { + "epoch": 0.9149191380237416, + "grad_norm": 819.3499145507812, + "learning_rate": 3.400405285300412e-07, + "loss": 71.8763, + "step": 110600 + }, + { + "epoch": 0.9150018612731108, + "grad_norm": 969.0990600585938, + "learning_rate": 3.3952948531410566e-07, + "loss": 73.1048, + "step": 110610 + }, + { + "epoch": 0.9150845845224801, + "grad_norm": 1323.8681640625, + "learning_rate": 3.390188129104205e-07, + "loss": 111.2788, + "step": 110620 + }, + { + "epoch": 0.9151673077718493, + "grad_norm": 1279.118408203125, + "learning_rate": 3.3850851135961814e-07, + "loss": 119.1711, + "step": 110630 + }, + { + "epoch": 0.9152500310212185, + "grad_norm": 767.4146118164062, + "learning_rate": 3.379985807023001e-07, + "loss": 123.3025, + "step": 110640 + }, + { + "epoch": 0.9153327542705878, + "grad_norm": 2297.595703125, + "learning_rate": 3.3748902097903936e-07, + "loss": 93.4889, + "step": 110650 + }, + { + "epoch": 0.915415477519957, + "grad_norm": 851.3316040039062, + "learning_rate": 3.369798322303786e-07, + "loss": 81.7054, + "step": 110660 + }, + { + "epoch": 0.9154982007693262, + "grad_norm": 671.036865234375, + "learning_rate": 3.364710144968325e-07, + "loss": 84.0587, + "step": 110670 + }, + { + "epoch": 0.9155809240186955, + "grad_norm": 1349.0550537109375, + "learning_rate": 3.359625678188849e-07, + "loss": 91.475, + "step": 110680 + }, + { + "epoch": 0.9156636472680647, + "grad_norm": 758.9757690429688, + "learning_rate": 3.3545449223699065e-07, + "loss": 79.7885, + "step": 110690 + }, + { + "epoch": 0.9157463705174339, + "grad_norm": 755.66064453125, + "learning_rate": 3.3494678779157464e-07, + "loss": 81.5341, + "step": 110700 + }, + { + "epoch": 0.9158290937668032, + "grad_norm": 864.9046630859375, + "learning_rate": 3.3443945452303337e-07, + "loss": 98.7411, + "step": 110710 + }, + { + "epoch": 0.9159118170161724, + "grad_norm": 536.4815063476562, + "learning_rate": 3.33932492471733e-07, + "loss": 98.7579, + "step": 110720 + }, + { + "epoch": 0.9159945402655416, + "grad_norm": 550.0960693359375, + "learning_rate": 3.334259016780106e-07, + "loss": 84.7401, + "step": 110730 + }, + { + "epoch": 0.9160772635149108, + "grad_norm": 1865.9971923828125, + "learning_rate": 3.3291968218217175e-07, + "loss": 84.6278, + "step": 110740 + }, + { + "epoch": 0.9161599867642801, + "grad_norm": 598.4445190429688, + "learning_rate": 3.324138340244948e-07, + "loss": 57.0423, + "step": 110750 + }, + { + "epoch": 0.9162427100136493, + "grad_norm": 871.3580932617188, + "learning_rate": 3.319083572452275e-07, + "loss": 72.3384, + "step": 110760 + }, + { + "epoch": 0.9163254332630185, + "grad_norm": 476.9991760253906, + "learning_rate": 3.314032518845889e-07, + "loss": 68.3785, + "step": 110770 + }, + { + "epoch": 0.9164081565123878, + "grad_norm": 693.8629760742188, + "learning_rate": 3.308985179827673e-07, + "loss": 114.7213, + "step": 110780 + }, + { + "epoch": 0.916490879761757, + "grad_norm": 797.1781616210938, + "learning_rate": 3.303941555799223e-07, + "loss": 86.4564, + "step": 110790 + }, + { + "epoch": 0.9165736030111262, + "grad_norm": 313.2153015136719, + "learning_rate": 3.298901647161834e-07, + "loss": 91.0484, + "step": 110800 + }, + { + "epoch": 0.9166563262604955, + "grad_norm": 865.4092407226562, + "learning_rate": 3.293865454316514e-07, + "loss": 105.9156, + "step": 110810 + }, + { + "epoch": 0.9167390495098647, + "grad_norm": 1219.156005859375, + "learning_rate": 3.2888329776639807e-07, + "loss": 95.9394, + "step": 110820 + }, + { + "epoch": 0.9168217727592339, + "grad_norm": 866.1614379882812, + "learning_rate": 3.283804217604619e-07, + "loss": 96.5862, + "step": 110830 + }, + { + "epoch": 0.9169044960086032, + "grad_norm": 650.52490234375, + "learning_rate": 3.27877917453856e-07, + "loss": 89.6044, + "step": 110840 + }, + { + "epoch": 0.9169872192579724, + "grad_norm": 1170.3040771484375, + "learning_rate": 3.273757848865622e-07, + "loss": 94.1275, + "step": 110850 + }, + { + "epoch": 0.9170699425073416, + "grad_norm": 1928.0880126953125, + "learning_rate": 3.2687402409853243e-07, + "loss": 109.4509, + "step": 110860 + }, + { + "epoch": 0.917152665756711, + "grad_norm": 754.0807495117188, + "learning_rate": 3.2637263512969033e-07, + "loss": 69.4093, + "step": 110870 + }, + { + "epoch": 0.9172353890060801, + "grad_norm": 695.7767333984375, + "learning_rate": 3.258716180199278e-07, + "loss": 82.0896, + "step": 110880 + }, + { + "epoch": 0.9173181122554493, + "grad_norm": 1156.6104736328125, + "learning_rate": 3.253709728091098e-07, + "loss": 72.8865, + "step": 110890 + }, + { + "epoch": 0.9174008355048187, + "grad_norm": 551.685546875, + "learning_rate": 3.2487069953706983e-07, + "loss": 74.3535, + "step": 110900 + }, + { + "epoch": 0.9174835587541879, + "grad_norm": 657.1426391601562, + "learning_rate": 3.243707982436123e-07, + "loss": 67.814, + "step": 110910 + }, + { + "epoch": 0.917566282003557, + "grad_norm": 796.87109375, + "learning_rate": 3.238712689685125e-07, + "loss": 61.9698, + "step": 110920 + }, + { + "epoch": 0.9176490052529264, + "grad_norm": 1190.6385498046875, + "learning_rate": 3.2337211175151484e-07, + "loss": 91.3083, + "step": 110930 + }, + { + "epoch": 0.9177317285022956, + "grad_norm": 977.4860229492188, + "learning_rate": 3.2287332663233527e-07, + "loss": 74.0361, + "step": 110940 + }, + { + "epoch": 0.9178144517516648, + "grad_norm": 757.3435668945312, + "learning_rate": 3.223749136506604e-07, + "loss": 80.6914, + "step": 110950 + }, + { + "epoch": 0.9178971750010341, + "grad_norm": 827.2467041015625, + "learning_rate": 3.218768728461458e-07, + "loss": 111.4018, + "step": 110960 + }, + { + "epoch": 0.9179798982504033, + "grad_norm": 609.2623291015625, + "learning_rate": 3.2137920425841907e-07, + "loss": 99.3807, + "step": 110970 + }, + { + "epoch": 0.9180626214997725, + "grad_norm": 1110.6324462890625, + "learning_rate": 3.2088190792707696e-07, + "loss": 80.4231, + "step": 110980 + }, + { + "epoch": 0.9181453447491418, + "grad_norm": 689.8076782226562, + "learning_rate": 3.2038498389168724e-07, + "loss": 65.6498, + "step": 110990 + }, + { + "epoch": 0.918228067998511, + "grad_norm": 941.228515625, + "learning_rate": 3.1988843219178776e-07, + "loss": 85.3875, + "step": 111000 + }, + { + "epoch": 0.9183107912478802, + "grad_norm": 887.41845703125, + "learning_rate": 3.193922528668869e-07, + "loss": 69.6305, + "step": 111010 + }, + { + "epoch": 0.9183935144972495, + "grad_norm": 713.7828369140625, + "learning_rate": 3.188964459564636e-07, + "loss": 81.9818, + "step": 111020 + }, + { + "epoch": 0.9184762377466187, + "grad_norm": 539.0556030273438, + "learning_rate": 3.184010114999664e-07, + "loss": 83.2385, + "step": 111030 + }, + { + "epoch": 0.9185589609959879, + "grad_norm": 917.137451171875, + "learning_rate": 3.179059495368153e-07, + "loss": 101.1152, + "step": 111040 + }, + { + "epoch": 0.9186416842453572, + "grad_norm": 666.9295043945312, + "learning_rate": 3.174112601064e-07, + "loss": 64.8105, + "step": 111050 + }, + { + "epoch": 0.9187244074947264, + "grad_norm": 525.5652465820312, + "learning_rate": 3.1691694324808063e-07, + "loss": 69.7607, + "step": 111060 + }, + { + "epoch": 0.9188071307440956, + "grad_norm": 1084.0616455078125, + "learning_rate": 3.1642299900118743e-07, + "loss": 113.7904, + "step": 111070 + }, + { + "epoch": 0.9188898539934649, + "grad_norm": 1473.717041015625, + "learning_rate": 3.159294274050212e-07, + "loss": 114.3284, + "step": 111080 + }, + { + "epoch": 0.9189725772428341, + "grad_norm": 789.0966186523438, + "learning_rate": 3.154362284988538e-07, + "loss": 85.0554, + "step": 111090 + }, + { + "epoch": 0.9190553004922033, + "grad_norm": 970.8892211914062, + "learning_rate": 3.1494340232192667e-07, + "loss": 77.95, + "step": 111100 + }, + { + "epoch": 0.9191380237415726, + "grad_norm": 856.5325927734375, + "learning_rate": 3.144509489134512e-07, + "loss": 85.2512, + "step": 111110 + }, + { + "epoch": 0.9192207469909418, + "grad_norm": 685.9720458984375, + "learning_rate": 3.139588683126099e-07, + "loss": 74.0177, + "step": 111120 + }, + { + "epoch": 0.919303470240311, + "grad_norm": 782.5280151367188, + "learning_rate": 3.134671605585554e-07, + "loss": 73.0808, + "step": 111130 + }, + { + "epoch": 0.9193861934896803, + "grad_norm": 421.8550720214844, + "learning_rate": 3.129758256904109e-07, + "loss": 67.5404, + "step": 111140 + }, + { + "epoch": 0.9194689167390495, + "grad_norm": 855.1493530273438, + "learning_rate": 3.1248486374726884e-07, + "loss": 86.1914, + "step": 111150 + }, + { + "epoch": 0.9195516399884187, + "grad_norm": 581.30029296875, + "learning_rate": 3.119942747681948e-07, + "loss": 95.9546, + "step": 111160 + }, + { + "epoch": 0.919634363237788, + "grad_norm": 637.724853515625, + "learning_rate": 3.1150405879221965e-07, + "loss": 97.2694, + "step": 111170 + }, + { + "epoch": 0.9197170864871572, + "grad_norm": 706.0328369140625, + "learning_rate": 3.110142158583496e-07, + "loss": 94.9882, + "step": 111180 + }, + { + "epoch": 0.9197998097365264, + "grad_norm": 560.701416015625, + "learning_rate": 3.1052474600555936e-07, + "loss": 66.4736, + "step": 111190 + }, + { + "epoch": 0.9198825329858957, + "grad_norm": 1017.1104125976562, + "learning_rate": 3.100356492727929e-07, + "loss": 84.1691, + "step": 111200 + }, + { + "epoch": 0.9199652562352649, + "grad_norm": 745.77587890625, + "learning_rate": 3.0954692569896585e-07, + "loss": 68.3992, + "step": 111210 + }, + { + "epoch": 0.9200479794846341, + "grad_norm": 499.56640625, + "learning_rate": 3.0905857532296414e-07, + "loss": 87.3954, + "step": 111220 + }, + { + "epoch": 0.9201307027340034, + "grad_norm": 1444.1029052734375, + "learning_rate": 3.085705981836423e-07, + "loss": 81.4259, + "step": 111230 + }, + { + "epoch": 0.9202134259833726, + "grad_norm": 508.9635314941406, + "learning_rate": 3.080829943198277e-07, + "loss": 89.5248, + "step": 111240 + }, + { + "epoch": 0.9202961492327418, + "grad_norm": 996.2418212890625, + "learning_rate": 3.0759576377031697e-07, + "loss": 69.8985, + "step": 111250 + }, + { + "epoch": 0.9203788724821111, + "grad_norm": 1550.9212646484375, + "learning_rate": 3.071089065738747e-07, + "loss": 69.4487, + "step": 111260 + }, + { + "epoch": 0.9204615957314803, + "grad_norm": 1241.609130859375, + "learning_rate": 3.0662242276923993e-07, + "loss": 112.2663, + "step": 111270 + }, + { + "epoch": 0.9205443189808495, + "grad_norm": 1453.5501708984375, + "learning_rate": 3.061363123951189e-07, + "loss": 87.4852, + "step": 111280 + }, + { + "epoch": 0.9206270422302189, + "grad_norm": 792.2757568359375, + "learning_rate": 3.0565057549019005e-07, + "loss": 89.3375, + "step": 111290 + }, + { + "epoch": 0.920709765479588, + "grad_norm": 365.10528564453125, + "learning_rate": 3.051652120931003e-07, + "loss": 77.1843, + "step": 111300 + }, + { + "epoch": 0.9207924887289572, + "grad_norm": 1038.219482421875, + "learning_rate": 3.0468022224246886e-07, + "loss": 100.4181, + "step": 111310 + }, + { + "epoch": 0.9208752119783266, + "grad_norm": 1549.9176025390625, + "learning_rate": 3.04195605976883e-07, + "loss": 101.2689, + "step": 111320 + }, + { + "epoch": 0.9209579352276958, + "grad_norm": 1094.44873046875, + "learning_rate": 3.0371136333490315e-07, + "loss": 62.7427, + "step": 111330 + }, + { + "epoch": 0.921040658477065, + "grad_norm": 764.5133056640625, + "learning_rate": 3.0322749435505563e-07, + "loss": 117.9781, + "step": 111340 + }, + { + "epoch": 0.9211233817264343, + "grad_norm": 922.7494506835938, + "learning_rate": 3.027439990758418e-07, + "loss": 87.756, + "step": 111350 + }, + { + "epoch": 0.9212061049758035, + "grad_norm": 2155.5322265625, + "learning_rate": 3.022608775357294e-07, + "loss": 109.2952, + "step": 111360 + }, + { + "epoch": 0.9212888282251727, + "grad_norm": 639.3640747070312, + "learning_rate": 3.017781297731598e-07, + "loss": 65.4102, + "step": 111370 + }, + { + "epoch": 0.921371551474542, + "grad_norm": 401.3136901855469, + "learning_rate": 3.012957558265428e-07, + "loss": 55.7189, + "step": 111380 + }, + { + "epoch": 0.9214542747239112, + "grad_norm": 815.2987060546875, + "learning_rate": 3.008137557342583e-07, + "loss": 89.766, + "step": 111390 + }, + { + "epoch": 0.9215369979732804, + "grad_norm": 848.1900024414062, + "learning_rate": 3.003321295346573e-07, + "loss": 70.3423, + "step": 111400 + }, + { + "epoch": 0.9216197212226497, + "grad_norm": 1112.6951904296875, + "learning_rate": 2.9985087726605965e-07, + "loss": 96.3883, + "step": 111410 + }, + { + "epoch": 0.9217024444720189, + "grad_norm": 863.7384643554688, + "learning_rate": 2.9936999896675757e-07, + "loss": 77.8842, + "step": 111420 + }, + { + "epoch": 0.9217851677213881, + "grad_norm": 549.550537109375, + "learning_rate": 2.988894946750115e-07, + "loss": 97.5944, + "step": 111430 + }, + { + "epoch": 0.9218678909707574, + "grad_norm": 348.6497497558594, + "learning_rate": 2.9840936442905253e-07, + "loss": 81.6903, + "step": 111440 + }, + { + "epoch": 0.9219506142201266, + "grad_norm": 1831.6419677734375, + "learning_rate": 2.979296082670835e-07, + "loss": 112.7743, + "step": 111450 + }, + { + "epoch": 0.9220333374694958, + "grad_norm": 490.64111328125, + "learning_rate": 2.974502262272749e-07, + "loss": 80.2675, + "step": 111460 + }, + { + "epoch": 0.922116060718865, + "grad_norm": 768.78076171875, + "learning_rate": 2.969712183477713e-07, + "loss": 74.6032, + "step": 111470 + }, + { + "epoch": 0.9221987839682343, + "grad_norm": 886.2664184570312, + "learning_rate": 2.964925846666833e-07, + "loss": 52.6146, + "step": 111480 + }, + { + "epoch": 0.9222815072176035, + "grad_norm": 648.3212280273438, + "learning_rate": 2.960143252220943e-07, + "loss": 88.3323, + "step": 111490 + }, + { + "epoch": 0.9223642304669727, + "grad_norm": 646.9230346679688, + "learning_rate": 2.955364400520583e-07, + "loss": 85.1518, + "step": 111500 + }, + { + "epoch": 0.922446953716342, + "grad_norm": 783.1369018554688, + "learning_rate": 2.950589291945954e-07, + "loss": 83.9216, + "step": 111510 + }, + { + "epoch": 0.9225296769657112, + "grad_norm": 624.5678100585938, + "learning_rate": 2.9458179268770147e-07, + "loss": 67.9207, + "step": 111520 + }, + { + "epoch": 0.9226124002150804, + "grad_norm": 933.9120483398438, + "learning_rate": 2.941050305693394e-07, + "loss": 96.648, + "step": 111530 + }, + { + "epoch": 0.9226951234644497, + "grad_norm": 759.1103515625, + "learning_rate": 2.9362864287744266e-07, + "loss": 79.1249, + "step": 111540 + }, + { + "epoch": 0.9227778467138189, + "grad_norm": 709.6221923828125, + "learning_rate": 2.931526296499154e-07, + "loss": 75.1609, + "step": 111550 + }, + { + "epoch": 0.9228605699631881, + "grad_norm": 1090.783203125, + "learning_rate": 2.926769909246313e-07, + "loss": 90.4893, + "step": 111560 + }, + { + "epoch": 0.9229432932125574, + "grad_norm": 1291.6949462890625, + "learning_rate": 2.9220172673943615e-07, + "loss": 121.0514, + "step": 111570 + }, + { + "epoch": 0.9230260164619266, + "grad_norm": 1319.6103515625, + "learning_rate": 2.9172683713214354e-07, + "loss": 81.1168, + "step": 111580 + }, + { + "epoch": 0.9231087397112958, + "grad_norm": 887.0220947265625, + "learning_rate": 2.9125232214053946e-07, + "loss": 77.7259, + "step": 111590 + }, + { + "epoch": 0.9231914629606651, + "grad_norm": 823.8740234375, + "learning_rate": 2.9077818180237693e-07, + "loss": 83.7634, + "step": 111600 + }, + { + "epoch": 0.9232741862100343, + "grad_norm": 1067.777099609375, + "learning_rate": 2.90304416155382e-07, + "loss": 77.3156, + "step": 111610 + }, + { + "epoch": 0.9233569094594035, + "grad_norm": 533.4111328125, + "learning_rate": 2.898310252372505e-07, + "loss": 81.9363, + "step": 111620 + }, + { + "epoch": 0.9234396327087728, + "grad_norm": 800.60888671875, + "learning_rate": 2.893580090856474e-07, + "loss": 112.345, + "step": 111630 + }, + { + "epoch": 0.923522355958142, + "grad_norm": 962.5048217773438, + "learning_rate": 2.888853677382081e-07, + "loss": 88.3361, + "step": 111640 + }, + { + "epoch": 0.9236050792075112, + "grad_norm": 889.5928344726562, + "learning_rate": 2.8841310123253865e-07, + "loss": 81.9884, + "step": 111650 + }, + { + "epoch": 0.9236878024568805, + "grad_norm": 697.1818237304688, + "learning_rate": 2.879412096062162e-07, + "loss": 121.3172, + "step": 111660 + }, + { + "epoch": 0.9237705257062497, + "grad_norm": 667.9248657226562, + "learning_rate": 2.874696928967863e-07, + "loss": 97.8055, + "step": 111670 + }, + { + "epoch": 0.9238532489556189, + "grad_norm": 442.2999572753906, + "learning_rate": 2.869985511417661e-07, + "loss": 54.4177, + "step": 111680 + }, + { + "epoch": 0.9239359722049882, + "grad_norm": 686.0079956054688, + "learning_rate": 2.8652778437864016e-07, + "loss": 87.2172, + "step": 111690 + }, + { + "epoch": 0.9240186954543574, + "grad_norm": 684.615966796875, + "learning_rate": 2.8605739264486733e-07, + "loss": 89.1697, + "step": 111700 + }, + { + "epoch": 0.9241014187037266, + "grad_norm": 1551.7012939453125, + "learning_rate": 2.8558737597787324e-07, + "loss": 115.9352, + "step": 111710 + }, + { + "epoch": 0.924184141953096, + "grad_norm": 1213.67236328125, + "learning_rate": 2.851177344150552e-07, + "loss": 97.0064, + "step": 111720 + }, + { + "epoch": 0.9242668652024651, + "grad_norm": 768.0172729492188, + "learning_rate": 2.846484679937811e-07, + "loss": 81.57, + "step": 111730 + }, + { + "epoch": 0.9243495884518343, + "grad_norm": 486.9347229003906, + "learning_rate": 2.841795767513877e-07, + "loss": 72.622, + "step": 111740 + }, + { + "epoch": 0.9244323117012037, + "grad_norm": 799.0153198242188, + "learning_rate": 2.8371106072518194e-07, + "loss": 95.9192, + "step": 111750 + }, + { + "epoch": 0.9245150349505729, + "grad_norm": 778.710205078125, + "learning_rate": 2.8324291995244333e-07, + "loss": 74.0421, + "step": 111760 + }, + { + "epoch": 0.924597758199942, + "grad_norm": 1426.642822265625, + "learning_rate": 2.8277515447041827e-07, + "loss": 101.7019, + "step": 111770 + }, + { + "epoch": 0.9246804814493114, + "grad_norm": 934.9050903320312, + "learning_rate": 2.8230776431632523e-07, + "loss": 93.3299, + "step": 111780 + }, + { + "epoch": 0.9247632046986806, + "grad_norm": 839.4944458007812, + "learning_rate": 2.8184074952735176e-07, + "loss": 76.1551, + "step": 111790 + }, + { + "epoch": 0.9248459279480498, + "grad_norm": 733.8991088867188, + "learning_rate": 2.813741101406564e-07, + "loss": 83.5848, + "step": 111800 + }, + { + "epoch": 0.9249286511974191, + "grad_norm": 1104.2376708984375, + "learning_rate": 2.8090784619336784e-07, + "loss": 109.4974, + "step": 111810 + }, + { + "epoch": 0.9250113744467883, + "grad_norm": 541.585205078125, + "learning_rate": 2.804419577225842e-07, + "loss": 99.5181, + "step": 111820 + }, + { + "epoch": 0.9250940976961575, + "grad_norm": 1171.6307373046875, + "learning_rate": 2.7997644476537353e-07, + "loss": 105.5711, + "step": 111830 + }, + { + "epoch": 0.9251768209455268, + "grad_norm": 1111.163818359375, + "learning_rate": 2.7951130735877576e-07, + "loss": 100.552, + "step": 111840 + }, + { + "epoch": 0.925259544194896, + "grad_norm": 465.4991149902344, + "learning_rate": 2.790465455397989e-07, + "loss": 91.685, + "step": 111850 + }, + { + "epoch": 0.9253422674442652, + "grad_norm": 768.08740234375, + "learning_rate": 2.785821593454224e-07, + "loss": 105.3449, + "step": 111860 + }, + { + "epoch": 0.9254249906936345, + "grad_norm": 497.09466552734375, + "learning_rate": 2.7811814881259503e-07, + "loss": 67.444, + "step": 111870 + }, + { + "epoch": 0.9255077139430037, + "grad_norm": 693.8814697265625, + "learning_rate": 2.776545139782361e-07, + "loss": 61.2311, + "step": 111880 + }, + { + "epoch": 0.9255904371923729, + "grad_norm": 892.4473266601562, + "learning_rate": 2.771912548792344e-07, + "loss": 87.7277, + "step": 111890 + }, + { + "epoch": 0.9256731604417422, + "grad_norm": 1138.6724853515625, + "learning_rate": 2.767283715524505e-07, + "loss": 122.5125, + "step": 111900 + }, + { + "epoch": 0.9257558836911114, + "grad_norm": 755.6520385742188, + "learning_rate": 2.762658640347127e-07, + "loss": 69.8308, + "step": 111910 + }, + { + "epoch": 0.9258386069404806, + "grad_norm": 983.39990234375, + "learning_rate": 2.75803732362821e-07, + "loss": 58.8145, + "step": 111920 + }, + { + "epoch": 0.9259213301898499, + "grad_norm": 895.5469360351562, + "learning_rate": 2.7534197657354543e-07, + "loss": 91.3707, + "step": 111930 + }, + { + "epoch": 0.9260040534392191, + "grad_norm": 1143.3978271484375, + "learning_rate": 2.7488059670362553e-07, + "loss": 80.0223, + "step": 111940 + }, + { + "epoch": 0.9260867766885883, + "grad_norm": 666.8565673828125, + "learning_rate": 2.744195927897708e-07, + "loss": 119.4018, + "step": 111950 + }, + { + "epoch": 0.9261694999379576, + "grad_norm": 1519.14404296875, + "learning_rate": 2.7395896486866193e-07, + "loss": 69.0608, + "step": 111960 + }, + { + "epoch": 0.9262522231873268, + "grad_norm": 1167.140625, + "learning_rate": 2.7349871297694895e-07, + "loss": 95.0523, + "step": 111970 + }, + { + "epoch": 0.926334946436696, + "grad_norm": 715.0806274414062, + "learning_rate": 2.73038837151251e-07, + "loss": 73.4228, + "step": 111980 + }, + { + "epoch": 0.9264176696860653, + "grad_norm": 602.8383178710938, + "learning_rate": 2.725793374281593e-07, + "loss": 103.7122, + "step": 111990 + }, + { + "epoch": 0.9265003929354345, + "grad_norm": 575.5548706054688, + "learning_rate": 2.7212021384423415e-07, + "loss": 59.8035, + "step": 112000 + }, + { + "epoch": 0.9265831161848037, + "grad_norm": 687.127197265625, + "learning_rate": 2.716614664360051e-07, + "loss": 53.2006, + "step": 112010 + }, + { + "epoch": 0.926665839434173, + "grad_norm": 912.4276123046875, + "learning_rate": 2.71203095239973e-07, + "loss": 71.3291, + "step": 112020 + }, + { + "epoch": 0.9267485626835422, + "grad_norm": 720.695068359375, + "learning_rate": 2.7074510029260814e-07, + "loss": 92.2435, + "step": 112030 + }, + { + "epoch": 0.9268312859329114, + "grad_norm": 881.0599975585938, + "learning_rate": 2.702874816303519e-07, + "loss": 83.2978, + "step": 112040 + }, + { + "epoch": 0.9269140091822807, + "grad_norm": 1031.5977783203125, + "learning_rate": 2.6983023928961406e-07, + "loss": 91.2726, + "step": 112050 + }, + { + "epoch": 0.9269967324316499, + "grad_norm": 976.7826538085938, + "learning_rate": 2.6937337330677547e-07, + "loss": 74.995, + "step": 112060 + }, + { + "epoch": 0.9270794556810191, + "grad_norm": 916.8348388671875, + "learning_rate": 2.689168837181866e-07, + "loss": 101.2523, + "step": 112070 + }, + { + "epoch": 0.9271621789303884, + "grad_norm": 889.0336303710938, + "learning_rate": 2.684607705601688e-07, + "loss": 84.4417, + "step": 112080 + }, + { + "epoch": 0.9272449021797576, + "grad_norm": 1171.827392578125, + "learning_rate": 2.680050338690132e-07, + "loss": 133.0743, + "step": 112090 + }, + { + "epoch": 0.9273276254291268, + "grad_norm": 868.6085205078125, + "learning_rate": 2.675496736809796e-07, + "loss": 84.3478, + "step": 112100 + }, + { + "epoch": 0.9274103486784961, + "grad_norm": 571.0438842773438, + "learning_rate": 2.6709469003230013e-07, + "loss": 91.2716, + "step": 112110 + }, + { + "epoch": 0.9274930719278653, + "grad_norm": 457.09393310546875, + "learning_rate": 2.6664008295917364e-07, + "loss": 72.2116, + "step": 112120 + }, + { + "epoch": 0.9275757951772345, + "grad_norm": 1358.0347900390625, + "learning_rate": 2.661858524977734e-07, + "loss": 93.3441, + "step": 112130 + }, + { + "epoch": 0.9276585184266039, + "grad_norm": 667.2669067382812, + "learning_rate": 2.657319986842394e-07, + "loss": 79.6386, + "step": 112140 + }, + { + "epoch": 0.927741241675973, + "grad_norm": 1060.2071533203125, + "learning_rate": 2.6527852155468327e-07, + "loss": 94.3927, + "step": 112150 + }, + { + "epoch": 0.9278239649253422, + "grad_norm": 1009.9804077148438, + "learning_rate": 2.648254211451856e-07, + "loss": 63.4351, + "step": 112160 + }, + { + "epoch": 0.9279066881747116, + "grad_norm": 910.8653564453125, + "learning_rate": 2.643726974917976e-07, + "loss": 94.569, + "step": 112170 + }, + { + "epoch": 0.9279894114240808, + "grad_norm": 451.7293701171875, + "learning_rate": 2.6392035063054043e-07, + "loss": 98.4698, + "step": 112180 + }, + { + "epoch": 0.92807213467345, + "grad_norm": 976.0929565429688, + "learning_rate": 2.634683805974059e-07, + "loss": 69.7292, + "step": 112190 + }, + { + "epoch": 0.9281548579228192, + "grad_norm": 999.5797119140625, + "learning_rate": 2.63016787428354e-07, + "loss": 100.5, + "step": 112200 + }, + { + "epoch": 0.9282375811721885, + "grad_norm": 620.88623046875, + "learning_rate": 2.6256557115931613e-07, + "loss": 76.7702, + "step": 112210 + }, + { + "epoch": 0.9283203044215577, + "grad_norm": 725.8551635742188, + "learning_rate": 2.6211473182619405e-07, + "loss": 74.561, + "step": 112220 + }, + { + "epoch": 0.9284030276709269, + "grad_norm": 1008.1248168945312, + "learning_rate": 2.616642694648591e-07, + "loss": 58.9289, + "step": 112230 + }, + { + "epoch": 0.9284857509202962, + "grad_norm": 1029.631591796875, + "learning_rate": 2.612141841111521e-07, + "loss": 83.1139, + "step": 112240 + }, + { + "epoch": 0.9285684741696654, + "grad_norm": 921.6142578125, + "learning_rate": 2.6076447580088426e-07, + "loss": 60.3248, + "step": 112250 + }, + { + "epoch": 0.9286511974190346, + "grad_norm": 684.411376953125, + "learning_rate": 2.603151445698371e-07, + "loss": 82.2432, + "step": 112260 + }, + { + "epoch": 0.9287339206684039, + "grad_norm": 1127.3673095703125, + "learning_rate": 2.598661904537619e-07, + "loss": 102.584, + "step": 112270 + }, + { + "epoch": 0.9288166439177731, + "grad_norm": 949.6649780273438, + "learning_rate": 2.5941761348837966e-07, + "loss": 110.4898, + "step": 112280 + }, + { + "epoch": 0.9288993671671423, + "grad_norm": 1033.648193359375, + "learning_rate": 2.5896941370938177e-07, + "loss": 92.9099, + "step": 112290 + }, + { + "epoch": 0.9289820904165116, + "grad_norm": 798.4220581054688, + "learning_rate": 2.5852159115242857e-07, + "loss": 71.5822, + "step": 112300 + }, + { + "epoch": 0.9290648136658808, + "grad_norm": 766.9046630859375, + "learning_rate": 2.580741458531516e-07, + "loss": 79.5017, + "step": 112310 + }, + { + "epoch": 0.92914753691525, + "grad_norm": 547.7767944335938, + "learning_rate": 2.576270778471529e-07, + "loss": 78.4615, + "step": 112320 + }, + { + "epoch": 0.9292302601646193, + "grad_norm": 636.5359497070312, + "learning_rate": 2.571803871700029e-07, + "loss": 64.6448, + "step": 112330 + }, + { + "epoch": 0.9293129834139885, + "grad_norm": 1199.5556640625, + "learning_rate": 2.567340738572427e-07, + "loss": 102.5773, + "step": 112340 + }, + { + "epoch": 0.9293957066633577, + "grad_norm": 1170.7523193359375, + "learning_rate": 2.5628813794438434e-07, + "loss": 80.3686, + "step": 112350 + }, + { + "epoch": 0.929478429912727, + "grad_norm": 1347.653564453125, + "learning_rate": 2.5584257946690836e-07, + "loss": 81.7604, + "step": 112360 + }, + { + "epoch": 0.9295611531620962, + "grad_norm": 786.822509765625, + "learning_rate": 2.553973984602648e-07, + "loss": 84.814, + "step": 112370 + }, + { + "epoch": 0.9296438764114654, + "grad_norm": 1360.4071044921875, + "learning_rate": 2.5495259495987576e-07, + "loss": 107.1304, + "step": 112380 + }, + { + "epoch": 0.9297265996608347, + "grad_norm": 1530.94873046875, + "learning_rate": 2.545081690011314e-07, + "loss": 98.4278, + "step": 112390 + }, + { + "epoch": 0.9298093229102039, + "grad_norm": 977.4533081054688, + "learning_rate": 2.540641206193939e-07, + "loss": 78.7725, + "step": 112400 + }, + { + "epoch": 0.9298920461595731, + "grad_norm": 600.151611328125, + "learning_rate": 2.536204498499922e-07, + "loss": 96.0168, + "step": 112410 + }, + { + "epoch": 0.9299747694089424, + "grad_norm": 640.1029052734375, + "learning_rate": 2.5317715672822927e-07, + "loss": 85.9019, + "step": 112420 + }, + { + "epoch": 0.9300574926583116, + "grad_norm": 747.0780029296875, + "learning_rate": 2.527342412893746e-07, + "loss": 62.0823, + "step": 112430 + }, + { + "epoch": 0.9301402159076808, + "grad_norm": 1375.25146484375, + "learning_rate": 2.5229170356867005e-07, + "loss": 96.1561, + "step": 112440 + }, + { + "epoch": 0.9302229391570501, + "grad_norm": 782.2132568359375, + "learning_rate": 2.5184954360132574e-07, + "loss": 89.7107, + "step": 112450 + }, + { + "epoch": 0.9303056624064193, + "grad_norm": 2209.59521484375, + "learning_rate": 2.514077614225219e-07, + "loss": 105.6186, + "step": 112460 + }, + { + "epoch": 0.9303883856557885, + "grad_norm": 583.348388671875, + "learning_rate": 2.509663570674087e-07, + "loss": 80.0686, + "step": 112470 + }, + { + "epoch": 0.9304711089051578, + "grad_norm": 459.78302001953125, + "learning_rate": 2.5052533057110805e-07, + "loss": 93.2552, + "step": 112480 + }, + { + "epoch": 0.930553832154527, + "grad_norm": 617.0430908203125, + "learning_rate": 2.500846819687097e-07, + "loss": 93.8821, + "step": 112490 + }, + { + "epoch": 0.9306365554038962, + "grad_norm": 0.0, + "learning_rate": 2.4964441129527337e-07, + "loss": 83.9168, + "step": 112500 + }, + { + "epoch": 0.9307192786532655, + "grad_norm": 957.689453125, + "learning_rate": 2.4920451858582997e-07, + "loss": 81.6168, + "step": 112510 + }, + { + "epoch": 0.9308020019026347, + "grad_norm": 1347.580810546875, + "learning_rate": 2.487650038753803e-07, + "loss": 84.5013, + "step": 112520 + }, + { + "epoch": 0.9308847251520039, + "grad_norm": 840.2437744140625, + "learning_rate": 2.483258671988942e-07, + "loss": 90.0643, + "step": 112530 + }, + { + "epoch": 0.9309674484013732, + "grad_norm": 803.8649291992188, + "learning_rate": 2.4788710859131203e-07, + "loss": 95.1621, + "step": 112540 + }, + { + "epoch": 0.9310501716507424, + "grad_norm": 775.64599609375, + "learning_rate": 2.474487280875426e-07, + "loss": 82.0533, + "step": 112550 + }, + { + "epoch": 0.9311328949001116, + "grad_norm": 648.1033325195312, + "learning_rate": 2.470107257224669e-07, + "loss": 69.88, + "step": 112560 + }, + { + "epoch": 0.931215618149481, + "grad_norm": 769.5602416992188, + "learning_rate": 2.4657310153093475e-07, + "loss": 101.4659, + "step": 112570 + }, + { + "epoch": 0.9312983413988501, + "grad_norm": 654.4227905273438, + "learning_rate": 2.46135855547765e-07, + "loss": 79.7583, + "step": 112580 + }, + { + "epoch": 0.9313810646482193, + "grad_norm": 909.7418823242188, + "learning_rate": 2.4569898780774816e-07, + "loss": 77.9152, + "step": 112590 + }, + { + "epoch": 0.9314637878975887, + "grad_norm": 475.4342041015625, + "learning_rate": 2.452624983456431e-07, + "loss": 87.7683, + "step": 112600 + }, + { + "epoch": 0.9315465111469579, + "grad_norm": 750.6339111328125, + "learning_rate": 2.4482638719618037e-07, + "loss": 95.7583, + "step": 112610 + }, + { + "epoch": 0.931629234396327, + "grad_norm": 719.8361206054688, + "learning_rate": 2.443906543940594e-07, + "loss": 89.19, + "step": 112620 + }, + { + "epoch": 0.9317119576456964, + "grad_norm": 788.4232788085938, + "learning_rate": 2.439552999739475e-07, + "loss": 64.0623, + "step": 112630 + }, + { + "epoch": 0.9317946808950656, + "grad_norm": 1180.428955078125, + "learning_rate": 2.4352032397048584e-07, + "loss": 77.4437, + "step": 112640 + }, + { + "epoch": 0.9318774041444348, + "grad_norm": 702.971923828125, + "learning_rate": 2.4308572641828234e-07, + "loss": 86.0467, + "step": 112650 + }, + { + "epoch": 0.9319601273938041, + "grad_norm": 467.1812438964844, + "learning_rate": 2.4265150735191644e-07, + "loss": 76.0462, + "step": 112660 + }, + { + "epoch": 0.9320428506431733, + "grad_norm": 1010.5810546875, + "learning_rate": 2.422176668059367e-07, + "loss": 71.2109, + "step": 112670 + }, + { + "epoch": 0.9321255738925425, + "grad_norm": 1030.048583984375, + "learning_rate": 2.417842048148622e-07, + "loss": 98.5793, + "step": 112680 + }, + { + "epoch": 0.9322082971419118, + "grad_norm": 608.4972534179688, + "learning_rate": 2.4135112141318084e-07, + "loss": 86.8852, + "step": 112690 + }, + { + "epoch": 0.932291020391281, + "grad_norm": 1069.7056884765625, + "learning_rate": 2.409184166353512e-07, + "loss": 73.6224, + "step": 112700 + }, + { + "epoch": 0.9323737436406502, + "grad_norm": 620.8689575195312, + "learning_rate": 2.4048609051580295e-07, + "loss": 73.3166, + "step": 112710 + }, + { + "epoch": 0.9324564668900195, + "grad_norm": 1390.774169921875, + "learning_rate": 2.4005414308893304e-07, + "loss": 93.2582, + "step": 112720 + }, + { + "epoch": 0.9325391901393887, + "grad_norm": 1095.233642578125, + "learning_rate": 2.396225743891095e-07, + "loss": 79.1661, + "step": 112730 + }, + { + "epoch": 0.9326219133887579, + "grad_norm": 651.0794677734375, + "learning_rate": 2.3919138445067045e-07, + "loss": 74.3248, + "step": 112740 + }, + { + "epoch": 0.9327046366381272, + "grad_norm": 694.2014770507812, + "learning_rate": 2.3876057330792344e-07, + "loss": 72.9786, + "step": 112750 + }, + { + "epoch": 0.9327873598874964, + "grad_norm": 845.7319946289062, + "learning_rate": 2.3833014099514716e-07, + "loss": 67.2363, + "step": 112760 + }, + { + "epoch": 0.9328700831368656, + "grad_norm": 579.4317016601562, + "learning_rate": 2.3790008754658811e-07, + "loss": 75.9026, + "step": 112770 + }, + { + "epoch": 0.9329528063862349, + "grad_norm": 1336.7401123046875, + "learning_rate": 2.3747041299646389e-07, + "loss": 100.3919, + "step": 112780 + }, + { + "epoch": 0.9330355296356041, + "grad_norm": 782.5462646484375, + "learning_rate": 2.3704111737896218e-07, + "loss": 66.6842, + "step": 112790 + }, + { + "epoch": 0.9331182528849733, + "grad_norm": 1084.5948486328125, + "learning_rate": 2.3661220072823953e-07, + "loss": 86.8441, + "step": 112800 + }, + { + "epoch": 0.9332009761343426, + "grad_norm": 873.84228515625, + "learning_rate": 2.3618366307842312e-07, + "loss": 80.4352, + "step": 112810 + }, + { + "epoch": 0.9332836993837118, + "grad_norm": 935.0689697265625, + "learning_rate": 2.3575550446360952e-07, + "loss": 74.7977, + "step": 112820 + }, + { + "epoch": 0.933366422633081, + "grad_norm": 1264.7552490234375, + "learning_rate": 2.3532772491786537e-07, + "loss": 62.4919, + "step": 112830 + }, + { + "epoch": 0.9334491458824503, + "grad_norm": 760.6544799804688, + "learning_rate": 2.3490032447522792e-07, + "loss": 78.2148, + "step": 112840 + }, + { + "epoch": 0.9335318691318195, + "grad_norm": 3588.122314453125, + "learning_rate": 2.3447330316970218e-07, + "loss": 120.6314, + "step": 112850 + }, + { + "epoch": 0.9336145923811887, + "grad_norm": 653.4447021484375, + "learning_rate": 2.3404666103526542e-07, + "loss": 97.4798, + "step": 112860 + }, + { + "epoch": 0.933697315630558, + "grad_norm": 705.934814453125, + "learning_rate": 2.3362039810586267e-07, + "loss": 94.1974, + "step": 112870 + }, + { + "epoch": 0.9337800388799272, + "grad_norm": 616.7807006835938, + "learning_rate": 2.3319451441541018e-07, + "loss": 67.858, + "step": 112880 + }, + { + "epoch": 0.9338627621292964, + "grad_norm": 934.6239624023438, + "learning_rate": 2.3276900999779305e-07, + "loss": 85.3062, + "step": 112890 + }, + { + "epoch": 0.9339454853786656, + "grad_norm": 563.1575317382812, + "learning_rate": 2.323438848868681e-07, + "loss": 75.0296, + "step": 112900 + }, + { + "epoch": 0.9340282086280349, + "grad_norm": 1033.3599853515625, + "learning_rate": 2.319191391164588e-07, + "loss": 86.1594, + "step": 112910 + }, + { + "epoch": 0.9341109318774041, + "grad_norm": 551.6663818359375, + "learning_rate": 2.3149477272036146e-07, + "loss": 72.7008, + "step": 112920 + }, + { + "epoch": 0.9341936551267733, + "grad_norm": 884.0999755859375, + "learning_rate": 2.3107078573234077e-07, + "loss": 88.6581, + "step": 112930 + }, + { + "epoch": 0.9342763783761426, + "grad_norm": 766.043701171875, + "learning_rate": 2.306471781861308e-07, + "loss": 75.3607, + "step": 112940 + }, + { + "epoch": 0.9343591016255118, + "grad_norm": 6711.1845703125, + "learning_rate": 2.3022395011543687e-07, + "loss": 123.3523, + "step": 112950 + }, + { + "epoch": 0.934441824874881, + "grad_norm": 771.6279296875, + "learning_rate": 2.2980110155393253e-07, + "loss": 97.1575, + "step": 112960 + }, + { + "epoch": 0.9345245481242503, + "grad_norm": 1057.9940185546875, + "learning_rate": 2.293786325352626e-07, + "loss": 113.2081, + "step": 112970 + }, + { + "epoch": 0.9346072713736195, + "grad_norm": 1071.353271484375, + "learning_rate": 2.289565430930407e-07, + "loss": 87.6715, + "step": 112980 + }, + { + "epoch": 0.9346899946229887, + "grad_norm": 862.3544921875, + "learning_rate": 2.2853483326085002e-07, + "loss": 79.9122, + "step": 112990 + }, + { + "epoch": 0.934772717872358, + "grad_norm": 895.1743774414062, + "learning_rate": 2.2811350307224534e-07, + "loss": 113.0911, + "step": 113000 + }, + { + "epoch": 0.9348554411217272, + "grad_norm": 753.2420654296875, + "learning_rate": 2.2769255256074874e-07, + "loss": 84.552, + "step": 113010 + }, + { + "epoch": 0.9349381643710964, + "grad_norm": 1342.393798828125, + "learning_rate": 2.27271981759854e-07, + "loss": 97.5154, + "step": 113020 + }, + { + "epoch": 0.9350208876204658, + "grad_norm": 1412.8348388671875, + "learning_rate": 2.2685179070302377e-07, + "loss": 82.4936, + "step": 113030 + }, + { + "epoch": 0.935103610869835, + "grad_norm": 668.9525756835938, + "learning_rate": 2.2643197942369022e-07, + "loss": 94.1694, + "step": 113040 + }, + { + "epoch": 0.9351863341192042, + "grad_norm": 772.8831176757812, + "learning_rate": 2.2601254795525774e-07, + "loss": 81.6293, + "step": 113050 + }, + { + "epoch": 0.9352690573685735, + "grad_norm": 1103.180908203125, + "learning_rate": 2.2559349633109629e-07, + "loss": 83.7612, + "step": 113060 + }, + { + "epoch": 0.9353517806179427, + "grad_norm": 819.5826416015625, + "learning_rate": 2.2517482458454808e-07, + "loss": 88.8579, + "step": 113070 + }, + { + "epoch": 0.9354345038673119, + "grad_norm": 2683.924072265625, + "learning_rate": 2.2475653274892594e-07, + "loss": 102.4598, + "step": 113080 + }, + { + "epoch": 0.9355172271166812, + "grad_norm": 808.376220703125, + "learning_rate": 2.2433862085751157e-07, + "loss": 86.0561, + "step": 113090 + }, + { + "epoch": 0.9355999503660504, + "grad_norm": 618.5714111328125, + "learning_rate": 2.2392108894355557e-07, + "loss": 45.9369, + "step": 113100 + }, + { + "epoch": 0.9356826736154196, + "grad_norm": 823.1011962890625, + "learning_rate": 2.2350393704027917e-07, + "loss": 98.3395, + "step": 113110 + }, + { + "epoch": 0.9357653968647889, + "grad_norm": 572.7033081054688, + "learning_rate": 2.230871651808736e-07, + "loss": 94.4214, + "step": 113120 + }, + { + "epoch": 0.9358481201141581, + "grad_norm": 2472.20947265625, + "learning_rate": 2.226707733984995e-07, + "loss": 115.9153, + "step": 113130 + }, + { + "epoch": 0.9359308433635273, + "grad_norm": 665.7183837890625, + "learning_rate": 2.2225476172628714e-07, + "loss": 60.923, + "step": 113140 + }, + { + "epoch": 0.9360135666128966, + "grad_norm": 926.3880615234375, + "learning_rate": 2.2183913019733605e-07, + "loss": 104.7108, + "step": 113150 + }, + { + "epoch": 0.9360962898622658, + "grad_norm": 866.1433715820312, + "learning_rate": 2.2142387884471593e-07, + "loss": 74.1606, + "step": 113160 + }, + { + "epoch": 0.936179013111635, + "grad_norm": 2463.356201171875, + "learning_rate": 2.210090077014676e-07, + "loss": 73.1271, + "step": 113170 + }, + { + "epoch": 0.9362617363610043, + "grad_norm": 2465.159912109375, + "learning_rate": 2.2059451680059962e-07, + "loss": 136.0056, + "step": 113180 + }, + { + "epoch": 0.9363444596103735, + "grad_norm": 1126.7469482421875, + "learning_rate": 2.2018040617509174e-07, + "loss": 108.5757, + "step": 113190 + }, + { + "epoch": 0.9364271828597427, + "grad_norm": 704.0745849609375, + "learning_rate": 2.1976667585789257e-07, + "loss": 93.8709, + "step": 113200 + }, + { + "epoch": 0.936509906109112, + "grad_norm": 623.0414428710938, + "learning_rate": 2.193533258819208e-07, + "loss": 100.2674, + "step": 113210 + }, + { + "epoch": 0.9365926293584812, + "grad_norm": 956.225830078125, + "learning_rate": 2.1894035628006517e-07, + "loss": 93.8058, + "step": 113220 + }, + { + "epoch": 0.9366753526078504, + "grad_norm": 1069.036376953125, + "learning_rate": 2.1852776708518265e-07, + "loss": 84.0381, + "step": 113230 + }, + { + "epoch": 0.9367580758572197, + "grad_norm": 1028.188720703125, + "learning_rate": 2.18115558330102e-07, + "loss": 62.8961, + "step": 113240 + }, + { + "epoch": 0.9368407991065889, + "grad_norm": 656.3768310546875, + "learning_rate": 2.1770373004762035e-07, + "loss": 98.3898, + "step": 113250 + }, + { + "epoch": 0.9369235223559581, + "grad_norm": 1091.6727294921875, + "learning_rate": 2.1729228227050426e-07, + "loss": 91.2749, + "step": 113260 + }, + { + "epoch": 0.9370062456053274, + "grad_norm": 1016.352783203125, + "learning_rate": 2.1688121503149195e-07, + "loss": 81.7884, + "step": 113270 + }, + { + "epoch": 0.9370889688546966, + "grad_norm": 699.8380737304688, + "learning_rate": 2.1647052836329065e-07, + "loss": 81.0783, + "step": 113280 + }, + { + "epoch": 0.9371716921040658, + "grad_norm": 887.7781372070312, + "learning_rate": 2.1606022229857525e-07, + "loss": 126.7601, + "step": 113290 + }, + { + "epoch": 0.9372544153534351, + "grad_norm": 768.95166015625, + "learning_rate": 2.1565029686999306e-07, + "loss": 73.0002, + "step": 113300 + }, + { + "epoch": 0.9373371386028043, + "grad_norm": 584.2427368164062, + "learning_rate": 2.1524075211016014e-07, + "loss": 74.3468, + "step": 113310 + }, + { + "epoch": 0.9374198618521735, + "grad_norm": 1104.49755859375, + "learning_rate": 2.148315880516605e-07, + "loss": 71.5521, + "step": 113320 + }, + { + "epoch": 0.9375025851015428, + "grad_norm": 663.4890747070312, + "learning_rate": 2.144228047270508e-07, + "loss": 82.779, + "step": 113330 + }, + { + "epoch": 0.937585308350912, + "grad_norm": 746.1346435546875, + "learning_rate": 2.140144021688556e-07, + "loss": 62.6374, + "step": 113340 + }, + { + "epoch": 0.9376680316002812, + "grad_norm": 1149.3975830078125, + "learning_rate": 2.1360638040957004e-07, + "loss": 114.1942, + "step": 113350 + }, + { + "epoch": 0.9377507548496505, + "grad_norm": 617.9911499023438, + "learning_rate": 2.1319873948165704e-07, + "loss": 67.7726, + "step": 113360 + }, + { + "epoch": 0.9378334780990197, + "grad_norm": 4376.74658203125, + "learning_rate": 2.1279147941755284e-07, + "loss": 123.5628, + "step": 113370 + }, + { + "epoch": 0.9379162013483889, + "grad_norm": 1298.1807861328125, + "learning_rate": 2.123846002496599e-07, + "loss": 91.0785, + "step": 113380 + }, + { + "epoch": 0.9379989245977582, + "grad_norm": 737.0429077148438, + "learning_rate": 2.119781020103523e-07, + "loss": 78.1234, + "step": 113390 + }, + { + "epoch": 0.9380816478471274, + "grad_norm": 1079.584716796875, + "learning_rate": 2.1157198473197417e-07, + "loss": 83.1591, + "step": 113400 + }, + { + "epoch": 0.9381643710964966, + "grad_norm": 733.552001953125, + "learning_rate": 2.111662484468363e-07, + "loss": 92.7453, + "step": 113410 + }, + { + "epoch": 0.938247094345866, + "grad_norm": 328.62188720703125, + "learning_rate": 2.1076089318722237e-07, + "loss": 84.1476, + "step": 113420 + }, + { + "epoch": 0.9383298175952351, + "grad_norm": 764.5359497070312, + "learning_rate": 2.1035591898538432e-07, + "loss": 115.1742, + "step": 113430 + }, + { + "epoch": 0.9384125408446043, + "grad_norm": 606.0637817382812, + "learning_rate": 2.0995132587354416e-07, + "loss": 91.7363, + "step": 113440 + }, + { + "epoch": 0.9384952640939737, + "grad_norm": 631.8139038085938, + "learning_rate": 2.0954711388389392e-07, + "loss": 70.5418, + "step": 113450 + }, + { + "epoch": 0.9385779873433429, + "grad_norm": 742.7326049804688, + "learning_rate": 2.09143283048594e-07, + "loss": 83.6848, + "step": 113460 + }, + { + "epoch": 0.938660710592712, + "grad_norm": 670.2506103515625, + "learning_rate": 2.08739833399777e-07, + "loss": 64.4276, + "step": 113470 + }, + { + "epoch": 0.9387434338420814, + "grad_norm": 569.9981079101562, + "learning_rate": 2.0833676496954225e-07, + "loss": 73.9018, + "step": 113480 + }, + { + "epoch": 0.9388261570914506, + "grad_norm": 843.3419189453125, + "learning_rate": 2.0793407778996021e-07, + "loss": 66.5153, + "step": 113490 + }, + { + "epoch": 0.9389088803408198, + "grad_norm": 1188.6783447265625, + "learning_rate": 2.0753177189307138e-07, + "loss": 82.2647, + "step": 113500 + }, + { + "epoch": 0.9389916035901891, + "grad_norm": 814.5333862304688, + "learning_rate": 2.071298473108846e-07, + "loss": 97.839, + "step": 113510 + }, + { + "epoch": 0.9390743268395583, + "grad_norm": 490.64227294921875, + "learning_rate": 2.0672830407537925e-07, + "loss": 70.4371, + "step": 113520 + }, + { + "epoch": 0.9391570500889275, + "grad_norm": 819.4855346679688, + "learning_rate": 2.0632714221850536e-07, + "loss": 79.6486, + "step": 113530 + }, + { + "epoch": 0.9392397733382968, + "grad_norm": 1092.7366943359375, + "learning_rate": 2.0592636177218017e-07, + "loss": 80.8918, + "step": 113540 + }, + { + "epoch": 0.939322496587666, + "grad_norm": 1289.69775390625, + "learning_rate": 2.055259627682926e-07, + "loss": 89.2574, + "step": 113550 + }, + { + "epoch": 0.9394052198370352, + "grad_norm": 651.2507934570312, + "learning_rate": 2.051259452387e-07, + "loss": 77.3755, + "step": 113560 + }, + { + "epoch": 0.9394879430864045, + "grad_norm": 588.5090942382812, + "learning_rate": 2.0472630921523185e-07, + "loss": 90.6757, + "step": 113570 + }, + { + "epoch": 0.9395706663357737, + "grad_norm": 806.9398193359375, + "learning_rate": 2.043270547296833e-07, + "loss": 84.2983, + "step": 113580 + }, + { + "epoch": 0.9396533895851429, + "grad_norm": 961.4072265625, + "learning_rate": 2.0392818181382168e-07, + "loss": 69.7272, + "step": 113590 + }, + { + "epoch": 0.9397361128345122, + "grad_norm": 765.0098876953125, + "learning_rate": 2.0352969049938332e-07, + "loss": 92.7051, + "step": 113600 + }, + { + "epoch": 0.9398188360838814, + "grad_norm": 1120.79833984375, + "learning_rate": 2.0313158081807504e-07, + "loss": 84.8201, + "step": 113610 + }, + { + "epoch": 0.9399015593332506, + "grad_norm": 834.6403198242188, + "learning_rate": 2.027338528015721e-07, + "loss": 77.9415, + "step": 113620 + }, + { + "epoch": 0.9399842825826198, + "grad_norm": 1245.4927978515625, + "learning_rate": 2.0233650648152026e-07, + "loss": 106.3016, + "step": 113630 + }, + { + "epoch": 0.9400670058319891, + "grad_norm": 606.9644165039062, + "learning_rate": 2.0193954188953425e-07, + "loss": 116.0985, + "step": 113640 + }, + { + "epoch": 0.9401497290813583, + "grad_norm": 788.4069213867188, + "learning_rate": 2.015429590571988e-07, + "loss": 84.2174, + "step": 113650 + }, + { + "epoch": 0.9402324523307275, + "grad_norm": 560.9490356445312, + "learning_rate": 2.0114675801606754e-07, + "loss": 73.4865, + "step": 113660 + }, + { + "epoch": 0.9403151755800968, + "grad_norm": 1249.0743408203125, + "learning_rate": 2.0075093879766584e-07, + "loss": 97.4712, + "step": 113670 + }, + { + "epoch": 0.940397898829466, + "grad_norm": 814.6300048828125, + "learning_rate": 2.003555014334857e-07, + "loss": 74.9081, + "step": 113680 + }, + { + "epoch": 0.9404806220788352, + "grad_norm": 929.8789672851562, + "learning_rate": 1.9996044595499142e-07, + "loss": 101.6634, + "step": 113690 + }, + { + "epoch": 0.9405633453282045, + "grad_norm": 495.4698486328125, + "learning_rate": 1.9956577239361507e-07, + "loss": 79.1617, + "step": 113700 + }, + { + "epoch": 0.9406460685775737, + "grad_norm": 496.3013916015625, + "learning_rate": 1.9917148078075876e-07, + "loss": 68.7562, + "step": 113710 + }, + { + "epoch": 0.9407287918269429, + "grad_norm": 884.4699096679688, + "learning_rate": 1.9877757114779517e-07, + "loss": 103.0708, + "step": 113720 + }, + { + "epoch": 0.9408115150763122, + "grad_norm": 874.1282348632812, + "learning_rate": 1.983840435260659e-07, + "loss": 84.1153, + "step": 113730 + }, + { + "epoch": 0.9408942383256814, + "grad_norm": 956.4913330078125, + "learning_rate": 1.9799089794688197e-07, + "loss": 78.011, + "step": 113740 + }, + { + "epoch": 0.9409769615750506, + "grad_norm": 651.1774291992188, + "learning_rate": 1.9759813444152342e-07, + "loss": 55.0541, + "step": 113750 + }, + { + "epoch": 0.9410596848244199, + "grad_norm": 874.5100708007812, + "learning_rate": 1.9720575304124135e-07, + "loss": 61.1703, + "step": 113760 + }, + { + "epoch": 0.9411424080737891, + "grad_norm": 646.1497802734375, + "learning_rate": 1.9681375377725631e-07, + "loss": 53.9226, + "step": 113770 + }, + { + "epoch": 0.9412251313231583, + "grad_norm": 452.9364318847656, + "learning_rate": 1.9642213668075673e-07, + "loss": 136.8662, + "step": 113780 + }, + { + "epoch": 0.9413078545725276, + "grad_norm": 505.2906494140625, + "learning_rate": 1.9603090178290207e-07, + "loss": 78.0005, + "step": 113790 + }, + { + "epoch": 0.9413905778218968, + "grad_norm": 842.5142211914062, + "learning_rate": 1.9564004911482192e-07, + "loss": 59.6014, + "step": 113800 + }, + { + "epoch": 0.941473301071266, + "grad_norm": 860.8002319335938, + "learning_rate": 1.9524957870761364e-07, + "loss": 103.618, + "step": 113810 + }, + { + "epoch": 0.9415560243206353, + "grad_norm": 587.0081176757812, + "learning_rate": 1.9485949059234567e-07, + "loss": 97.7175, + "step": 113820 + }, + { + "epoch": 0.9416387475700045, + "grad_norm": 842.8802490234375, + "learning_rate": 1.944697848000554e-07, + "loss": 126.7808, + "step": 113830 + }, + { + "epoch": 0.9417214708193737, + "grad_norm": 915.0126342773438, + "learning_rate": 1.9408046136174975e-07, + "loss": 79.9683, + "step": 113840 + }, + { + "epoch": 0.941804194068743, + "grad_norm": 609.2335815429688, + "learning_rate": 1.9369152030840553e-07, + "loss": 92.4454, + "step": 113850 + }, + { + "epoch": 0.9418869173181122, + "grad_norm": 582.4105224609375, + "learning_rate": 1.9330296167096972e-07, + "loss": 94.1964, + "step": 113860 + }, + { + "epoch": 0.9419696405674814, + "grad_norm": 1090.697265625, + "learning_rate": 1.9291478548035703e-07, + "loss": 78.0568, + "step": 113870 + }, + { + "epoch": 0.9420523638168508, + "grad_norm": 755.0408325195312, + "learning_rate": 1.9252699176745326e-07, + "loss": 85.2529, + "step": 113880 + }, + { + "epoch": 0.94213508706622, + "grad_norm": 657.3673095703125, + "learning_rate": 1.9213958056311376e-07, + "loss": 65.6693, + "step": 113890 + }, + { + "epoch": 0.9422178103155892, + "grad_norm": 1077.51904296875, + "learning_rate": 1.917525518981622e-07, + "loss": 100.6788, + "step": 113900 + }, + { + "epoch": 0.9423005335649585, + "grad_norm": 780.7044067382812, + "learning_rate": 1.91365905803394e-07, + "loss": 88.6271, + "step": 113910 + }, + { + "epoch": 0.9423832568143277, + "grad_norm": 674.3640747070312, + "learning_rate": 1.9097964230957112e-07, + "loss": 72.1338, + "step": 113920 + }, + { + "epoch": 0.9424659800636969, + "grad_norm": 830.9854125976562, + "learning_rate": 1.9059376144742792e-07, + "loss": 82.4124, + "step": 113930 + }, + { + "epoch": 0.9425487033130662, + "grad_norm": 555.4501342773438, + "learning_rate": 1.9020826324766707e-07, + "loss": 94.9241, + "step": 113940 + }, + { + "epoch": 0.9426314265624354, + "grad_norm": 762.3797607421875, + "learning_rate": 1.8982314774096067e-07, + "loss": 81.5409, + "step": 113950 + }, + { + "epoch": 0.9427141498118046, + "grad_norm": 840.9845581054688, + "learning_rate": 1.894384149579509e-07, + "loss": 81.5919, + "step": 113960 + }, + { + "epoch": 0.9427968730611739, + "grad_norm": 520.0264892578125, + "learning_rate": 1.8905406492924884e-07, + "loss": 82.138, + "step": 113970 + }, + { + "epoch": 0.9428795963105431, + "grad_norm": 1191.1163330078125, + "learning_rate": 1.8867009768543554e-07, + "loss": 92.1979, + "step": 113980 + }, + { + "epoch": 0.9429623195599123, + "grad_norm": 850.5126342773438, + "learning_rate": 1.8828651325706159e-07, + "loss": 124.8058, + "step": 113990 + }, + { + "epoch": 0.9430450428092816, + "grad_norm": 1242.6998291015625, + "learning_rate": 1.8790331167464758e-07, + "loss": 74.3944, + "step": 114000 + }, + { + "epoch": 0.9431277660586508, + "grad_norm": 937.7474975585938, + "learning_rate": 1.875204929686819e-07, + "loss": 78.7968, + "step": 114010 + }, + { + "epoch": 0.94321048930802, + "grad_norm": 811.911376953125, + "learning_rate": 1.8713805716962408e-07, + "loss": 81.0114, + "step": 114020 + }, + { + "epoch": 0.9432932125573893, + "grad_norm": 887.4229736328125, + "learning_rate": 1.867560043079031e-07, + "loss": 91.9161, + "step": 114030 + }, + { + "epoch": 0.9433759358067585, + "grad_norm": 556.4613037109375, + "learning_rate": 1.8637433441391739e-07, + "loss": 81.6592, + "step": 114040 + }, + { + "epoch": 0.9434586590561277, + "grad_norm": 785.0360107421875, + "learning_rate": 1.859930475180338e-07, + "loss": 88.5488, + "step": 114050 + }, + { + "epoch": 0.943541382305497, + "grad_norm": 1435.7166748046875, + "learning_rate": 1.8561214365059033e-07, + "loss": 75.1003, + "step": 114060 + }, + { + "epoch": 0.9436241055548662, + "grad_norm": 584.7539672851562, + "learning_rate": 1.8523162284189377e-07, + "loss": 85.8287, + "step": 114070 + }, + { + "epoch": 0.9437068288042354, + "grad_norm": 553.2955932617188, + "learning_rate": 1.848514851222205e-07, + "loss": 86.0015, + "step": 114080 + }, + { + "epoch": 0.9437895520536047, + "grad_norm": 628.0018310546875, + "learning_rate": 1.8447173052181577e-07, + "loss": 81.3418, + "step": 114090 + }, + { + "epoch": 0.9438722753029739, + "grad_norm": 708.7710571289062, + "learning_rate": 1.8409235907089484e-07, + "loss": 91.275, + "step": 114100 + }, + { + "epoch": 0.9439549985523431, + "grad_norm": 712.0585327148438, + "learning_rate": 1.8371337079964303e-07, + "loss": 99.5175, + "step": 114110 + }, + { + "epoch": 0.9440377218017124, + "grad_norm": 1265.697021484375, + "learning_rate": 1.8333476573821395e-07, + "loss": 103.0046, + "step": 114120 + }, + { + "epoch": 0.9441204450510816, + "grad_norm": 937.801025390625, + "learning_rate": 1.8295654391673245e-07, + "loss": 89.4391, + "step": 114130 + }, + { + "epoch": 0.9442031683004508, + "grad_norm": 484.9341125488281, + "learning_rate": 1.8257870536529167e-07, + "loss": 118.8798, + "step": 114140 + }, + { + "epoch": 0.9442858915498201, + "grad_norm": 604.8045654296875, + "learning_rate": 1.8220125011395419e-07, + "loss": 78.082, + "step": 114150 + }, + { + "epoch": 0.9443686147991893, + "grad_norm": 624.2005615234375, + "learning_rate": 1.8182417819275266e-07, + "loss": 70.6287, + "step": 114160 + }, + { + "epoch": 0.9444513380485585, + "grad_norm": 622.5794067382812, + "learning_rate": 1.8144748963168924e-07, + "loss": 80.9847, + "step": 114170 + }, + { + "epoch": 0.9445340612979278, + "grad_norm": 371.8035888671875, + "learning_rate": 1.8107118446073492e-07, + "loss": 83.7294, + "step": 114180 + }, + { + "epoch": 0.944616784547297, + "grad_norm": 911.8489379882812, + "learning_rate": 1.806952627098296e-07, + "loss": 69.7044, + "step": 114190 + }, + { + "epoch": 0.9446995077966662, + "grad_norm": 649.6644287109375, + "learning_rate": 1.8031972440888556e-07, + "loss": 99.0398, + "step": 114200 + }, + { + "epoch": 0.9447822310460355, + "grad_norm": 1058.6116943359375, + "learning_rate": 1.799445695877805e-07, + "loss": 105.9399, + "step": 114210 + }, + { + "epoch": 0.9448649542954047, + "grad_norm": 1170.114013671875, + "learning_rate": 1.7956979827636556e-07, + "loss": 95.6862, + "step": 114220 + }, + { + "epoch": 0.9449476775447739, + "grad_norm": 1229.6246337890625, + "learning_rate": 1.791954105044591e-07, + "loss": 83.8179, + "step": 114230 + }, + { + "epoch": 0.9450304007941432, + "grad_norm": 673.4191284179688, + "learning_rate": 1.788214063018495e-07, + "loss": 78.5252, + "step": 114240 + }, + { + "epoch": 0.9451131240435124, + "grad_norm": 1112.168212890625, + "learning_rate": 1.7844778569829412e-07, + "loss": 80.5072, + "step": 114250 + }, + { + "epoch": 0.9451958472928816, + "grad_norm": 1105.3406982421875, + "learning_rate": 1.7807454872352137e-07, + "loss": 76.1349, + "step": 114260 + }, + { + "epoch": 0.945278570542251, + "grad_norm": 727.6226806640625, + "learning_rate": 1.7770169540722638e-07, + "loss": 81.8822, + "step": 114270 + }, + { + "epoch": 0.9453612937916201, + "grad_norm": 932.0345458984375, + "learning_rate": 1.7732922577907595e-07, + "loss": 89.935, + "step": 114280 + }, + { + "epoch": 0.9454440170409893, + "grad_norm": 780.9658813476562, + "learning_rate": 1.769571398687059e-07, + "loss": 95.7571, + "step": 114290 + }, + { + "epoch": 0.9455267402903587, + "grad_norm": 883.4459838867188, + "learning_rate": 1.765854377057219e-07, + "loss": 72.5025, + "step": 114300 + }, + { + "epoch": 0.9456094635397279, + "grad_norm": 925.8482055664062, + "learning_rate": 1.76214119319697e-07, + "loss": 86.9446, + "step": 114310 + }, + { + "epoch": 0.9456921867890971, + "grad_norm": 770.7780151367188, + "learning_rate": 1.758431847401776e-07, + "loss": 78.9522, + "step": 114320 + }, + { + "epoch": 0.9457749100384664, + "grad_norm": 670.9049682617188, + "learning_rate": 1.7547263399667558e-07, + "loss": 84.8401, + "step": 114330 + }, + { + "epoch": 0.9458576332878356, + "grad_norm": 637.699951171875, + "learning_rate": 1.7510246711867572e-07, + "loss": 91.2389, + "step": 114340 + }, + { + "epoch": 0.9459403565372048, + "grad_norm": 908.4950561523438, + "learning_rate": 1.7473268413562837e-07, + "loss": 92.7157, + "step": 114350 + }, + { + "epoch": 0.946023079786574, + "grad_norm": 778.84765625, + "learning_rate": 1.743632850769561e-07, + "loss": 84.7941, + "step": 114360 + }, + { + "epoch": 0.9461058030359433, + "grad_norm": 945.8580322265625, + "learning_rate": 1.739942699720504e-07, + "loss": 124.0557, + "step": 114370 + }, + { + "epoch": 0.9461885262853125, + "grad_norm": 716.9354248046875, + "learning_rate": 1.7362563885027272e-07, + "loss": 101.1986, + "step": 114380 + }, + { + "epoch": 0.9462712495346817, + "grad_norm": 920.9649047851562, + "learning_rate": 1.7325739174095302e-07, + "loss": 72.5662, + "step": 114390 + }, + { + "epoch": 0.946353972784051, + "grad_norm": 918.943115234375, + "learning_rate": 1.728895286733906e-07, + "loss": 63.3512, + "step": 114400 + }, + { + "epoch": 0.9464366960334202, + "grad_norm": 959.2852172851562, + "learning_rate": 1.7252204967685427e-07, + "loss": 78.725, + "step": 114410 + }, + { + "epoch": 0.9465194192827894, + "grad_norm": 519.7755126953125, + "learning_rate": 1.7215495478058397e-07, + "loss": 80.2883, + "step": 114420 + }, + { + "epoch": 0.9466021425321587, + "grad_norm": 1290.6314697265625, + "learning_rate": 1.7178824401378802e-07, + "loss": 74.9698, + "step": 114430 + }, + { + "epoch": 0.9466848657815279, + "grad_norm": 840.70068359375, + "learning_rate": 1.7142191740564196e-07, + "loss": 72.1562, + "step": 114440 + }, + { + "epoch": 0.9467675890308971, + "grad_norm": 853.8070068359375, + "learning_rate": 1.7105597498529358e-07, + "loss": 80.7684, + "step": 114450 + }, + { + "epoch": 0.9468503122802664, + "grad_norm": 1219.2557373046875, + "learning_rate": 1.7069041678186017e-07, + "loss": 85.5803, + "step": 114460 + }, + { + "epoch": 0.9469330355296356, + "grad_norm": 652.1727294921875, + "learning_rate": 1.7032524282442618e-07, + "loss": 74.4581, + "step": 114470 + }, + { + "epoch": 0.9470157587790048, + "grad_norm": 469.0629577636719, + "learning_rate": 1.6996045314204734e-07, + "loss": 71.7418, + "step": 114480 + }, + { + "epoch": 0.9470984820283741, + "grad_norm": 666.7960815429688, + "learning_rate": 1.6959604776374871e-07, + "loss": 87.4047, + "step": 114490 + }, + { + "epoch": 0.9471812052777433, + "grad_norm": 587.7761840820312, + "learning_rate": 1.6923202671852379e-07, + "loss": 122.6015, + "step": 114500 + }, + { + "epoch": 0.9472639285271125, + "grad_norm": 1696.044677734375, + "learning_rate": 1.688683900353366e-07, + "loss": 96.9965, + "step": 114510 + }, + { + "epoch": 0.9473466517764818, + "grad_norm": 692.3433837890625, + "learning_rate": 1.6850513774311906e-07, + "loss": 80.0998, + "step": 114520 + }, + { + "epoch": 0.947429375025851, + "grad_norm": 755.7346801757812, + "learning_rate": 1.6814226987077464e-07, + "loss": 77.0792, + "step": 114530 + }, + { + "epoch": 0.9475120982752202, + "grad_norm": 481.3829345703125, + "learning_rate": 1.6777978644717474e-07, + "loss": 63.8102, + "step": 114540 + }, + { + "epoch": 0.9475948215245895, + "grad_norm": 654.6975708007812, + "learning_rate": 1.6741768750116017e-07, + "loss": 56.0552, + "step": 114550 + }, + { + "epoch": 0.9476775447739587, + "grad_norm": 745.0943603515625, + "learning_rate": 1.670559730615412e-07, + "loss": 76.7988, + "step": 114560 + }, + { + "epoch": 0.9477602680233279, + "grad_norm": 604.7420654296875, + "learning_rate": 1.6669464315709872e-07, + "loss": 75.703, + "step": 114570 + }, + { + "epoch": 0.9478429912726972, + "grad_norm": 1340.7713623046875, + "learning_rate": 1.6633369781658137e-07, + "loss": 78.7956, + "step": 114580 + }, + { + "epoch": 0.9479257145220664, + "grad_norm": 1017.1603393554688, + "learning_rate": 1.6597313706870842e-07, + "loss": 88.8706, + "step": 114590 + }, + { + "epoch": 0.9480084377714356, + "grad_norm": 465.18914794921875, + "learning_rate": 1.656129609421675e-07, + "loss": 90.0202, + "step": 114600 + }, + { + "epoch": 0.9480911610208049, + "grad_norm": 746.8944091796875, + "learning_rate": 1.6525316946561675e-07, + "loss": 69.2716, + "step": 114610 + }, + { + "epoch": 0.9481738842701741, + "grad_norm": 557.8377075195312, + "learning_rate": 1.648937626676822e-07, + "loss": 73.0987, + "step": 114620 + }, + { + "epoch": 0.9482566075195433, + "grad_norm": 1114.826171875, + "learning_rate": 1.6453474057696152e-07, + "loss": 73.0719, + "step": 114630 + }, + { + "epoch": 0.9483393307689126, + "grad_norm": 1133.66796875, + "learning_rate": 1.6417610322201904e-07, + "loss": 84.8226, + "step": 114640 + }, + { + "epoch": 0.9484220540182818, + "grad_norm": 638.8233032226562, + "learning_rate": 1.6381785063139144e-07, + "loss": 58.4258, + "step": 114650 + }, + { + "epoch": 0.948504777267651, + "grad_norm": 581.8013916015625, + "learning_rate": 1.6345998283358145e-07, + "loss": 90.1874, + "step": 114660 + }, + { + "epoch": 0.9485875005170203, + "grad_norm": 584.7073974609375, + "learning_rate": 1.631024998570646e-07, + "loss": 108.2388, + "step": 114670 + }, + { + "epoch": 0.9486702237663895, + "grad_norm": 1126.181884765625, + "learning_rate": 1.6274540173028318e-07, + "loss": 92.8387, + "step": 114680 + }, + { + "epoch": 0.9487529470157587, + "grad_norm": 823.510986328125, + "learning_rate": 1.6238868848165056e-07, + "loss": 86.9546, + "step": 114690 + }, + { + "epoch": 0.948835670265128, + "grad_norm": 1618.146728515625, + "learning_rate": 1.6203236013954792e-07, + "loss": 113.7917, + "step": 114700 + }, + { + "epoch": 0.9489183935144972, + "grad_norm": 6492.607421875, + "learning_rate": 1.6167641673232703e-07, + "loss": 122.2078, + "step": 114710 + }, + { + "epoch": 0.9490011167638664, + "grad_norm": 1291.8187255859375, + "learning_rate": 1.613208582883091e-07, + "loss": 75.8952, + "step": 114720 + }, + { + "epoch": 0.9490838400132358, + "grad_norm": 712.4785766601562, + "learning_rate": 1.609656848357838e-07, + "loss": 69.3616, + "step": 114730 + }, + { + "epoch": 0.949166563262605, + "grad_norm": 1505.3465576171875, + "learning_rate": 1.6061089640301063e-07, + "loss": 88.8558, + "step": 114740 + }, + { + "epoch": 0.9492492865119742, + "grad_norm": 827.9591064453125, + "learning_rate": 1.6025649301821877e-07, + "loss": 93.01, + "step": 114750 + }, + { + "epoch": 0.9493320097613435, + "grad_norm": 877.7481689453125, + "learning_rate": 1.599024747096062e-07, + "loss": 93.6232, + "step": 114760 + }, + { + "epoch": 0.9494147330107127, + "grad_norm": 1036.861083984375, + "learning_rate": 1.595488415053409e-07, + "loss": 90.6146, + "step": 114770 + }, + { + "epoch": 0.9494974562600819, + "grad_norm": 577.421875, + "learning_rate": 1.591955934335593e-07, + "loss": 68.6489, + "step": 114780 + }, + { + "epoch": 0.9495801795094512, + "grad_norm": 877.7052612304688, + "learning_rate": 1.588427305223683e-07, + "loss": 94.7773, + "step": 114790 + }, + { + "epoch": 0.9496629027588204, + "grad_norm": 977.225341796875, + "learning_rate": 1.584902527998433e-07, + "loss": 80.5525, + "step": 114800 + }, + { + "epoch": 0.9497456260081896, + "grad_norm": 1442.789794921875, + "learning_rate": 1.5813816029402963e-07, + "loss": 83.8474, + "step": 114810 + }, + { + "epoch": 0.9498283492575589, + "grad_norm": 750.039306640625, + "learning_rate": 1.5778645303294094e-07, + "loss": 82.7841, + "step": 114820 + }, + { + "epoch": 0.9499110725069281, + "grad_norm": 716.677490234375, + "learning_rate": 1.5743513104456154e-07, + "loss": 83.8299, + "step": 114830 + }, + { + "epoch": 0.9499937957562973, + "grad_norm": 688.27685546875, + "learning_rate": 1.5708419435684463e-07, + "loss": 93.2964, + "step": 114840 + }, + { + "epoch": 0.9500765190056666, + "grad_norm": 907.2855834960938, + "learning_rate": 1.5673364299771177e-07, + "loss": 100.0956, + "step": 114850 + }, + { + "epoch": 0.9501592422550358, + "grad_norm": 1261.3101806640625, + "learning_rate": 1.5638347699505673e-07, + "loss": 93.222, + "step": 114860 + }, + { + "epoch": 0.950241965504405, + "grad_norm": 672.2671508789062, + "learning_rate": 1.5603369637673727e-07, + "loss": 119.3111, + "step": 114870 + }, + { + "epoch": 0.9503246887537743, + "grad_norm": 998.6853637695312, + "learning_rate": 1.5568430117058718e-07, + "loss": 69.6595, + "step": 114880 + }, + { + "epoch": 0.9504074120031435, + "grad_norm": 602.7067260742188, + "learning_rate": 1.553352914044043e-07, + "loss": 88.5864, + "step": 114890 + }, + { + "epoch": 0.9504901352525127, + "grad_norm": 914.7874145507812, + "learning_rate": 1.5498666710595855e-07, + "loss": 66.891, + "step": 114900 + }, + { + "epoch": 0.950572858501882, + "grad_norm": 843.3950805664062, + "learning_rate": 1.5463842830298782e-07, + "loss": 80.3389, + "step": 114910 + }, + { + "epoch": 0.9506555817512512, + "grad_norm": 720.8048706054688, + "learning_rate": 1.5429057502320045e-07, + "loss": 82.6339, + "step": 114920 + }, + { + "epoch": 0.9507383050006204, + "grad_norm": 1675.1365966796875, + "learning_rate": 1.5394310729427265e-07, + "loss": 100.8808, + "step": 114930 + }, + { + "epoch": 0.9508210282499897, + "grad_norm": 751.2517700195312, + "learning_rate": 1.535960251438523e-07, + "loss": 80.3977, + "step": 114940 + }, + { + "epoch": 0.9509037514993589, + "grad_norm": 557.0956420898438, + "learning_rate": 1.53249328599554e-07, + "loss": 75.274, + "step": 114950 + }, + { + "epoch": 0.9509864747487281, + "grad_norm": 1328.832275390625, + "learning_rate": 1.5290301768896287e-07, + "loss": 63.9589, + "step": 114960 + }, + { + "epoch": 0.9510691979980974, + "grad_norm": 811.1527709960938, + "learning_rate": 1.5255709243963246e-07, + "loss": 86.3641, + "step": 114970 + }, + { + "epoch": 0.9511519212474666, + "grad_norm": 1016.0829467773438, + "learning_rate": 1.5221155287908851e-07, + "loss": 80.3929, + "step": 114980 + }, + { + "epoch": 0.9512346444968358, + "grad_norm": 1659.81787109375, + "learning_rate": 1.518663990348229e-07, + "loss": 106.4811, + "step": 114990 + }, + { + "epoch": 0.9513173677462051, + "grad_norm": 682.6334838867188, + "learning_rate": 1.5152163093429762e-07, + "loss": 82.3017, + "step": 115000 + }, + { + "epoch": 0.9514000909955743, + "grad_norm": 1205.90576171875, + "learning_rate": 1.5117724860494509e-07, + "loss": 84.5935, + "step": 115010 + }, + { + "epoch": 0.9514828142449435, + "grad_norm": 691.462890625, + "learning_rate": 1.5083325207416565e-07, + "loss": 72.7825, + "step": 115020 + }, + { + "epoch": 0.9515655374943128, + "grad_norm": 810.3004150390625, + "learning_rate": 1.504896413693302e-07, + "loss": 85.171, + "step": 115030 + }, + { + "epoch": 0.951648260743682, + "grad_norm": 750.6644897460938, + "learning_rate": 1.501464165177774e-07, + "loss": 94.0332, + "step": 115040 + }, + { + "epoch": 0.9517309839930512, + "grad_norm": 463.6679992675781, + "learning_rate": 1.4980357754681595e-07, + "loss": 81.951, + "step": 115050 + }, + { + "epoch": 0.9518137072424205, + "grad_norm": 824.5008544921875, + "learning_rate": 1.4946112448372462e-07, + "loss": 96.1971, + "step": 115060 + }, + { + "epoch": 0.9518964304917897, + "grad_norm": 629.86572265625, + "learning_rate": 1.491190573557505e-07, + "loss": 87.0432, + "step": 115070 + }, + { + "epoch": 0.9519791537411589, + "grad_norm": 1388.0849609375, + "learning_rate": 1.4877737619011067e-07, + "loss": 94.4698, + "step": 115080 + }, + { + "epoch": 0.9520618769905281, + "grad_norm": 826.2018432617188, + "learning_rate": 1.4843608101399065e-07, + "loss": 83.7667, + "step": 115090 + }, + { + "epoch": 0.9521446002398974, + "grad_norm": 675.1743774414062, + "learning_rate": 1.4809517185454646e-07, + "loss": 67.9461, + "step": 115100 + }, + { + "epoch": 0.9522273234892666, + "grad_norm": 530.15380859375, + "learning_rate": 1.4775464873890256e-07, + "loss": 54.279, + "step": 115110 + }, + { + "epoch": 0.9523100467386358, + "grad_norm": 862.5327758789062, + "learning_rate": 1.4741451169415165e-07, + "loss": 95.6716, + "step": 115120 + }, + { + "epoch": 0.9523927699880052, + "grad_norm": 1480.7520751953125, + "learning_rate": 1.4707476074735772e-07, + "loss": 112.3083, + "step": 115130 + }, + { + "epoch": 0.9524754932373743, + "grad_norm": 440.8155517578125, + "learning_rate": 1.4673539592555354e-07, + "loss": 67.7535, + "step": 115140 + }, + { + "epoch": 0.9525582164867435, + "grad_norm": 760.7919311523438, + "learning_rate": 1.4639641725574028e-07, + "loss": 81.5349, + "step": 115150 + }, + { + "epoch": 0.9526409397361129, + "grad_norm": 711.9234008789062, + "learning_rate": 1.460578247648886e-07, + "loss": 81.3426, + "step": 115160 + }, + { + "epoch": 0.9527236629854821, + "grad_norm": 728.8473510742188, + "learning_rate": 1.4571961847993977e-07, + "loss": 85.5738, + "step": 115170 + }, + { + "epoch": 0.9528063862348513, + "grad_norm": 426.4129943847656, + "learning_rate": 1.453817984278022e-07, + "loss": 90.5035, + "step": 115180 + }, + { + "epoch": 0.9528891094842206, + "grad_norm": 623.3121337890625, + "learning_rate": 1.450443646353561e-07, + "loss": 79.0887, + "step": 115190 + }, + { + "epoch": 0.9529718327335898, + "grad_norm": 748.9600219726562, + "learning_rate": 1.4470731712944885e-07, + "loss": 110.1946, + "step": 115200 + }, + { + "epoch": 0.953054555982959, + "grad_norm": 567.2506713867188, + "learning_rate": 1.443706559368968e-07, + "loss": 76.4982, + "step": 115210 + }, + { + "epoch": 0.9531372792323283, + "grad_norm": 406.186279296875, + "learning_rate": 1.4403438108448742e-07, + "loss": 120.5117, + "step": 115220 + }, + { + "epoch": 0.9532200024816975, + "grad_norm": 800.6393432617188, + "learning_rate": 1.436984925989765e-07, + "loss": 77.8392, + "step": 115230 + }, + { + "epoch": 0.9533027257310667, + "grad_norm": 632.5950927734375, + "learning_rate": 1.4336299050708935e-07, + "loss": 69.4117, + "step": 115240 + }, + { + "epoch": 0.953385448980436, + "grad_norm": 855.23974609375, + "learning_rate": 1.4302787483551962e-07, + "loss": 80.8834, + "step": 115250 + }, + { + "epoch": 0.9534681722298052, + "grad_norm": 749.250732421875, + "learning_rate": 1.426931456109315e-07, + "loss": 87.1514, + "step": 115260 + }, + { + "epoch": 0.9535508954791744, + "grad_norm": 503.9537658691406, + "learning_rate": 1.4235880285995762e-07, + "loss": 58.2232, + "step": 115270 + }, + { + "epoch": 0.9536336187285437, + "grad_norm": 932.1503295898438, + "learning_rate": 1.4202484660920057e-07, + "loss": 87.4663, + "step": 115280 + }, + { + "epoch": 0.9537163419779129, + "grad_norm": 973.0960083007812, + "learning_rate": 1.4169127688523187e-07, + "loss": 86.1386, + "step": 115290 + }, + { + "epoch": 0.9537990652272821, + "grad_norm": 702.0614013671875, + "learning_rate": 1.413580937145914e-07, + "loss": 77.9156, + "step": 115300 + }, + { + "epoch": 0.9538817884766514, + "grad_norm": 870.7654418945312, + "learning_rate": 1.410252971237891e-07, + "loss": 92.6622, + "step": 115310 + }, + { + "epoch": 0.9539645117260206, + "grad_norm": 1024.184326171875, + "learning_rate": 1.406928871393043e-07, + "loss": 81.5768, + "step": 115320 + }, + { + "epoch": 0.9540472349753898, + "grad_norm": 1577.88525390625, + "learning_rate": 1.4036086378758474e-07, + "loss": 82.3757, + "step": 115330 + }, + { + "epoch": 0.9541299582247591, + "grad_norm": 652.8252563476562, + "learning_rate": 1.4002922709504874e-07, + "loss": 72.8365, + "step": 115340 + }, + { + "epoch": 0.9542126814741283, + "grad_norm": 1175.390625, + "learning_rate": 1.3969797708808296e-07, + "loss": 84.131, + "step": 115350 + }, + { + "epoch": 0.9542954047234975, + "grad_norm": 1300.7655029296875, + "learning_rate": 1.39367113793043e-07, + "loss": 82.6223, + "step": 115360 + }, + { + "epoch": 0.9543781279728668, + "grad_norm": 706.9264526367188, + "learning_rate": 1.390366372362556e-07, + "loss": 95.0086, + "step": 115370 + }, + { + "epoch": 0.954460851222236, + "grad_norm": 700.4502563476562, + "learning_rate": 1.3870654744401358e-07, + "loss": 83.8916, + "step": 115380 + }, + { + "epoch": 0.9545435744716052, + "grad_norm": 775.755859375, + "learning_rate": 1.3837684444258092e-07, + "loss": 76.1233, + "step": 115390 + }, + { + "epoch": 0.9546262977209745, + "grad_norm": 778.3601684570312, + "learning_rate": 1.3804752825819113e-07, + "loss": 90.3711, + "step": 115400 + }, + { + "epoch": 0.9547090209703437, + "grad_norm": 1078.059326171875, + "learning_rate": 1.3771859891704653e-07, + "loss": 66.5463, + "step": 115410 + }, + { + "epoch": 0.9547917442197129, + "grad_norm": 954.205322265625, + "learning_rate": 1.373900564453179e-07, + "loss": 80.5711, + "step": 115420 + }, + { + "epoch": 0.9548744674690822, + "grad_norm": 1166.7353515625, + "learning_rate": 1.3706190086914595e-07, + "loss": 74.4393, + "step": 115430 + }, + { + "epoch": 0.9549571907184514, + "grad_norm": 567.34423828125, + "learning_rate": 1.3673413221464039e-07, + "loss": 75.5963, + "step": 115440 + }, + { + "epoch": 0.9550399139678206, + "grad_norm": 1016.5427856445312, + "learning_rate": 1.3640675050788088e-07, + "loss": 82.2038, + "step": 115450 + }, + { + "epoch": 0.9551226372171899, + "grad_norm": 744.4939575195312, + "learning_rate": 1.360797557749155e-07, + "loss": 81.3256, + "step": 115460 + }, + { + "epoch": 0.9552053604665591, + "grad_norm": 757.7814331054688, + "learning_rate": 1.3575314804176176e-07, + "loss": 68.7753, + "step": 115470 + }, + { + "epoch": 0.9552880837159283, + "grad_norm": 502.91387939453125, + "learning_rate": 1.3542692733440555e-07, + "loss": 86.8093, + "step": 115480 + }, + { + "epoch": 0.9553708069652976, + "grad_norm": 464.1839904785156, + "learning_rate": 1.3510109367880387e-07, + "loss": 91.0751, + "step": 115490 + }, + { + "epoch": 0.9554535302146668, + "grad_norm": 1076.8289794921875, + "learning_rate": 1.3477564710088097e-07, + "loss": 95.7027, + "step": 115500 + }, + { + "epoch": 0.955536253464036, + "grad_norm": 701.7645874023438, + "learning_rate": 1.3445058762653174e-07, + "loss": 91.7253, + "step": 115510 + }, + { + "epoch": 0.9556189767134053, + "grad_norm": 1526.50439453125, + "learning_rate": 1.3412591528161935e-07, + "loss": 109.7217, + "step": 115520 + }, + { + "epoch": 0.9557016999627745, + "grad_norm": 1529.469482421875, + "learning_rate": 1.338016300919759e-07, + "loss": 80.6137, + "step": 115530 + }, + { + "epoch": 0.9557844232121437, + "grad_norm": 649.20068359375, + "learning_rate": 1.3347773208340464e-07, + "loss": 82.6933, + "step": 115540 + }, + { + "epoch": 0.955867146461513, + "grad_norm": 1042.389404296875, + "learning_rate": 1.3315422128167555e-07, + "loss": 79.4586, + "step": 115550 + }, + { + "epoch": 0.9559498697108822, + "grad_norm": 921.5702514648438, + "learning_rate": 1.3283109771252966e-07, + "loss": 88.0491, + "step": 115560 + }, + { + "epoch": 0.9560325929602514, + "grad_norm": 533.2924194335938, + "learning_rate": 1.3250836140167588e-07, + "loss": 63.4962, + "step": 115570 + }, + { + "epoch": 0.9561153162096208, + "grad_norm": 578.7036743164062, + "learning_rate": 1.3218601237479255e-07, + "loss": 78.6079, + "step": 115580 + }, + { + "epoch": 0.95619803945899, + "grad_norm": 1517.4697265625, + "learning_rate": 1.3186405065752861e-07, + "loss": 79.1212, + "step": 115590 + }, + { + "epoch": 0.9562807627083592, + "grad_norm": 694.7293090820312, + "learning_rate": 1.315424762755002e-07, + "loss": 75.5039, + "step": 115600 + }, + { + "epoch": 0.9563634859577285, + "grad_norm": 866.1787719726562, + "learning_rate": 1.3122128925429356e-07, + "loss": 104.6923, + "step": 115610 + }, + { + "epoch": 0.9564462092070977, + "grad_norm": 821.7919921875, + "learning_rate": 1.3090048961946433e-07, + "loss": 111.5836, + "step": 115620 + }, + { + "epoch": 0.9565289324564669, + "grad_norm": 1143.0037841796875, + "learning_rate": 1.305800773965371e-07, + "loss": 107.7157, + "step": 115630 + }, + { + "epoch": 0.9566116557058362, + "grad_norm": 874.7697143554688, + "learning_rate": 1.3026005261100537e-07, + "loss": 58.3183, + "step": 115640 + }, + { + "epoch": 0.9566943789552054, + "grad_norm": 968.6356201171875, + "learning_rate": 1.2994041528833267e-07, + "loss": 73.539, + "step": 115650 + }, + { + "epoch": 0.9567771022045746, + "grad_norm": 657.7074584960938, + "learning_rate": 1.2962116545394977e-07, + "loss": 88.1406, + "step": 115660 + }, + { + "epoch": 0.9568598254539439, + "grad_norm": 964.78662109375, + "learning_rate": 1.2930230313325908e-07, + "loss": 92.0967, + "step": 115670 + }, + { + "epoch": 0.9569425487033131, + "grad_norm": 537.0193481445312, + "learning_rate": 1.2898382835163093e-07, + "loss": 71.7896, + "step": 115680 + }, + { + "epoch": 0.9570252719526823, + "grad_norm": 1111.5057373046875, + "learning_rate": 1.2866574113440444e-07, + "loss": 86.3393, + "step": 115690 + }, + { + "epoch": 0.9571079952020516, + "grad_norm": 862.1448364257812, + "learning_rate": 1.2834804150688828e-07, + "loss": 85.4919, + "step": 115700 + }, + { + "epoch": 0.9571907184514208, + "grad_norm": 816.7103271484375, + "learning_rate": 1.2803072949436058e-07, + "loss": 83.843, + "step": 115710 + }, + { + "epoch": 0.95727344170079, + "grad_norm": 776.513671875, + "learning_rate": 1.277138051220689e-07, + "loss": 98.6719, + "step": 115720 + }, + { + "epoch": 0.9573561649501593, + "grad_norm": 596.2342529296875, + "learning_rate": 1.2739726841522858e-07, + "loss": 82.0256, + "step": 115730 + }, + { + "epoch": 0.9574388881995285, + "grad_norm": 825.4178466796875, + "learning_rate": 1.270811193990257e-07, + "loss": 86.8246, + "step": 115740 + }, + { + "epoch": 0.9575216114488977, + "grad_norm": 740.2637329101562, + "learning_rate": 1.267653580986139e-07, + "loss": 74.5113, + "step": 115750 + }, + { + "epoch": 0.957604334698267, + "grad_norm": 432.4769592285156, + "learning_rate": 1.2644998453911762e-07, + "loss": 73.2089, + "step": 115760 + }, + { + "epoch": 0.9576870579476362, + "grad_norm": 2192.880126953125, + "learning_rate": 1.2613499874563006e-07, + "loss": 113.4138, + "step": 115770 + }, + { + "epoch": 0.9577697811970054, + "grad_norm": 771.948486328125, + "learning_rate": 1.2582040074321177e-07, + "loss": 85.1038, + "step": 115780 + }, + { + "epoch": 0.9578525044463747, + "grad_norm": 656.0718383789062, + "learning_rate": 1.255061905568955e-07, + "loss": 97.9296, + "step": 115790 + }, + { + "epoch": 0.9579352276957439, + "grad_norm": 875.1376953125, + "learning_rate": 1.251923682116807e-07, + "loss": 76.1461, + "step": 115800 + }, + { + "epoch": 0.9580179509451131, + "grad_norm": 622.1165771484375, + "learning_rate": 1.248789337325368e-07, + "loss": 81.3456, + "step": 115810 + }, + { + "epoch": 0.9581006741944823, + "grad_norm": 1635.0670166015625, + "learning_rate": 1.2456588714440167e-07, + "loss": 106.8132, + "step": 115820 + }, + { + "epoch": 0.9581833974438516, + "grad_norm": 610.9708251953125, + "learning_rate": 1.2425322847218368e-07, + "loss": 83.99, + "step": 115830 + }, + { + "epoch": 0.9582661206932208, + "grad_norm": 882.1947631835938, + "learning_rate": 1.239409577407602e-07, + "loss": 74.6342, + "step": 115840 + }, + { + "epoch": 0.95834884394259, + "grad_norm": 1295.30126953125, + "learning_rate": 1.2362907497497633e-07, + "loss": 88.346, + "step": 115850 + }, + { + "epoch": 0.9584315671919593, + "grad_norm": 1216.3671875, + "learning_rate": 1.233175801996478e-07, + "loss": 95.8267, + "step": 115860 + }, + { + "epoch": 0.9585142904413285, + "grad_norm": 1159.141845703125, + "learning_rate": 1.2300647343955807e-07, + "loss": 98.5586, + "step": 115870 + }, + { + "epoch": 0.9585970136906977, + "grad_norm": 1170.9654541015625, + "learning_rate": 1.2269575471946127e-07, + "loss": 77.9647, + "step": 115880 + }, + { + "epoch": 0.958679736940067, + "grad_norm": 526.090087890625, + "learning_rate": 1.2238542406407984e-07, + "loss": 86.4296, + "step": 115890 + }, + { + "epoch": 0.9587624601894362, + "grad_norm": 970.7127685546875, + "learning_rate": 1.22075481498104e-07, + "loss": 70.3665, + "step": 115900 + }, + { + "epoch": 0.9588451834388054, + "grad_norm": 747.5088500976562, + "learning_rate": 1.2176592704619628e-07, + "loss": 67.7107, + "step": 115910 + }, + { + "epoch": 0.9589279066881747, + "grad_norm": 1059.5880126953125, + "learning_rate": 1.2145676073298473e-07, + "loss": 61.7566, + "step": 115920 + }, + { + "epoch": 0.9590106299375439, + "grad_norm": 982.748046875, + "learning_rate": 1.211479825830697e-07, + "loss": 106.074, + "step": 115930 + }, + { + "epoch": 0.9590933531869131, + "grad_norm": 676.8903198242188, + "learning_rate": 1.2083959262101874e-07, + "loss": 96.3958, + "step": 115940 + }, + { + "epoch": 0.9591760764362824, + "grad_norm": 1083.4862060546875, + "learning_rate": 1.205315908713689e-07, + "loss": 84.409, + "step": 115950 + }, + { + "epoch": 0.9592587996856516, + "grad_norm": 1195.280517578125, + "learning_rate": 1.2022397735862724e-07, + "loss": 94.2063, + "step": 115960 + }, + { + "epoch": 0.9593415229350208, + "grad_norm": 891.7564086914062, + "learning_rate": 1.199167521072686e-07, + "loss": 121.5456, + "step": 115970 + }, + { + "epoch": 0.9594242461843902, + "grad_norm": 679.1651611328125, + "learning_rate": 1.196099151417368e-07, + "loss": 70.1369, + "step": 115980 + }, + { + "epoch": 0.9595069694337593, + "grad_norm": 1112.198486328125, + "learning_rate": 1.1930346648644675e-07, + "loss": 92.8188, + "step": 115990 + }, + { + "epoch": 0.9595896926831285, + "grad_norm": 540.4926147460938, + "learning_rate": 1.1899740616578004e-07, + "loss": 93.057, + "step": 116000 + }, + { + "epoch": 0.9596724159324979, + "grad_norm": 913.5923461914062, + "learning_rate": 1.1869173420408886e-07, + "loss": 83.6311, + "step": 116010 + }, + { + "epoch": 0.9597551391818671, + "grad_norm": 704.4148559570312, + "learning_rate": 1.1838645062569377e-07, + "loss": 82.1583, + "step": 116020 + }, + { + "epoch": 0.9598378624312363, + "grad_norm": 983.6731567382812, + "learning_rate": 1.1808155545488586e-07, + "loss": 71.0212, + "step": 116030 + }, + { + "epoch": 0.9599205856806056, + "grad_norm": 1065.49072265625, + "learning_rate": 1.1777704871592355e-07, + "loss": 97.0757, + "step": 116040 + }, + { + "epoch": 0.9600033089299748, + "grad_norm": 2115.354248046875, + "learning_rate": 1.174729304330352e-07, + "loss": 83.9105, + "step": 116050 + }, + { + "epoch": 0.960086032179344, + "grad_norm": 895.270751953125, + "learning_rate": 1.1716920063041815e-07, + "loss": 94.6967, + "step": 116060 + }, + { + "epoch": 0.9601687554287133, + "grad_norm": 776.3504638671875, + "learning_rate": 1.168658593322386e-07, + "loss": 75.9079, + "step": 116070 + }, + { + "epoch": 0.9602514786780825, + "grad_norm": 933.9031372070312, + "learning_rate": 1.165629065626317e-07, + "loss": 89.017, + "step": 116080 + }, + { + "epoch": 0.9603342019274517, + "grad_norm": 984.0797119140625, + "learning_rate": 1.1626034234570261e-07, + "loss": 112.102, + "step": 116090 + }, + { + "epoch": 0.960416925176821, + "grad_norm": 813.5068969726562, + "learning_rate": 1.1595816670552429e-07, + "loss": 51.8942, + "step": 116100 + }, + { + "epoch": 0.9604996484261902, + "grad_norm": 717.0478515625, + "learning_rate": 1.1565637966613974e-07, + "loss": 70.7065, + "step": 116110 + }, + { + "epoch": 0.9605823716755594, + "grad_norm": 879.2958984375, + "learning_rate": 1.1535498125156197e-07, + "loss": 81.3121, + "step": 116120 + }, + { + "epoch": 0.9606650949249287, + "grad_norm": 1321.8685302734375, + "learning_rate": 1.1505397148577013e-07, + "loss": 52.1673, + "step": 116130 + }, + { + "epoch": 0.9607478181742979, + "grad_norm": 987.5189208984375, + "learning_rate": 1.1475335039271507e-07, + "loss": 70.81, + "step": 116140 + }, + { + "epoch": 0.9608305414236671, + "grad_norm": 899.2925415039062, + "learning_rate": 1.1445311799631598e-07, + "loss": 92.3291, + "step": 116150 + }, + { + "epoch": 0.9609132646730364, + "grad_norm": 756.8441772460938, + "learning_rate": 1.1415327432046041e-07, + "loss": 112.0039, + "step": 116160 + }, + { + "epoch": 0.9609959879224056, + "grad_norm": 635.4735107421875, + "learning_rate": 1.1385381938900597e-07, + "loss": 71.928, + "step": 116170 + }, + { + "epoch": 0.9610787111717748, + "grad_norm": 4364.9423828125, + "learning_rate": 1.1355475322577858e-07, + "loss": 115.5651, + "step": 116180 + }, + { + "epoch": 0.9611614344211441, + "grad_norm": 619.9732055664062, + "learning_rate": 1.1325607585457366e-07, + "loss": 106.7126, + "step": 116190 + }, + { + "epoch": 0.9612441576705133, + "grad_norm": 897.743408203125, + "learning_rate": 1.1295778729915551e-07, + "loss": 70.6706, + "step": 116200 + }, + { + "epoch": 0.9613268809198825, + "grad_norm": 1066.9715576171875, + "learning_rate": 1.1265988758325742e-07, + "loss": 81.003, + "step": 116210 + }, + { + "epoch": 0.9614096041692518, + "grad_norm": 805.7496337890625, + "learning_rate": 1.1236237673058315e-07, + "loss": 107.8749, + "step": 116220 + }, + { + "epoch": 0.961492327418621, + "grad_norm": 593.794677734375, + "learning_rate": 1.1206525476480323e-07, + "loss": 103.9387, + "step": 116230 + }, + { + "epoch": 0.9615750506679902, + "grad_norm": 1031.0078125, + "learning_rate": 1.1176852170955821e-07, + "loss": 81.512, + "step": 116240 + }, + { + "epoch": 0.9616577739173595, + "grad_norm": 388.48583984375, + "learning_rate": 1.1147217758845752e-07, + "loss": 71.2489, + "step": 116250 + }, + { + "epoch": 0.9617404971667287, + "grad_norm": 1047.6055908203125, + "learning_rate": 1.1117622242508064e-07, + "loss": 84.0228, + "step": 116260 + }, + { + "epoch": 0.9618232204160979, + "grad_norm": 386.8448486328125, + "learning_rate": 1.1088065624297484e-07, + "loss": 51.314, + "step": 116270 + }, + { + "epoch": 0.9619059436654672, + "grad_norm": 1203.3616943359375, + "learning_rate": 1.1058547906565743e-07, + "loss": 67.1805, + "step": 116280 + }, + { + "epoch": 0.9619886669148364, + "grad_norm": 992.1272583007812, + "learning_rate": 1.1029069091661459e-07, + "loss": 103.1852, + "step": 116290 + }, + { + "epoch": 0.9620713901642056, + "grad_norm": 621.4592895507812, + "learning_rate": 1.0999629181929983e-07, + "loss": 73.4462, + "step": 116300 + }, + { + "epoch": 0.9621541134135749, + "grad_norm": 1026.2972412109375, + "learning_rate": 1.0970228179713827e-07, + "loss": 73.2935, + "step": 116310 + }, + { + "epoch": 0.9622368366629441, + "grad_norm": 725.5745239257812, + "learning_rate": 1.0940866087352287e-07, + "loss": 76.0764, + "step": 116320 + }, + { + "epoch": 0.9623195599123133, + "grad_norm": 780.3145141601562, + "learning_rate": 1.0911542907181605e-07, + "loss": 65.7862, + "step": 116330 + }, + { + "epoch": 0.9624022831616826, + "grad_norm": 584.7391357421875, + "learning_rate": 1.0882258641534749e-07, + "loss": 66.0809, + "step": 116340 + }, + { + "epoch": 0.9624850064110518, + "grad_norm": 1102.67919921875, + "learning_rate": 1.0853013292741854e-07, + "loss": 74.0244, + "step": 116350 + }, + { + "epoch": 0.962567729660421, + "grad_norm": 282.3551940917969, + "learning_rate": 1.0823806863129838e-07, + "loss": 74.1816, + "step": 116360 + }, + { + "epoch": 0.9626504529097903, + "grad_norm": 876.3226928710938, + "learning_rate": 1.0794639355022507e-07, + "loss": 88.0445, + "step": 116370 + }, + { + "epoch": 0.9627331761591595, + "grad_norm": 719.6063842773438, + "learning_rate": 1.0765510770740506e-07, + "loss": 75.5096, + "step": 116380 + }, + { + "epoch": 0.9628158994085287, + "grad_norm": 1058.7418212890625, + "learning_rate": 1.0736421112601592e-07, + "loss": 88.4987, + "step": 116390 + }, + { + "epoch": 0.962898622657898, + "grad_norm": 544.888671875, + "learning_rate": 1.070737038292019e-07, + "loss": 86.7714, + "step": 116400 + }, + { + "epoch": 0.9629813459072672, + "grad_norm": 1347.0355224609375, + "learning_rate": 1.0678358584007787e-07, + "loss": 100.632, + "step": 116410 + }, + { + "epoch": 0.9630640691566364, + "grad_norm": 557.1162719726562, + "learning_rate": 1.0649385718172756e-07, + "loss": 98.0301, + "step": 116420 + }, + { + "epoch": 0.9631467924060058, + "grad_norm": 601.667236328125, + "learning_rate": 1.0620451787720254e-07, + "loss": 55.4105, + "step": 116430 + }, + { + "epoch": 0.963229515655375, + "grad_norm": 627.3742065429688, + "learning_rate": 1.059155679495244e-07, + "loss": 89.8003, + "step": 116440 + }, + { + "epoch": 0.9633122389047442, + "grad_norm": 612.9856567382812, + "learning_rate": 1.0562700742168364e-07, + "loss": 79.46, + "step": 116450 + }, + { + "epoch": 0.9633949621541135, + "grad_norm": 969.4473876953125, + "learning_rate": 1.0533883631663966e-07, + "loss": 105.5287, + "step": 116460 + }, + { + "epoch": 0.9634776854034827, + "grad_norm": 1106.258544921875, + "learning_rate": 1.0505105465732135e-07, + "loss": 85.9468, + "step": 116470 + }, + { + "epoch": 0.9635604086528519, + "grad_norm": 1332.35546875, + "learning_rate": 1.0476366246662595e-07, + "loss": 84.5516, + "step": 116480 + }, + { + "epoch": 0.9636431319022212, + "grad_norm": 922.864013671875, + "learning_rate": 1.044766597674196e-07, + "loss": 80.9004, + "step": 116490 + }, + { + "epoch": 0.9637258551515904, + "grad_norm": 458.0718688964844, + "learning_rate": 1.0419004658253795e-07, + "loss": 81.3689, + "step": 116500 + }, + { + "epoch": 0.9638085784009596, + "grad_norm": 821.3236083984375, + "learning_rate": 1.0390382293478551e-07, + "loss": 77.9656, + "step": 116510 + }, + { + "epoch": 0.9638913016503289, + "grad_norm": 836.4533081054688, + "learning_rate": 1.036179888469363e-07, + "loss": 127.9579, + "step": 116520 + }, + { + "epoch": 0.9639740248996981, + "grad_norm": 1029.4652099609375, + "learning_rate": 1.0333254434173212e-07, + "loss": 84.6967, + "step": 116530 + }, + { + "epoch": 0.9640567481490673, + "grad_norm": 869.0244750976562, + "learning_rate": 1.0304748944188425e-07, + "loss": 76.1726, + "step": 116540 + }, + { + "epoch": 0.9641394713984365, + "grad_norm": 824.9259033203125, + "learning_rate": 1.0276282417007399e-07, + "loss": 85.9181, + "step": 116550 + }, + { + "epoch": 0.9642221946478058, + "grad_norm": 630.62109375, + "learning_rate": 1.02478548548951e-07, + "loss": 92.7414, + "step": 116560 + }, + { + "epoch": 0.964304917897175, + "grad_norm": 1001.7970581054688, + "learning_rate": 1.0219466260113276e-07, + "loss": 93.9342, + "step": 116570 + }, + { + "epoch": 0.9643876411465442, + "grad_norm": 711.27392578125, + "learning_rate": 1.0191116634920728e-07, + "loss": 91.1572, + "step": 116580 + }, + { + "epoch": 0.9644703643959135, + "grad_norm": 980.2251586914062, + "learning_rate": 1.0162805981573154e-07, + "loss": 112.6816, + "step": 116590 + }, + { + "epoch": 0.9645530876452827, + "grad_norm": 983.453857421875, + "learning_rate": 1.0134534302323029e-07, + "loss": 99.6782, + "step": 116600 + }, + { + "epoch": 0.9646358108946519, + "grad_norm": 362.87335205078125, + "learning_rate": 1.0106301599419832e-07, + "loss": 80.6802, + "step": 116610 + }, + { + "epoch": 0.9647185341440212, + "grad_norm": 757.7172241210938, + "learning_rate": 1.0078107875109878e-07, + "loss": 75.8858, + "step": 116620 + }, + { + "epoch": 0.9648012573933904, + "grad_norm": 863.5123901367188, + "learning_rate": 1.0049953131636481e-07, + "loss": 93.2976, + "step": 116630 + }, + { + "epoch": 0.9648839806427596, + "grad_norm": 642.6458740234375, + "learning_rate": 1.002183737123974e-07, + "loss": 81.1781, + "step": 116640 + }, + { + "epoch": 0.9649667038921289, + "grad_norm": 626.2711791992188, + "learning_rate": 9.993760596156698e-08, + "loss": 75.3195, + "step": 116650 + }, + { + "epoch": 0.9650494271414981, + "grad_norm": 792.793212890625, + "learning_rate": 9.965722808621403e-08, + "loss": 73.6435, + "step": 116660 + }, + { + "epoch": 0.9651321503908673, + "grad_norm": 803.5825805664062, + "learning_rate": 9.937724010864402e-08, + "loss": 71.446, + "step": 116670 + }, + { + "epoch": 0.9652148736402366, + "grad_norm": 952.573486328125, + "learning_rate": 9.909764205113747e-08, + "loss": 82.1058, + "step": 116680 + }, + { + "epoch": 0.9652975968896058, + "grad_norm": 1239.050048828125, + "learning_rate": 9.881843393593882e-08, + "loss": 83.7428, + "step": 116690 + }, + { + "epoch": 0.965380320138975, + "grad_norm": 1195.249267578125, + "learning_rate": 9.853961578526417e-08, + "loss": 87.2989, + "step": 116700 + }, + { + "epoch": 0.9654630433883443, + "grad_norm": 518.0953369140625, + "learning_rate": 9.826118762129799e-08, + "loss": 81.8561, + "step": 116710 + }, + { + "epoch": 0.9655457666377135, + "grad_norm": 741.3461303710938, + "learning_rate": 9.798314946619258e-08, + "loss": 93.3796, + "step": 116720 + }, + { + "epoch": 0.9656284898870827, + "grad_norm": 578.9274291992188, + "learning_rate": 9.770550134207135e-08, + "loss": 83.2529, + "step": 116730 + }, + { + "epoch": 0.965711213136452, + "grad_norm": 700.8192138671875, + "learning_rate": 9.74282432710244e-08, + "loss": 67.5045, + "step": 116740 + }, + { + "epoch": 0.9657939363858212, + "grad_norm": 965.154541015625, + "learning_rate": 9.715137527511298e-08, + "loss": 94.1983, + "step": 116750 + }, + { + "epoch": 0.9658766596351904, + "grad_norm": 2566.7998046875, + "learning_rate": 9.687489737636502e-08, + "loss": 91.5708, + "step": 116760 + }, + { + "epoch": 0.9659593828845597, + "grad_norm": 775.1097412109375, + "learning_rate": 9.659880959677903e-08, + "loss": 90.2111, + "step": 116770 + }, + { + "epoch": 0.9660421061339289, + "grad_norm": 1181.168212890625, + "learning_rate": 9.632311195832245e-08, + "loss": 104.988, + "step": 116780 + }, + { + "epoch": 0.9661248293832981, + "grad_norm": 812.8969116210938, + "learning_rate": 9.604780448293105e-08, + "loss": 74.2418, + "step": 116790 + }, + { + "epoch": 0.9662075526326674, + "grad_norm": 762.244384765625, + "learning_rate": 9.57728871925101e-08, + "loss": 75.0032, + "step": 116800 + }, + { + "epoch": 0.9662902758820366, + "grad_norm": 1320.201171875, + "learning_rate": 9.549836010893265e-08, + "loss": 107.8506, + "step": 116810 + }, + { + "epoch": 0.9663729991314058, + "grad_norm": 821.8568725585938, + "learning_rate": 9.522422325404234e-08, + "loss": 91.6939, + "step": 116820 + }, + { + "epoch": 0.9664557223807752, + "grad_norm": 1307.53466796875, + "learning_rate": 9.495047664965063e-08, + "loss": 97.0378, + "step": 116830 + }, + { + "epoch": 0.9665384456301443, + "grad_norm": 891.2249145507812, + "learning_rate": 9.467712031753839e-08, + "loss": 80.264, + "step": 116840 + }, + { + "epoch": 0.9666211688795135, + "grad_norm": 5029.44189453125, + "learning_rate": 9.440415427945548e-08, + "loss": 183.4319, + "step": 116850 + }, + { + "epoch": 0.9667038921288829, + "grad_norm": 1075.81103515625, + "learning_rate": 9.413157855712007e-08, + "loss": 81.115, + "step": 116860 + }, + { + "epoch": 0.9667866153782521, + "grad_norm": 3139.591796875, + "learning_rate": 9.385939317221926e-08, + "loss": 154.7868, + "step": 116870 + }, + { + "epoch": 0.9668693386276213, + "grad_norm": 621.9048461914062, + "learning_rate": 9.358759814641127e-08, + "loss": 72.0611, + "step": 116880 + }, + { + "epoch": 0.9669520618769906, + "grad_norm": 1159.2205810546875, + "learning_rate": 9.331619350132049e-08, + "loss": 98.6789, + "step": 116890 + }, + { + "epoch": 0.9670347851263598, + "grad_norm": 1112.920654296875, + "learning_rate": 9.304517925854184e-08, + "loss": 83.0609, + "step": 116900 + }, + { + "epoch": 0.967117508375729, + "grad_norm": 585.3701171875, + "learning_rate": 9.277455543963809e-08, + "loss": 100.5077, + "step": 116910 + }, + { + "epoch": 0.9672002316250983, + "grad_norm": 488.8531494140625, + "learning_rate": 9.250432206614258e-08, + "loss": 84.3317, + "step": 116920 + }, + { + "epoch": 0.9672829548744675, + "grad_norm": 479.1993103027344, + "learning_rate": 9.22344791595553e-08, + "loss": 71.9188, + "step": 116930 + }, + { + "epoch": 0.9673656781238367, + "grad_norm": 1235.1121826171875, + "learning_rate": 9.196502674134689e-08, + "loss": 94.2771, + "step": 116940 + }, + { + "epoch": 0.967448401373206, + "grad_norm": 991.1707153320312, + "learning_rate": 9.169596483295628e-08, + "loss": 105.2712, + "step": 116950 + }, + { + "epoch": 0.9675311246225752, + "grad_norm": 1200.013427734375, + "learning_rate": 9.142729345579193e-08, + "loss": 92.9962, + "step": 116960 + }, + { + "epoch": 0.9676138478719444, + "grad_norm": 757.19580078125, + "learning_rate": 9.115901263123006e-08, + "loss": 83.3054, + "step": 116970 + }, + { + "epoch": 0.9676965711213137, + "grad_norm": 1006.8023071289062, + "learning_rate": 9.089112238061692e-08, + "loss": 100.9199, + "step": 116980 + }, + { + "epoch": 0.9677792943706829, + "grad_norm": 668.6526489257812, + "learning_rate": 9.062362272526825e-08, + "loss": 91.3445, + "step": 116990 + }, + { + "epoch": 0.9678620176200521, + "grad_norm": 1176.7666015625, + "learning_rate": 9.035651368646647e-08, + "loss": 89.7577, + "step": 117000 + }, + { + "epoch": 0.9679447408694214, + "grad_norm": 1044.1488037109375, + "learning_rate": 9.008979528546513e-08, + "loss": 88.0212, + "step": 117010 + }, + { + "epoch": 0.9680274641187906, + "grad_norm": 831.52197265625, + "learning_rate": 8.982346754348503e-08, + "loss": 84.6709, + "step": 117020 + }, + { + "epoch": 0.9681101873681598, + "grad_norm": 926.8240356445312, + "learning_rate": 8.955753048171645e-08, + "loss": 73.5235, + "step": 117030 + }, + { + "epoch": 0.9681929106175291, + "grad_norm": 1101.0938720703125, + "learning_rate": 8.929198412131968e-08, + "loss": 92.8208, + "step": 117040 + }, + { + "epoch": 0.9682756338668983, + "grad_norm": 721.4671020507812, + "learning_rate": 8.902682848342282e-08, + "loss": 74.5471, + "step": 117050 + }, + { + "epoch": 0.9683583571162675, + "grad_norm": 922.5578002929688, + "learning_rate": 8.876206358912232e-08, + "loss": 87.3108, + "step": 117060 + }, + { + "epoch": 0.9684410803656368, + "grad_norm": 815.2578735351562, + "learning_rate": 8.849768945948522e-08, + "loss": 89.3768, + "step": 117070 + }, + { + "epoch": 0.968523803615006, + "grad_norm": 905.2554321289062, + "learning_rate": 8.823370611554638e-08, + "loss": 80.9791, + "step": 117080 + }, + { + "epoch": 0.9686065268643752, + "grad_norm": 972.8367919921875, + "learning_rate": 8.797011357830953e-08, + "loss": 137.1645, + "step": 117090 + }, + { + "epoch": 0.9686892501137445, + "grad_norm": 931.77392578125, + "learning_rate": 8.770691186874791e-08, + "loss": 84.7926, + "step": 117100 + }, + { + "epoch": 0.9687719733631137, + "grad_norm": 788.7830200195312, + "learning_rate": 8.744410100780254e-08, + "loss": 58.1295, + "step": 117110 + }, + { + "epoch": 0.9688546966124829, + "grad_norm": 2165.523681640625, + "learning_rate": 8.718168101638446e-08, + "loss": 107.7649, + "step": 117120 + }, + { + "epoch": 0.9689374198618522, + "grad_norm": 1699.0238037109375, + "learning_rate": 8.69196519153731e-08, + "loss": 119.7703, + "step": 117130 + }, + { + "epoch": 0.9690201431112214, + "grad_norm": 1005.6736450195312, + "learning_rate": 8.665801372561677e-08, + "loss": 91.6887, + "step": 117140 + }, + { + "epoch": 0.9691028663605906, + "grad_norm": 997.28857421875, + "learning_rate": 8.639676646793382e-08, + "loss": 78.077, + "step": 117150 + }, + { + "epoch": 0.9691855896099599, + "grad_norm": 868.7511596679688, + "learning_rate": 8.613591016310874e-08, + "loss": 78.0869, + "step": 117160 + }, + { + "epoch": 0.9692683128593291, + "grad_norm": 1064.0950927734375, + "learning_rate": 8.58754448318988e-08, + "loss": 82.3546, + "step": 117170 + }, + { + "epoch": 0.9693510361086983, + "grad_norm": 615.5275268554688, + "learning_rate": 8.561537049502688e-08, + "loss": 108.2288, + "step": 117180 + }, + { + "epoch": 0.9694337593580676, + "grad_norm": 714.61328125, + "learning_rate": 8.535568717318533e-08, + "loss": 79.6565, + "step": 117190 + }, + { + "epoch": 0.9695164826074368, + "grad_norm": 695.4380493164062, + "learning_rate": 8.509639488703703e-08, + "loss": 81.4569, + "step": 117200 + }, + { + "epoch": 0.969599205856806, + "grad_norm": 1146.906005859375, + "learning_rate": 8.483749365721217e-08, + "loss": 87.309, + "step": 117210 + }, + { + "epoch": 0.9696819291061753, + "grad_norm": 1346.7725830078125, + "learning_rate": 8.457898350430982e-08, + "loss": 99.9304, + "step": 117220 + }, + { + "epoch": 0.9697646523555445, + "grad_norm": 692.3303833007812, + "learning_rate": 8.432086444889964e-08, + "loss": 82.1651, + "step": 117230 + }, + { + "epoch": 0.9698473756049137, + "grad_norm": 876.026123046875, + "learning_rate": 8.406313651151799e-08, + "loss": 124.7427, + "step": 117240 + }, + { + "epoch": 0.969930098854283, + "grad_norm": 654.4212646484375, + "learning_rate": 8.380579971267178e-08, + "loss": 60.3869, + "step": 117250 + }, + { + "epoch": 0.9700128221036523, + "grad_norm": 741.6707763671875, + "learning_rate": 8.354885407283574e-08, + "loss": 86.2612, + "step": 117260 + }, + { + "epoch": 0.9700955453530214, + "grad_norm": 688.7070922851562, + "learning_rate": 8.329229961245355e-08, + "loss": 73.4818, + "step": 117270 + }, + { + "epoch": 0.9701782686023906, + "grad_norm": 632.3734130859375, + "learning_rate": 8.303613635193886e-08, + "loss": 71.4801, + "step": 117280 + }, + { + "epoch": 0.97026099185176, + "grad_norm": 1043.305419921875, + "learning_rate": 8.278036431167313e-08, + "loss": 108.113, + "step": 117290 + }, + { + "epoch": 0.9703437151011292, + "grad_norm": 680.9979248046875, + "learning_rate": 8.252498351200621e-08, + "loss": 82.1075, + "step": 117300 + }, + { + "epoch": 0.9704264383504984, + "grad_norm": 1170.890380859375, + "learning_rate": 8.226999397325852e-08, + "loss": 78.4652, + "step": 117310 + }, + { + "epoch": 0.9705091615998677, + "grad_norm": 801.5345458984375, + "learning_rate": 8.201539571571826e-08, + "loss": 93.103, + "step": 117320 + }, + { + "epoch": 0.9705918848492369, + "grad_norm": 694.1389770507812, + "learning_rate": 8.176118875964201e-08, + "loss": 105.483, + "step": 117330 + }, + { + "epoch": 0.9706746080986061, + "grad_norm": 702.7589111328125, + "learning_rate": 8.150737312525692e-08, + "loss": 80.5185, + "step": 117340 + }, + { + "epoch": 0.9707573313479754, + "grad_norm": 735.4678955078125, + "learning_rate": 8.125394883275683e-08, + "loss": 95.8936, + "step": 117350 + }, + { + "epoch": 0.9708400545973446, + "grad_norm": 704.5406494140625, + "learning_rate": 8.100091590230618e-08, + "loss": 103.3761, + "step": 117360 + }, + { + "epoch": 0.9709227778467138, + "grad_norm": 966.3074340820312, + "learning_rate": 8.07482743540372e-08, + "loss": 101.2361, + "step": 117370 + }, + { + "epoch": 0.9710055010960831, + "grad_norm": 1027.426513671875, + "learning_rate": 8.049602420805214e-08, + "loss": 84.4538, + "step": 117380 + }, + { + "epoch": 0.9710882243454523, + "grad_norm": 901.7965087890625, + "learning_rate": 8.024416548442104e-08, + "loss": 86.5814, + "step": 117390 + }, + { + "epoch": 0.9711709475948215, + "grad_norm": 1091.1844482421875, + "learning_rate": 7.99926982031829e-08, + "loss": 99.3349, + "step": 117400 + }, + { + "epoch": 0.9712536708441908, + "grad_norm": 709.89697265625, + "learning_rate": 7.974162238434557e-08, + "loss": 76.4967, + "step": 117410 + }, + { + "epoch": 0.97133639409356, + "grad_norm": 859.7228393554688, + "learning_rate": 7.949093804788699e-08, + "loss": 87.3181, + "step": 117420 + }, + { + "epoch": 0.9714191173429292, + "grad_norm": 662.0986328125, + "learning_rate": 7.924064521375174e-08, + "loss": 80.255, + "step": 117430 + }, + { + "epoch": 0.9715018405922985, + "grad_norm": 804.8336791992188, + "learning_rate": 7.899074390185557e-08, + "loss": 82.7631, + "step": 117440 + }, + { + "epoch": 0.9715845638416677, + "grad_norm": 800.1364135742188, + "learning_rate": 7.874123413208145e-08, + "loss": 74.2819, + "step": 117450 + }, + { + "epoch": 0.9716672870910369, + "grad_norm": 465.7279052734375, + "learning_rate": 7.849211592428186e-08, + "loss": 66.4609, + "step": 117460 + }, + { + "epoch": 0.9717500103404062, + "grad_norm": 444.983154296875, + "learning_rate": 7.824338929827813e-08, + "loss": 72.5292, + "step": 117470 + }, + { + "epoch": 0.9718327335897754, + "grad_norm": 253.49969482421875, + "learning_rate": 7.799505427386001e-08, + "loss": 67.4403, + "step": 117480 + }, + { + "epoch": 0.9719154568391446, + "grad_norm": 559.1021118164062, + "learning_rate": 7.774711087078612e-08, + "loss": 71.8914, + "step": 117490 + }, + { + "epoch": 0.9719981800885139, + "grad_norm": 1051.5477294921875, + "learning_rate": 7.749955910878459e-08, + "loss": 80.7977, + "step": 117500 + }, + { + "epoch": 0.9720809033378831, + "grad_norm": 739.0164794921875, + "learning_rate": 7.725239900755244e-08, + "loss": 62.2263, + "step": 117510 + }, + { + "epoch": 0.9721636265872523, + "grad_norm": 1051.24560546875, + "learning_rate": 7.700563058675448e-08, + "loss": 72.5346, + "step": 117520 + }, + { + "epoch": 0.9722463498366216, + "grad_norm": 876.2381591796875, + "learning_rate": 7.67592538660239e-08, + "loss": 93.1742, + "step": 117530 + }, + { + "epoch": 0.9723290730859908, + "grad_norm": 892.1935424804688, + "learning_rate": 7.651326886496613e-08, + "loss": 75.8199, + "step": 117540 + }, + { + "epoch": 0.97241179633536, + "grad_norm": 513.9732055664062, + "learning_rate": 7.626767560315107e-08, + "loss": 75.7576, + "step": 117550 + }, + { + "epoch": 0.9724945195847293, + "grad_norm": 822.49267578125, + "learning_rate": 7.602247410012032e-08, + "loss": 82.0519, + "step": 117560 + }, + { + "epoch": 0.9725772428340985, + "grad_norm": 927.3632202148438, + "learning_rate": 7.577766437538325e-08, + "loss": 98.2942, + "step": 117570 + }, + { + "epoch": 0.9726599660834677, + "grad_norm": 772.4122314453125, + "learning_rate": 7.553324644841875e-08, + "loss": 106.7597, + "step": 117580 + }, + { + "epoch": 0.972742689332837, + "grad_norm": 796.9185791015625, + "learning_rate": 7.528922033867347e-08, + "loss": 103.0361, + "step": 117590 + }, + { + "epoch": 0.9728254125822062, + "grad_norm": 881.1489868164062, + "learning_rate": 7.5045586065563e-08, + "loss": 84.0167, + "step": 117600 + }, + { + "epoch": 0.9729081358315754, + "grad_norm": 965.630615234375, + "learning_rate": 7.480234364847349e-08, + "loss": 93.8757, + "step": 117610 + }, + { + "epoch": 0.9729908590809447, + "grad_norm": 820.9173583984375, + "learning_rate": 7.455949310675725e-08, + "loss": 76.266, + "step": 117620 + }, + { + "epoch": 0.9730735823303139, + "grad_norm": 748.765625, + "learning_rate": 7.43170344597377e-08, + "loss": 102.896, + "step": 117630 + }, + { + "epoch": 0.9731563055796831, + "grad_norm": 604.3673095703125, + "learning_rate": 7.407496772670609e-08, + "loss": 62.3851, + "step": 117640 + }, + { + "epoch": 0.9732390288290524, + "grad_norm": 783.4528198242188, + "learning_rate": 7.383329292692198e-08, + "loss": 89.3814, + "step": 117650 + }, + { + "epoch": 0.9733217520784216, + "grad_norm": 1100.453857421875, + "learning_rate": 7.359201007961503e-08, + "loss": 75.2225, + "step": 117660 + }, + { + "epoch": 0.9734044753277908, + "grad_norm": 647.7412109375, + "learning_rate": 7.335111920398263e-08, + "loss": 84.3431, + "step": 117670 + }, + { + "epoch": 0.9734871985771602, + "grad_norm": 796.82421875, + "learning_rate": 7.311062031919114e-08, + "loss": 103.5371, + "step": 117680 + }, + { + "epoch": 0.9735699218265293, + "grad_norm": 915.2636108398438, + "learning_rate": 7.28705134443769e-08, + "loss": 82.3625, + "step": 117690 + }, + { + "epoch": 0.9736526450758985, + "grad_norm": 1407.7706298828125, + "learning_rate": 7.263079859864298e-08, + "loss": 99.5093, + "step": 117700 + }, + { + "epoch": 0.9737353683252679, + "grad_norm": 515.5316162109375, + "learning_rate": 7.239147580106242e-08, + "loss": 85.3771, + "step": 117710 + }, + { + "epoch": 0.9738180915746371, + "grad_norm": 635.7969970703125, + "learning_rate": 7.215254507067782e-08, + "loss": 83.8497, + "step": 117720 + }, + { + "epoch": 0.9739008148240063, + "grad_norm": 915.8248901367188, + "learning_rate": 7.191400642649893e-08, + "loss": 72.568, + "step": 117730 + }, + { + "epoch": 0.9739835380733756, + "grad_norm": 774.489013671875, + "learning_rate": 7.167585988750669e-08, + "loss": 101.6645, + "step": 117740 + }, + { + "epoch": 0.9740662613227448, + "grad_norm": 866.99560546875, + "learning_rate": 7.143810547264762e-08, + "loss": 61.4445, + "step": 117750 + }, + { + "epoch": 0.974148984572114, + "grad_norm": 642.11474609375, + "learning_rate": 7.120074320083991e-08, + "loss": 64.4058, + "step": 117760 + }, + { + "epoch": 0.9742317078214833, + "grad_norm": 1115.784423828125, + "learning_rate": 7.096377309096846e-08, + "loss": 76.0183, + "step": 117770 + }, + { + "epoch": 0.9743144310708525, + "grad_norm": 734.0995483398438, + "learning_rate": 7.072719516188875e-08, + "loss": 96.2746, + "step": 117780 + }, + { + "epoch": 0.9743971543202217, + "grad_norm": 525.3031616210938, + "learning_rate": 7.049100943242404e-08, + "loss": 76.8062, + "step": 117790 + }, + { + "epoch": 0.974479877569591, + "grad_norm": 831.6923217773438, + "learning_rate": 7.025521592136597e-08, + "loss": 73.2237, + "step": 117800 + }, + { + "epoch": 0.9745626008189602, + "grad_norm": 1876.879150390625, + "learning_rate": 7.001981464747565e-08, + "loss": 87.2098, + "step": 117810 + }, + { + "epoch": 0.9746453240683294, + "grad_norm": 811.3916015625, + "learning_rate": 6.978480562948309e-08, + "loss": 94.3064, + "step": 117820 + }, + { + "epoch": 0.9747280473176987, + "grad_norm": 1149.4193115234375, + "learning_rate": 6.955018888608722e-08, + "loss": 100.6052, + "step": 117830 + }, + { + "epoch": 0.9748107705670679, + "grad_norm": 863.6624145507812, + "learning_rate": 6.931596443595478e-08, + "loss": 109.152, + "step": 117840 + }, + { + "epoch": 0.9748934938164371, + "grad_norm": 771.5863647460938, + "learning_rate": 6.908213229772254e-08, + "loss": 67.1083, + "step": 117850 + }, + { + "epoch": 0.9749762170658064, + "grad_norm": 581.7706298828125, + "learning_rate": 6.884869248999504e-08, + "loss": 92.5001, + "step": 117860 + }, + { + "epoch": 0.9750589403151756, + "grad_norm": 790.7437133789062, + "learning_rate": 6.861564503134688e-08, + "loss": 98.9197, + "step": 117870 + }, + { + "epoch": 0.9751416635645448, + "grad_norm": 710.2706909179688, + "learning_rate": 6.838298994031933e-08, + "loss": 100.5049, + "step": 117880 + }, + { + "epoch": 0.9752243868139141, + "grad_norm": 656.0684814453125, + "learning_rate": 6.815072723542426e-08, + "loss": 85.3911, + "step": 117890 + }, + { + "epoch": 0.9753071100632833, + "grad_norm": 482.3664245605469, + "learning_rate": 6.791885693514134e-08, + "loss": 58.3653, + "step": 117900 + }, + { + "epoch": 0.9753898333126525, + "grad_norm": 1170.74169921875, + "learning_rate": 6.768737905792022e-08, + "loss": 84.8319, + "step": 117910 + }, + { + "epoch": 0.9754725565620218, + "grad_norm": 755.68017578125, + "learning_rate": 6.745629362217731e-08, + "loss": 113.3002, + "step": 117920 + }, + { + "epoch": 0.975555279811391, + "grad_norm": 776.3069458007812, + "learning_rate": 6.722560064630013e-08, + "loss": 65.8851, + "step": 117930 + }, + { + "epoch": 0.9756380030607602, + "grad_norm": 1081.909912109375, + "learning_rate": 6.699530014864397e-08, + "loss": 92.803, + "step": 117940 + }, + { + "epoch": 0.9757207263101295, + "grad_norm": 552.3186645507812, + "learning_rate": 6.676539214753253e-08, + "loss": 92.8013, + "step": 117950 + }, + { + "epoch": 0.9758034495594987, + "grad_norm": 771.9279174804688, + "learning_rate": 6.653587666125782e-08, + "loss": 74.0391, + "step": 117960 + }, + { + "epoch": 0.9758861728088679, + "grad_norm": 670.3401489257812, + "learning_rate": 6.630675370808193e-08, + "loss": 77.2455, + "step": 117970 + }, + { + "epoch": 0.9759688960582372, + "grad_norm": 1167.330810546875, + "learning_rate": 6.607802330623525e-08, + "loss": 87.141, + "step": 117980 + }, + { + "epoch": 0.9760516193076064, + "grad_norm": 347.4068908691406, + "learning_rate": 6.584968547391657e-08, + "loss": 72.6703, + "step": 117990 + }, + { + "epoch": 0.9761343425569756, + "grad_norm": 1017.5067749023438, + "learning_rate": 6.562174022929358e-08, + "loss": 84.4651, + "step": 118000 + }, + { + "epoch": 0.9762170658063448, + "grad_norm": 818.614013671875, + "learning_rate": 6.539418759050286e-08, + "loss": 78.0972, + "step": 118010 + }, + { + "epoch": 0.9762997890557141, + "grad_norm": 870.0516357421875, + "learning_rate": 6.516702757564941e-08, + "loss": 60.4278, + "step": 118020 + }, + { + "epoch": 0.9763825123050833, + "grad_norm": 1681.1678466796875, + "learning_rate": 6.494026020280875e-08, + "loss": 94.2777, + "step": 118030 + }, + { + "epoch": 0.9764652355544525, + "grad_norm": 767.0552978515625, + "learning_rate": 6.471388549002255e-08, + "loss": 96.2769, + "step": 118040 + }, + { + "epoch": 0.9765479588038218, + "grad_norm": 904.4722900390625, + "learning_rate": 6.448790345530253e-08, + "loss": 73.3652, + "step": 118050 + }, + { + "epoch": 0.976630682053191, + "grad_norm": 916.374267578125, + "learning_rate": 6.426231411662876e-08, + "loss": 75.4504, + "step": 118060 + }, + { + "epoch": 0.9767134053025602, + "grad_norm": 327.85504150390625, + "learning_rate": 6.403711749195073e-08, + "loss": 55.0961, + "step": 118070 + }, + { + "epoch": 0.9767961285519295, + "grad_norm": 794.025146484375, + "learning_rate": 6.381231359918638e-08, + "loss": 82.1348, + "step": 118080 + }, + { + "epoch": 0.9768788518012987, + "grad_norm": 971.2992553710938, + "learning_rate": 6.358790245622193e-08, + "loss": 65.286, + "step": 118090 + }, + { + "epoch": 0.9769615750506679, + "grad_norm": 382.24456787109375, + "learning_rate": 6.336388408091366e-08, + "loss": 81.6362, + "step": 118100 + }, + { + "epoch": 0.9770442983000373, + "grad_norm": 700.8916625976562, + "learning_rate": 6.314025849108397e-08, + "loss": 66.6585, + "step": 118110 + }, + { + "epoch": 0.9771270215494064, + "grad_norm": 598.8179321289062, + "learning_rate": 6.291702570452806e-08, + "loss": 87.4747, + "step": 118120 + }, + { + "epoch": 0.9772097447987756, + "grad_norm": 832.1001586914062, + "learning_rate": 6.269418573900565e-08, + "loss": 78.9643, + "step": 118130 + }, + { + "epoch": 0.977292468048145, + "grad_norm": 1039.1939697265625, + "learning_rate": 6.247173861224753e-08, + "loss": 125.8844, + "step": 118140 + }, + { + "epoch": 0.9773751912975142, + "grad_norm": 537.9774169921875, + "learning_rate": 6.224968434195289e-08, + "loss": 60.1959, + "step": 118150 + }, + { + "epoch": 0.9774579145468834, + "grad_norm": 1239.988037109375, + "learning_rate": 6.202802294578981e-08, + "loss": 96.3588, + "step": 118160 + }, + { + "epoch": 0.9775406377962527, + "grad_norm": 629.1329345703125, + "learning_rate": 6.180675444139527e-08, + "loss": 78.8181, + "step": 118170 + }, + { + "epoch": 0.9776233610456219, + "grad_norm": 1456.6697998046875, + "learning_rate": 6.158587884637357e-08, + "loss": 83.5854, + "step": 118180 + }, + { + "epoch": 0.9777060842949911, + "grad_norm": 694.644775390625, + "learning_rate": 6.136539617829895e-08, + "loss": 75.1875, + "step": 118190 + }, + { + "epoch": 0.9777888075443604, + "grad_norm": 1134.1533203125, + "learning_rate": 6.114530645471461e-08, + "loss": 95.2716, + "step": 118200 + }, + { + "epoch": 0.9778715307937296, + "grad_norm": 819.7823486328125, + "learning_rate": 6.09256096931321e-08, + "loss": 93.0044, + "step": 118210 + }, + { + "epoch": 0.9779542540430988, + "grad_norm": 874.4256591796875, + "learning_rate": 6.070630591103188e-08, + "loss": 77.5878, + "step": 118220 + }, + { + "epoch": 0.9780369772924681, + "grad_norm": 941.7909545898438, + "learning_rate": 6.048739512586221e-08, + "loss": 106.6546, + "step": 118230 + }, + { + "epoch": 0.9781197005418373, + "grad_norm": 496.1390075683594, + "learning_rate": 6.026887735504083e-08, + "loss": 70.8411, + "step": 118240 + }, + { + "epoch": 0.9782024237912065, + "grad_norm": 1133.30517578125, + "learning_rate": 6.005075261595495e-08, + "loss": 62.7111, + "step": 118250 + }, + { + "epoch": 0.9782851470405758, + "grad_norm": 1455.4931640625, + "learning_rate": 5.983302092595955e-08, + "loss": 82.0333, + "step": 118260 + }, + { + "epoch": 0.978367870289945, + "grad_norm": 369.7554626464844, + "learning_rate": 5.961568230237858e-08, + "loss": 109.3388, + "step": 118270 + }, + { + "epoch": 0.9784505935393142, + "grad_norm": 983.8111572265625, + "learning_rate": 5.939873676250374e-08, + "loss": 74.7944, + "step": 118280 + }, + { + "epoch": 0.9785333167886835, + "grad_norm": 437.4408874511719, + "learning_rate": 5.91821843235979e-08, + "loss": 82.0863, + "step": 118290 + }, + { + "epoch": 0.9786160400380527, + "grad_norm": 1494.396484375, + "learning_rate": 5.8966025002889505e-08, + "loss": 103.6332, + "step": 118300 + }, + { + "epoch": 0.9786987632874219, + "grad_norm": 637.7553100585938, + "learning_rate": 5.8750258817578676e-08, + "loss": 59.2877, + "step": 118310 + }, + { + "epoch": 0.9787814865367912, + "grad_norm": 802.739990234375, + "learning_rate": 5.85348857848328e-08, + "loss": 55.1885, + "step": 118320 + }, + { + "epoch": 0.9788642097861604, + "grad_norm": 941.2757568359375, + "learning_rate": 5.8319905921787603e-08, + "loss": 69.7701, + "step": 118330 + }, + { + "epoch": 0.9789469330355296, + "grad_norm": 1327.10888671875, + "learning_rate": 5.810531924554774e-08, + "loss": 87.9876, + "step": 118340 + }, + { + "epoch": 0.9790296562848989, + "grad_norm": 1318.0863037109375, + "learning_rate": 5.7891125773187896e-08, + "loss": 139.0294, + "step": 118350 + }, + { + "epoch": 0.9791123795342681, + "grad_norm": 1360.8392333984375, + "learning_rate": 5.7677325521749983e-08, + "loss": 68.9617, + "step": 118360 + }, + { + "epoch": 0.9791951027836373, + "grad_norm": 1109.0506591796875, + "learning_rate": 5.746391850824484e-08, + "loss": 122.1048, + "step": 118370 + }, + { + "epoch": 0.9792778260330066, + "grad_norm": 1080.967041015625, + "learning_rate": 5.725090474965278e-08, + "loss": 67.3166, + "step": 118380 + }, + { + "epoch": 0.9793605492823758, + "grad_norm": 738.1102905273438, + "learning_rate": 5.703828426292191e-08, + "loss": 103.1861, + "step": 118390 + }, + { + "epoch": 0.979443272531745, + "grad_norm": 1060.9788818359375, + "learning_rate": 5.6826057064969244e-08, + "loss": 83.6084, + "step": 118400 + }, + { + "epoch": 0.9795259957811143, + "grad_norm": 731.0538330078125, + "learning_rate": 5.6614223172681836e-08, + "loss": 100.0388, + "step": 118410 + }, + { + "epoch": 0.9796087190304835, + "grad_norm": 1262.67578125, + "learning_rate": 5.640278260291287e-08, + "loss": 83.6562, + "step": 118420 + }, + { + "epoch": 0.9796914422798527, + "grad_norm": 726.7060546875, + "learning_rate": 5.6191735372487235e-08, + "loss": 89.0712, + "step": 118430 + }, + { + "epoch": 0.979774165529222, + "grad_norm": 708.3402099609375, + "learning_rate": 5.5981081498195365e-08, + "loss": 108.3079, + "step": 118440 + }, + { + "epoch": 0.9798568887785912, + "grad_norm": 731.2184448242188, + "learning_rate": 5.577082099679942e-08, + "loss": 64.9039, + "step": 118450 + }, + { + "epoch": 0.9799396120279604, + "grad_norm": 673.4403076171875, + "learning_rate": 5.556095388502824e-08, + "loss": 95.9862, + "step": 118460 + }, + { + "epoch": 0.9800223352773297, + "grad_norm": 1423.2120361328125, + "learning_rate": 5.535148017958014e-08, + "loss": 81.7384, + "step": 118470 + }, + { + "epoch": 0.9801050585266989, + "grad_norm": 706.9336547851562, + "learning_rate": 5.514239989712178e-08, + "loss": 82.0284, + "step": 118480 + }, + { + "epoch": 0.9801877817760681, + "grad_norm": 711.587646484375, + "learning_rate": 5.493371305428874e-08, + "loss": 101.2837, + "step": 118490 + }, + { + "epoch": 0.9802705050254374, + "grad_norm": 993.87353515625, + "learning_rate": 5.472541966768552e-08, + "loss": 74.9229, + "step": 118500 + }, + { + "epoch": 0.9803532282748066, + "grad_norm": 746.7269287109375, + "learning_rate": 5.451751975388442e-08, + "loss": 77.2581, + "step": 118510 + }, + { + "epoch": 0.9804359515241758, + "grad_norm": 1050.691162109375, + "learning_rate": 5.4310013329428314e-08, + "loss": 88.0851, + "step": 118520 + }, + { + "epoch": 0.9805186747735452, + "grad_norm": 496.2442321777344, + "learning_rate": 5.410290041082622e-08, + "loss": 141.8692, + "step": 118530 + }, + { + "epoch": 0.9806013980229144, + "grad_norm": 484.4472351074219, + "learning_rate": 5.3896181014557733e-08, + "loss": 62.8719, + "step": 118540 + }, + { + "epoch": 0.9806841212722835, + "grad_norm": 1243.587646484375, + "learning_rate": 5.368985515707137e-08, + "loss": 120.0687, + "step": 118550 + }, + { + "epoch": 0.9807668445216529, + "grad_norm": 533.1880493164062, + "learning_rate": 5.348392285478232e-08, + "loss": 66.4509, + "step": 118560 + }, + { + "epoch": 0.9808495677710221, + "grad_norm": 1625.4766845703125, + "learning_rate": 5.327838412407582e-08, + "loss": 110.9632, + "step": 118570 + }, + { + "epoch": 0.9809322910203913, + "grad_norm": 521.4833374023438, + "learning_rate": 5.3073238981305455e-08, + "loss": 88.1412, + "step": 118580 + }, + { + "epoch": 0.9810150142697606, + "grad_norm": 671.5929565429688, + "learning_rate": 5.2868487442794825e-08, + "loss": 59.3454, + "step": 118590 + }, + { + "epoch": 0.9810977375191298, + "grad_norm": 717.1107177734375, + "learning_rate": 5.266412952483424e-08, + "loss": 101.9609, + "step": 118600 + }, + { + "epoch": 0.981180460768499, + "grad_norm": 657.0799560546875, + "learning_rate": 5.246016524368347e-08, + "loss": 88.4751, + "step": 118610 + }, + { + "epoch": 0.9812631840178683, + "grad_norm": 616.4298706054688, + "learning_rate": 5.225659461557176e-08, + "loss": 111.9666, + "step": 118620 + }, + { + "epoch": 0.9813459072672375, + "grad_norm": 525.951416015625, + "learning_rate": 5.205341765669503e-08, + "loss": 113.7892, + "step": 118630 + }, + { + "epoch": 0.9814286305166067, + "grad_norm": 701.6843872070312, + "learning_rate": 5.185063438322091e-08, + "loss": 59.8481, + "step": 118640 + }, + { + "epoch": 0.981511353765976, + "grad_norm": 768.6402587890625, + "learning_rate": 5.1648244811282054e-08, + "loss": 75.6018, + "step": 118650 + }, + { + "epoch": 0.9815940770153452, + "grad_norm": 1144.209716796875, + "learning_rate": 5.14462489569828e-08, + "loss": 80.7018, + "step": 118660 + }, + { + "epoch": 0.9816768002647144, + "grad_norm": 1187.2999267578125, + "learning_rate": 5.1244646836394187e-08, + "loss": 93.3386, + "step": 118670 + }, + { + "epoch": 0.9817595235140837, + "grad_norm": 1293.2803955078125, + "learning_rate": 5.104343846555726e-08, + "loss": 73.9802, + "step": 118680 + }, + { + "epoch": 0.9818422467634529, + "grad_norm": 918.84765625, + "learning_rate": 5.0842623860482e-08, + "loss": 94.4681, + "step": 118690 + }, + { + "epoch": 0.9819249700128221, + "grad_norm": 558.7251586914062, + "learning_rate": 5.064220303714507e-08, + "loss": 82.5352, + "step": 118700 + }, + { + "epoch": 0.9820076932621913, + "grad_norm": 1152.9447021484375, + "learning_rate": 5.044217601149371e-08, + "loss": 65.6448, + "step": 118710 + }, + { + "epoch": 0.9820904165115606, + "grad_norm": 535.541748046875, + "learning_rate": 5.024254279944296e-08, + "loss": 72.6428, + "step": 118720 + }, + { + "epoch": 0.9821731397609298, + "grad_norm": 2769.91845703125, + "learning_rate": 5.004330341687735e-08, + "loss": 102.4574, + "step": 118730 + }, + { + "epoch": 0.982255863010299, + "grad_norm": 680.4102172851562, + "learning_rate": 4.9844457879648086e-08, + "loss": 94.1751, + "step": 118740 + }, + { + "epoch": 0.9823385862596683, + "grad_norm": 941.7730712890625, + "learning_rate": 4.9646006203577515e-08, + "loss": 112.8078, + "step": 118750 + }, + { + "epoch": 0.9824213095090375, + "grad_norm": 1552.2708740234375, + "learning_rate": 4.944794840445521e-08, + "loss": 78.17, + "step": 118760 + }, + { + "epoch": 0.9825040327584067, + "grad_norm": 1019.7816772460938, + "learning_rate": 4.9250284498039146e-08, + "loss": 65.3781, + "step": 118770 + }, + { + "epoch": 0.982586756007776, + "grad_norm": 658.9552001953125, + "learning_rate": 4.905301450005784e-08, + "loss": 74.2217, + "step": 118780 + }, + { + "epoch": 0.9826694792571452, + "grad_norm": 731.14111328125, + "learning_rate": 4.885613842620596e-08, + "loss": 93.1678, + "step": 118790 + }, + { + "epoch": 0.9827522025065144, + "grad_norm": 1084.143310546875, + "learning_rate": 4.865965629214819e-08, + "loss": 86.729, + "step": 118800 + }, + { + "epoch": 0.9828349257558837, + "grad_norm": 621.4204711914062, + "learning_rate": 4.846356811351871e-08, + "loss": 87.1071, + "step": 118810 + }, + { + "epoch": 0.9829176490052529, + "grad_norm": 686.3203735351562, + "learning_rate": 4.826787390591836e-08, + "loss": 66.5393, + "step": 118820 + }, + { + "epoch": 0.9830003722546221, + "grad_norm": 380.79351806640625, + "learning_rate": 4.8072573684918024e-08, + "loss": 60.1358, + "step": 118830 + }, + { + "epoch": 0.9830830955039914, + "grad_norm": 1424.990966796875, + "learning_rate": 4.787766746605638e-08, + "loss": 97.6002, + "step": 118840 + }, + { + "epoch": 0.9831658187533606, + "grad_norm": 489.2065734863281, + "learning_rate": 4.768315526484158e-08, + "loss": 69.9402, + "step": 118850 + }, + { + "epoch": 0.9832485420027298, + "grad_norm": 868.9462280273438, + "learning_rate": 4.7489037096750126e-08, + "loss": 106.7428, + "step": 118860 + }, + { + "epoch": 0.9833312652520991, + "grad_norm": 2591.3154296875, + "learning_rate": 4.7295312977226895e-08, + "loss": 102.9899, + "step": 118870 + }, + { + "epoch": 0.9834139885014683, + "grad_norm": 1134.68212890625, + "learning_rate": 4.710198292168566e-08, + "loss": 78.4484, + "step": 118880 + }, + { + "epoch": 0.9834967117508375, + "grad_norm": 805.161376953125, + "learning_rate": 4.690904694550913e-08, + "loss": 88.5338, + "step": 118890 + }, + { + "epoch": 0.9835794350002068, + "grad_norm": 747.1693115234375, + "learning_rate": 4.671650506404835e-08, + "loss": 83.3935, + "step": 118900 + }, + { + "epoch": 0.983662158249576, + "grad_norm": 807.8037109375, + "learning_rate": 4.6524357292622724e-08, + "loss": 78.79, + "step": 118910 + }, + { + "epoch": 0.9837448814989452, + "grad_norm": 695.4989013671875, + "learning_rate": 4.633260364652059e-08, + "loss": 82.1724, + "step": 118920 + }, + { + "epoch": 0.9838276047483145, + "grad_norm": 840.2431640625, + "learning_rate": 4.6141244140998634e-08, + "loss": 73.6012, + "step": 118930 + }, + { + "epoch": 0.9839103279976837, + "grad_norm": 857.3699340820312, + "learning_rate": 4.5950278791283e-08, + "loss": 75.8136, + "step": 118940 + }, + { + "epoch": 0.9839930512470529, + "grad_norm": 1018.6058959960938, + "learning_rate": 4.575970761256765e-08, + "loss": 85.1594, + "step": 118950 + }, + { + "epoch": 0.9840757744964223, + "grad_norm": 1265.7125244140625, + "learning_rate": 4.556953062001546e-08, + "loss": 89.4971, + "step": 118960 + }, + { + "epoch": 0.9841584977457914, + "grad_norm": 735.5936279296875, + "learning_rate": 4.5379747828757095e-08, + "loss": 81.9222, + "step": 118970 + }, + { + "epoch": 0.9842412209951606, + "grad_norm": 881.5132446289062, + "learning_rate": 4.5190359253894925e-08, + "loss": 88.397, + "step": 118980 + }, + { + "epoch": 0.98432394424453, + "grad_norm": 353.5128479003906, + "learning_rate": 4.500136491049578e-08, + "loss": 83.2312, + "step": 118990 + }, + { + "epoch": 0.9844066674938992, + "grad_norm": 1424.359130859375, + "learning_rate": 4.481276481359764e-08, + "loss": 86.6353, + "step": 119000 + }, + { + "epoch": 0.9844893907432684, + "grad_norm": 887.9814453125, + "learning_rate": 4.462455897820628e-08, + "loss": 76.8451, + "step": 119010 + }, + { + "epoch": 0.9845721139926377, + "grad_norm": 806.23193359375, + "learning_rate": 4.443674741929693e-08, + "loss": 66.0883, + "step": 119020 + }, + { + "epoch": 0.9846548372420069, + "grad_norm": 573.1680297851562, + "learning_rate": 4.424933015181265e-08, + "loss": 72.9488, + "step": 119030 + }, + { + "epoch": 0.9847375604913761, + "grad_norm": 976.209716796875, + "learning_rate": 4.40623071906654e-08, + "loss": 84.5281, + "step": 119040 + }, + { + "epoch": 0.9848202837407454, + "grad_norm": 947.809814453125, + "learning_rate": 4.387567855073604e-08, + "loss": 89.569, + "step": 119050 + }, + { + "epoch": 0.9849030069901146, + "grad_norm": 1052.1142578125, + "learning_rate": 4.368944424687271e-08, + "loss": 69.5182, + "step": 119060 + }, + { + "epoch": 0.9849857302394838, + "grad_norm": 498.6680603027344, + "learning_rate": 4.350360429389411e-08, + "loss": 92.5021, + "step": 119070 + }, + { + "epoch": 0.9850684534888531, + "grad_norm": 878.2149047851562, + "learning_rate": 4.3318158706586734e-08, + "loss": 60.1033, + "step": 119080 + }, + { + "epoch": 0.9851511767382223, + "grad_norm": 618.4829711914062, + "learning_rate": 4.3133107499704894e-08, + "loss": 80.3329, + "step": 119090 + }, + { + "epoch": 0.9852338999875915, + "grad_norm": 544.2587280273438, + "learning_rate": 4.294845068797349e-08, + "loss": 79.2608, + "step": 119100 + }, + { + "epoch": 0.9853166232369608, + "grad_norm": 989.1095581054688, + "learning_rate": 4.276418828608353e-08, + "loss": 69.4792, + "step": 119110 + }, + { + "epoch": 0.98539934648633, + "grad_norm": 1151.3955078125, + "learning_rate": 4.258032030869608e-08, + "loss": 83.3888, + "step": 119120 + }, + { + "epoch": 0.9854820697356992, + "grad_norm": 628.578857421875, + "learning_rate": 4.2396846770441644e-08, + "loss": 103.2333, + "step": 119130 + }, + { + "epoch": 0.9855647929850685, + "grad_norm": 830.7070922851562, + "learning_rate": 4.221376768591801e-08, + "loss": 83.7114, + "step": 119140 + }, + { + "epoch": 0.9856475162344377, + "grad_norm": 343.4464111328125, + "learning_rate": 4.203108306969128e-08, + "loss": 86.0502, + "step": 119150 + }, + { + "epoch": 0.9857302394838069, + "grad_norm": 621.4478759765625, + "learning_rate": 4.1848792936297064e-08, + "loss": 83.2397, + "step": 119160 + }, + { + "epoch": 0.9858129627331762, + "grad_norm": 964.1356811523438, + "learning_rate": 4.166689730023987e-08, + "loss": 77.1152, + "step": 119170 + }, + { + "epoch": 0.9858956859825454, + "grad_norm": 818.7540893554688, + "learning_rate": 4.148539617599201e-08, + "loss": 92.1264, + "step": 119180 + }, + { + "epoch": 0.9859784092319146, + "grad_norm": 1049.2435302734375, + "learning_rate": 4.13042895779947e-08, + "loss": 92.8093, + "step": 119190 + }, + { + "epoch": 0.9860611324812839, + "grad_norm": 622.9645385742188, + "learning_rate": 4.112357752065754e-08, + "loss": 73.6431, + "step": 119200 + }, + { + "epoch": 0.9861438557306531, + "grad_norm": 934.980224609375, + "learning_rate": 4.0943260018359024e-08, + "loss": 102.3326, + "step": 119210 + }, + { + "epoch": 0.9862265789800223, + "grad_norm": 775.8395385742188, + "learning_rate": 4.076333708544655e-08, + "loss": 99.6095, + "step": 119220 + }, + { + "epoch": 0.9863093022293916, + "grad_norm": 561.11181640625, + "learning_rate": 4.058380873623591e-08, + "loss": 60.4265, + "step": 119230 + }, + { + "epoch": 0.9863920254787608, + "grad_norm": 1175.651123046875, + "learning_rate": 4.040467498501011e-08, + "loss": 102.5078, + "step": 119240 + }, + { + "epoch": 0.98647474872813, + "grad_norm": 1031.730712890625, + "learning_rate": 4.02259358460233e-08, + "loss": 107.7175, + "step": 119250 + }, + { + "epoch": 0.9865574719774993, + "grad_norm": 1173.889404296875, + "learning_rate": 4.00475913334969e-08, + "loss": 112.8835, + "step": 119260 + }, + { + "epoch": 0.9866401952268685, + "grad_norm": 747.98486328125, + "learning_rate": 3.986964146162009e-08, + "loss": 72.1821, + "step": 119270 + }, + { + "epoch": 0.9867229184762377, + "grad_norm": 904.0946044921875, + "learning_rate": 3.969208624455212e-08, + "loss": 78.2023, + "step": 119280 + }, + { + "epoch": 0.986805641725607, + "grad_norm": 761.3492431640625, + "learning_rate": 3.951492569642001e-08, + "loss": 83.0522, + "step": 119290 + }, + { + "epoch": 0.9868883649749762, + "grad_norm": 1574.1885986328125, + "learning_rate": 3.9338159831319724e-08, + "loss": 108.7734, + "step": 119300 + }, + { + "epoch": 0.9869710882243454, + "grad_norm": 774.9620971679688, + "learning_rate": 3.9161788663315546e-08, + "loss": 79.6705, + "step": 119310 + }, + { + "epoch": 0.9870538114737147, + "grad_norm": 1015.9965209960938, + "learning_rate": 3.898581220644071e-08, + "loss": 126.5208, + "step": 119320 + }, + { + "epoch": 0.9871365347230839, + "grad_norm": 1341.8759765625, + "learning_rate": 3.881023047469679e-08, + "loss": 90.6255, + "step": 119330 + }, + { + "epoch": 0.9872192579724531, + "grad_norm": 511.98223876953125, + "learning_rate": 3.863504348205427e-08, + "loss": 85.9824, + "step": 119340 + }, + { + "epoch": 0.9873019812218224, + "grad_norm": 1165.170654296875, + "learning_rate": 3.8460251242451454e-08, + "loss": 123.9411, + "step": 119350 + }, + { + "epoch": 0.9873847044711916, + "grad_norm": 1152.4227294921875, + "learning_rate": 3.828585376979666e-08, + "loss": 91.7785, + "step": 119360 + }, + { + "epoch": 0.9874674277205608, + "grad_norm": 684.0379638671875, + "learning_rate": 3.811185107796489e-08, + "loss": 69.7867, + "step": 119370 + }, + { + "epoch": 0.9875501509699302, + "grad_norm": 664.3668823242188, + "learning_rate": 3.793824318080064e-08, + "loss": 74.003, + "step": 119380 + }, + { + "epoch": 0.9876328742192994, + "grad_norm": 800.0819091796875, + "learning_rate": 3.7765030092118404e-08, + "loss": 138.9721, + "step": 119390 + }, + { + "epoch": 0.9877155974686685, + "grad_norm": 845.6797485351562, + "learning_rate": 3.7592211825698835e-08, + "loss": 104.952, + "step": 119400 + }, + { + "epoch": 0.9877983207180379, + "grad_norm": 942.4180297851562, + "learning_rate": 3.741978839529259e-08, + "loss": 112.8675, + "step": 119410 + }, + { + "epoch": 0.9878810439674071, + "grad_norm": 1062.473876953125, + "learning_rate": 3.72477598146187e-08, + "loss": 90.5084, + "step": 119420 + }, + { + "epoch": 0.9879637672167763, + "grad_norm": 636.2952270507812, + "learning_rate": 3.7076126097363997e-08, + "loss": 70.5935, + "step": 119430 + }, + { + "epoch": 0.9880464904661455, + "grad_norm": 446.5930480957031, + "learning_rate": 3.690488725718588e-08, + "loss": 109.0157, + "step": 119440 + }, + { + "epoch": 0.9881292137155148, + "grad_norm": 1208.2958984375, + "learning_rate": 3.673404330770847e-08, + "loss": 64.4161, + "step": 119450 + }, + { + "epoch": 0.988211936964884, + "grad_norm": 501.6045227050781, + "learning_rate": 3.65635942625242e-08, + "loss": 64.2057, + "step": 119460 + }, + { + "epoch": 0.9882946602142532, + "grad_norm": 1004.0560913085938, + "learning_rate": 3.639354013519614e-08, + "loss": 118.3625, + "step": 119470 + }, + { + "epoch": 0.9883773834636225, + "grad_norm": 1431.545166015625, + "learning_rate": 3.6223880939254e-08, + "loss": 111.376, + "step": 119480 + }, + { + "epoch": 0.9884601067129917, + "grad_norm": 717.1210327148438, + "learning_rate": 3.605461668819754e-08, + "loss": 81.6542, + "step": 119490 + }, + { + "epoch": 0.9885428299623609, + "grad_norm": 844.7835083007812, + "learning_rate": 3.588574739549322e-08, + "loss": 92.5829, + "step": 119500 + }, + { + "epoch": 0.9886255532117302, + "grad_norm": 910.1040649414062, + "learning_rate": 3.571727307457806e-08, + "loss": 111.628, + "step": 119510 + }, + { + "epoch": 0.9887082764610994, + "grad_norm": 649.8385620117188, + "learning_rate": 3.5549193738856346e-08, + "loss": 86.6366, + "step": 119520 + }, + { + "epoch": 0.9887909997104686, + "grad_norm": 784.6031494140625, + "learning_rate": 3.5381509401701264e-08, + "loss": 72.6148, + "step": 119530 + }, + { + "epoch": 0.9888737229598379, + "grad_norm": 1095.903564453125, + "learning_rate": 3.5214220076455474e-08, + "loss": 120.5914, + "step": 119540 + }, + { + "epoch": 0.9889564462092071, + "grad_norm": 954.7218017578125, + "learning_rate": 3.5047325776428884e-08, + "loss": 101.9791, + "step": 119550 + }, + { + "epoch": 0.9890391694585763, + "grad_norm": 1238.466796875, + "learning_rate": 3.488082651490032e-08, + "loss": 100.0169, + "step": 119560 + }, + { + "epoch": 0.9891218927079456, + "grad_norm": 688.223388671875, + "learning_rate": 3.471472230511752e-08, + "loss": 96.7697, + "step": 119570 + }, + { + "epoch": 0.9892046159573148, + "grad_norm": 788.05859375, + "learning_rate": 3.454901316029657e-08, + "loss": 100.5506, + "step": 119580 + }, + { + "epoch": 0.989287339206684, + "grad_norm": 1479.240234375, + "learning_rate": 3.438369909362249e-08, + "loss": 109.9174, + "step": 119590 + }, + { + "epoch": 0.9893700624560533, + "grad_norm": 831.3067016601562, + "learning_rate": 3.421878011824864e-08, + "loss": 110.4286, + "step": 119600 + }, + { + "epoch": 0.9894527857054225, + "grad_norm": 924.3592529296875, + "learning_rate": 3.405425624729619e-08, + "loss": 91.2705, + "step": 119610 + }, + { + "epoch": 0.9895355089547917, + "grad_norm": 674.0458374023438, + "learning_rate": 3.389012749385578e-08, + "loss": 74.0681, + "step": 119620 + }, + { + "epoch": 0.989618232204161, + "grad_norm": 523.977294921875, + "learning_rate": 3.3726393870986976e-08, + "loss": 97.7908, + "step": 119630 + }, + { + "epoch": 0.9897009554535302, + "grad_norm": 619.9107666015625, + "learning_rate": 3.356305539171656e-08, + "loss": 99.3192, + "step": 119640 + }, + { + "epoch": 0.9897836787028994, + "grad_norm": 644.1179809570312, + "learning_rate": 3.340011206904137e-08, + "loss": 80.9895, + "step": 119650 + }, + { + "epoch": 0.9898664019522687, + "grad_norm": 646.9247436523438, + "learning_rate": 3.323756391592548e-08, + "loss": 75.6963, + "step": 119660 + }, + { + "epoch": 0.9899491252016379, + "grad_norm": 613.2579956054688, + "learning_rate": 3.307541094530242e-08, + "loss": 106.8036, + "step": 119670 + }, + { + "epoch": 0.9900318484510071, + "grad_norm": 903.300048828125, + "learning_rate": 3.291365317007355e-08, + "loss": 66.5273, + "step": 119680 + }, + { + "epoch": 0.9901145717003764, + "grad_norm": 749.8668212890625, + "learning_rate": 3.2752290603109694e-08, + "loss": 81.0457, + "step": 119690 + }, + { + "epoch": 0.9901972949497456, + "grad_norm": 855.69677734375, + "learning_rate": 3.25913232572489e-08, + "loss": 103.7646, + "step": 119700 + }, + { + "epoch": 0.9902800181991148, + "grad_norm": 640.4257202148438, + "learning_rate": 3.243075114529981e-08, + "loss": 88.8553, + "step": 119710 + }, + { + "epoch": 0.9903627414484841, + "grad_norm": 642.8728637695312, + "learning_rate": 3.2270574280037213e-08, + "loss": 89.717, + "step": 119720 + }, + { + "epoch": 0.9904454646978533, + "grad_norm": 1653.7088623046875, + "learning_rate": 3.211079267420647e-08, + "loss": 106.5534, + "step": 119730 + }, + { + "epoch": 0.9905281879472225, + "grad_norm": 1032.344970703125, + "learning_rate": 3.195140634052074e-08, + "loss": 83.4833, + "step": 119740 + }, + { + "epoch": 0.9906109111965918, + "grad_norm": 886.9328002929688, + "learning_rate": 3.179241529166099e-08, + "loss": 82.0412, + "step": 119750 + }, + { + "epoch": 0.990693634445961, + "grad_norm": 930.1018676757812, + "learning_rate": 3.163381954027822e-08, + "loss": 77.0731, + "step": 119760 + }, + { + "epoch": 0.9907763576953302, + "grad_norm": 1377.98388671875, + "learning_rate": 3.147561909899066e-08, + "loss": 89.5693, + "step": 119770 + }, + { + "epoch": 0.9908590809446995, + "grad_norm": 943.3343505859375, + "learning_rate": 3.131781398038547e-08, + "loss": 82.712, + "step": 119780 + }, + { + "epoch": 0.9909418041940687, + "grad_norm": 901.633056640625, + "learning_rate": 3.1160404197018155e-08, + "loss": 70.4035, + "step": 119790 + }, + { + "epoch": 0.9910245274434379, + "grad_norm": 1146.6190185546875, + "learning_rate": 3.100338976141426e-08, + "loss": 68.581, + "step": 119800 + }, + { + "epoch": 0.9911072506928073, + "grad_norm": 1056.26904296875, + "learning_rate": 3.084677068606545e-08, + "loss": 76.8607, + "step": 119810 + }, + { + "epoch": 0.9911899739421764, + "grad_norm": 531.5973510742188, + "learning_rate": 3.0690546983433986e-08, + "loss": 101.3226, + "step": 119820 + }, + { + "epoch": 0.9912726971915456, + "grad_norm": 1205.59521484375, + "learning_rate": 3.053471866594993e-08, + "loss": 120.3366, + "step": 119830 + }, + { + "epoch": 0.991355420440915, + "grad_norm": 904.2116088867188, + "learning_rate": 3.0379285746011125e-08, + "loss": 75.5825, + "step": 119840 + }, + { + "epoch": 0.9914381436902842, + "grad_norm": 891.4710083007812, + "learning_rate": 3.022424823598546e-08, + "loss": 89.1019, + "step": 119850 + }, + { + "epoch": 0.9915208669396534, + "grad_norm": 869.971923828125, + "learning_rate": 3.0069606148208085e-08, + "loss": 88.6072, + "step": 119860 + }, + { + "epoch": 0.9916035901890227, + "grad_norm": 634.763916015625, + "learning_rate": 2.991535949498303e-08, + "loss": 87.1923, + "step": 119870 + }, + { + "epoch": 0.9916863134383919, + "grad_norm": 789.7999877929688, + "learning_rate": 2.9761508288583262e-08, + "loss": 72.2758, + "step": 119880 + }, + { + "epoch": 0.9917690366877611, + "grad_norm": 535.9653930664062, + "learning_rate": 2.96080525412501e-08, + "loss": 92.1307, + "step": 119890 + }, + { + "epoch": 0.9918517599371304, + "grad_norm": 1039.1324462890625, + "learning_rate": 2.9454992265193216e-08, + "loss": 54.078, + "step": 119900 + }, + { + "epoch": 0.9919344831864996, + "grad_norm": 616.3204345703125, + "learning_rate": 2.9302327472590653e-08, + "loss": 85.4111, + "step": 119910 + }, + { + "epoch": 0.9920172064358688, + "grad_norm": 799.8880615234375, + "learning_rate": 2.9150058175589356e-08, + "loss": 82.8982, + "step": 119920 + }, + { + "epoch": 0.9920999296852381, + "grad_norm": 1155.7872314453125, + "learning_rate": 2.8998184386305196e-08, + "loss": 100.5487, + "step": 119930 + }, + { + "epoch": 0.9921826529346073, + "grad_norm": 1084.304443359375, + "learning_rate": 2.8846706116821834e-08, + "loss": 90.8323, + "step": 119940 + }, + { + "epoch": 0.9922653761839765, + "grad_norm": 1023.7007446289062, + "learning_rate": 2.8695623379191296e-08, + "loss": 71.3728, + "step": 119950 + }, + { + "epoch": 0.9923480994333458, + "grad_norm": 681.5350952148438, + "learning_rate": 2.8544936185434525e-08, + "loss": 76.9926, + "step": 119960 + }, + { + "epoch": 0.992430822682715, + "grad_norm": 1248.9072265625, + "learning_rate": 2.8394644547541373e-08, + "loss": 89.5176, + "step": 119970 + }, + { + "epoch": 0.9925135459320842, + "grad_norm": 609.6895751953125, + "learning_rate": 2.8244748477470052e-08, + "loss": 81.6998, + "step": 119980 + }, + { + "epoch": 0.9925962691814535, + "grad_norm": 660.6539306640625, + "learning_rate": 2.809524798714658e-08, + "loss": 99.3655, + "step": 119990 + }, + { + "epoch": 0.9926789924308227, + "grad_norm": 712.8468017578125, + "learning_rate": 2.7946143088466437e-08, + "loss": 69.3642, + "step": 120000 + }, + { + "epoch": 0.9927617156801919, + "grad_norm": 1156.4256591796875, + "learning_rate": 2.7797433793292915e-08, + "loss": 94.3238, + "step": 120010 + }, + { + "epoch": 0.9928444389295612, + "grad_norm": 956.7645874023438, + "learning_rate": 2.7649120113458217e-08, + "loss": 95.6845, + "step": 120020 + }, + { + "epoch": 0.9929271621789304, + "grad_norm": 726.518310546875, + "learning_rate": 2.7501202060763454e-08, + "loss": 83.2681, + "step": 120030 + }, + { + "epoch": 0.9930098854282996, + "grad_norm": 1263.5406494140625, + "learning_rate": 2.7353679646976995e-08, + "loss": 91.9831, + "step": 120040 + }, + { + "epoch": 0.9930926086776689, + "grad_norm": 1196.4813232421875, + "learning_rate": 2.7206552883836667e-08, + "loss": 97.7018, + "step": 120050 + }, + { + "epoch": 0.9931753319270381, + "grad_norm": 481.29241943359375, + "learning_rate": 2.705982178304922e-08, + "loss": 94.1881, + "step": 120060 + }, + { + "epoch": 0.9932580551764073, + "grad_norm": 627.6724853515625, + "learning_rate": 2.691348635628921e-08, + "loss": 86.9981, + "step": 120070 + }, + { + "epoch": 0.9933407784257766, + "grad_norm": 792.4179077148438, + "learning_rate": 2.676754661519898e-08, + "loss": 85.3965, + "step": 120080 + }, + { + "epoch": 0.9934235016751458, + "grad_norm": 1112.1466064453125, + "learning_rate": 2.662200257139147e-08, + "loss": 107.8607, + "step": 120090 + }, + { + "epoch": 0.993506224924515, + "grad_norm": 851.7528076171875, + "learning_rate": 2.6476854236446858e-08, + "loss": 81.0411, + "step": 120100 + }, + { + "epoch": 0.9935889481738843, + "grad_norm": 679.0064697265625, + "learning_rate": 2.6332101621913133e-08, + "loss": 100.3239, + "step": 120110 + }, + { + "epoch": 0.9936716714232535, + "grad_norm": 970.4327392578125, + "learning_rate": 2.6187744739308297e-08, + "loss": 93.8976, + "step": 120120 + }, + { + "epoch": 0.9937543946726227, + "grad_norm": 783.4990234375, + "learning_rate": 2.604378360011761e-08, + "loss": 81.577, + "step": 120130 + }, + { + "epoch": 0.993837117921992, + "grad_norm": 464.9545593261719, + "learning_rate": 2.5900218215795802e-08, + "loss": 64.9535, + "step": 120140 + }, + { + "epoch": 0.9939198411713612, + "grad_norm": 1092.5909423828125, + "learning_rate": 2.57570485977654e-08, + "loss": 81.3496, + "step": 120150 + }, + { + "epoch": 0.9940025644207304, + "grad_norm": 300.0126647949219, + "learning_rate": 2.5614274757417846e-08, + "loss": 73.0116, + "step": 120160 + }, + { + "epoch": 0.9940852876700996, + "grad_norm": 1496.8515625, + "learning_rate": 2.5471896706113497e-08, + "loss": 90.5071, + "step": 120170 + }, + { + "epoch": 0.9941680109194689, + "grad_norm": 827.8729248046875, + "learning_rate": 2.5329914455180516e-08, + "loss": 68.6131, + "step": 120180 + }, + { + "epoch": 0.9942507341688381, + "grad_norm": 1299.62744140625, + "learning_rate": 2.5188328015914865e-08, + "loss": 96.4743, + "step": 120190 + }, + { + "epoch": 0.9943334574182073, + "grad_norm": 1085.9984130859375, + "learning_rate": 2.5047137399583088e-08, + "loss": 90.0169, + "step": 120200 + }, + { + "epoch": 0.9944161806675766, + "grad_norm": 712.6924438476562, + "learning_rate": 2.4906342617418976e-08, + "loss": 122.7976, + "step": 120210 + }, + { + "epoch": 0.9944989039169458, + "grad_norm": 1067.037109375, + "learning_rate": 2.4765943680624126e-08, + "loss": 105.9706, + "step": 120220 + }, + { + "epoch": 0.994581627166315, + "grad_norm": 1210.1380615234375, + "learning_rate": 2.4625940600369603e-08, + "loss": 77.7456, + "step": 120230 + }, + { + "epoch": 0.9946643504156844, + "grad_norm": 626.2311401367188, + "learning_rate": 2.4486333387795935e-08, + "loss": 73.6353, + "step": 120240 + }, + { + "epoch": 0.9947470736650535, + "grad_norm": 1163.8887939453125, + "learning_rate": 2.434712205400924e-08, + "loss": 72.8398, + "step": 120250 + }, + { + "epoch": 0.9948297969144227, + "grad_norm": 703.2742919921875, + "learning_rate": 2.4208306610087884e-08, + "loss": 88.9687, + "step": 120260 + }, + { + "epoch": 0.9949125201637921, + "grad_norm": 1137.736572265625, + "learning_rate": 2.406988706707525e-08, + "loss": 121.851, + "step": 120270 + }, + { + "epoch": 0.9949952434131613, + "grad_norm": 653.0780029296875, + "learning_rate": 2.3931863435985303e-08, + "loss": 79.4951, + "step": 120280 + }, + { + "epoch": 0.9950779666625305, + "grad_norm": 1000.9141845703125, + "learning_rate": 2.379423572779982e-08, + "loss": 72.5086, + "step": 120290 + }, + { + "epoch": 0.9951606899118998, + "grad_norm": 633.1144409179688, + "learning_rate": 2.3657003953468926e-08, + "loss": 94.2369, + "step": 120300 + }, + { + "epoch": 0.995243413161269, + "grad_norm": 827.4677734375, + "learning_rate": 2.352016812391278e-08, + "loss": 79.3089, + "step": 120310 + }, + { + "epoch": 0.9953261364106382, + "grad_norm": 759.8338012695312, + "learning_rate": 2.3383728250017112e-08, + "loss": 57.8192, + "step": 120320 + }, + { + "epoch": 0.9954088596600075, + "grad_norm": 735.8635864257812, + "learning_rate": 2.3247684342639355e-08, + "loss": 92.8142, + "step": 120330 + }, + { + "epoch": 0.9954915829093767, + "grad_norm": 739.5372314453125, + "learning_rate": 2.311203641260251e-08, + "loss": 82.3384, + "step": 120340 + }, + { + "epoch": 0.9955743061587459, + "grad_norm": 755.5853881835938, + "learning_rate": 2.2976784470700174e-08, + "loss": 117.4208, + "step": 120350 + }, + { + "epoch": 0.9956570294081152, + "grad_norm": 1694.4178466796875, + "learning_rate": 2.284192852769429e-08, + "loss": 125.9178, + "step": 120360 + }, + { + "epoch": 0.9957397526574844, + "grad_norm": 635.702880859375, + "learning_rate": 2.2707468594313497e-08, + "loss": 72.2283, + "step": 120370 + }, + { + "epoch": 0.9958224759068536, + "grad_norm": 1230.975341796875, + "learning_rate": 2.2573404681256463e-08, + "loss": 123.6782, + "step": 120380 + }, + { + "epoch": 0.9959051991562229, + "grad_norm": 678.0013427734375, + "learning_rate": 2.243973679919076e-08, + "loss": 80.5772, + "step": 120390 + }, + { + "epoch": 0.9959879224055921, + "grad_norm": 758.3480224609375, + "learning_rate": 2.2306464958751217e-08, + "loss": 67.9236, + "step": 120400 + }, + { + "epoch": 0.9960706456549613, + "grad_norm": 788.4033203125, + "learning_rate": 2.2173589170541576e-08, + "loss": 79.5632, + "step": 120410 + }, + { + "epoch": 0.9961533689043306, + "grad_norm": 529.8211059570312, + "learning_rate": 2.204110944513449e-08, + "loss": 113.7574, + "step": 120420 + }, + { + "epoch": 0.9962360921536998, + "grad_norm": 686.330078125, + "learning_rate": 2.1909025793070416e-08, + "loss": 67.1674, + "step": 120430 + }, + { + "epoch": 0.996318815403069, + "grad_norm": 659.1135864257812, + "learning_rate": 2.1777338224859278e-08, + "loss": 113.0164, + "step": 120440 + }, + { + "epoch": 0.9964015386524383, + "grad_norm": 969.425048828125, + "learning_rate": 2.1646046750978255e-08, + "loss": 99.7389, + "step": 120450 + }, + { + "epoch": 0.9964842619018075, + "grad_norm": 861.7916870117188, + "learning_rate": 2.1515151381873435e-08, + "loss": 84.4122, + "step": 120460 + }, + { + "epoch": 0.9965669851511767, + "grad_norm": 389.3373718261719, + "learning_rate": 2.1384652127959816e-08, + "loss": 83.2005, + "step": 120470 + }, + { + "epoch": 0.996649708400546, + "grad_norm": 1106.470458984375, + "learning_rate": 2.125454899962076e-08, + "loss": 96.3944, + "step": 120480 + }, + { + "epoch": 0.9967324316499152, + "grad_norm": 1094.505859375, + "learning_rate": 2.112484200720799e-08, + "loss": 83.0741, + "step": 120490 + }, + { + "epoch": 0.9968151548992844, + "grad_norm": 439.3631896972656, + "learning_rate": 2.0995531161041028e-08, + "loss": 97.843, + "step": 120500 + }, + { + "epoch": 0.9968978781486537, + "grad_norm": 987.277099609375, + "learning_rate": 2.0866616471409974e-08, + "loss": 105.7594, + "step": 120510 + }, + { + "epoch": 0.9969806013980229, + "grad_norm": 647.2058715820312, + "learning_rate": 2.0738097948570514e-08, + "loss": 68.505, + "step": 120520 + }, + { + "epoch": 0.9970633246473921, + "grad_norm": 1555.4075927734375, + "learning_rate": 2.0609975602749465e-08, + "loss": 75.6344, + "step": 120530 + }, + { + "epoch": 0.9971460478967614, + "grad_norm": 1339.879638671875, + "learning_rate": 2.048224944413979e-08, + "loss": 79.3426, + "step": 120540 + }, + { + "epoch": 0.9972287711461306, + "grad_norm": 446.56878662109375, + "learning_rate": 2.035491948290502e-08, + "loss": 111.5274, + "step": 120550 + }, + { + "epoch": 0.9973114943954998, + "grad_norm": 896.1454467773438, + "learning_rate": 2.0227985729175393e-08, + "loss": 93.6553, + "step": 120560 + }, + { + "epoch": 0.9973942176448691, + "grad_norm": 909.8489990234375, + "learning_rate": 2.0101448193051153e-08, + "loss": 81.4754, + "step": 120570 + }, + { + "epoch": 0.9974769408942383, + "grad_norm": 680.5419921875, + "learning_rate": 1.997530688459981e-08, + "loss": 64.9978, + "step": 120580 + }, + { + "epoch": 0.9975596641436075, + "grad_norm": 970.2647705078125, + "learning_rate": 1.984956181385833e-08, + "loss": 98.7731, + "step": 120590 + }, + { + "epoch": 0.9976423873929768, + "grad_norm": 1299.3114013671875, + "learning_rate": 1.9724212990830938e-08, + "loss": 73.416, + "step": 120600 + }, + { + "epoch": 0.997725110642346, + "grad_norm": 1053.347900390625, + "learning_rate": 1.9599260425491873e-08, + "loss": 82.9828, + "step": 120610 + }, + { + "epoch": 0.9978078338917152, + "grad_norm": 1263.4722900390625, + "learning_rate": 1.9474704127783184e-08, + "loss": 76.9199, + "step": 120620 + }, + { + "epoch": 0.9978905571410845, + "grad_norm": 490.1828918457031, + "learning_rate": 1.9350544107614165e-08, + "loss": 77.5371, + "step": 120630 + }, + { + "epoch": 0.9979732803904537, + "grad_norm": 1117.427734375, + "learning_rate": 1.9226780374864695e-08, + "loss": 103.023, + "step": 120640 + }, + { + "epoch": 0.9980560036398229, + "grad_norm": 476.96221923828125, + "learning_rate": 1.9103412939381338e-08, + "loss": 69.9926, + "step": 120650 + }, + { + "epoch": 0.9981387268891923, + "grad_norm": 722.0250854492188, + "learning_rate": 1.898044181098013e-08, + "loss": 101.3328, + "step": 120660 + }, + { + "epoch": 0.9982214501385615, + "grad_norm": 736.2235107421875, + "learning_rate": 1.885786699944492e-08, + "loss": 79.6206, + "step": 120670 + }, + { + "epoch": 0.9983041733879306, + "grad_norm": 1309.5517578125, + "learning_rate": 1.873568851452956e-08, + "loss": 78.2351, + "step": 120680 + }, + { + "epoch": 0.9983868966373, + "grad_norm": 752.5504150390625, + "learning_rate": 1.8613906365954616e-08, + "loss": 87.0025, + "step": 120690 + }, + { + "epoch": 0.9984696198866692, + "grad_norm": 815.6478881835938, + "learning_rate": 1.8492520563409555e-08, + "loss": 64.3488, + "step": 120700 + }, + { + "epoch": 0.9985523431360384, + "grad_norm": 664.5618286132812, + "learning_rate": 1.837153111655221e-08, + "loss": 70.5869, + "step": 120710 + }, + { + "epoch": 0.9986350663854077, + "grad_norm": 1548.3511962890625, + "learning_rate": 1.825093803500988e-08, + "loss": 103.4076, + "step": 120720 + }, + { + "epoch": 0.9987177896347769, + "grad_norm": 667.9990234375, + "learning_rate": 1.813074132837711e-08, + "loss": 93.5051, + "step": 120730 + }, + { + "epoch": 0.9988005128841461, + "grad_norm": 587.8815307617188, + "learning_rate": 1.801094100621792e-08, + "loss": 84.6205, + "step": 120740 + }, + { + "epoch": 0.9988832361335154, + "grad_norm": 895.0107421875, + "learning_rate": 1.789153707806357e-08, + "loss": 76.1649, + "step": 120750 + }, + { + "epoch": 0.9989659593828846, + "grad_norm": 657.9972534179688, + "learning_rate": 1.7772529553414798e-08, + "loss": 74.9104, + "step": 120760 + }, + { + "epoch": 0.9990486826322538, + "grad_norm": 1787.9937744140625, + "learning_rate": 1.7653918441740693e-08, + "loss": 109.789, + "step": 120770 + }, + { + "epoch": 0.9991314058816231, + "grad_norm": 562.6156616210938, + "learning_rate": 1.753570375247815e-08, + "loss": 76.8233, + "step": 120780 + }, + { + "epoch": 0.9992141291309923, + "grad_norm": 759.29150390625, + "learning_rate": 1.7417885495033537e-08, + "loss": 86.3301, + "step": 120790 + }, + { + "epoch": 0.9992968523803615, + "grad_norm": 533.1914672851562, + "learning_rate": 1.730046367878102e-08, + "loss": 123.4786, + "step": 120800 + }, + { + "epoch": 0.9993795756297308, + "grad_norm": 1021.0844116210938, + "learning_rate": 1.7183438313062573e-08, + "loss": 79.5246, + "step": 120810 + }, + { + "epoch": 0.9994622988791, + "grad_norm": 579.9783935546875, + "learning_rate": 1.7066809407190187e-08, + "loss": 84.2175, + "step": 120820 + }, + { + "epoch": 0.9995450221284692, + "grad_norm": 669.8778076171875, + "learning_rate": 1.695057697044311e-08, + "loss": 100.9156, + "step": 120830 + }, + { + "epoch": 0.9996277453778385, + "grad_norm": 494.1153259277344, + "learning_rate": 1.68347410120695e-08, + "loss": 78.8109, + "step": 120840 + }, + { + "epoch": 0.9997104686272077, + "grad_norm": 1579.7835693359375, + "learning_rate": 1.671930154128587e-08, + "loss": 84.4901, + "step": 120850 + }, + { + "epoch": 0.9997931918765769, + "grad_norm": 1268.185546875, + "learning_rate": 1.6604258567277652e-08, + "loss": 77.1438, + "step": 120860 + }, + { + "epoch": 0.9998759151259462, + "grad_norm": 618.141845703125, + "learning_rate": 1.6489612099197527e-08, + "loss": 88.3295, + "step": 120870 + }, + { + "epoch": 0.9999586383753154, + "grad_norm": 1028.1591796875, + "learning_rate": 1.637536214616764e-08, + "loss": 69.9013, + "step": 120880 + }, + { + "epoch": 1.0, + "eval_loss": 76.04621887207031, + "eval_runtime": 214.3291, + "eval_samples_per_second": 45.579, + "eval_steps_per_second": 5.702, + "step": 120885 + }, + { + "epoch": 1.0000413616246846, + "grad_norm": 835.1196899414062, + "learning_rate": 1.6261508717278497e-08, + "loss": 103.3933, + "step": 120890 + }, + { + "epoch": 1.0001240848740538, + "grad_norm": 625.5361938476562, + "learning_rate": 1.614805182158896e-08, + "loss": 81.46, + "step": 120900 + }, + { + "epoch": 1.000206808123423, + "grad_norm": 715.9172973632812, + "learning_rate": 1.603499146812626e-08, + "loss": 85.437, + "step": 120910 + }, + { + "epoch": 1.0002895313727924, + "grad_norm": 975.20361328125, + "learning_rate": 1.5922327665885416e-08, + "loss": 100.9134, + "step": 120920 + }, + { + "epoch": 1.0003722546221616, + "grad_norm": 1666.02734375, + "learning_rate": 1.5810060423831487e-08, + "loss": 88.4254, + "step": 120930 + }, + { + "epoch": 1.0004549778715308, + "grad_norm": 772.5983276367188, + "learning_rate": 1.5698189750896762e-08, + "loss": 97.352, + "step": 120940 + }, + { + "epoch": 1.0005377011209, + "grad_norm": 1283.052490234375, + "learning_rate": 1.5586715655982463e-08, + "loss": 107.4238, + "step": 120950 + }, + { + "epoch": 1.0006204243702692, + "grad_norm": 646.5191650390625, + "learning_rate": 1.5475638147957607e-08, + "loss": 72.1843, + "step": 120960 + }, + { + "epoch": 1.0007031476196384, + "grad_norm": 777.9674072265625, + "learning_rate": 1.5364957235660115e-08, + "loss": 90.0959, + "step": 120970 + }, + { + "epoch": 1.0007858708690078, + "grad_norm": 364.2453308105469, + "learning_rate": 1.525467292789684e-08, + "loss": 74.3597, + "step": 120980 + }, + { + "epoch": 1.000868594118377, + "grad_norm": 626.4849853515625, + "learning_rate": 1.5144785233442428e-08, + "loss": 70.714, + "step": 120990 + }, + { + "epoch": 1.0009513173677462, + "grad_norm": 418.33367919921875, + "learning_rate": 1.5035294161039882e-08, + "loss": 81.4252, + "step": 121000 + }, + { + "epoch": 1.0010340406171154, + "grad_norm": 1237.5550537109375, + "learning_rate": 1.4926199719401124e-08, + "loss": 93.9281, + "step": 121010 + }, + { + "epoch": 1.0011167638664846, + "grad_norm": 523.1364135742188, + "learning_rate": 1.4817501917205879e-08, + "loss": 67.6458, + "step": 121020 + }, + { + "epoch": 1.0011994871158538, + "grad_norm": 531.9727172851562, + "learning_rate": 1.4709200763103892e-08, + "loss": 67.4297, + "step": 121030 + }, + { + "epoch": 1.0012822103652232, + "grad_norm": 2390.410888671875, + "learning_rate": 1.460129626571105e-08, + "loss": 97.8159, + "step": 121040 + }, + { + "epoch": 1.0013649336145924, + "grad_norm": 923.2665405273438, + "learning_rate": 1.449378843361271e-08, + "loss": 96.3494, + "step": 121050 + }, + { + "epoch": 1.0014476568639616, + "grad_norm": 484.8533630371094, + "learning_rate": 1.4386677275363692e-08, + "loss": 62.1874, + "step": 121060 + }, + { + "epoch": 1.0015303801133308, + "grad_norm": 816.00146484375, + "learning_rate": 1.4279962799486069e-08, + "loss": 85.6988, + "step": 121070 + }, + { + "epoch": 1.0016131033627, + "grad_norm": 547.6641235351562, + "learning_rate": 1.4173645014470272e-08, + "loss": 101.0872, + "step": 121080 + }, + { + "epoch": 1.0016958266120692, + "grad_norm": 648.3076782226562, + "learning_rate": 1.4067723928775645e-08, + "loss": 74.9175, + "step": 121090 + }, + { + "epoch": 1.0017785498614387, + "grad_norm": 828.8029174804688, + "learning_rate": 1.3962199550829892e-08, + "loss": 83.7772, + "step": 121100 + }, + { + "epoch": 1.0018612731108079, + "grad_norm": 672.8508911132812, + "learning_rate": 1.3857071889029073e-08, + "loss": 99.7323, + "step": 121110 + }, + { + "epoch": 1.001943996360177, + "grad_norm": 847.6339111328125, + "learning_rate": 1.3752340951737609e-08, + "loss": 83.5709, + "step": 121120 + }, + { + "epoch": 1.0020267196095463, + "grad_norm": 1562.5223388671875, + "learning_rate": 1.3648006747288833e-08, + "loss": 85.9273, + "step": 121130 + }, + { + "epoch": 1.0021094428589155, + "grad_norm": 1429.5631103515625, + "learning_rate": 1.354406928398333e-08, + "loss": 100.3321, + "step": 121140 + }, + { + "epoch": 1.0021921661082847, + "grad_norm": 858.3600463867188, + "learning_rate": 1.3440528570092259e-08, + "loss": 88.2199, + "step": 121150 + }, + { + "epoch": 1.002274889357654, + "grad_norm": 965.1406860351562, + "learning_rate": 1.3337384613852922e-08, + "loss": 71.9796, + "step": 121160 + }, + { + "epoch": 1.0023576126070233, + "grad_norm": 1044.383056640625, + "learning_rate": 1.3234637423472085e-08, + "loss": 76.3436, + "step": 121170 + }, + { + "epoch": 1.0024403358563925, + "grad_norm": 696.2551879882812, + "learning_rate": 1.3132287007124877e-08, + "loss": 70.0724, + "step": 121180 + }, + { + "epoch": 1.0025230591057617, + "grad_norm": 814.8746948242188, + "learning_rate": 1.3030333372954784e-08, + "loss": 69.551, + "step": 121190 + }, + { + "epoch": 1.0026057823551309, + "grad_norm": 665.35009765625, + "learning_rate": 1.2928776529074205e-08, + "loss": 96.7717, + "step": 121200 + }, + { + "epoch": 1.0026885056045, + "grad_norm": 1074.0308837890625, + "learning_rate": 1.2827616483563343e-08, + "loss": 147.5443, + "step": 121210 + }, + { + "epoch": 1.0027712288538695, + "grad_norm": 541.9178466796875, + "learning_rate": 1.2726853244471316e-08, + "loss": 80.1292, + "step": 121220 + }, + { + "epoch": 1.0028539521032387, + "grad_norm": 1141.502685546875, + "learning_rate": 1.2626486819814488e-08, + "loss": 112.4385, + "step": 121230 + }, + { + "epoch": 1.0029366753526079, + "grad_norm": 396.3456115722656, + "learning_rate": 1.2526517217579248e-08, + "loss": 74.7738, + "step": 121240 + }, + { + "epoch": 1.003019398601977, + "grad_norm": 824.7189331054688, + "learning_rate": 1.2426944445719791e-08, + "loss": 81.8229, + "step": 121250 + }, + { + "epoch": 1.0031021218513463, + "grad_norm": 322.1405334472656, + "learning_rate": 1.2327768512158667e-08, + "loss": 62.011, + "step": 121260 + }, + { + "epoch": 1.0031848451007155, + "grad_norm": 780.4888916015625, + "learning_rate": 1.2228989424786786e-08, + "loss": 79.2352, + "step": 121270 + }, + { + "epoch": 1.0032675683500847, + "grad_norm": 664.0239868164062, + "learning_rate": 1.2130607191462863e-08, + "loss": 78.1969, + "step": 121280 + }, + { + "epoch": 1.003350291599454, + "grad_norm": 737.7160034179688, + "learning_rate": 1.2032621820015633e-08, + "loss": 89.1368, + "step": 121290 + }, + { + "epoch": 1.0034330148488233, + "grad_norm": 988.8171997070312, + "learning_rate": 1.1935033318241084e-08, + "loss": 84.0071, + "step": 121300 + }, + { + "epoch": 1.0035157380981925, + "grad_norm": 708.0661010742188, + "learning_rate": 1.1837841693904118e-08, + "loss": 72.5567, + "step": 121310 + }, + { + "epoch": 1.0035984613475617, + "grad_norm": 664.5126342773438, + "learning_rate": 1.174104695473688e-08, + "loss": 67.6821, + "step": 121320 + }, + { + "epoch": 1.003681184596931, + "grad_norm": 548.2943725585938, + "learning_rate": 1.1644649108441542e-08, + "loss": 95.8433, + "step": 121330 + }, + { + "epoch": 1.0037639078463, + "grad_norm": 988.5643310546875, + "learning_rate": 1.1548648162688081e-08, + "loss": 87.8513, + "step": 121340 + }, + { + "epoch": 1.0038466310956695, + "grad_norm": 895.0706176757812, + "learning_rate": 1.1453044125114832e-08, + "loss": 89.9198, + "step": 121350 + }, + { + "epoch": 1.0039293543450387, + "grad_norm": 858.029296875, + "learning_rate": 1.1357837003329042e-08, + "loss": 67.4552, + "step": 121360 + }, + { + "epoch": 1.004012077594408, + "grad_norm": 605.3368530273438, + "learning_rate": 1.1263026804904653e-08, + "loss": 54.8068, + "step": 121370 + }, + { + "epoch": 1.0040948008437771, + "grad_norm": 557.933349609375, + "learning_rate": 1.1168613537386186e-08, + "loss": 79.3044, + "step": 121380 + }, + { + "epoch": 1.0041775240931463, + "grad_norm": 797.0153198242188, + "learning_rate": 1.1074597208285965e-08, + "loss": 85.258, + "step": 121390 + }, + { + "epoch": 1.0042602473425155, + "grad_norm": 749.7955932617188, + "learning_rate": 1.0980977825083561e-08, + "loss": 67.815, + "step": 121400 + }, + { + "epoch": 1.004342970591885, + "grad_norm": 1231.0704345703125, + "learning_rate": 1.0887755395228018e-08, + "loss": 87.1247, + "step": 121410 + }, + { + "epoch": 1.0044256938412541, + "grad_norm": 730.1659545898438, + "learning_rate": 1.0794929926137287e-08, + "loss": 87.4435, + "step": 121420 + }, + { + "epoch": 1.0045084170906233, + "grad_norm": 1234.9593505859375, + "learning_rate": 1.0702501425196576e-08, + "loss": 78.5555, + "step": 121430 + }, + { + "epoch": 1.0045911403399925, + "grad_norm": 728.4467163085938, + "learning_rate": 1.0610469899760001e-08, + "loss": 91.6618, + "step": 121440 + }, + { + "epoch": 1.0046738635893617, + "grad_norm": 939.027099609375, + "learning_rate": 1.0518835357150036e-08, + "loss": 77.4593, + "step": 121450 + }, + { + "epoch": 1.004756586838731, + "grad_norm": 785.4007568359375, + "learning_rate": 1.0427597804657518e-08, + "loss": 100.9078, + "step": 121460 + }, + { + "epoch": 1.0048393100881003, + "grad_norm": 932.05224609375, + "learning_rate": 1.0336757249542195e-08, + "loss": 77.0113, + "step": 121470 + }, + { + "epoch": 1.0049220333374695, + "grad_norm": 836.0494995117188, + "learning_rate": 1.0246313699031618e-08, + "loss": 100.5597, + "step": 121480 + }, + { + "epoch": 1.0050047565868387, + "grad_norm": 1092.8907470703125, + "learning_rate": 1.0156267160322253e-08, + "loss": 95.8895, + "step": 121490 + }, + { + "epoch": 1.005087479836208, + "grad_norm": 1151.7713623046875, + "learning_rate": 1.006661764057837e-08, + "loss": 119.7996, + "step": 121500 + }, + { + "epoch": 1.0051702030855771, + "grad_norm": 1046.9149169921875, + "learning_rate": 9.977365146932595e-09, + "loss": 54.5647, + "step": 121510 + }, + { + "epoch": 1.0052529263349463, + "grad_norm": 482.7346496582031, + "learning_rate": 9.888509686487025e-09, + "loss": 66.3425, + "step": 121520 + }, + { + "epoch": 1.0053356495843158, + "grad_norm": 771.5325927734375, + "learning_rate": 9.800051266311006e-09, + "loss": 84.2417, + "step": 121530 + }, + { + "epoch": 1.005418372833685, + "grad_norm": 931.7367553710938, + "learning_rate": 9.711989893443353e-09, + "loss": 84.3377, + "step": 121540 + }, + { + "epoch": 1.0055010960830542, + "grad_norm": 843.2858276367188, + "learning_rate": 9.624325574890125e-09, + "loss": 74.5406, + "step": 121550 + }, + { + "epoch": 1.0055838193324234, + "grad_norm": 802.0359497070312, + "learning_rate": 9.537058317626857e-09, + "loss": 72.5852, + "step": 121560 + }, + { + "epoch": 1.0056665425817926, + "grad_norm": 631.546875, + "learning_rate": 9.450188128596328e-09, + "loss": 87.635, + "step": 121570 + }, + { + "epoch": 1.0057492658311618, + "grad_norm": 947.8718872070312, + "learning_rate": 9.363715014710784e-09, + "loss": 92.5211, + "step": 121580 + }, + { + "epoch": 1.0058319890805312, + "grad_norm": 304.4026184082031, + "learning_rate": 9.277638982850835e-09, + "loss": 99.9868, + "step": 121590 + }, + { + "epoch": 1.0059147123299004, + "grad_norm": 926.6283569335938, + "learning_rate": 9.191960039864334e-09, + "loss": 78.2546, + "step": 121600 + }, + { + "epoch": 1.0059974355792696, + "grad_norm": 812.9754028320312, + "learning_rate": 9.106678192569718e-09, + "loss": 71.0503, + "step": 121610 + }, + { + "epoch": 1.0060801588286388, + "grad_norm": 1050.507080078125, + "learning_rate": 9.021793447750448e-09, + "loss": 83.0579, + "step": 121620 + }, + { + "epoch": 1.006162882078008, + "grad_norm": 738.1742553710938, + "learning_rate": 8.937305812162234e-09, + "loss": 83.2435, + "step": 121630 + }, + { + "epoch": 1.0062456053273772, + "grad_norm": 946.2736206054688, + "learning_rate": 8.853215292526917e-09, + "loss": 77.3213, + "step": 121640 + }, + { + "epoch": 1.0063283285767466, + "grad_norm": 717.0030517578125, + "learning_rate": 8.769521895534705e-09, + "loss": 60.9286, + "step": 121650 + }, + { + "epoch": 1.0064110518261158, + "grad_norm": 722.735107421875, + "learning_rate": 8.686225627845268e-09, + "loss": 60.4515, + "step": 121660 + }, + { + "epoch": 1.006493775075485, + "grad_norm": 477.0111083984375, + "learning_rate": 8.603326496085529e-09, + "loss": 103.6363, + "step": 121670 + }, + { + "epoch": 1.0065764983248542, + "grad_norm": 554.1845092773438, + "learning_rate": 8.520824506851877e-09, + "loss": 88.1589, + "step": 121680 + }, + { + "epoch": 1.0066592215742234, + "grad_norm": 795.9306640625, + "learning_rate": 8.438719666707951e-09, + "loss": 70.1685, + "step": 121690 + }, + { + "epoch": 1.0067419448235926, + "grad_norm": 959.230224609375, + "learning_rate": 8.357011982187412e-09, + "loss": 82.1214, + "step": 121700 + }, + { + "epoch": 1.006824668072962, + "grad_norm": 684.617919921875, + "learning_rate": 8.27570145979062e-09, + "loss": 75.703, + "step": 121710 + }, + { + "epoch": 1.0069073913223312, + "grad_norm": 424.90740966796875, + "learning_rate": 8.194788105987395e-09, + "loss": 74.4388, + "step": 121720 + }, + { + "epoch": 1.0069901145717004, + "grad_norm": 777.08251953125, + "learning_rate": 8.114271927215923e-09, + "loss": 97.2741, + "step": 121730 + }, + { + "epoch": 1.0070728378210696, + "grad_norm": 970.0604858398438, + "learning_rate": 8.034152929881633e-09, + "loss": 78.7951, + "step": 121740 + }, + { + "epoch": 1.0071555610704388, + "grad_norm": 800.564697265625, + "learning_rate": 7.954431120359985e-09, + "loss": 73.0719, + "step": 121750 + }, + { + "epoch": 1.007238284319808, + "grad_norm": 970.1133422851562, + "learning_rate": 7.875106504994234e-09, + "loss": 84.3109, + "step": 121760 + }, + { + "epoch": 1.0073210075691774, + "grad_norm": 535.0064086914062, + "learning_rate": 7.796179090094891e-09, + "loss": 114.3683, + "step": 121770 + }, + { + "epoch": 1.0074037308185466, + "grad_norm": 1022.1173095703125, + "learning_rate": 7.71764888194304e-09, + "loss": 74.4927, + "step": 121780 + }, + { + "epoch": 1.0074864540679158, + "grad_norm": 387.936767578125, + "learning_rate": 7.63951588678591e-09, + "loss": 93.5733, + "step": 121790 + }, + { + "epoch": 1.007569177317285, + "grad_norm": 637.2479248046875, + "learning_rate": 7.561780110840744e-09, + "loss": 64.9509, + "step": 121800 + }, + { + "epoch": 1.0076519005666542, + "grad_norm": 817.9259033203125, + "learning_rate": 7.484441560292599e-09, + "loss": 107.0969, + "step": 121810 + }, + { + "epoch": 1.0077346238160234, + "grad_norm": 1081.365234375, + "learning_rate": 7.407500241294885e-09, + "loss": 68.617, + "step": 121820 + }, + { + "epoch": 1.0078173470653928, + "grad_norm": 1211.7698974609375, + "learning_rate": 7.3309561599693715e-09, + "loss": 83.9199, + "step": 121830 + }, + { + "epoch": 1.007900070314762, + "grad_norm": 497.444091796875, + "learning_rate": 7.254809322406742e-09, + "loss": 69.8575, + "step": 121840 + }, + { + "epoch": 1.0079827935641312, + "grad_norm": 490.0628967285156, + "learning_rate": 7.1790597346649286e-09, + "loss": 70.6277, + "step": 121850 + }, + { + "epoch": 1.0080655168135004, + "grad_norm": 1028.51953125, + "learning_rate": 7.103707402771887e-09, + "loss": 88.6616, + "step": 121860 + }, + { + "epoch": 1.0081482400628696, + "grad_norm": 2735.880126953125, + "learning_rate": 7.028752332722266e-09, + "loss": 106.7273, + "step": 121870 + }, + { + "epoch": 1.0082309633122388, + "grad_norm": 659.5089111328125, + "learning_rate": 6.954194530480185e-09, + "loss": 65.3164, + "step": 121880 + }, + { + "epoch": 1.0083136865616082, + "grad_norm": 610.6782836914062, + "learning_rate": 6.880034001977565e-09, + "loss": 66.6304, + "step": 121890 + }, + { + "epoch": 1.0083964098109774, + "grad_norm": 908.3348999023438, + "learning_rate": 6.806270753115796e-09, + "loss": 91.2019, + "step": 121900 + }, + { + "epoch": 1.0084791330603466, + "grad_norm": 1129.9967041015625, + "learning_rate": 6.732904789762962e-09, + "loss": 87.364, + "step": 121910 + }, + { + "epoch": 1.0085618563097158, + "grad_norm": 1457.5179443359375, + "learning_rate": 6.659936117757171e-09, + "loss": 100.0551, + "step": 121920 + }, + { + "epoch": 1.008644579559085, + "grad_norm": 696.4898681640625, + "learning_rate": 6.587364742903779e-09, + "loss": 98.5237, + "step": 121930 + }, + { + "epoch": 1.0087273028084542, + "grad_norm": 702.8273315429688, + "learning_rate": 6.515190670977057e-09, + "loss": 54.427, + "step": 121940 + }, + { + "epoch": 1.0088100260578237, + "grad_norm": 1061.605224609375, + "learning_rate": 6.4434139077201865e-09, + "loss": 81.1102, + "step": 121950 + }, + { + "epoch": 1.0088927493071929, + "grad_norm": 623.2269287109375, + "learning_rate": 6.3720344588430464e-09, + "loss": 117.4438, + "step": 121960 + }, + { + "epoch": 1.008975472556562, + "grad_norm": 1200.9871826171875, + "learning_rate": 6.301052330025537e-09, + "loss": 97.576, + "step": 121970 + }, + { + "epoch": 1.0090581958059313, + "grad_norm": 605.901611328125, + "learning_rate": 6.230467526915362e-09, + "loss": 63.3802, + "step": 121980 + }, + { + "epoch": 1.0091409190553005, + "grad_norm": 609.5484619140625, + "learning_rate": 6.160280055128032e-09, + "loss": 70.7963, + "step": 121990 + }, + { + "epoch": 1.0092236423046697, + "grad_norm": 692.6685180664062, + "learning_rate": 6.090489920249076e-09, + "loss": 90.4518, + "step": 122000 + }, + { + "epoch": 1.0093063655540389, + "grad_norm": 900.5830688476562, + "learning_rate": 6.021097127831277e-09, + "loss": 80.2815, + "step": 122010 + }, + { + "epoch": 1.0093890888034083, + "grad_norm": 1926.3387451171875, + "learning_rate": 5.952101683394662e-09, + "loss": 118.7339, + "step": 122020 + }, + { + "epoch": 1.0094718120527775, + "grad_norm": 563.1376953125, + "learning_rate": 5.8835035924303955e-09, + "loss": 93.353, + "step": 122030 + }, + { + "epoch": 1.0095545353021467, + "grad_norm": 601.802978515625, + "learning_rate": 5.815302860395778e-09, + "loss": 94.3208, + "step": 122040 + }, + { + "epoch": 1.0096372585515159, + "grad_norm": 828.7846069335938, + "learning_rate": 5.7474994927170235e-09, + "loss": 67.016, + "step": 122050 + }, + { + "epoch": 1.009719981800885, + "grad_norm": 468.670654296875, + "learning_rate": 5.680093494789263e-09, + "loss": 67.1952, + "step": 122060 + }, + { + "epoch": 1.0098027050502543, + "grad_norm": 799.6488037109375, + "learning_rate": 5.613084871975982e-09, + "loss": 76.9322, + "step": 122070 + }, + { + "epoch": 1.0098854282996237, + "grad_norm": 481.51190185546875, + "learning_rate": 5.546473629607918e-09, + "loss": 68.0958, + "step": 122080 + }, + { + "epoch": 1.0099681515489929, + "grad_norm": 2192.124755859375, + "learning_rate": 5.48025977298583e-09, + "loss": 99.5286, + "step": 122090 + }, + { + "epoch": 1.010050874798362, + "grad_norm": 411.526123046875, + "learning_rate": 5.414443307377171e-09, + "loss": 96.2247, + "step": 122100 + }, + { + "epoch": 1.0101335980477313, + "grad_norm": 194.7018280029297, + "learning_rate": 5.349024238019973e-09, + "loss": 63.8754, + "step": 122110 + }, + { + "epoch": 1.0102163212971005, + "grad_norm": 1099.83154296875, + "learning_rate": 5.284002570117852e-09, + "loss": 108.386, + "step": 122120 + }, + { + "epoch": 1.0102990445464697, + "grad_norm": 622.5518798828125, + "learning_rate": 5.219378308845558e-09, + "loss": 81.0414, + "step": 122130 + }, + { + "epoch": 1.010381767795839, + "grad_norm": 436.0816955566406, + "learning_rate": 5.155151459343977e-09, + "loss": 105.7425, + "step": 122140 + }, + { + "epoch": 1.0104644910452083, + "grad_norm": 1281.2032470703125, + "learning_rate": 5.091322026724022e-09, + "loss": 83.9981, + "step": 122150 + }, + { + "epoch": 1.0105472142945775, + "grad_norm": 1101.347412109375, + "learning_rate": 5.027890016064408e-09, + "loss": 86.2431, + "step": 122160 + }, + { + "epoch": 1.0106299375439467, + "grad_norm": 490.4996032714844, + "learning_rate": 4.964855432411097e-09, + "loss": 70.88, + "step": 122170 + }, + { + "epoch": 1.010712660793316, + "grad_norm": 584.7899780273438, + "learning_rate": 4.90221828078008e-09, + "loss": 67.8227, + "step": 122180 + }, + { + "epoch": 1.010795384042685, + "grad_norm": 886.4894409179688, + "learning_rate": 4.8399785661557005e-09, + "loss": 79.1609, + "step": 122190 + }, + { + "epoch": 1.0108781072920545, + "grad_norm": 748.0830078125, + "learning_rate": 4.7781362934889995e-09, + "loss": 83.5357, + "step": 122200 + }, + { + "epoch": 1.0109608305414237, + "grad_norm": 582.5003662109375, + "learning_rate": 4.716691467701595e-09, + "loss": 70.7697, + "step": 122210 + }, + { + "epoch": 1.011043553790793, + "grad_norm": 1061.063720703125, + "learning_rate": 4.655644093681244e-09, + "loss": 86.0885, + "step": 122220 + }, + { + "epoch": 1.0111262770401621, + "grad_norm": 800.2383422851562, + "learning_rate": 4.5949941762862826e-09, + "loss": 67.8206, + "step": 122230 + }, + { + "epoch": 1.0112090002895313, + "grad_norm": 889.217041015625, + "learning_rate": 4.5347417203411845e-09, + "loss": 91.7229, + "step": 122240 + }, + { + "epoch": 1.0112917235389005, + "grad_norm": 465.7398681640625, + "learning_rate": 4.474886730641004e-09, + "loss": 76.3832, + "step": 122250 + }, + { + "epoch": 1.01137444678827, + "grad_norm": 638.1264038085938, + "learning_rate": 4.415429211948041e-09, + "loss": 64.9447, + "step": 122260 + }, + { + "epoch": 1.0114571700376391, + "grad_norm": 1501.3109130859375, + "learning_rate": 4.356369168992402e-09, + "loss": 76.8261, + "step": 122270 + }, + { + "epoch": 1.0115398932870083, + "grad_norm": 1041.6864013671875, + "learning_rate": 4.297706606473107e-09, + "loss": 101.5634, + "step": 122280 + }, + { + "epoch": 1.0116226165363775, + "grad_norm": 1001.2974243164062, + "learning_rate": 4.239441529058641e-09, + "loss": 85.1877, + "step": 122290 + }, + { + "epoch": 1.0117053397857467, + "grad_norm": 405.4379577636719, + "learning_rate": 4.181573941384187e-09, + "loss": 76.1107, + "step": 122300 + }, + { + "epoch": 1.011788063035116, + "grad_norm": 670.6312866210938, + "learning_rate": 4.1241038480543945e-09, + "loss": 63.3756, + "step": 122310 + }, + { + "epoch": 1.0118707862844853, + "grad_norm": 1032.002685546875, + "learning_rate": 4.067031253641162e-09, + "loss": 92.0514, + "step": 122320 + }, + { + "epoch": 1.0119535095338545, + "grad_norm": 894.6094970703125, + "learning_rate": 4.010356162686413e-09, + "loss": 166.9267, + "step": 122330 + }, + { + "epoch": 1.0120362327832237, + "grad_norm": 840.5252685546875, + "learning_rate": 3.9540785796993165e-09, + "loss": 72.8894, + "step": 122340 + }, + { + "epoch": 1.012118956032593, + "grad_norm": 867.8668823242188, + "learning_rate": 3.898198509157402e-09, + "loss": 55.9863, + "step": 122350 + }, + { + "epoch": 1.0122016792819621, + "grad_norm": 802.0833129882812, + "learning_rate": 3.842715955506559e-09, + "loss": 79.1176, + "step": 122360 + }, + { + "epoch": 1.0122844025313313, + "grad_norm": 1034.685302734375, + "learning_rate": 3.787630923161589e-09, + "loss": 77.4822, + "step": 122370 + }, + { + "epoch": 1.0123671257807008, + "grad_norm": 506.0362548828125, + "learning_rate": 3.7329434165050975e-09, + "loss": 66.0188, + "step": 122380 + }, + { + "epoch": 1.01244984903007, + "grad_norm": 1035.119384765625, + "learning_rate": 3.6786534398891573e-09, + "loss": 72.05, + "step": 122390 + }, + { + "epoch": 1.0125325722794392, + "grad_norm": 623.9898071289062, + "learning_rate": 3.6247609976319818e-09, + "loss": 56.7796, + "step": 122400 + }, + { + "epoch": 1.0126152955288084, + "grad_norm": 1366.751708984375, + "learning_rate": 3.5712660940229184e-09, + "loss": 81.9944, + "step": 122410 + }, + { + "epoch": 1.0126980187781776, + "grad_norm": 689.130615234375, + "learning_rate": 3.518168733317451e-09, + "loss": 130.3806, + "step": 122420 + }, + { + "epoch": 1.0127807420275468, + "grad_norm": 1009.6278686523438, + "learning_rate": 3.4654689197405335e-09, + "loss": 81.2598, + "step": 122430 + }, + { + "epoch": 1.0128634652769162, + "grad_norm": 437.2408142089844, + "learning_rate": 3.413166657485478e-09, + "loss": 80.2506, + "step": 122440 + }, + { + "epoch": 1.0129461885262854, + "grad_norm": 862.9107666015625, + "learning_rate": 3.3612619507134013e-09, + "loss": 128.401, + "step": 122450 + }, + { + "epoch": 1.0130289117756546, + "grad_norm": 486.2399597167969, + "learning_rate": 3.3097548035537776e-09, + "loss": 61.5472, + "step": 122460 + }, + { + "epoch": 1.0131116350250238, + "grad_norm": 731.587890625, + "learning_rate": 3.25864522010555e-09, + "loss": 79.2592, + "step": 122470 + }, + { + "epoch": 1.013194358274393, + "grad_norm": 1046.9647216796875, + "learning_rate": 3.207933204435465e-09, + "loss": 70.5895, + "step": 122480 + }, + { + "epoch": 1.0132770815237622, + "grad_norm": 621.4727172851562, + "learning_rate": 3.157618760577519e-09, + "loss": 69.9926, + "step": 122490 + }, + { + "epoch": 1.0133598047731316, + "grad_norm": 774.1251220703125, + "learning_rate": 3.1077018925351753e-09, + "loss": 124.7276, + "step": 122500 + }, + { + "epoch": 1.0134425280225008, + "grad_norm": 1431.8812255859375, + "learning_rate": 3.0581826042808126e-09, + "loss": 103.817, + "step": 122510 + }, + { + "epoch": 1.01352525127187, + "grad_norm": 441.38287353515625, + "learning_rate": 3.009060899754057e-09, + "loss": 70.7602, + "step": 122520 + }, + { + "epoch": 1.0136079745212392, + "grad_norm": 560.4646606445312, + "learning_rate": 2.960336782862894e-09, + "loss": 81.647, + "step": 122530 + }, + { + "epoch": 1.0136906977706084, + "grad_norm": 600.3382568359375, + "learning_rate": 2.9120102574842212e-09, + "loss": 81.0002, + "step": 122540 + }, + { + "epoch": 1.0137734210199776, + "grad_norm": 1430.2890625, + "learning_rate": 2.8640813274638525e-09, + "loss": 108.3468, + "step": 122550 + }, + { + "epoch": 1.013856144269347, + "grad_norm": 565.4375, + "learning_rate": 2.816549996614293e-09, + "loss": 111.723, + "step": 122560 + }, + { + "epoch": 1.0139388675187162, + "grad_norm": 706.692626953125, + "learning_rate": 2.7694162687186276e-09, + "loss": 75.623, + "step": 122570 + }, + { + "epoch": 1.0140215907680854, + "grad_norm": 2190.73681640625, + "learning_rate": 2.7226801475255248e-09, + "loss": 116.6049, + "step": 122580 + }, + { + "epoch": 1.0141043140174546, + "grad_norm": 573.954345703125, + "learning_rate": 2.6763416367547866e-09, + "loss": 77.4606, + "step": 122590 + }, + { + "epoch": 1.0141870372668238, + "grad_norm": 855.6474609375, + "learning_rate": 2.630400740092909e-09, + "loss": 81.674, + "step": 122600 + }, + { + "epoch": 1.014269760516193, + "grad_norm": 641.304443359375, + "learning_rate": 2.5848574611953003e-09, + "loss": 93.9946, + "step": 122610 + }, + { + "epoch": 1.0143524837655624, + "grad_norm": 899.0760498046875, + "learning_rate": 2.5397118036851743e-09, + "loss": 101.6424, + "step": 122620 + }, + { + "epoch": 1.0144352070149316, + "grad_norm": 1727.291259765625, + "learning_rate": 2.494963771155212e-09, + "loss": 74.2995, + "step": 122630 + }, + { + "epoch": 1.0145179302643008, + "grad_norm": 804.0676879882812, + "learning_rate": 2.4506133671653443e-09, + "loss": 93.2673, + "step": 122640 + }, + { + "epoch": 1.01460065351367, + "grad_norm": 1078.8939208984375, + "learning_rate": 2.4066605952444144e-09, + "loss": 78.2374, + "step": 122650 + }, + { + "epoch": 1.0146833767630392, + "grad_norm": 1028.9287109375, + "learning_rate": 2.3631054588901802e-09, + "loss": 89.6077, + "step": 122660 + }, + { + "epoch": 1.0147661000124084, + "grad_norm": 654.36328125, + "learning_rate": 2.319947961567093e-09, + "loss": 84.3641, + "step": 122670 + }, + { + "epoch": 1.0148488232617778, + "grad_norm": 532.8497314453125, + "learning_rate": 2.277188106709627e-09, + "loss": 120.6354, + "step": 122680 + }, + { + "epoch": 1.014931546511147, + "grad_norm": 970.8217163085938, + "learning_rate": 2.234825897720061e-09, + "loss": 66.7476, + "step": 122690 + }, + { + "epoch": 1.0150142697605162, + "grad_norm": 934.7479858398438, + "learning_rate": 2.192861337968477e-09, + "loss": 95.2176, + "step": 122700 + }, + { + "epoch": 1.0150969930098854, + "grad_norm": 740.1159057617188, + "learning_rate": 2.151294430794426e-09, + "loss": 86.6372, + "step": 122710 + }, + { + "epoch": 1.0151797162592546, + "grad_norm": 928.073974609375, + "learning_rate": 2.110125179504152e-09, + "loss": 88.6049, + "step": 122720 + }, + { + "epoch": 1.0152624395086238, + "grad_norm": 741.3151245117188, + "learning_rate": 2.0693535873744784e-09, + "loss": 87.0215, + "step": 122730 + }, + { + "epoch": 1.015345162757993, + "grad_norm": 805.8557739257812, + "learning_rate": 2.028979657648922e-09, + "loss": 51.4993, + "step": 122740 + }, + { + "epoch": 1.0154278860073624, + "grad_norm": 620.05224609375, + "learning_rate": 1.989003393539912e-09, + "loss": 97.4243, + "step": 122750 + }, + { + "epoch": 1.0155106092567316, + "grad_norm": 693.1014404296875, + "learning_rate": 1.9494247982282386e-09, + "loss": 131.4482, + "step": 122760 + }, + { + "epoch": 1.0155933325061008, + "grad_norm": 651.7940673828125, + "learning_rate": 1.9102438748624943e-09, + "loss": 74.7952, + "step": 122770 + }, + { + "epoch": 1.01567605575547, + "grad_norm": 810.8271484375, + "learning_rate": 1.8714606265607395e-09, + "loss": 77.6325, + "step": 122780 + }, + { + "epoch": 1.0157587790048392, + "grad_norm": 2490.756591796875, + "learning_rate": 1.8330750564088396e-09, + "loss": 81.8933, + "step": 122790 + }, + { + "epoch": 1.0158415022542084, + "grad_norm": 928.2571411132812, + "learning_rate": 1.795087167459908e-09, + "loss": 67.8226, + "step": 122800 + }, + { + "epoch": 1.0159242255035779, + "grad_norm": 970.4078979492188, + "learning_rate": 1.757496962738192e-09, + "loss": 105.3837, + "step": 122810 + }, + { + "epoch": 1.016006948752947, + "grad_norm": 399.0166320800781, + "learning_rate": 1.7203044452329676e-09, + "loss": 68.2508, + "step": 122820 + }, + { + "epoch": 1.0160896720023163, + "grad_norm": 846.7438354492188, + "learning_rate": 1.6835096179040888e-09, + "loss": 113.6935, + "step": 122830 + }, + { + "epoch": 1.0161723952516855, + "grad_norm": 1102.63623046875, + "learning_rate": 1.6471124836792142e-09, + "loss": 99.2099, + "step": 122840 + }, + { + "epoch": 1.0162551185010547, + "grad_norm": 1199.7076416015625, + "learning_rate": 1.61111304545436e-09, + "loss": 79.1822, + "step": 122850 + }, + { + "epoch": 1.0163378417504239, + "grad_norm": 708.465576171875, + "learning_rate": 1.575511306093902e-09, + "loss": 74.9537, + "step": 122860 + }, + { + "epoch": 1.0164205649997933, + "grad_norm": 665.5509643554688, + "learning_rate": 1.5403072684300191e-09, + "loss": 79.0717, + "step": 122870 + }, + { + "epoch": 1.0165032882491625, + "grad_norm": 616.6489868164062, + "learning_rate": 1.505500935264359e-09, + "loss": 127.0564, + "step": 122880 + }, + { + "epoch": 1.0165860114985317, + "grad_norm": 849.0372924804688, + "learning_rate": 1.471092309365818e-09, + "loss": 70.2291, + "step": 122890 + }, + { + "epoch": 1.0166687347479009, + "grad_norm": 669.7859497070312, + "learning_rate": 1.4370813934722062e-09, + "loss": 122.7297, + "step": 122900 + }, + { + "epoch": 1.01675145799727, + "grad_norm": 1538.0843505859375, + "learning_rate": 1.403468190290247e-09, + "loss": 75.6392, + "step": 122910 + }, + { + "epoch": 1.0168341812466393, + "grad_norm": 595.5289916992188, + "learning_rate": 1.3702527024933576e-09, + "loss": 91.8487, + "step": 122920 + }, + { + "epoch": 1.0169169044960087, + "grad_norm": 982.86083984375, + "learning_rate": 1.337434932724979e-09, + "loss": 80.7263, + "step": 122930 + }, + { + "epoch": 1.0169996277453779, + "grad_norm": 984.17333984375, + "learning_rate": 1.305014883595801e-09, + "loss": 79.123, + "step": 122940 + }, + { + "epoch": 1.017082350994747, + "grad_norm": 805.4910888671875, + "learning_rate": 1.2729925576859815e-09, + "loss": 83.601, + "step": 122950 + }, + { + "epoch": 1.0171650742441163, + "grad_norm": 1756.73193359375, + "learning_rate": 1.2413679575434823e-09, + "loss": 102.3782, + "step": 122960 + }, + { + "epoch": 1.0172477974934855, + "grad_norm": 719.05615234375, + "learning_rate": 1.210141085683514e-09, + "loss": 85.2048, + "step": 122970 + }, + { + "epoch": 1.0173305207428547, + "grad_norm": 581.4542846679688, + "learning_rate": 1.1793119445918656e-09, + "loss": 51.8009, + "step": 122980 + }, + { + "epoch": 1.017413243992224, + "grad_norm": 1233.638916015625, + "learning_rate": 1.1488805367204648e-09, + "loss": 70.137, + "step": 122990 + }, + { + "epoch": 1.0174959672415933, + "grad_norm": 595.7598876953125, + "learning_rate": 1.118846864490708e-09, + "loss": 119.1018, + "step": 123000 + }, + { + "epoch": 1.0175786904909625, + "grad_norm": 652.7130126953125, + "learning_rate": 1.0892109302929055e-09, + "loss": 81.6739, + "step": 123010 + }, + { + "epoch": 1.0176614137403317, + "grad_norm": 612.19482421875, + "learning_rate": 1.0599727364851708e-09, + "loss": 69.6915, + "step": 123020 + }, + { + "epoch": 1.017744136989701, + "grad_norm": 1230.201171875, + "learning_rate": 1.0311322853928662e-09, + "loss": 80.762, + "step": 123030 + }, + { + "epoch": 1.01782686023907, + "grad_norm": 1171.8270263671875, + "learning_rate": 1.0026895793108227e-09, + "loss": 116.272, + "step": 123040 + }, + { + "epoch": 1.0179095834884395, + "grad_norm": 644.4696044921875, + "learning_rate": 9.74644620502785e-10, + "loss": 89.4497, + "step": 123050 + }, + { + "epoch": 1.0179923067378087, + "grad_norm": 625.5791015625, + "learning_rate": 9.46997411200301e-10, + "loss": 84.8642, + "step": 123060 + }, + { + "epoch": 1.018075029987178, + "grad_norm": 844.577880859375, + "learning_rate": 9.197479536021681e-10, + "loss": 96.1589, + "step": 123070 + }, + { + "epoch": 1.0181577532365471, + "grad_norm": 844.6200561523438, + "learning_rate": 8.928962498772065e-10, + "loss": 93.7864, + "step": 123080 + }, + { + "epoch": 1.0182404764859163, + "grad_norm": 1154.9962158203125, + "learning_rate": 8.664423021614854e-10, + "loss": 72.7706, + "step": 123090 + }, + { + "epoch": 1.0183231997352855, + "grad_norm": 1171.821044921875, + "learning_rate": 8.403861125599877e-10, + "loss": 87.6492, + "step": 123100 + }, + { + "epoch": 1.018405922984655, + "grad_norm": 697.9021606445312, + "learning_rate": 8.147276831460548e-10, + "loss": 80.5435, + "step": 123110 + }, + { + "epoch": 1.0184886462340241, + "grad_norm": 1245.968017578125, + "learning_rate": 7.894670159613871e-10, + "loss": 83.4186, + "step": 123120 + }, + { + "epoch": 1.0185713694833933, + "grad_norm": 1226.178466796875, + "learning_rate": 7.646041130149329e-10, + "loss": 89.3538, + "step": 123130 + }, + { + "epoch": 1.0186540927327625, + "grad_norm": 1140.58984375, + "learning_rate": 7.401389762862199e-10, + "loss": 99.5532, + "step": 123140 + }, + { + "epoch": 1.0187368159821317, + "grad_norm": 732.798828125, + "learning_rate": 7.16071607720914e-10, + "loss": 95.5324, + "step": 123150 + }, + { + "epoch": 1.018819539231501, + "grad_norm": 850.7224731445312, + "learning_rate": 6.924020092335948e-10, + "loss": 65.989, + "step": 123160 + }, + { + "epoch": 1.0189022624808703, + "grad_norm": 660.1827392578125, + "learning_rate": 6.691301827088659e-10, + "loss": 83.6253, + "step": 123170 + }, + { + "epoch": 1.0189849857302395, + "grad_norm": 558.4231567382812, + "learning_rate": 6.46256129997469e-10, + "loss": 86.4292, + "step": 123180 + }, + { + "epoch": 1.0190677089796087, + "grad_norm": 1053.580078125, + "learning_rate": 6.237798529190597e-10, + "loss": 88.6869, + "step": 123190 + }, + { + "epoch": 1.019150432228978, + "grad_norm": 1924.813232421875, + "learning_rate": 6.017013532627625e-10, + "loss": 88.4377, + "step": 123200 + }, + { + "epoch": 1.0192331554783471, + "grad_norm": 1369.2806396484375, + "learning_rate": 5.800206327855051e-10, + "loss": 77.994, + "step": 123210 + }, + { + "epoch": 1.0193158787277163, + "grad_norm": 745.227294921875, + "learning_rate": 5.58737693210909e-10, + "loss": 89.5734, + "step": 123220 + }, + { + "epoch": 1.0193986019770858, + "grad_norm": 391.84759521484375, + "learning_rate": 5.378525362337294e-10, + "loss": 81.0701, + "step": 123230 + }, + { + "epoch": 1.019481325226455, + "grad_norm": 504.9894104003906, + "learning_rate": 5.173651635148602e-10, + "loss": 66.15, + "step": 123240 + }, + { + "epoch": 1.0195640484758242, + "grad_norm": 955.138671875, + "learning_rate": 4.972755766846637e-10, + "loss": 55.0407, + "step": 123250 + }, + { + "epoch": 1.0196467717251934, + "grad_norm": 1267.6429443359375, + "learning_rate": 4.775837773418612e-10, + "loss": 92.8494, + "step": 123260 + }, + { + "epoch": 1.0197294949745626, + "grad_norm": 495.1304016113281, + "learning_rate": 4.5828976705297735e-10, + "loss": 78.7691, + "step": 123270 + }, + { + "epoch": 1.0198122182239318, + "grad_norm": 913.0636596679688, + "learning_rate": 4.3939354735345053e-10, + "loss": 95.1808, + "step": 123280 + }, + { + "epoch": 1.0198949414733012, + "grad_norm": 1205.1334228515625, + "learning_rate": 4.2089511974596765e-10, + "loss": 88.0171, + "step": 123290 + }, + { + "epoch": 1.0199776647226704, + "grad_norm": 1519.8599853515625, + "learning_rate": 4.027944857032395e-10, + "loss": 74.2102, + "step": 123300 + }, + { + "epoch": 1.0200603879720396, + "grad_norm": 845.7083129882812, + "learning_rate": 3.8509164666522546e-10, + "loss": 81.2621, + "step": 123310 + }, + { + "epoch": 1.0201431112214088, + "grad_norm": 1035.19189453125, + "learning_rate": 3.677866040402434e-10, + "loss": 77.1908, + "step": 123320 + }, + { + "epoch": 1.020225834470778, + "grad_norm": 679.1898193359375, + "learning_rate": 3.5087935920496975e-10, + "loss": 78.4861, + "step": 123330 + }, + { + "epoch": 1.0203085577201472, + "grad_norm": 987.113525390625, + "learning_rate": 3.343699135049949e-10, + "loss": 106.0358, + "step": 123340 + }, + { + "epoch": 1.0203912809695166, + "grad_norm": 1130.323486328125, + "learning_rate": 3.182582682542679e-10, + "loss": 104.6267, + "step": 123350 + }, + { + "epoch": 1.0204740042188858, + "grad_norm": 952.3067016601562, + "learning_rate": 3.0254442473398594e-10, + "loss": 91.154, + "step": 123360 + }, + { + "epoch": 1.020556727468255, + "grad_norm": 777.0213012695312, + "learning_rate": 2.8722838419481535e-10, + "loss": 84.8183, + "step": 123370 + }, + { + "epoch": 1.0206394507176242, + "grad_norm": 873.121337890625, + "learning_rate": 2.723101478546708e-10, + "loss": 75.0147, + "step": 123380 + }, + { + "epoch": 1.0207221739669934, + "grad_norm": 1494.041015625, + "learning_rate": 2.57789716902046e-10, + "loss": 92.4943, + "step": 123390 + }, + { + "epoch": 1.0208048972163626, + "grad_norm": 729.3314819335938, + "learning_rate": 2.436670924910178e-10, + "loss": 89.5984, + "step": 123400 + }, + { + "epoch": 1.0208876204657318, + "grad_norm": 744.1867065429688, + "learning_rate": 2.2994227574568705e-10, + "loss": 103.0427, + "step": 123410 + }, + { + "epoch": 1.0209703437151012, + "grad_norm": 762.7804565429688, + "learning_rate": 2.1661526775795804e-10, + "loss": 75.6005, + "step": 123420 + }, + { + "epoch": 1.0210530669644704, + "grad_norm": 758.2909545898438, + "learning_rate": 2.0368606958809377e-10, + "loss": 85.8044, + "step": 123430 + }, + { + "epoch": 1.0211357902138396, + "grad_norm": 943.9282836914062, + "learning_rate": 1.9115468226527101e-10, + "loss": 90.3115, + "step": 123440 + }, + { + "epoch": 1.0212185134632088, + "grad_norm": 1385.7298583984375, + "learning_rate": 1.790211067859149e-10, + "loss": 82.1778, + "step": 123450 + }, + { + "epoch": 1.021301236712578, + "grad_norm": 875.9342041015625, + "learning_rate": 1.6728534411591946e-10, + "loss": 93.4806, + "step": 123460 + }, + { + "epoch": 1.0213839599619472, + "grad_norm": 732.7958374023438, + "learning_rate": 1.5594739518898227e-10, + "loss": 65.2388, + "step": 123470 + }, + { + "epoch": 1.0214666832113166, + "grad_norm": 978.7460327148438, + "learning_rate": 1.4500726090715956e-10, + "loss": 85.1013, + "step": 123480 + }, + { + "epoch": 1.0215494064606858, + "grad_norm": 1050.985595703125, + "learning_rate": 1.3446494214031102e-10, + "loss": 90.1326, + "step": 123490 + }, + { + "epoch": 1.021632129710055, + "grad_norm": 823.0833740234375, + "learning_rate": 1.2432043972832042e-10, + "loss": 71.4136, + "step": 123500 + }, + { + "epoch": 1.0217148529594242, + "grad_norm": 735.2103881835938, + "learning_rate": 1.1457375447776476e-10, + "loss": 74.5606, + "step": 123510 + }, + { + "epoch": 1.0217975762087934, + "grad_norm": 836.6810302734375, + "learning_rate": 1.0522488716413481e-10, + "loss": 59.5058, + "step": 123520 + }, + { + "epoch": 1.0218802994581626, + "grad_norm": 490.60614013671875, + "learning_rate": 9.627383853128003e-11, + "loss": 74.4532, + "step": 123530 + }, + { + "epoch": 1.021963022707532, + "grad_norm": 616.2906494140625, + "learning_rate": 8.772060929196357e-11, + "loss": 62.0144, + "step": 123540 + }, + { + "epoch": 1.0220457459569012, + "grad_norm": 775.4456787109375, + "learning_rate": 7.956520012564195e-11, + "loss": 84.0153, + "step": 123550 + }, + { + "epoch": 1.0221284692062704, + "grad_norm": 1081.5496826171875, + "learning_rate": 7.180761168179563e-11, + "loss": 79.5349, + "step": 123560 + }, + { + "epoch": 1.0222111924556396, + "grad_norm": 855.0829467773438, + "learning_rate": 6.444784457770858e-11, + "loss": 90.5716, + "step": 123570 + }, + { + "epoch": 1.0222939157050088, + "grad_norm": 1062.8492431640625, + "learning_rate": 5.748589939902349e-11, + "loss": 111.6922, + "step": 123580 + }, + { + "epoch": 1.022376638954378, + "grad_norm": 1228.608154296875, + "learning_rate": 5.0921776699741634e-11, + "loss": 90.2285, + "step": 123590 + }, + { + "epoch": 1.0224593622037474, + "grad_norm": 1081.4580078125, + "learning_rate": 4.4755477001667824e-11, + "loss": 100.085, + "step": 123600 + }, + { + "epoch": 1.0225420854531166, + "grad_norm": 735.9952392578125, + "learning_rate": 3.898700079607576e-11, + "loss": 85.3372, + "step": 123610 + }, + { + "epoch": 1.0226248087024858, + "grad_norm": 562.4883422851562, + "learning_rate": 3.361634854093243e-11, + "loss": 114.2243, + "step": 123620 + }, + { + "epoch": 1.022707531951855, + "grad_norm": 543.7153930664062, + "learning_rate": 2.864352066478393e-11, + "loss": 100.8226, + "step": 123630 + }, + { + "epoch": 1.0227902552012242, + "grad_norm": 463.9744873046875, + "learning_rate": 2.406851756231454e-11, + "loss": 62.1017, + "step": 123640 + }, + { + "epoch": 1.0228729784505934, + "grad_norm": 1143.583740234375, + "learning_rate": 1.9891339597677415e-11, + "loss": 97.4913, + "step": 123650 + }, + { + "epoch": 1.0229557016999629, + "grad_norm": 592.5479736328125, + "learning_rate": 1.6111987103939464e-11, + "loss": 69.8716, + "step": 123660 + }, + { + "epoch": 1.023038424949332, + "grad_norm": 738.0408935546875, + "learning_rate": 1.273046038141601e-11, + "loss": 81.3839, + "step": 123670 + }, + { + "epoch": 1.0231211481987013, + "grad_norm": 5198.33935546875, + "learning_rate": 9.746759698781027e-12, + "loss": 182.0562, + "step": 123680 + }, + { + "epoch": 1.0232038714480705, + "grad_norm": 724.8328857421875, + "learning_rate": 7.160885294177355e-12, + "loss": 113.7217, + "step": 123690 + }, + { + "epoch": 1.0232865946974397, + "grad_norm": 1005.2647705078125, + "learning_rate": 4.9728373724411416e-12, + "loss": 77.6447, + "step": 123700 + }, + { + "epoch": 1.0233693179468089, + "grad_norm": 1014.6140747070312, + "learning_rate": 3.1826161084325124e-12, + "loss": 78.813, + "step": 123710 + }, + { + "epoch": 1.0234520411961783, + "grad_norm": 925.9212036132812, + "learning_rate": 1.7902216442600152e-12, + "loss": 70.0796, + "step": 123720 + }, + { + "epoch": 1.0235347644455475, + "grad_norm": 570.4528198242188, + "learning_rate": 7.956540903908405e-13, + "loss": 79.1544, + "step": 123730 + }, + { + "epoch": 1.0236174876949167, + "grad_norm": 898.171142578125, + "learning_rate": 1.9891352676104647e-13, + "loss": 58.6368, + "step": 123740 + }, + { + "epoch": 1.0237002109442859, + "grad_norm": 748.3834838867188, + "learning_rate": 0.0, + "loss": 101.8211, + "step": 123750 + } + ], + "logging_steps": 10, + "max_steps": 123750, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 2000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}