|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 13550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007380073800738007, |
|
"grad_norm": 100.91170501708984, |
|
"learning_rate": 7.380073800738008e-07, |
|
"loss": 15.7032, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0014760147601476014, |
|
"grad_norm": 53.92641830444336, |
|
"learning_rate": 1.4760147601476015e-06, |
|
"loss": 12.3986, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002214022140221402, |
|
"grad_norm": 22.292367935180664, |
|
"learning_rate": 2.2140221402214023e-06, |
|
"loss": 10.8644, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002952029520295203, |
|
"grad_norm": 9.494361877441406, |
|
"learning_rate": 2.952029520295203e-06, |
|
"loss": 10.0464, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0036900369003690036, |
|
"grad_norm": 5.657174110412598, |
|
"learning_rate": 3.690036900369004e-06, |
|
"loss": 9.8267, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004428044280442804, |
|
"grad_norm": 11.480326652526855, |
|
"learning_rate": 4.428044280442805e-06, |
|
"loss": 9.6732, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0051660516605166054, |
|
"grad_norm": 32.78021240234375, |
|
"learning_rate": 5.166051660516605e-06, |
|
"loss": 9.6283, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005904059040590406, |
|
"grad_norm": 26.172266006469727, |
|
"learning_rate": 5.904059040590406e-06, |
|
"loss": 9.4751, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006642066420664207, |
|
"grad_norm": 33.70742416381836, |
|
"learning_rate": 6.642066420664207e-06, |
|
"loss": 9.3959, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.007380073800738007, |
|
"grad_norm": 63.11279296875, |
|
"learning_rate": 7.380073800738008e-06, |
|
"loss": 9.3828, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008118081180811807, |
|
"grad_norm": 15.8975191116333, |
|
"learning_rate": 8.118081180811808e-06, |
|
"loss": 9.2352, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.008856088560885609, |
|
"grad_norm": 12.312295913696289, |
|
"learning_rate": 8.85608856088561e-06, |
|
"loss": 9.1436, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.00959409594095941, |
|
"grad_norm": 10.606693267822266, |
|
"learning_rate": 9.59409594095941e-06, |
|
"loss": 8.8854, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.010332103321033211, |
|
"grad_norm": 16.000524520874023, |
|
"learning_rate": 1.033210332103321e-05, |
|
"loss": 8.703, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01107011070110701, |
|
"grad_norm": 14.297750473022461, |
|
"learning_rate": 1.1070110701107012e-05, |
|
"loss": 8.5243, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.011808118081180811, |
|
"grad_norm": 11.472665786743164, |
|
"learning_rate": 1.1808118081180812e-05, |
|
"loss": 8.232, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.012546125461254613, |
|
"grad_norm": 7.633975028991699, |
|
"learning_rate": 1.2546125461254612e-05, |
|
"loss": 8.0453, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.013284132841328414, |
|
"grad_norm": 7.606258869171143, |
|
"learning_rate": 1.3284132841328414e-05, |
|
"loss": 7.9445, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.014022140221402213, |
|
"grad_norm": 13.680715560913086, |
|
"learning_rate": 1.4022140221402214e-05, |
|
"loss": 7.9335, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.014760147601476014, |
|
"grad_norm": 10.28775405883789, |
|
"learning_rate": 1.4760147601476015e-05, |
|
"loss": 7.7923, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.015498154981549815, |
|
"grad_norm": 7.461697101593018, |
|
"learning_rate": 1.5498154981549817e-05, |
|
"loss": 7.763, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.016236162361623615, |
|
"grad_norm": 4.384743690490723, |
|
"learning_rate": 1.6236162361623615e-05, |
|
"loss": 7.7702, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.016974169741697416, |
|
"grad_norm": 5.806989669799805, |
|
"learning_rate": 1.6974169741697417e-05, |
|
"loss": 7.76, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.017712177121771217, |
|
"grad_norm": 5.75732421875, |
|
"learning_rate": 1.771217712177122e-05, |
|
"loss": 7.6101, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01845018450184502, |
|
"grad_norm": 3.3278968334198, |
|
"learning_rate": 1.845018450184502e-05, |
|
"loss": 7.5669, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01918819188191882, |
|
"grad_norm": 5.252697467803955, |
|
"learning_rate": 1.918819188191882e-05, |
|
"loss": 7.3905, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01992619926199262, |
|
"grad_norm": 3.135658025741577, |
|
"learning_rate": 1.992619926199262e-05, |
|
"loss": 7.3472, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.020664206642066422, |
|
"grad_norm": 5.030785083770752, |
|
"learning_rate": 2.066420664206642e-05, |
|
"loss": 7.2426, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.021402214022140223, |
|
"grad_norm": 4.882932186126709, |
|
"learning_rate": 2.140221402214022e-05, |
|
"loss": 7.0541, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02214022140221402, |
|
"grad_norm": 2.2638933658599854, |
|
"learning_rate": 2.2140221402214025e-05, |
|
"loss": 7.0113, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.022878228782287822, |
|
"grad_norm": 4.782796859741211, |
|
"learning_rate": 2.2878228782287826e-05, |
|
"loss": 6.8661, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.023616236162361623, |
|
"grad_norm": 1.9799453020095825, |
|
"learning_rate": 2.3616236162361624e-05, |
|
"loss": 7.0323, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.024354243542435424, |
|
"grad_norm": 4.8417558670043945, |
|
"learning_rate": 2.4354243542435426e-05, |
|
"loss": 6.8865, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.025092250922509225, |
|
"grad_norm": 4.531852722167969, |
|
"learning_rate": 2.5092250922509224e-05, |
|
"loss": 6.7982, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.025830258302583026, |
|
"grad_norm": 3.0997111797332764, |
|
"learning_rate": 2.5830258302583026e-05, |
|
"loss": 6.79, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.026568265682656828, |
|
"grad_norm": 3.2981128692626953, |
|
"learning_rate": 2.6568265682656828e-05, |
|
"loss": 6.7459, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02730627306273063, |
|
"grad_norm": 3.313589572906494, |
|
"learning_rate": 2.730627306273063e-05, |
|
"loss": 6.637, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.028044280442804426, |
|
"grad_norm": 2.1940433979034424, |
|
"learning_rate": 2.8044280442804427e-05, |
|
"loss": 6.5645, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.028782287822878228, |
|
"grad_norm": 3.6912360191345215, |
|
"learning_rate": 2.878228782287823e-05, |
|
"loss": 6.4398, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.02952029520295203, |
|
"grad_norm": 3.37406325340271, |
|
"learning_rate": 2.952029520295203e-05, |
|
"loss": 6.4774, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03025830258302583, |
|
"grad_norm": 3.22963285446167, |
|
"learning_rate": 3.0258302583025832e-05, |
|
"loss": 6.3106, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03099630996309963, |
|
"grad_norm": 2.419431686401367, |
|
"learning_rate": 3.0996309963099634e-05, |
|
"loss": 6.3172, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03173431734317343, |
|
"grad_norm": 2.677661895751953, |
|
"learning_rate": 3.173431734317343e-05, |
|
"loss": 6.1359, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.03247232472324723, |
|
"grad_norm": 2.795398712158203, |
|
"learning_rate": 3.247232472324723e-05, |
|
"loss": 6.2412, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.033210332103321034, |
|
"grad_norm": 2.9979288578033447, |
|
"learning_rate": 3.3210332103321035e-05, |
|
"loss": 6.2192, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.03394833948339483, |
|
"grad_norm": 3.352975845336914, |
|
"learning_rate": 3.3948339483394833e-05, |
|
"loss": 6.1654, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.03468634686346864, |
|
"grad_norm": 2.6526570320129395, |
|
"learning_rate": 3.468634686346864e-05, |
|
"loss": 6.0669, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.035424354243542434, |
|
"grad_norm": 2.950063467025757, |
|
"learning_rate": 3.542435424354244e-05, |
|
"loss": 6.0332, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.03616236162361624, |
|
"grad_norm": 4.221488952636719, |
|
"learning_rate": 3.6162361623616235e-05, |
|
"loss": 5.9708, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.03690036900369004, |
|
"grad_norm": 2.6405985355377197, |
|
"learning_rate": 3.690036900369004e-05, |
|
"loss": 5.76, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.037638376383763834, |
|
"grad_norm": 4.019585132598877, |
|
"learning_rate": 3.763837638376384e-05, |
|
"loss": 5.9036, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.03837638376383764, |
|
"grad_norm": 2.687580108642578, |
|
"learning_rate": 3.837638376383764e-05, |
|
"loss": 5.808, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03911439114391144, |
|
"grad_norm": 3.339268207550049, |
|
"learning_rate": 3.911439114391144e-05, |
|
"loss": 5.7391, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03985239852398524, |
|
"grad_norm": 3.0441882610321045, |
|
"learning_rate": 3.985239852398524e-05, |
|
"loss": 5.7045, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04059040590405904, |
|
"grad_norm": 2.8957650661468506, |
|
"learning_rate": 4.0590405904059045e-05, |
|
"loss": 5.6956, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.041328413284132844, |
|
"grad_norm": 3.6834869384765625, |
|
"learning_rate": 4.132841328413284e-05, |
|
"loss": 5.6103, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04206642066420664, |
|
"grad_norm": 3.4573564529418945, |
|
"learning_rate": 4.206642066420665e-05, |
|
"loss": 5.4107, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.042804428044280446, |
|
"grad_norm": 3.2341487407684326, |
|
"learning_rate": 4.280442804428044e-05, |
|
"loss": 5.4208, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.043542435424354244, |
|
"grad_norm": 3.6147806644439697, |
|
"learning_rate": 4.3542435424354244e-05, |
|
"loss": 5.4217, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.04428044280442804, |
|
"grad_norm": 3.6139488220214844, |
|
"learning_rate": 4.428044280442805e-05, |
|
"loss": 5.3994, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.045018450184501846, |
|
"grad_norm": 3.277580499649048, |
|
"learning_rate": 4.501845018450185e-05, |
|
"loss": 5.3605, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.045756457564575644, |
|
"grad_norm": 2.5641043186187744, |
|
"learning_rate": 4.575645756457565e-05, |
|
"loss": 5.1682, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.04649446494464945, |
|
"grad_norm": 2.422578811645508, |
|
"learning_rate": 4.6494464944649444e-05, |
|
"loss": 5.1585, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.047232472324723246, |
|
"grad_norm": 4.027858257293701, |
|
"learning_rate": 4.723247232472325e-05, |
|
"loss": 5.2147, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.04797047970479705, |
|
"grad_norm": 2.401747226715088, |
|
"learning_rate": 4.797047970479705e-05, |
|
"loss": 5.0839, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.04870848708487085, |
|
"grad_norm": 2.9220759868621826, |
|
"learning_rate": 4.870848708487085e-05, |
|
"loss": 5.1216, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.04944649446494465, |
|
"grad_norm": 2.4891719818115234, |
|
"learning_rate": 4.944649446494466e-05, |
|
"loss": 4.9789, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.05018450184501845, |
|
"grad_norm": 2.279683828353882, |
|
"learning_rate": 5.018450184501845e-05, |
|
"loss": 4.9611, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05092250922509225, |
|
"grad_norm": 2.045536518096924, |
|
"learning_rate": 5.0922509225092254e-05, |
|
"loss": 4.8993, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.05166051660516605, |
|
"grad_norm": 1.9132373332977295, |
|
"learning_rate": 5.166051660516605e-05, |
|
"loss": 4.8083, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05239852398523985, |
|
"grad_norm": 2.304215669631958, |
|
"learning_rate": 5.239852398523986e-05, |
|
"loss": 4.828, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.053136531365313655, |
|
"grad_norm": 2.2891597747802734, |
|
"learning_rate": 5.3136531365313655e-05, |
|
"loss": 4.7838, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.05387453874538745, |
|
"grad_norm": 2.411600351333618, |
|
"learning_rate": 5.387453874538746e-05, |
|
"loss": 4.6931, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.05461254612546126, |
|
"grad_norm": 1.6772541999816895, |
|
"learning_rate": 5.461254612546126e-05, |
|
"loss": 4.7027, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.055350553505535055, |
|
"grad_norm": 1.7979137897491455, |
|
"learning_rate": 5.535055350553506e-05, |
|
"loss": 4.7452, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05608856088560885, |
|
"grad_norm": 2.3298912048339844, |
|
"learning_rate": 5.6088560885608855e-05, |
|
"loss": 4.7062, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.05682656826568266, |
|
"grad_norm": 1.986875295639038, |
|
"learning_rate": 5.682656826568265e-05, |
|
"loss": 4.6142, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.057564575645756455, |
|
"grad_norm": 1.8501532077789307, |
|
"learning_rate": 5.756457564575646e-05, |
|
"loss": 4.4524, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.05830258302583026, |
|
"grad_norm": 1.5959872007369995, |
|
"learning_rate": 5.830258302583026e-05, |
|
"loss": 4.5299, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.05904059040590406, |
|
"grad_norm": 2.339456796646118, |
|
"learning_rate": 5.904059040590406e-05, |
|
"loss": 4.4703, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.05977859778597786, |
|
"grad_norm": 1.6436880826950073, |
|
"learning_rate": 5.9778597785977866e-05, |
|
"loss": 4.463, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.06051660516605166, |
|
"grad_norm": 1.7336505651474, |
|
"learning_rate": 6.0516605166051664e-05, |
|
"loss": 4.4363, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.061254612546125464, |
|
"grad_norm": 1.691726565361023, |
|
"learning_rate": 6.125461254612547e-05, |
|
"loss": 4.4099, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.06199261992619926, |
|
"grad_norm": 1.5019862651824951, |
|
"learning_rate": 6.199261992619927e-05, |
|
"loss": 4.4153, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.06273062730627306, |
|
"grad_norm": 1.4851793050765991, |
|
"learning_rate": 6.273062730627307e-05, |
|
"loss": 4.4721, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.06346863468634686, |
|
"grad_norm": 1.4793798923492432, |
|
"learning_rate": 6.346863468634686e-05, |
|
"loss": 4.3287, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.06420664206642067, |
|
"grad_norm": 1.5791796445846558, |
|
"learning_rate": 6.420664206642066e-05, |
|
"loss": 4.3766, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.06494464944649446, |
|
"grad_norm": 1.5449219942092896, |
|
"learning_rate": 6.494464944649446e-05, |
|
"loss": 4.2412, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.06568265682656826, |
|
"grad_norm": 1.229464054107666, |
|
"learning_rate": 6.568265682656827e-05, |
|
"loss": 4.1558, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.06642066420664207, |
|
"grad_norm": 1.5863291025161743, |
|
"learning_rate": 6.642066420664207e-05, |
|
"loss": 4.2074, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06715867158671587, |
|
"grad_norm": 1.319446086883545, |
|
"learning_rate": 6.715867158671587e-05, |
|
"loss": 4.258, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.06789667896678966, |
|
"grad_norm": 1.3132025003433228, |
|
"learning_rate": 6.789667896678967e-05, |
|
"loss": 4.1444, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.06863468634686347, |
|
"grad_norm": 1.5694645643234253, |
|
"learning_rate": 6.863468634686348e-05, |
|
"loss": 4.1201, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.06937269372693727, |
|
"grad_norm": 1.4163988828659058, |
|
"learning_rate": 6.937269372693728e-05, |
|
"loss": 4.1113, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07011070110701106, |
|
"grad_norm": 1.487798810005188, |
|
"learning_rate": 7.011070110701108e-05, |
|
"loss": 4.0805, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07084870848708487, |
|
"grad_norm": 1.2213908433914185, |
|
"learning_rate": 7.084870848708487e-05, |
|
"loss": 4.0324, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07158671586715867, |
|
"grad_norm": 1.332588791847229, |
|
"learning_rate": 7.158671586715867e-05, |
|
"loss": 4.0455, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.07232472324723248, |
|
"grad_norm": 1.212963342666626, |
|
"learning_rate": 7.232472324723247e-05, |
|
"loss": 4.0044, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.07306273062730627, |
|
"grad_norm": 1.0928430557250977, |
|
"learning_rate": 7.306273062730628e-05, |
|
"loss": 4.0215, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.07380073800738007, |
|
"grad_norm": 1.1430400609970093, |
|
"learning_rate": 7.380073800738008e-05, |
|
"loss": 4.0744, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07453874538745388, |
|
"grad_norm": 0.9975944757461548, |
|
"learning_rate": 7.453874538745388e-05, |
|
"loss": 3.9955, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.07527675276752767, |
|
"grad_norm": 1.1288777589797974, |
|
"learning_rate": 7.527675276752768e-05, |
|
"loss": 3.9916, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.07601476014760147, |
|
"grad_norm": 1.0064688920974731, |
|
"learning_rate": 7.601476014760149e-05, |
|
"loss": 3.9025, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.07675276752767528, |
|
"grad_norm": 1.329229474067688, |
|
"learning_rate": 7.675276752767529e-05, |
|
"loss": 3.9822, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.07749077490774908, |
|
"grad_norm": 1.022760033607483, |
|
"learning_rate": 7.749077490774908e-05, |
|
"loss": 3.8762, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.07822878228782287, |
|
"grad_norm": 1.0934398174285889, |
|
"learning_rate": 7.822878228782288e-05, |
|
"loss": 3.7911, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.07896678966789668, |
|
"grad_norm": 1.008171796798706, |
|
"learning_rate": 7.896678966789668e-05, |
|
"loss": 3.8972, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.07970479704797048, |
|
"grad_norm": 1.1563254594802856, |
|
"learning_rate": 7.970479704797048e-05, |
|
"loss": 3.7901, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08044280442804429, |
|
"grad_norm": 1.06783926486969, |
|
"learning_rate": 8.044280442804428e-05, |
|
"loss": 3.9768, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.08118081180811808, |
|
"grad_norm": 1.0809143781661987, |
|
"learning_rate": 8.118081180811809e-05, |
|
"loss": 3.7534, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08191881918819188, |
|
"grad_norm": 1.040157675743103, |
|
"learning_rate": 8.191881918819189e-05, |
|
"loss": 3.7609, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.08265682656826569, |
|
"grad_norm": 1.0198458433151245, |
|
"learning_rate": 8.265682656826569e-05, |
|
"loss": 3.8031, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.08339483394833948, |
|
"grad_norm": 1.3017419576644897, |
|
"learning_rate": 8.339483394833948e-05, |
|
"loss": 3.8526, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.08413284132841328, |
|
"grad_norm": 0.9285693168640137, |
|
"learning_rate": 8.41328413284133e-05, |
|
"loss": 3.7551, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.08487084870848709, |
|
"grad_norm": 0.9603882431983948, |
|
"learning_rate": 8.48708487084871e-05, |
|
"loss": 3.834, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08560885608856089, |
|
"grad_norm": 0.9195291996002197, |
|
"learning_rate": 8.560885608856088e-05, |
|
"loss": 3.7804, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.08634686346863468, |
|
"grad_norm": 1.0526838302612305, |
|
"learning_rate": 8.634686346863469e-05, |
|
"loss": 3.8757, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.08708487084870849, |
|
"grad_norm": 0.8891322612762451, |
|
"learning_rate": 8.708487084870849e-05, |
|
"loss": 3.7513, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.08782287822878229, |
|
"grad_norm": 0.9467900395393372, |
|
"learning_rate": 8.782287822878229e-05, |
|
"loss": 3.7555, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.08856088560885608, |
|
"grad_norm": 1.0294831991195679, |
|
"learning_rate": 8.85608856088561e-05, |
|
"loss": 3.834, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.08929889298892989, |
|
"grad_norm": 1.0832924842834473, |
|
"learning_rate": 8.92988929889299e-05, |
|
"loss": 3.6299, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.09003690036900369, |
|
"grad_norm": 0.9595062732696533, |
|
"learning_rate": 9.00369003690037e-05, |
|
"loss": 3.7695, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.0907749077490775, |
|
"grad_norm": 0.8714928030967712, |
|
"learning_rate": 9.077490774907749e-05, |
|
"loss": 3.7346, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.09151291512915129, |
|
"grad_norm": 0.9189225435256958, |
|
"learning_rate": 9.15129151291513e-05, |
|
"loss": 3.7646, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.09225092250922509, |
|
"grad_norm": 1.0212230682373047, |
|
"learning_rate": 9.22509225092251e-05, |
|
"loss": 3.6518, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.0929889298892989, |
|
"grad_norm": 0.8631012439727783, |
|
"learning_rate": 9.298892988929889e-05, |
|
"loss": 3.6702, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.09372693726937269, |
|
"grad_norm": 0.76339191198349, |
|
"learning_rate": 9.37269372693727e-05, |
|
"loss": 3.6757, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.09446494464944649, |
|
"grad_norm": 0.8459323048591614, |
|
"learning_rate": 9.44649446494465e-05, |
|
"loss": 3.8173, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.0952029520295203, |
|
"grad_norm": 0.7884580492973328, |
|
"learning_rate": 9.52029520295203e-05, |
|
"loss": 3.6719, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.0959409594095941, |
|
"grad_norm": 0.9069279432296753, |
|
"learning_rate": 9.59409594095941e-05, |
|
"loss": 3.6447, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09667896678966789, |
|
"grad_norm": 0.8386545181274414, |
|
"learning_rate": 9.66789667896679e-05, |
|
"loss": 3.6681, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.0974169741697417, |
|
"grad_norm": 0.8082497119903564, |
|
"learning_rate": 9.74169741697417e-05, |
|
"loss": 3.6526, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.0981549815498155, |
|
"grad_norm": 0.7619675993919373, |
|
"learning_rate": 9.81549815498155e-05, |
|
"loss": 3.6717, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.0988929889298893, |
|
"grad_norm": 0.803425133228302, |
|
"learning_rate": 9.889298892988931e-05, |
|
"loss": 3.5892, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.0996309963099631, |
|
"grad_norm": 0.8372170925140381, |
|
"learning_rate": 9.963099630996311e-05, |
|
"loss": 3.6615, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1003690036900369, |
|
"grad_norm": 0.8343318700790405, |
|
"learning_rate": 9.999995852216369e-05, |
|
"loss": 3.5785, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.1011070110701107, |
|
"grad_norm": 0.8367707133293152, |
|
"learning_rate": 9.999962669988607e-05, |
|
"loss": 3.625, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.1018450184501845, |
|
"grad_norm": 0.8662716150283813, |
|
"learning_rate": 9.999896305753297e-05, |
|
"loss": 3.6656, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.1025830258302583, |
|
"grad_norm": 0.747052788734436, |
|
"learning_rate": 9.999796759950864e-05, |
|
"loss": 3.5761, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.1033210332103321, |
|
"grad_norm": 0.7763943672180176, |
|
"learning_rate": 9.999664033241933e-05, |
|
"loss": 3.5234, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.10405904059040591, |
|
"grad_norm": 0.7435528039932251, |
|
"learning_rate": 9.99949812650734e-05, |
|
"loss": 3.5132, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.1047970479704797, |
|
"grad_norm": 0.8303211331367493, |
|
"learning_rate": 9.999299040848121e-05, |
|
"loss": 3.5173, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.1055350553505535, |
|
"grad_norm": 0.8359752297401428, |
|
"learning_rate": 9.999066777585495e-05, |
|
"loss": 3.5605, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.10627306273062731, |
|
"grad_norm": 0.909545361995697, |
|
"learning_rate": 9.998801338260865e-05, |
|
"loss": 3.5839, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.1070110701107011, |
|
"grad_norm": 0.845916748046875, |
|
"learning_rate": 9.99850272463581e-05, |
|
"loss": 3.5685, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.1077490774907749, |
|
"grad_norm": 0.834235429763794, |
|
"learning_rate": 9.99817093869206e-05, |
|
"loss": 3.5476, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.10848708487084871, |
|
"grad_norm": 0.7273171544075012, |
|
"learning_rate": 9.997805982631499e-05, |
|
"loss": 3.4777, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.10922509225092251, |
|
"grad_norm": 0.839796245098114, |
|
"learning_rate": 9.99740785887614e-05, |
|
"loss": 3.5084, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.1099630996309963, |
|
"grad_norm": 0.7638348340988159, |
|
"learning_rate": 9.99697657006811e-05, |
|
"loss": 3.5741, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.11070110701107011, |
|
"grad_norm": 0.7195069193840027, |
|
"learning_rate": 9.996512119069636e-05, |
|
"loss": 3.5083, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11143911439114391, |
|
"grad_norm": 0.7351711392402649, |
|
"learning_rate": 9.996014508963028e-05, |
|
"loss": 3.365, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.1121771217712177, |
|
"grad_norm": 0.7192705869674683, |
|
"learning_rate": 9.995483743050648e-05, |
|
"loss": 3.5233, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.11291512915129151, |
|
"grad_norm": 0.7362285256385803, |
|
"learning_rate": 9.994919824854898e-05, |
|
"loss": 3.5548, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.11365313653136531, |
|
"grad_norm": 0.6908057928085327, |
|
"learning_rate": 9.994322758118196e-05, |
|
"loss": 3.4293, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.11439114391143912, |
|
"grad_norm": 0.7892534136772156, |
|
"learning_rate": 9.993692546802941e-05, |
|
"loss": 3.4583, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.11512915129151291, |
|
"grad_norm": 0.7085639834403992, |
|
"learning_rate": 9.993029195091505e-05, |
|
"loss": 3.4349, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.11586715867158671, |
|
"grad_norm": 0.7825974225997925, |
|
"learning_rate": 9.992332707386188e-05, |
|
"loss": 3.4496, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.11660516605166052, |
|
"grad_norm": 0.7284643054008484, |
|
"learning_rate": 9.991603088309194e-05, |
|
"loss": 3.517, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.11734317343173432, |
|
"grad_norm": 0.7682483792304993, |
|
"learning_rate": 9.990840342702606e-05, |
|
"loss": 3.4505, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.11808118081180811, |
|
"grad_norm": 0.8391796350479126, |
|
"learning_rate": 9.990044475628347e-05, |
|
"loss": 3.5077, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.11881918819188192, |
|
"grad_norm": 0.7043576836585999, |
|
"learning_rate": 9.989215492368151e-05, |
|
"loss": 3.4272, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.11955719557195572, |
|
"grad_norm": 0.72553551197052, |
|
"learning_rate": 9.988353398423527e-05, |
|
"loss": 3.3559, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.12029520295202951, |
|
"grad_norm": 0.7156850099563599, |
|
"learning_rate": 9.987458199515713e-05, |
|
"loss": 3.4108, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.12103321033210332, |
|
"grad_norm": 0.6410751342773438, |
|
"learning_rate": 9.98652990158566e-05, |
|
"loss": 3.4688, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.12177121771217712, |
|
"grad_norm": 0.8124927282333374, |
|
"learning_rate": 9.985568510793967e-05, |
|
"loss": 3.4611, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.12250922509225093, |
|
"grad_norm": 0.7403334379196167, |
|
"learning_rate": 9.984574033520857e-05, |
|
"loss": 3.4669, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.12324723247232472, |
|
"grad_norm": 0.662948727607727, |
|
"learning_rate": 9.983546476366132e-05, |
|
"loss": 3.4798, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.12398523985239852, |
|
"grad_norm": 0.6987183690071106, |
|
"learning_rate": 9.982485846149125e-05, |
|
"loss": 3.3932, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.12472324723247233, |
|
"grad_norm": 0.650486171245575, |
|
"learning_rate": 9.981392149908652e-05, |
|
"loss": 3.3856, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.12546125461254612, |
|
"grad_norm": 0.6416191458702087, |
|
"learning_rate": 9.98026539490298e-05, |
|
"loss": 3.455, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.12619926199261994, |
|
"grad_norm": 0.6319407820701599, |
|
"learning_rate": 9.979105588609762e-05, |
|
"loss": 3.4001, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.12693726937269373, |
|
"grad_norm": 0.6667493581771851, |
|
"learning_rate": 9.977912738725994e-05, |
|
"loss": 3.4277, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.12767527675276752, |
|
"grad_norm": 0.6686265468597412, |
|
"learning_rate": 9.976686853167967e-05, |
|
"loss": 3.4075, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.12841328413284134, |
|
"grad_norm": 0.731555700302124, |
|
"learning_rate": 9.975427940071211e-05, |
|
"loss": 3.4226, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.12915129151291513, |
|
"grad_norm": 0.6553905606269836, |
|
"learning_rate": 9.97413600779044e-05, |
|
"loss": 3.4306, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.12988929889298892, |
|
"grad_norm": 0.7509811520576477, |
|
"learning_rate": 9.9728110648995e-05, |
|
"loss": 3.3937, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.13062730627306274, |
|
"grad_norm": 0.7052728533744812, |
|
"learning_rate": 9.971453120191309e-05, |
|
"loss": 3.3822, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.13136531365313653, |
|
"grad_norm": 0.6742541790008545, |
|
"learning_rate": 9.970062182677801e-05, |
|
"loss": 3.3824, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.13210332103321032, |
|
"grad_norm": 0.6257262825965881, |
|
"learning_rate": 9.968638261589866e-05, |
|
"loss": 3.4047, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.13284132841328414, |
|
"grad_norm": 0.6546107530593872, |
|
"learning_rate": 9.967181366377285e-05, |
|
"loss": 3.3903, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.13357933579335793, |
|
"grad_norm": 0.8019782304763794, |
|
"learning_rate": 9.965691506708672e-05, |
|
"loss": 3.3911, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.13431734317343175, |
|
"grad_norm": 0.6207643151283264, |
|
"learning_rate": 9.964168692471408e-05, |
|
"loss": 3.3861, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.13505535055350554, |
|
"grad_norm": 0.6750718355178833, |
|
"learning_rate": 9.962612933771576e-05, |
|
"loss": 3.4424, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.13579335793357933, |
|
"grad_norm": 0.9330940246582031, |
|
"learning_rate": 9.961024240933892e-05, |
|
"loss": 3.3459, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.13653136531365315, |
|
"grad_norm": 0.7058202028274536, |
|
"learning_rate": 9.959402624501636e-05, |
|
"loss": 3.3327, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.13726937269372694, |
|
"grad_norm": 0.779712438583374, |
|
"learning_rate": 9.957748095236589e-05, |
|
"loss": 3.4398, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.13800738007380073, |
|
"grad_norm": 0.663960337638855, |
|
"learning_rate": 9.956060664118951e-05, |
|
"loss": 3.3513, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.13874538745387455, |
|
"grad_norm": 0.756618082523346, |
|
"learning_rate": 9.954340342347279e-05, |
|
"loss": 3.304, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.13948339483394834, |
|
"grad_norm": 0.7523687481880188, |
|
"learning_rate": 9.952587141338403e-05, |
|
"loss": 3.3155, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.14022140221402213, |
|
"grad_norm": 0.6524930596351624, |
|
"learning_rate": 9.950801072727356e-05, |
|
"loss": 3.3803, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14095940959409595, |
|
"grad_norm": 0.7161090970039368, |
|
"learning_rate": 9.948982148367292e-05, |
|
"loss": 3.4219, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.14169741697416974, |
|
"grad_norm": 0.7181054949760437, |
|
"learning_rate": 9.947130380329418e-05, |
|
"loss": 3.301, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.14243542435424356, |
|
"grad_norm": 0.6185216903686523, |
|
"learning_rate": 9.945245780902899e-05, |
|
"loss": 3.3666, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.14317343173431735, |
|
"grad_norm": 0.6279731392860413, |
|
"learning_rate": 9.943328362594788e-05, |
|
"loss": 3.2862, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.14391143911439114, |
|
"grad_norm": 0.6401661038398743, |
|
"learning_rate": 9.941378138129938e-05, |
|
"loss": 3.3112, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.14464944649446496, |
|
"grad_norm": 0.6105781197547913, |
|
"learning_rate": 9.939395120450916e-05, |
|
"loss": 3.3539, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.14538745387453875, |
|
"grad_norm": 0.6660001873970032, |
|
"learning_rate": 9.937379322717924e-05, |
|
"loss": 3.3722, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.14612546125461254, |
|
"grad_norm": 0.6415931582450867, |
|
"learning_rate": 9.935330758308705e-05, |
|
"loss": 3.3329, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.14686346863468636, |
|
"grad_norm": 0.6147580742835999, |
|
"learning_rate": 9.933249440818455e-05, |
|
"loss": 3.2807, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.14760147601476015, |
|
"grad_norm": 0.694519579410553, |
|
"learning_rate": 9.931135384059736e-05, |
|
"loss": 3.2662, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14833948339483394, |
|
"grad_norm": 0.6452217102050781, |
|
"learning_rate": 9.928988602062384e-05, |
|
"loss": 3.2942, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.14907749077490776, |
|
"grad_norm": 0.6983804106712341, |
|
"learning_rate": 9.926809109073412e-05, |
|
"loss": 3.2639, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.14981549815498155, |
|
"grad_norm": 0.6302483677864075, |
|
"learning_rate": 9.924596919556917e-05, |
|
"loss": 3.3648, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.15055350553505534, |
|
"grad_norm": 0.6506009697914124, |
|
"learning_rate": 9.922352048193986e-05, |
|
"loss": 3.3417, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.15129151291512916, |
|
"grad_norm": 0.6232055425643921, |
|
"learning_rate": 9.920074509882602e-05, |
|
"loss": 3.3304, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.15202952029520295, |
|
"grad_norm": 0.6454508900642395, |
|
"learning_rate": 9.917764319737533e-05, |
|
"loss": 3.2585, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.15276752767527677, |
|
"grad_norm": 0.6281662583351135, |
|
"learning_rate": 9.915421493090243e-05, |
|
"loss": 3.2753, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.15350553505535056, |
|
"grad_norm": 0.7222394943237305, |
|
"learning_rate": 9.913046045488786e-05, |
|
"loss": 3.2683, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.15424354243542435, |
|
"grad_norm": 0.6333222389221191, |
|
"learning_rate": 9.910637992697707e-05, |
|
"loss": 3.2676, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.15498154981549817, |
|
"grad_norm": 0.6758008003234863, |
|
"learning_rate": 9.908197350697926e-05, |
|
"loss": 3.2941, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15571955719557196, |
|
"grad_norm": 0.5930529832839966, |
|
"learning_rate": 9.905724135686648e-05, |
|
"loss": 3.3365, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.15645756457564575, |
|
"grad_norm": 0.7024756669998169, |
|
"learning_rate": 9.903218364077243e-05, |
|
"loss": 3.2594, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.15719557195571957, |
|
"grad_norm": 0.6018502712249756, |
|
"learning_rate": 9.900680052499138e-05, |
|
"loss": 3.3316, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.15793357933579336, |
|
"grad_norm": 0.6856579184532166, |
|
"learning_rate": 9.898109217797717e-05, |
|
"loss": 3.3196, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.15867158671586715, |
|
"grad_norm": 0.6864190101623535, |
|
"learning_rate": 9.895505877034198e-05, |
|
"loss": 3.3116, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.15940959409594097, |
|
"grad_norm": 0.57015061378479, |
|
"learning_rate": 9.892870047485526e-05, |
|
"loss": 3.3119, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.16014760147601476, |
|
"grad_norm": 0.5812332630157471, |
|
"learning_rate": 9.89020174664425e-05, |
|
"loss": 3.2727, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.16088560885608857, |
|
"grad_norm": 0.6356363296508789, |
|
"learning_rate": 9.887500992218421e-05, |
|
"loss": 3.3661, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.16162361623616237, |
|
"grad_norm": 0.672024130821228, |
|
"learning_rate": 9.884767802131465e-05, |
|
"loss": 3.3215, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.16236162361623616, |
|
"grad_norm": 0.6531562805175781, |
|
"learning_rate": 9.882002194522064e-05, |
|
"loss": 3.2374, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.16309963099630997, |
|
"grad_norm": 0.6039624214172363, |
|
"learning_rate": 9.879204187744036e-05, |
|
"loss": 3.2342, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.16383763837638377, |
|
"grad_norm": 0.5702035427093506, |
|
"learning_rate": 9.876373800366215e-05, |
|
"loss": 3.3181, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.16457564575645756, |
|
"grad_norm": 0.6860033273696899, |
|
"learning_rate": 9.87351105117233e-05, |
|
"loss": 3.3758, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.16531365313653137, |
|
"grad_norm": 0.6462620496749878, |
|
"learning_rate": 9.870615959160875e-05, |
|
"loss": 3.3542, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.16605166051660517, |
|
"grad_norm": 0.6575970649719238, |
|
"learning_rate": 9.867688543544988e-05, |
|
"loss": 3.2135, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.16678966789667896, |
|
"grad_norm": 0.6185761094093323, |
|
"learning_rate": 9.86472882375232e-05, |
|
"loss": 3.294, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.16752767527675277, |
|
"grad_norm": 0.6141475439071655, |
|
"learning_rate": 9.861736819424902e-05, |
|
"loss": 3.1992, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.16826568265682657, |
|
"grad_norm": 0.6172120571136475, |
|
"learning_rate": 9.85871255041903e-05, |
|
"loss": 3.2167, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.16900369003690036, |
|
"grad_norm": 0.5904815196990967, |
|
"learning_rate": 9.855656036805114e-05, |
|
"loss": 3.2945, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.16974169741697417, |
|
"grad_norm": 0.6383630633354187, |
|
"learning_rate": 9.852567298867557e-05, |
|
"loss": 3.2865, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17047970479704797, |
|
"grad_norm": 0.60262531042099, |
|
"learning_rate": 9.84944635710462e-05, |
|
"loss": 3.2188, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.17121771217712178, |
|
"grad_norm": 0.5909958481788635, |
|
"learning_rate": 9.846293232228274e-05, |
|
"loss": 3.2896, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.17195571955719557, |
|
"grad_norm": 0.5554500818252563, |
|
"learning_rate": 9.843107945164086e-05, |
|
"loss": 3.1705, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.17269372693726937, |
|
"grad_norm": 0.620606005191803, |
|
"learning_rate": 9.83989051705105e-05, |
|
"loss": 3.2288, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.17343173431734318, |
|
"grad_norm": 0.6841108202934265, |
|
"learning_rate": 9.836640969241475e-05, |
|
"loss": 3.2441, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.17416974169741697, |
|
"grad_norm": 0.6839698553085327, |
|
"learning_rate": 9.833359323300826e-05, |
|
"loss": 3.2246, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.17490774907749077, |
|
"grad_norm": 0.7128744721412659, |
|
"learning_rate": 9.830045601007584e-05, |
|
"loss": 3.2008, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.17564575645756458, |
|
"grad_norm": 0.65251624584198, |
|
"learning_rate": 9.826699824353106e-05, |
|
"loss": 3.3275, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.17638376383763837, |
|
"grad_norm": 0.5380867123603821, |
|
"learning_rate": 9.823322015541474e-05, |
|
"loss": 3.2064, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.17712177121771217, |
|
"grad_norm": 0.5963719487190247, |
|
"learning_rate": 9.819912196989351e-05, |
|
"loss": 3.1643, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.17785977859778598, |
|
"grad_norm": 0.8703069090843201, |
|
"learning_rate": 9.816470391325832e-05, |
|
"loss": 3.1848, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.17859778597785977, |
|
"grad_norm": 0.608935534954071, |
|
"learning_rate": 9.81299662139229e-05, |
|
"loss": 3.2719, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.1793357933579336, |
|
"grad_norm": 0.6425730586051941, |
|
"learning_rate": 9.809490910242229e-05, |
|
"loss": 3.2619, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.18007380073800738, |
|
"grad_norm": 0.5790001749992371, |
|
"learning_rate": 9.805953281141131e-05, |
|
"loss": 3.243, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.18081180811808117, |
|
"grad_norm": 0.6436141133308411, |
|
"learning_rate": 9.802383757566301e-05, |
|
"loss": 3.2284, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.181549815498155, |
|
"grad_norm": 0.5458927154541016, |
|
"learning_rate": 9.798782363206702e-05, |
|
"loss": 3.2043, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.18228782287822878, |
|
"grad_norm": 0.6296219229698181, |
|
"learning_rate": 9.795149121962815e-05, |
|
"loss": 3.2683, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.18302583025830257, |
|
"grad_norm": 0.6964813470840454, |
|
"learning_rate": 9.791484057946465e-05, |
|
"loss": 3.1977, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.1837638376383764, |
|
"grad_norm": 0.5911018252372742, |
|
"learning_rate": 9.787787195480672e-05, |
|
"loss": 3.2263, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.18450184501845018, |
|
"grad_norm": 0.5431626439094543, |
|
"learning_rate": 9.784058559099483e-05, |
|
"loss": 3.1628, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18523985239852397, |
|
"grad_norm": 0.6068975329399109, |
|
"learning_rate": 9.78029817354781e-05, |
|
"loss": 3.1828, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.1859778597785978, |
|
"grad_norm": 0.580287516117096, |
|
"learning_rate": 9.776506063781269e-05, |
|
"loss": 3.2248, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.18671586715867158, |
|
"grad_norm": 0.6136944890022278, |
|
"learning_rate": 9.772682254966008e-05, |
|
"loss": 3.2495, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.18745387453874537, |
|
"grad_norm": 0.6076098680496216, |
|
"learning_rate": 9.76882677247855e-05, |
|
"loss": 3.1979, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.1881918819188192, |
|
"grad_norm": 0.5682818293571472, |
|
"learning_rate": 9.764939641905615e-05, |
|
"loss": 3.1714, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.18892988929889298, |
|
"grad_norm": 0.5991480350494385, |
|
"learning_rate": 9.761020889043954e-05, |
|
"loss": 3.154, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.1896678966789668, |
|
"grad_norm": 0.6232896447181702, |
|
"learning_rate": 9.75707053990018e-05, |
|
"loss": 3.2036, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.1904059040590406, |
|
"grad_norm": 0.5560643672943115, |
|
"learning_rate": 9.75308862069059e-05, |
|
"loss": 3.2392, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.19114391143911438, |
|
"grad_norm": 0.5718569755554199, |
|
"learning_rate": 9.749075157840996e-05, |
|
"loss": 3.2528, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.1918819188191882, |
|
"grad_norm": 0.5662999749183655, |
|
"learning_rate": 9.74503017798655e-05, |
|
"loss": 3.2256, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.192619926199262, |
|
"grad_norm": 0.6026265621185303, |
|
"learning_rate": 9.74095370797156e-05, |
|
"loss": 3.2183, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.19335793357933578, |
|
"grad_norm": 0.6032066941261292, |
|
"learning_rate": 9.736845774849321e-05, |
|
"loss": 3.2418, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.1940959409594096, |
|
"grad_norm": 0.5830618143081665, |
|
"learning_rate": 9.732706405881931e-05, |
|
"loss": 3.191, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.1948339483394834, |
|
"grad_norm": 0.5695509314537048, |
|
"learning_rate": 9.728535628540109e-05, |
|
"loss": 3.1968, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.19557195571955718, |
|
"grad_norm": 0.5905478000640869, |
|
"learning_rate": 9.724333470503013e-05, |
|
"loss": 3.2596, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.196309963099631, |
|
"grad_norm": 0.5251249670982361, |
|
"learning_rate": 9.720099959658062e-05, |
|
"loss": 3.1729, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.1970479704797048, |
|
"grad_norm": 0.6502349972724915, |
|
"learning_rate": 9.715835124100742e-05, |
|
"loss": 3.2604, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.1977859778597786, |
|
"grad_norm": 0.6250560283660889, |
|
"learning_rate": 9.711538992134426e-05, |
|
"loss": 3.2194, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.1985239852398524, |
|
"grad_norm": 0.5793785452842712, |
|
"learning_rate": 9.707211592270183e-05, |
|
"loss": 3.1994, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.1992619926199262, |
|
"grad_norm": 0.6495150327682495, |
|
"learning_rate": 9.70285295322659e-05, |
|
"loss": 3.1919, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.5875915288925171, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 3.2464, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.2007380073800738, |
|
"grad_norm": 0.5518725514411926, |
|
"learning_rate": 9.69404207351206e-05, |
|
"loss": 3.2042, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.2014760147601476, |
|
"grad_norm": 0.5390283465385437, |
|
"learning_rate": 9.689589891314094e-05, |
|
"loss": 3.2012, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.2022140221402214, |
|
"grad_norm": 0.5596645474433899, |
|
"learning_rate": 9.685106586882336e-05, |
|
"loss": 3.2053, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.2029520295202952, |
|
"grad_norm": 0.5377479195594788, |
|
"learning_rate": 9.680592189970015e-05, |
|
"loss": 3.177, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.203690036900369, |
|
"grad_norm": 0.5858853459358215, |
|
"learning_rate": 9.676046730536704e-05, |
|
"loss": 3.2039, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.2044280442804428, |
|
"grad_norm": 0.5771840810775757, |
|
"learning_rate": 9.671470238748124e-05, |
|
"loss": 3.1654, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.2051660516605166, |
|
"grad_norm": 0.5626157522201538, |
|
"learning_rate": 9.666862744975938e-05, |
|
"loss": 3.1978, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.2059040590405904, |
|
"grad_norm": 0.5536968111991882, |
|
"learning_rate": 9.662224279797552e-05, |
|
"loss": 3.2152, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.2066420664206642, |
|
"grad_norm": 0.5982388854026794, |
|
"learning_rate": 9.657554873995913e-05, |
|
"loss": 3.1699, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.207380073800738, |
|
"grad_norm": 0.5761833190917969, |
|
"learning_rate": 9.652854558559308e-05, |
|
"loss": 3.1766, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.20811808118081182, |
|
"grad_norm": 0.5907506346702576, |
|
"learning_rate": 9.648123364681145e-05, |
|
"loss": 3.0935, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.2088560885608856, |
|
"grad_norm": 0.5584788918495178, |
|
"learning_rate": 9.643361323759763e-05, |
|
"loss": 3.1111, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.2095940959409594, |
|
"grad_norm": 0.5568063855171204, |
|
"learning_rate": 9.638568467398215e-05, |
|
"loss": 3.1739, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.21033210332103322, |
|
"grad_norm": 0.5453604459762573, |
|
"learning_rate": 9.633744827404055e-05, |
|
"loss": 3.2064, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.211070110701107, |
|
"grad_norm": 0.6171849966049194, |
|
"learning_rate": 9.628890435789135e-05, |
|
"loss": 3.2281, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.2118081180811808, |
|
"grad_norm": 0.5285280346870422, |
|
"learning_rate": 9.624005324769388e-05, |
|
"loss": 3.113, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.21254612546125462, |
|
"grad_norm": 0.5632630586624146, |
|
"learning_rate": 9.619089526764614e-05, |
|
"loss": 3.1592, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.2132841328413284, |
|
"grad_norm": 0.6024160385131836, |
|
"learning_rate": 9.614143074398264e-05, |
|
"loss": 3.1904, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.2140221402214022, |
|
"grad_norm": 0.5437342524528503, |
|
"learning_rate": 9.609166000497229e-05, |
|
"loss": 3.1156, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.21476014760147602, |
|
"grad_norm": 0.5884766578674316, |
|
"learning_rate": 9.604158338091615e-05, |
|
"loss": 3.1888, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.2154981549815498, |
|
"grad_norm": 0.547242283821106, |
|
"learning_rate": 9.599120120414531e-05, |
|
"loss": 3.1079, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.21623616236162363, |
|
"grad_norm": 0.5443885326385498, |
|
"learning_rate": 9.594051380901859e-05, |
|
"loss": 3.1147, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.21697416974169742, |
|
"grad_norm": 0.5350677371025085, |
|
"learning_rate": 9.588952153192041e-05, |
|
"loss": 3.1061, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.2177121771217712, |
|
"grad_norm": 0.5434796214103699, |
|
"learning_rate": 9.583822471125854e-05, |
|
"loss": 3.1172, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.21845018450184503, |
|
"grad_norm": 0.5185326933860779, |
|
"learning_rate": 9.578662368746182e-05, |
|
"loss": 3.2186, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.21918819188191882, |
|
"grad_norm": 0.5394032001495361, |
|
"learning_rate": 9.57347188029779e-05, |
|
"loss": 3.1628, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.2199261992619926, |
|
"grad_norm": 0.5857832431793213, |
|
"learning_rate": 9.568251040227101e-05, |
|
"loss": 3.1291, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.22066420664206643, |
|
"grad_norm": 0.6189760565757751, |
|
"learning_rate": 9.562999883181967e-05, |
|
"loss": 3.1305, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.22140221402214022, |
|
"grad_norm": 0.5518510937690735, |
|
"learning_rate": 9.557718444011431e-05, |
|
"loss": 3.2148, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.222140221402214, |
|
"grad_norm": 0.5947515964508057, |
|
"learning_rate": 9.552406757765509e-05, |
|
"loss": 3.1322, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.22287822878228783, |
|
"grad_norm": 0.5554746985435486, |
|
"learning_rate": 9.547064859694943e-05, |
|
"loss": 3.1822, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.22361623616236162, |
|
"grad_norm": 0.5308244824409485, |
|
"learning_rate": 9.541692785250981e-05, |
|
"loss": 3.1371, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.2243542435424354, |
|
"grad_norm": 0.5285702347755432, |
|
"learning_rate": 9.536290570085131e-05, |
|
"loss": 3.1329, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.22509225092250923, |
|
"grad_norm": 0.5468854904174805, |
|
"learning_rate": 9.530858250048932e-05, |
|
"loss": 3.2538, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.22583025830258302, |
|
"grad_norm": 0.5449059009552002, |
|
"learning_rate": 9.525395861193707e-05, |
|
"loss": 3.2139, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.22656826568265684, |
|
"grad_norm": 0.5692685842514038, |
|
"learning_rate": 9.519903439770332e-05, |
|
"loss": 3.1138, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.22730627306273063, |
|
"grad_norm": 0.5263866782188416, |
|
"learning_rate": 9.514381022228997e-05, |
|
"loss": 3.0872, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.22804428044280442, |
|
"grad_norm": 0.5696788430213928, |
|
"learning_rate": 9.50882864521895e-05, |
|
"loss": 3.167, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.22878228782287824, |
|
"grad_norm": 0.5760169625282288, |
|
"learning_rate": 9.503246345588274e-05, |
|
"loss": 3.15, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.22952029520295203, |
|
"grad_norm": 0.5390339493751526, |
|
"learning_rate": 9.497634160383626e-05, |
|
"loss": 3.1367, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.23025830258302582, |
|
"grad_norm": 0.5490269660949707, |
|
"learning_rate": 9.491992126849997e-05, |
|
"loss": 3.1779, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.23099630996309964, |
|
"grad_norm": 0.5177121758460999, |
|
"learning_rate": 9.486320282430468e-05, |
|
"loss": 3.0789, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.23173431734317343, |
|
"grad_norm": 0.5448027849197388, |
|
"learning_rate": 9.480618664765955e-05, |
|
"loss": 3.1866, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.23247232472324722, |
|
"grad_norm": 0.5371176600456238, |
|
"learning_rate": 9.474887311694968e-05, |
|
"loss": 3.2089, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.23321033210332104, |
|
"grad_norm": 0.6013469099998474, |
|
"learning_rate": 9.469126261253348e-05, |
|
"loss": 3.1159, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.23394833948339483, |
|
"grad_norm": 0.5597007274627686, |
|
"learning_rate": 9.463335551674025e-05, |
|
"loss": 3.124, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.23468634686346865, |
|
"grad_norm": 0.5460641384124756, |
|
"learning_rate": 9.45751522138676e-05, |
|
"loss": 3.103, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.23542435424354244, |
|
"grad_norm": 0.5389031767845154, |
|
"learning_rate": 9.45166530901789e-05, |
|
"loss": 3.1502, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.23616236162361623, |
|
"grad_norm": 0.5293789505958557, |
|
"learning_rate": 9.445785853390073e-05, |
|
"loss": 3.0856, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.23690036900369005, |
|
"grad_norm": 0.677259087562561, |
|
"learning_rate": 9.439876893522028e-05, |
|
"loss": 3.1143, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.23763837638376384, |
|
"grad_norm": 0.5259451866149902, |
|
"learning_rate": 9.433938468628277e-05, |
|
"loss": 3.1628, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.23837638376383763, |
|
"grad_norm": 0.5321341156959534, |
|
"learning_rate": 9.427970618118888e-05, |
|
"loss": 3.1164, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.23911439114391145, |
|
"grad_norm": 0.5752614140510559, |
|
"learning_rate": 9.421973381599208e-05, |
|
"loss": 3.0361, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.23985239852398524, |
|
"grad_norm": 0.5552977323532104, |
|
"learning_rate": 9.415946798869602e-05, |
|
"loss": 3.1452, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.24059040590405903, |
|
"grad_norm": 0.5862517952919006, |
|
"learning_rate": 9.409890909925193e-05, |
|
"loss": 3.1493, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.24132841328413285, |
|
"grad_norm": 0.5374996066093445, |
|
"learning_rate": 9.40380575495559e-05, |
|
"loss": 3.1315, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.24206642066420664, |
|
"grad_norm": 0.5315213203430176, |
|
"learning_rate": 9.39769137434463e-05, |
|
"loss": 3.1218, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.24280442804428043, |
|
"grad_norm": 0.5306174159049988, |
|
"learning_rate": 9.391547808670096e-05, |
|
"loss": 3.0916, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.24354243542435425, |
|
"grad_norm": 0.5105913281440735, |
|
"learning_rate": 9.385375098703465e-05, |
|
"loss": 3.0469, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.24428044280442804, |
|
"grad_norm": 0.5171898603439331, |
|
"learning_rate": 9.379173285409621e-05, |
|
"loss": 3.068, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.24501845018450186, |
|
"grad_norm": 0.5028154253959656, |
|
"learning_rate": 9.372942409946596e-05, |
|
"loss": 3.1542, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.24575645756457565, |
|
"grad_norm": 0.5281797647476196, |
|
"learning_rate": 9.366682513665293e-05, |
|
"loss": 3.1484, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.24649446494464944, |
|
"grad_norm": 0.5240592956542969, |
|
"learning_rate": 9.360393638109201e-05, |
|
"loss": 3.103, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.24723247232472326, |
|
"grad_norm": 0.5516790747642517, |
|
"learning_rate": 9.354075825014139e-05, |
|
"loss": 3.0701, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.24797047970479705, |
|
"grad_norm": 0.6081251502037048, |
|
"learning_rate": 9.347729116307964e-05, |
|
"loss": 3.1434, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.24870848708487084, |
|
"grad_norm": 0.5216418504714966, |
|
"learning_rate": 9.341353554110297e-05, |
|
"loss": 3.1567, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.24944649446494466, |
|
"grad_norm": 0.5264909863471985, |
|
"learning_rate": 9.334949180732245e-05, |
|
"loss": 3.162, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.25018450184501845, |
|
"grad_norm": 0.4942391812801361, |
|
"learning_rate": 9.328516038676119e-05, |
|
"loss": 3.1532, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.25092250922509224, |
|
"grad_norm": 0.5401615500450134, |
|
"learning_rate": 9.322054170635149e-05, |
|
"loss": 3.1, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.25166051660516603, |
|
"grad_norm": 0.5021462440490723, |
|
"learning_rate": 9.315563619493209e-05, |
|
"loss": 3.0438, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.2523985239852399, |
|
"grad_norm": 0.5627569556236267, |
|
"learning_rate": 9.309044428324522e-05, |
|
"loss": 3.2005, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.25313653136531367, |
|
"grad_norm": 0.514385461807251, |
|
"learning_rate": 9.302496640393382e-05, |
|
"loss": 3.1035, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.25387453874538746, |
|
"grad_norm": 0.5261507630348206, |
|
"learning_rate": 9.295920299153863e-05, |
|
"loss": 3.1706, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.25461254612546125, |
|
"grad_norm": 0.5069513916969299, |
|
"learning_rate": 9.289315448249531e-05, |
|
"loss": 3.1218, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.25535055350553504, |
|
"grad_norm": 0.49072757363319397, |
|
"learning_rate": 9.282682131513157e-05, |
|
"loss": 3.1231, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.25608856088560883, |
|
"grad_norm": 0.6358250379562378, |
|
"learning_rate": 9.276020392966422e-05, |
|
"loss": 3.1082, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.2568265682656827, |
|
"grad_norm": 0.5456467270851135, |
|
"learning_rate": 9.26933027681963e-05, |
|
"loss": 3.1454, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.25756457564575647, |
|
"grad_norm": 0.5754953026771545, |
|
"learning_rate": 9.262611827471406e-05, |
|
"loss": 3.1334, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.25830258302583026, |
|
"grad_norm": 0.5355437397956848, |
|
"learning_rate": 9.25586508950841e-05, |
|
"loss": 3.0149, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25904059040590405, |
|
"grad_norm": 0.5386449694633484, |
|
"learning_rate": 9.249090107705044e-05, |
|
"loss": 3.1859, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.25977859778597784, |
|
"grad_norm": 0.5665399432182312, |
|
"learning_rate": 9.242286927023136e-05, |
|
"loss": 3.171, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.2605166051660517, |
|
"grad_norm": 0.5453583002090454, |
|
"learning_rate": 9.235455592611665e-05, |
|
"loss": 3.1198, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.2612546125461255, |
|
"grad_norm": 0.5409013032913208, |
|
"learning_rate": 9.22859614980645e-05, |
|
"loss": 3.0841, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.26199261992619927, |
|
"grad_norm": 0.5243815779685974, |
|
"learning_rate": 9.221708644129843e-05, |
|
"loss": 3.13, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.26273062730627306, |
|
"grad_norm": 0.562589168548584, |
|
"learning_rate": 9.214793121290442e-05, |
|
"loss": 3.0718, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.26346863468634685, |
|
"grad_norm": 0.5075133442878723, |
|
"learning_rate": 9.207849627182772e-05, |
|
"loss": 3.1159, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.26420664206642064, |
|
"grad_norm": 0.5348154902458191, |
|
"learning_rate": 9.200878207886993e-05, |
|
"loss": 3.1932, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.2649446494464945, |
|
"grad_norm": 0.5550357103347778, |
|
"learning_rate": 9.19387890966859e-05, |
|
"loss": 3.0973, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.2656826568265683, |
|
"grad_norm": 0.534482479095459, |
|
"learning_rate": 9.186851778978062e-05, |
|
"loss": 3.1466, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.26642066420664207, |
|
"grad_norm": 0.521537184715271, |
|
"learning_rate": 9.179796862450618e-05, |
|
"loss": 3.0424, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.26715867158671586, |
|
"grad_norm": 0.5350748896598816, |
|
"learning_rate": 9.172714206905866e-05, |
|
"loss": 3.0505, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.26789667896678965, |
|
"grad_norm": 0.5348935127258301, |
|
"learning_rate": 9.165603859347502e-05, |
|
"loss": 3.1561, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.2686346863468635, |
|
"grad_norm": 0.5182725191116333, |
|
"learning_rate": 9.158465866963002e-05, |
|
"loss": 3.0778, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2693726937269373, |
|
"grad_norm": 0.5188565850257874, |
|
"learning_rate": 9.151300277123301e-05, |
|
"loss": 3.0517, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.2701107011070111, |
|
"grad_norm": 0.5163888931274414, |
|
"learning_rate": 9.144107137382484e-05, |
|
"loss": 2.979, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.27084870848708487, |
|
"grad_norm": 0.5174587965011597, |
|
"learning_rate": 9.136886495477475e-05, |
|
"loss": 3.0661, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.27158671586715866, |
|
"grad_norm": 0.5590752363204956, |
|
"learning_rate": 9.129638399327706e-05, |
|
"loss": 3.0624, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.27232472324723245, |
|
"grad_norm": 0.48960742354393005, |
|
"learning_rate": 9.122362897034817e-05, |
|
"loss": 3.0344, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.2730627306273063, |
|
"grad_norm": 0.5071660876274109, |
|
"learning_rate": 9.115060036882318e-05, |
|
"loss": 3.0374, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.2738007380073801, |
|
"grad_norm": 0.5058993697166443, |
|
"learning_rate": 9.107729867335288e-05, |
|
"loss": 3.0823, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.2745387453874539, |
|
"grad_norm": 0.5252380967140198, |
|
"learning_rate": 9.100372437040034e-05, |
|
"loss": 3.0558, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.27527675276752767, |
|
"grad_norm": 0.49785932898521423, |
|
"learning_rate": 9.092987794823786e-05, |
|
"loss": 3.0836, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.27601476014760146, |
|
"grad_norm": 0.5140420794487, |
|
"learning_rate": 9.085575989694357e-05, |
|
"loss": 3.1079, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.2767527675276753, |
|
"grad_norm": 0.5329453945159912, |
|
"learning_rate": 9.078137070839832e-05, |
|
"loss": 3.0775, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.2774907749077491, |
|
"grad_norm": 0.4971647560596466, |
|
"learning_rate": 9.070671087628229e-05, |
|
"loss": 3.0756, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.2782287822878229, |
|
"grad_norm": 0.5552874803543091, |
|
"learning_rate": 9.063178089607183e-05, |
|
"loss": 3.0615, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.2789667896678967, |
|
"grad_norm": 0.525969922542572, |
|
"learning_rate": 9.055658126503605e-05, |
|
"loss": 3.0594, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.27970479704797047, |
|
"grad_norm": 0.5235247611999512, |
|
"learning_rate": 9.048111248223368e-05, |
|
"loss": 3.097, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.28044280442804426, |
|
"grad_norm": 0.5573784112930298, |
|
"learning_rate": 9.040537504850954e-05, |
|
"loss": 3.0303, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.2811808118081181, |
|
"grad_norm": 0.5464443564414978, |
|
"learning_rate": 9.032936946649144e-05, |
|
"loss": 3.063, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.2819188191881919, |
|
"grad_norm": 0.5378391146659851, |
|
"learning_rate": 9.02530962405867e-05, |
|
"loss": 3.0853, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.2826568265682657, |
|
"grad_norm": 0.5274621844291687, |
|
"learning_rate": 9.017655587697885e-05, |
|
"loss": 3.1374, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.2833948339483395, |
|
"grad_norm": 0.5044965744018555, |
|
"learning_rate": 9.009974888362424e-05, |
|
"loss": 3.064, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.28413284132841327, |
|
"grad_norm": 0.5318046808242798, |
|
"learning_rate": 9.002267577024876e-05, |
|
"loss": 3.0662, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.2848708487084871, |
|
"grad_norm": 0.5438222289085388, |
|
"learning_rate": 8.994533704834435e-05, |
|
"loss": 3.0999, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.2856088560885609, |
|
"grad_norm": 0.5226894021034241, |
|
"learning_rate": 8.986773323116563e-05, |
|
"loss": 3.0496, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.2863468634686347, |
|
"grad_norm": 1.9248789548873901, |
|
"learning_rate": 8.978986483372655e-05, |
|
"loss": 3.0549, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.2870848708487085, |
|
"grad_norm": 0.49465620517730713, |
|
"learning_rate": 8.971173237279692e-05, |
|
"loss": 3.085, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.2878228782287823, |
|
"grad_norm": 0.5317748785018921, |
|
"learning_rate": 8.963333636689898e-05, |
|
"loss": 3.0659, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.28856088560885607, |
|
"grad_norm": 0.5400087833404541, |
|
"learning_rate": 8.9554677336304e-05, |
|
"loss": 3.0963, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.2892988929889299, |
|
"grad_norm": 0.5060845613479614, |
|
"learning_rate": 8.947575580302878e-05, |
|
"loss": 3.0503, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.2900369003690037, |
|
"grad_norm": 0.5168414115905762, |
|
"learning_rate": 8.939657229083222e-05, |
|
"loss": 3.1322, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.2907749077490775, |
|
"grad_norm": 0.5268558263778687, |
|
"learning_rate": 8.931712732521183e-05, |
|
"loss": 3.0947, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.2915129151291513, |
|
"grad_norm": 0.5113683938980103, |
|
"learning_rate": 8.92374214334002e-05, |
|
"loss": 3.0379, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.2922509225092251, |
|
"grad_norm": 0.5602664947509766, |
|
"learning_rate": 8.915745514436161e-05, |
|
"loss": 3.0636, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.29298892988929887, |
|
"grad_norm": 0.507926344871521, |
|
"learning_rate": 8.907722898878844e-05, |
|
"loss": 3.0737, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.2937269372693727, |
|
"grad_norm": 0.5805441737174988, |
|
"learning_rate": 8.899674349909759e-05, |
|
"loss": 3.0743, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.2944649446494465, |
|
"grad_norm": 0.5141892433166504, |
|
"learning_rate": 8.891599920942713e-05, |
|
"loss": 3.0711, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.2952029520295203, |
|
"grad_norm": 0.5769287347793579, |
|
"learning_rate": 8.883499665563253e-05, |
|
"loss": 3.0302, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2959409594095941, |
|
"grad_norm": 0.5248669981956482, |
|
"learning_rate": 8.875373637528335e-05, |
|
"loss": 3.0871, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.2966789667896679, |
|
"grad_norm": 0.5001204609870911, |
|
"learning_rate": 8.867221890765938e-05, |
|
"loss": 3.0342, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.2974169741697417, |
|
"grad_norm": 0.5176003575325012, |
|
"learning_rate": 8.859044479374736e-05, |
|
"loss": 3.1404, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.2981549815498155, |
|
"grad_norm": 0.5125160217285156, |
|
"learning_rate": 8.850841457623719e-05, |
|
"loss": 3.0399, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.2988929889298893, |
|
"grad_norm": 0.49271440505981445, |
|
"learning_rate": 8.842612879951837e-05, |
|
"loss": 3.0082, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.2996309963099631, |
|
"grad_norm": 0.5456764698028564, |
|
"learning_rate": 8.834358800967645e-05, |
|
"loss": 3.0537, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.3003690036900369, |
|
"grad_norm": 0.5039022564888, |
|
"learning_rate": 8.826079275448933e-05, |
|
"loss": 3.0508, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.3011070110701107, |
|
"grad_norm": 0.48597994446754456, |
|
"learning_rate": 8.817774358342367e-05, |
|
"loss": 3.0806, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.3018450184501845, |
|
"grad_norm": 0.5243167877197266, |
|
"learning_rate": 8.809444104763122e-05, |
|
"loss": 3.1176, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.3025830258302583, |
|
"grad_norm": 0.5244473218917847, |
|
"learning_rate": 8.801088569994522e-05, |
|
"loss": 3.0985, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3033210332103321, |
|
"grad_norm": 0.4856514632701874, |
|
"learning_rate": 8.792707809487661e-05, |
|
"loss": 3.0546, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.3040590405904059, |
|
"grad_norm": 0.48701879382133484, |
|
"learning_rate": 8.784301878861047e-05, |
|
"loss": 3.083, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.3047970479704797, |
|
"grad_norm": 0.5364317297935486, |
|
"learning_rate": 8.775870833900226e-05, |
|
"loss": 3.0672, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.30553505535055353, |
|
"grad_norm": 0.5016632676124573, |
|
"learning_rate": 8.767414730557418e-05, |
|
"loss": 2.9692, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.3062730627306273, |
|
"grad_norm": 0.5020787715911865, |
|
"learning_rate": 8.758933624951135e-05, |
|
"loss": 3.0618, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.3070110701107011, |
|
"grad_norm": 0.5041311383247375, |
|
"learning_rate": 8.750427573365824e-05, |
|
"loss": 3.0193, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.3077490774907749, |
|
"grad_norm": 0.5102233290672302, |
|
"learning_rate": 8.741896632251476e-05, |
|
"loss": 3.0837, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.3084870848708487, |
|
"grad_norm": 0.5173757672309875, |
|
"learning_rate": 8.733340858223268e-05, |
|
"loss": 2.9969, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.3092250922509225, |
|
"grad_norm": 0.47782695293426514, |
|
"learning_rate": 8.724760308061172e-05, |
|
"loss": 2.9934, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.30996309963099633, |
|
"grad_norm": 0.4984055161476135, |
|
"learning_rate": 8.71615503870959e-05, |
|
"loss": 3.0055, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3107011070110701, |
|
"grad_norm": 0.535744845867157, |
|
"learning_rate": 8.707525107276971e-05, |
|
"loss": 3.1124, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.3114391143911439, |
|
"grad_norm": 0.5163019895553589, |
|
"learning_rate": 8.698870571035435e-05, |
|
"loss": 3.0904, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.3121771217712177, |
|
"grad_norm": 0.5297439694404602, |
|
"learning_rate": 8.690191487420385e-05, |
|
"loss": 3.039, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.3129151291512915, |
|
"grad_norm": 0.5315809845924377, |
|
"learning_rate": 8.681487914030137e-05, |
|
"loss": 3.1418, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.31365313653136534, |
|
"grad_norm": 0.5038068890571594, |
|
"learning_rate": 8.672759908625528e-05, |
|
"loss": 3.105, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.31439114391143913, |
|
"grad_norm": 0.5104600787162781, |
|
"learning_rate": 8.664007529129539e-05, |
|
"loss": 3.0253, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.3151291512915129, |
|
"grad_norm": 0.5337395668029785, |
|
"learning_rate": 8.655230833626908e-05, |
|
"loss": 3.0637, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.3158671586715867, |
|
"grad_norm": 0.5203779935836792, |
|
"learning_rate": 8.646429880363746e-05, |
|
"loss": 3.0862, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.3166051660516605, |
|
"grad_norm": 0.510831356048584, |
|
"learning_rate": 8.637604727747149e-05, |
|
"loss": 2.9944, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.3173431734317343, |
|
"grad_norm": 0.5363606214523315, |
|
"learning_rate": 8.62875543434481e-05, |
|
"loss": 3.1227, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.31808118081180814, |
|
"grad_norm": 0.5156981945037842, |
|
"learning_rate": 8.61988205888463e-05, |
|
"loss": 3.046, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.31881918819188193, |
|
"grad_norm": 0.530002772808075, |
|
"learning_rate": 8.610984660254333e-05, |
|
"loss": 3.037, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.3195571955719557, |
|
"grad_norm": 0.5514121651649475, |
|
"learning_rate": 8.602063297501068e-05, |
|
"loss": 3.0828, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.3202952029520295, |
|
"grad_norm": 0.49961575865745544, |
|
"learning_rate": 8.593118029831025e-05, |
|
"loss": 3.0404, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.3210332103321033, |
|
"grad_norm": 0.4883437752723694, |
|
"learning_rate": 8.584148916609032e-05, |
|
"loss": 3.0681, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.32177121771217715, |
|
"grad_norm": 0.5226607918739319, |
|
"learning_rate": 8.575156017358171e-05, |
|
"loss": 3.0631, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.32250922509225094, |
|
"grad_norm": 0.5821093320846558, |
|
"learning_rate": 8.566139391759378e-05, |
|
"loss": 3.0793, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.32324723247232473, |
|
"grad_norm": 0.5188676118850708, |
|
"learning_rate": 8.557099099651047e-05, |
|
"loss": 3.086, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.3239852398523985, |
|
"grad_norm": 0.5117591023445129, |
|
"learning_rate": 8.548035201028636e-05, |
|
"loss": 3.1174, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.3247232472324723, |
|
"grad_norm": 0.48335784673690796, |
|
"learning_rate": 8.538947756044261e-05, |
|
"loss": 2.9864, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.3254612546125461, |
|
"grad_norm": 0.5281744599342346, |
|
"learning_rate": 8.52983682500631e-05, |
|
"loss": 3.0942, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.32619926199261995, |
|
"grad_norm": 0.4935998022556305, |
|
"learning_rate": 8.520702468379028e-05, |
|
"loss": 3.0716, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.32693726937269374, |
|
"grad_norm": 0.4817652404308319, |
|
"learning_rate": 8.511544746782125e-05, |
|
"loss": 3.0314, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.32767527675276753, |
|
"grad_norm": 0.49610570073127747, |
|
"learning_rate": 8.502363720990374e-05, |
|
"loss": 2.9699, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.3284132841328413, |
|
"grad_norm": 0.5101500749588013, |
|
"learning_rate": 8.493159451933203e-05, |
|
"loss": 2.9248, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.3291512915129151, |
|
"grad_norm": 0.48433801531791687, |
|
"learning_rate": 8.483932000694295e-05, |
|
"loss": 3.0812, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.3298892988929889, |
|
"grad_norm": 0.4775218665599823, |
|
"learning_rate": 8.474681428511177e-05, |
|
"loss": 2.986, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.33062730627306275, |
|
"grad_norm": 0.49710339307785034, |
|
"learning_rate": 8.465407796774816e-05, |
|
"loss": 3.0331, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.33136531365313654, |
|
"grad_norm": 0.5008261799812317, |
|
"learning_rate": 8.456111167029219e-05, |
|
"loss": 3.0763, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.33210332103321033, |
|
"grad_norm": 0.5350390672683716, |
|
"learning_rate": 8.446791600971012e-05, |
|
"loss": 3.0238, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3328413284132841, |
|
"grad_norm": 0.5100720524787903, |
|
"learning_rate": 8.43744916044904e-05, |
|
"loss": 3.1137, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.3335793357933579, |
|
"grad_norm": 0.5103323459625244, |
|
"learning_rate": 8.428083907463951e-05, |
|
"loss": 3.0862, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.33431734317343176, |
|
"grad_norm": 0.563750147819519, |
|
"learning_rate": 8.418695904167788e-05, |
|
"loss": 3.0551, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.33505535055350555, |
|
"grad_norm": 0.4909681975841522, |
|
"learning_rate": 8.40928521286358e-05, |
|
"loss": 2.9769, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.33579335793357934, |
|
"grad_norm": 0.5330002903938293, |
|
"learning_rate": 8.399851896004913e-05, |
|
"loss": 3.046, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.33653136531365313, |
|
"grad_norm": 0.49845483899116516, |
|
"learning_rate": 8.390396016195537e-05, |
|
"loss": 3.0318, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.3372693726937269, |
|
"grad_norm": 0.4647519290447235, |
|
"learning_rate": 8.380917636188934e-05, |
|
"loss": 3.0097, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.3380073800738007, |
|
"grad_norm": 0.4947097599506378, |
|
"learning_rate": 8.371416818887908e-05, |
|
"loss": 3.0244, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.33874538745387456, |
|
"grad_norm": 0.514033854007721, |
|
"learning_rate": 8.361893627344168e-05, |
|
"loss": 3.0259, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.33948339483394835, |
|
"grad_norm": 0.5403528213500977, |
|
"learning_rate": 8.35234812475791e-05, |
|
"loss": 3.0071, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.34022140221402214, |
|
"grad_norm": 0.495109498500824, |
|
"learning_rate": 8.342780374477396e-05, |
|
"loss": 3.058, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.34095940959409593, |
|
"grad_norm": 0.48301902413368225, |
|
"learning_rate": 8.33319043999853e-05, |
|
"loss": 3.0686, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.3416974169741697, |
|
"grad_norm": 0.4977583885192871, |
|
"learning_rate": 8.323578384964444e-05, |
|
"loss": 2.9218, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.34243542435424357, |
|
"grad_norm": 0.4929274022579193, |
|
"learning_rate": 8.313944273165069e-05, |
|
"loss": 3.0489, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.34317343173431736, |
|
"grad_norm": 0.5092618465423584, |
|
"learning_rate": 8.304288168536718e-05, |
|
"loss": 2.9915, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.34391143911439115, |
|
"grad_norm": 0.48645535111427307, |
|
"learning_rate": 8.294610135161658e-05, |
|
"loss": 2.9596, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.34464944649446494, |
|
"grad_norm": 0.5053686499595642, |
|
"learning_rate": 8.284910237267682e-05, |
|
"loss": 3.0022, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.34538745387453873, |
|
"grad_norm": 0.5074572563171387, |
|
"learning_rate": 8.275188539227686e-05, |
|
"loss": 3.0701, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.3461254612546125, |
|
"grad_norm": 0.5153145790100098, |
|
"learning_rate": 8.265445105559247e-05, |
|
"loss": 2.9951, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.34686346863468637, |
|
"grad_norm": 0.5247951745986938, |
|
"learning_rate": 8.255680000924184e-05, |
|
"loss": 3.0631, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.34760147601476016, |
|
"grad_norm": 0.4750431180000305, |
|
"learning_rate": 8.245893290128136e-05, |
|
"loss": 3.0917, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.34833948339483395, |
|
"grad_norm": 0.4787590503692627, |
|
"learning_rate": 8.236085038120129e-05, |
|
"loss": 3.0494, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.34907749077490774, |
|
"grad_norm": 0.49496400356292725, |
|
"learning_rate": 8.22625530999215e-05, |
|
"loss": 3.0276, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.34981549815498153, |
|
"grad_norm": 0.517461359500885, |
|
"learning_rate": 8.216404170978707e-05, |
|
"loss": 2.9682, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.3505535055350554, |
|
"grad_norm": 0.4839133024215698, |
|
"learning_rate": 8.206531686456403e-05, |
|
"loss": 3.0396, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.35129151291512917, |
|
"grad_norm": 0.5224480628967285, |
|
"learning_rate": 8.196637921943496e-05, |
|
"loss": 3.048, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.35202952029520296, |
|
"grad_norm": 0.5209102034568787, |
|
"learning_rate": 8.186722943099472e-05, |
|
"loss": 3.0128, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.35276752767527675, |
|
"grad_norm": 0.480421781539917, |
|
"learning_rate": 8.176786815724601e-05, |
|
"loss": 3.0139, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.35350553505535054, |
|
"grad_norm": 0.4676721692085266, |
|
"learning_rate": 8.166829605759507e-05, |
|
"loss": 2.8988, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.35424354243542433, |
|
"grad_norm": 0.5178680419921875, |
|
"learning_rate": 8.156851379284729e-05, |
|
"loss": 3.0074, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.3549815498154982, |
|
"grad_norm": 0.5426033735275269, |
|
"learning_rate": 8.146852202520277e-05, |
|
"loss": 2.9998, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.35571955719557197, |
|
"grad_norm": 0.4766799807548523, |
|
"learning_rate": 8.136832141825196e-05, |
|
"loss": 3.0129, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.35645756457564576, |
|
"grad_norm": 0.49461451172828674, |
|
"learning_rate": 8.12679126369713e-05, |
|
"loss": 3.0726, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.35719557195571955, |
|
"grad_norm": 0.4843361973762512, |
|
"learning_rate": 8.116729634771876e-05, |
|
"loss": 2.9953, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.35793357933579334, |
|
"grad_norm": 0.5127764344215393, |
|
"learning_rate": 8.106647321822943e-05, |
|
"loss": 3.0525, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.3586715867158672, |
|
"grad_norm": 0.4938580393791199, |
|
"learning_rate": 8.096544391761103e-05, |
|
"loss": 2.975, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.359409594095941, |
|
"grad_norm": 0.4944118559360504, |
|
"learning_rate": 8.08642091163396e-05, |
|
"loss": 3.0102, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.36014760147601477, |
|
"grad_norm": 0.4949988126754761, |
|
"learning_rate": 8.076276948625494e-05, |
|
"loss": 2.9756, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.36088560885608856, |
|
"grad_norm": 0.5549206733703613, |
|
"learning_rate": 8.066112570055621e-05, |
|
"loss": 3.0896, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.36162361623616235, |
|
"grad_norm": 0.4933255910873413, |
|
"learning_rate": 8.055927843379738e-05, |
|
"loss": 3.036, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.36236162361623614, |
|
"grad_norm": 0.5120234489440918, |
|
"learning_rate": 8.04572283618829e-05, |
|
"loss": 3.0661, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.36309963099631, |
|
"grad_norm": 0.47579410672187805, |
|
"learning_rate": 8.035497616206302e-05, |
|
"loss": 2.9517, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.3638376383763838, |
|
"grad_norm": 0.47006312012672424, |
|
"learning_rate": 8.025252251292949e-05, |
|
"loss": 2.9931, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.36457564575645757, |
|
"grad_norm": 0.498418927192688, |
|
"learning_rate": 8.014986809441094e-05, |
|
"loss": 2.9749, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.36531365313653136, |
|
"grad_norm": 0.4772182106971741, |
|
"learning_rate": 8.00470135877684e-05, |
|
"loss": 2.9708, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.36605166051660515, |
|
"grad_norm": 0.47467556595802307, |
|
"learning_rate": 7.994395967559076e-05, |
|
"loss": 2.9898, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.36678966789667894, |
|
"grad_norm": 0.509661078453064, |
|
"learning_rate": 7.984070704179026e-05, |
|
"loss": 3.0238, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.3675276752767528, |
|
"grad_norm": 0.47225892543792725, |
|
"learning_rate": 7.973725637159794e-05, |
|
"loss": 3.0066, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.3682656826568266, |
|
"grad_norm": 0.5211546421051025, |
|
"learning_rate": 7.963360835155915e-05, |
|
"loss": 3.0896, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.36900369003690037, |
|
"grad_norm": 0.4817075729370117, |
|
"learning_rate": 7.952976366952888e-05, |
|
"loss": 3.0348, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36974169741697416, |
|
"grad_norm": 0.4747537672519684, |
|
"learning_rate": 7.942572301466727e-05, |
|
"loss": 3.0146, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.37047970479704795, |
|
"grad_norm": 0.5026445984840393, |
|
"learning_rate": 7.932148707743503e-05, |
|
"loss": 2.9681, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.3712177121771218, |
|
"grad_norm": 0.47187340259552, |
|
"learning_rate": 7.921705654958886e-05, |
|
"loss": 3.0161, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.3719557195571956, |
|
"grad_norm": 0.5039234161376953, |
|
"learning_rate": 7.911243212417687e-05, |
|
"loss": 3.0002, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.3726937269372694, |
|
"grad_norm": 0.481448233127594, |
|
"learning_rate": 7.900761449553394e-05, |
|
"loss": 2.9907, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.37343173431734317, |
|
"grad_norm": 0.4844491481781006, |
|
"learning_rate": 7.890260435927708e-05, |
|
"loss": 3.0501, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.37416974169741696, |
|
"grad_norm": 0.502325177192688, |
|
"learning_rate": 7.879740241230098e-05, |
|
"loss": 2.9843, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.37490774907749075, |
|
"grad_norm": 0.49289822578430176, |
|
"learning_rate": 7.869200935277317e-05, |
|
"loss": 2.9808, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.3756457564575646, |
|
"grad_norm": 0.4960924983024597, |
|
"learning_rate": 7.858642588012957e-05, |
|
"loss": 3.0367, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.3763837638376384, |
|
"grad_norm": 0.4961390495300293, |
|
"learning_rate": 7.848065269506968e-05, |
|
"loss": 3.0371, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.3771217712177122, |
|
"grad_norm": 0.5095449090003967, |
|
"learning_rate": 7.837469049955211e-05, |
|
"loss": 2.9584, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.37785977859778597, |
|
"grad_norm": 0.5364798307418823, |
|
"learning_rate": 7.826853999678979e-05, |
|
"loss": 3.0194, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.37859778597785976, |
|
"grad_norm": 0.47735193371772766, |
|
"learning_rate": 7.816220189124526e-05, |
|
"loss": 2.9603, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.3793357933579336, |
|
"grad_norm": 0.47760894894599915, |
|
"learning_rate": 7.805567688862626e-05, |
|
"loss": 3.0335, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.3800738007380074, |
|
"grad_norm": 0.4874935448169708, |
|
"learning_rate": 7.794896569588066e-05, |
|
"loss": 3.0274, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.3808118081180812, |
|
"grad_norm": 0.48565617203712463, |
|
"learning_rate": 7.784206902119213e-05, |
|
"loss": 3.0081, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.381549815498155, |
|
"grad_norm": 0.513862133026123, |
|
"learning_rate": 7.773498757397522e-05, |
|
"loss": 2.9605, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.38228782287822877, |
|
"grad_norm": 0.4750123918056488, |
|
"learning_rate": 7.762772206487066e-05, |
|
"loss": 3.0109, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.38302583025830256, |
|
"grad_norm": 0.4761565327644348, |
|
"learning_rate": 7.75202732057408e-05, |
|
"loss": 3.0137, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.3837638376383764, |
|
"grad_norm": 0.5001286864280701, |
|
"learning_rate": 7.741264170966472e-05, |
|
"loss": 3.0493, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.3845018450184502, |
|
"grad_norm": 0.48891499638557434, |
|
"learning_rate": 7.730482829093358e-05, |
|
"loss": 3.0333, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.385239852398524, |
|
"grad_norm": 0.4714498221874237, |
|
"learning_rate": 7.719683366504586e-05, |
|
"loss": 2.9868, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.3859778597785978, |
|
"grad_norm": 0.4761471748352051, |
|
"learning_rate": 7.708865854870258e-05, |
|
"loss": 3.0351, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.38671586715867157, |
|
"grad_norm": 0.47278621792793274, |
|
"learning_rate": 7.698030365980265e-05, |
|
"loss": 3.0056, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.3874538745387454, |
|
"grad_norm": 0.502041220664978, |
|
"learning_rate": 7.687176971743796e-05, |
|
"loss": 3.013, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.3881918819188192, |
|
"grad_norm": 0.4808847904205322, |
|
"learning_rate": 7.676305744188871e-05, |
|
"loss": 3.0363, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.388929889298893, |
|
"grad_norm": 0.4782809615135193, |
|
"learning_rate": 7.665416755461859e-05, |
|
"loss": 2.9693, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.3896678966789668, |
|
"grad_norm": 0.4984862804412842, |
|
"learning_rate": 7.654510077827003e-05, |
|
"loss": 2.9882, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.3904059040590406, |
|
"grad_norm": 0.48033297061920166, |
|
"learning_rate": 7.643585783665931e-05, |
|
"loss": 2.9822, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.39114391143911437, |
|
"grad_norm": 0.5328406691551208, |
|
"learning_rate": 7.632643945477193e-05, |
|
"loss": 2.9835, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.3918819188191882, |
|
"grad_norm": 0.4741387963294983, |
|
"learning_rate": 7.621684635875756e-05, |
|
"loss": 3.0095, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.392619926199262, |
|
"grad_norm": 0.8941669464111328, |
|
"learning_rate": 7.610707927592549e-05, |
|
"loss": 2.9642, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.3933579335793358, |
|
"grad_norm": 0.501148521900177, |
|
"learning_rate": 7.59971389347395e-05, |
|
"loss": 2.9973, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.3940959409594096, |
|
"grad_norm": 0.4852311611175537, |
|
"learning_rate": 7.588702606481337e-05, |
|
"loss": 3.019, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.3948339483394834, |
|
"grad_norm": 0.44878798723220825, |
|
"learning_rate": 7.577674139690572e-05, |
|
"loss": 2.9582, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.3955719557195572, |
|
"grad_norm": 0.4837028384208679, |
|
"learning_rate": 7.566628566291536e-05, |
|
"loss": 2.9865, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.396309963099631, |
|
"grad_norm": 0.5781135559082031, |
|
"learning_rate": 7.555565959587638e-05, |
|
"loss": 2.9709, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.3970479704797048, |
|
"grad_norm": 0.4646313786506653, |
|
"learning_rate": 7.544486392995324e-05, |
|
"loss": 3.0123, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.3977859778597786, |
|
"grad_norm": 0.45897990465164185, |
|
"learning_rate": 7.533389940043598e-05, |
|
"loss": 2.9744, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.3985239852398524, |
|
"grad_norm": 0.47609013319015503, |
|
"learning_rate": 7.522276674373525e-05, |
|
"loss": 2.9654, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.3992619926199262, |
|
"grad_norm": 0.48847806453704834, |
|
"learning_rate": 7.51114666973775e-05, |
|
"loss": 3.0279, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.5017388463020325, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.9632, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.4007380073800738, |
|
"grad_norm": 0.49840694665908813, |
|
"learning_rate": 7.488836739134608e-05, |
|
"loss": 3.0054, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.4014760147601476, |
|
"grad_norm": 0.48498594760894775, |
|
"learning_rate": 7.477656961226007e-05, |
|
"loss": 2.9744, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.4022140221402214, |
|
"grad_norm": 0.49641212821006775, |
|
"learning_rate": 7.466460740468245e-05, |
|
"loss": 3.0054, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.4029520295202952, |
|
"grad_norm": 0.47951868176460266, |
|
"learning_rate": 7.455248151164493e-05, |
|
"loss": 2.9506, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.40369003690036903, |
|
"grad_norm": 0.5073153972625732, |
|
"learning_rate": 7.444019267726553e-05, |
|
"loss": 2.9172, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.4044280442804428, |
|
"grad_norm": 0.48473188281059265, |
|
"learning_rate": 7.432774164674359e-05, |
|
"loss": 2.9388, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.4051660516605166, |
|
"grad_norm": 0.4775610566139221, |
|
"learning_rate": 7.421512916635485e-05, |
|
"loss": 3.0088, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.4059040590405904, |
|
"grad_norm": 0.5261042714118958, |
|
"learning_rate": 7.410235598344657e-05, |
|
"loss": 2.9721, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4066420664206642, |
|
"grad_norm": 0.45107316970825195, |
|
"learning_rate": 7.398942284643241e-05, |
|
"loss": 2.9521, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.407380073800738, |
|
"grad_norm": 0.46772444248199463, |
|
"learning_rate": 7.387633050478766e-05, |
|
"loss": 2.9259, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.40811808118081183, |
|
"grad_norm": 0.4604153633117676, |
|
"learning_rate": 7.376307970904408e-05, |
|
"loss": 3.082, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.4088560885608856, |
|
"grad_norm": 0.47096291184425354, |
|
"learning_rate": 7.364967121078502e-05, |
|
"loss": 2.9186, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.4095940959409594, |
|
"grad_norm": 0.4761073589324951, |
|
"learning_rate": 7.353610576264045e-05, |
|
"loss": 3.028, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.4103321033210332, |
|
"grad_norm": 0.5043940544128418, |
|
"learning_rate": 7.34223841182819e-05, |
|
"loss": 2.9259, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.411070110701107, |
|
"grad_norm": 0.48511525988578796, |
|
"learning_rate": 7.33085070324175e-05, |
|
"loss": 2.9453, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.4118081180811808, |
|
"grad_norm": 0.4717444181442261, |
|
"learning_rate": 7.319447526078696e-05, |
|
"loss": 3.0091, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.41254612546125463, |
|
"grad_norm": 0.44939619302749634, |
|
"learning_rate": 7.308028956015653e-05, |
|
"loss": 2.9809, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.4132841328413284, |
|
"grad_norm": 0.46631982922554016, |
|
"learning_rate": 7.296595068831406e-05, |
|
"loss": 2.9969, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.4140221402214022, |
|
"grad_norm": 0.4884931743144989, |
|
"learning_rate": 7.285145940406386e-05, |
|
"loss": 2.9521, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.414760147601476, |
|
"grad_norm": 0.4892655611038208, |
|
"learning_rate": 7.273681646722173e-05, |
|
"loss": 2.9666, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.4154981549815498, |
|
"grad_norm": 0.4869326651096344, |
|
"learning_rate": 7.262202263860988e-05, |
|
"loss": 2.9618, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.41623616236162364, |
|
"grad_norm": 0.48076122999191284, |
|
"learning_rate": 7.2507078680052e-05, |
|
"loss": 2.9113, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.41697416974169743, |
|
"grad_norm": 0.46369293332099915, |
|
"learning_rate": 7.239198535436801e-05, |
|
"loss": 2.9309, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.4177121771217712, |
|
"grad_norm": 0.49062806367874146, |
|
"learning_rate": 7.227674342536913e-05, |
|
"loss": 3.0057, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.418450184501845, |
|
"grad_norm": 0.4727836847305298, |
|
"learning_rate": 7.216135365785279e-05, |
|
"loss": 3.0034, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.4191881918819188, |
|
"grad_norm": 0.5185651779174805, |
|
"learning_rate": 7.20458168175975e-05, |
|
"loss": 2.9296, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.4199261992619926, |
|
"grad_norm": 0.4758572280406952, |
|
"learning_rate": 7.193013367135792e-05, |
|
"loss": 2.9805, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.42066420664206644, |
|
"grad_norm": 0.507834255695343, |
|
"learning_rate": 7.181430498685954e-05, |
|
"loss": 2.9829, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.42140221402214023, |
|
"grad_norm": 0.48527729511260986, |
|
"learning_rate": 7.169833153279375e-05, |
|
"loss": 2.9951, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.422140221402214, |
|
"grad_norm": 0.5018925070762634, |
|
"learning_rate": 7.158221407881272e-05, |
|
"loss": 3.0251, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.4228782287822878, |
|
"grad_norm": 0.5182327032089233, |
|
"learning_rate": 7.146595339552422e-05, |
|
"loss": 2.9954, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.4236162361623616, |
|
"grad_norm": 0.5015000104904175, |
|
"learning_rate": 7.134955025448663e-05, |
|
"loss": 2.9285, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.42435424354243545, |
|
"grad_norm": 0.47007137537002563, |
|
"learning_rate": 7.123300542820366e-05, |
|
"loss": 2.923, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.42509225092250924, |
|
"grad_norm": 0.4987011253833771, |
|
"learning_rate": 7.111631969011938e-05, |
|
"loss": 2.9555, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.42583025830258303, |
|
"grad_norm": 0.4811478853225708, |
|
"learning_rate": 7.099949381461296e-05, |
|
"loss": 2.9797, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.4265682656826568, |
|
"grad_norm": 0.4753568470478058, |
|
"learning_rate": 7.08825285769936e-05, |
|
"loss": 2.9137, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.4273062730627306, |
|
"grad_norm": 0.46175628900527954, |
|
"learning_rate": 7.076542475349537e-05, |
|
"loss": 2.9291, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.4280442804428044, |
|
"grad_norm": 0.5033062696456909, |
|
"learning_rate": 7.06481831212721e-05, |
|
"loss": 2.9927, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.42878228782287825, |
|
"grad_norm": 0.4942483603954315, |
|
"learning_rate": 7.05308044583921e-05, |
|
"loss": 2.8999, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.42952029520295204, |
|
"grad_norm": 0.46212270855903625, |
|
"learning_rate": 7.041328954383316e-05, |
|
"loss": 2.9618, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.43025830258302583, |
|
"grad_norm": 0.4895878732204437, |
|
"learning_rate": 7.029563915747722e-05, |
|
"loss": 3.0415, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.4309963099630996, |
|
"grad_norm": 0.48732495307922363, |
|
"learning_rate": 7.017785408010533e-05, |
|
"loss": 2.9275, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.4317343173431734, |
|
"grad_norm": 0.49087876081466675, |
|
"learning_rate": 7.005993509339241e-05, |
|
"loss": 2.981, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.43247232472324726, |
|
"grad_norm": 0.5266060829162598, |
|
"learning_rate": 6.9941882979902e-05, |
|
"loss": 2.8859, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.43321033210332105, |
|
"grad_norm": 0.45862722396850586, |
|
"learning_rate": 6.982369852308124e-05, |
|
"loss": 2.9225, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.43394833948339484, |
|
"grad_norm": 0.5097654461860657, |
|
"learning_rate": 6.97053825072554e-05, |
|
"loss": 2.9179, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.43468634686346863, |
|
"grad_norm": 0.5156700611114502, |
|
"learning_rate": 6.958693571762301e-05, |
|
"loss": 3.0092, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.4354243542435424, |
|
"grad_norm": 0.4698309898376465, |
|
"learning_rate": 6.946835894025037e-05, |
|
"loss": 2.8776, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.4361623616236162, |
|
"grad_norm": 0.4787076711654663, |
|
"learning_rate": 6.934965296206645e-05, |
|
"loss": 2.9759, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.43690036900369006, |
|
"grad_norm": 0.4753543734550476, |
|
"learning_rate": 6.923081857085766e-05, |
|
"loss": 3.0012, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.43763837638376385, |
|
"grad_norm": 0.4781608283519745, |
|
"learning_rate": 6.911185655526263e-05, |
|
"loss": 2.9636, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.43837638376383764, |
|
"grad_norm": 0.46679866313934326, |
|
"learning_rate": 6.899276770476695e-05, |
|
"loss": 2.9666, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.43911439114391143, |
|
"grad_norm": 0.4817095100879669, |
|
"learning_rate": 6.887355280969796e-05, |
|
"loss": 2.9268, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.4398523985239852, |
|
"grad_norm": 0.46391561627388, |
|
"learning_rate": 6.875421266121946e-05, |
|
"loss": 2.9796, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.44059040590405907, |
|
"grad_norm": 0.4704035222530365, |
|
"learning_rate": 6.86347480513265e-05, |
|
"loss": 2.93, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.44132841328413286, |
|
"grad_norm": 0.5005739331245422, |
|
"learning_rate": 6.851515977284013e-05, |
|
"loss": 2.9329, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.44206642066420665, |
|
"grad_norm": 0.5069407224655151, |
|
"learning_rate": 6.839544861940214e-05, |
|
"loss": 3.0269, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.44280442804428044, |
|
"grad_norm": 0.4672479033470154, |
|
"learning_rate": 6.827561538546967e-05, |
|
"loss": 2.9522, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.44354243542435423, |
|
"grad_norm": 0.4877452850341797, |
|
"learning_rate": 6.815566086631016e-05, |
|
"loss": 2.9381, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.444280442804428, |
|
"grad_norm": 0.4852764308452606, |
|
"learning_rate": 6.80355858579959e-05, |
|
"loss": 2.9431, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.44501845018450187, |
|
"grad_norm": 0.4775632321834564, |
|
"learning_rate": 6.791539115739879e-05, |
|
"loss": 2.9923, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.44575645756457566, |
|
"grad_norm": 0.48804882168769836, |
|
"learning_rate": 6.779507756218509e-05, |
|
"loss": 3.0321, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.44649446494464945, |
|
"grad_norm": 0.4770827293395996, |
|
"learning_rate": 6.76746458708101e-05, |
|
"loss": 3.0004, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.44723247232472324, |
|
"grad_norm": 0.47312870621681213, |
|
"learning_rate": 6.75540968825128e-05, |
|
"loss": 2.9975, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.44797047970479703, |
|
"grad_norm": 0.48013314604759216, |
|
"learning_rate": 6.74334313973107e-05, |
|
"loss": 2.9666, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.4487084870848708, |
|
"grad_norm": 0.4521431624889374, |
|
"learning_rate": 6.731265021599436e-05, |
|
"loss": 2.8592, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.44944649446494467, |
|
"grad_norm": 0.4653100073337555, |
|
"learning_rate": 6.719175414012219e-05, |
|
"loss": 2.9367, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.45018450184501846, |
|
"grad_norm": 0.5198903679847717, |
|
"learning_rate": 6.707074397201508e-05, |
|
"loss": 3.014, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.45092250922509225, |
|
"grad_norm": 0.4655381441116333, |
|
"learning_rate": 6.694962051475107e-05, |
|
"loss": 2.9422, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.45166051660516604, |
|
"grad_norm": 0.4614551067352295, |
|
"learning_rate": 6.682838457216009e-05, |
|
"loss": 2.9474, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.45239852398523983, |
|
"grad_norm": 0.4937768876552582, |
|
"learning_rate": 6.67070369488185e-05, |
|
"loss": 2.8953, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.4531365313653137, |
|
"grad_norm": 0.4759802222251892, |
|
"learning_rate": 6.65855784500439e-05, |
|
"loss": 2.9553, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.45387453874538747, |
|
"grad_norm": 0.519924521446228, |
|
"learning_rate": 6.646400988188964e-05, |
|
"loss": 2.8839, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.45461254612546126, |
|
"grad_norm": 0.46175694465637207, |
|
"learning_rate": 6.63423320511396e-05, |
|
"loss": 2.9878, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.45535055350553505, |
|
"grad_norm": 0.48847445845603943, |
|
"learning_rate": 6.622054576530274e-05, |
|
"loss": 2.9601, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.45608856088560884, |
|
"grad_norm": 0.46752119064331055, |
|
"learning_rate": 6.609865183260778e-05, |
|
"loss": 2.9375, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.45682656826568263, |
|
"grad_norm": 0.48789575695991516, |
|
"learning_rate": 6.597665106199783e-05, |
|
"loss": 2.9675, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.4575645756457565, |
|
"grad_norm": 0.46002650260925293, |
|
"learning_rate": 6.585454426312506e-05, |
|
"loss": 2.9194, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.45830258302583027, |
|
"grad_norm": 0.4882054924964905, |
|
"learning_rate": 6.573233224634524e-05, |
|
"loss": 2.931, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.45904059040590406, |
|
"grad_norm": 0.4962427318096161, |
|
"learning_rate": 6.561001582271245e-05, |
|
"loss": 2.9639, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.45977859778597785, |
|
"grad_norm": 0.47860512137413025, |
|
"learning_rate": 6.548759580397363e-05, |
|
"loss": 2.9726, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.46051660516605164, |
|
"grad_norm": 0.4823954701423645, |
|
"learning_rate": 6.536507300256327e-05, |
|
"loss": 2.9363, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.4612546125461255, |
|
"grad_norm": 0.46530622243881226, |
|
"learning_rate": 6.524244823159794e-05, |
|
"loss": 2.9696, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.4619926199261993, |
|
"grad_norm": 0.4861395061016083, |
|
"learning_rate": 6.511972230487091e-05, |
|
"loss": 2.9816, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.46273062730627307, |
|
"grad_norm": 0.47099757194519043, |
|
"learning_rate": 6.499689603684682e-05, |
|
"loss": 2.8812, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.46346863468634686, |
|
"grad_norm": 0.47105422616004944, |
|
"learning_rate": 6.487397024265616e-05, |
|
"loss": 2.8715, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.46420664206642065, |
|
"grad_norm": 0.4647127091884613, |
|
"learning_rate": 6.475094573808993e-05, |
|
"loss": 2.972, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.46494464944649444, |
|
"grad_norm": 0.4713263213634491, |
|
"learning_rate": 6.462782333959429e-05, |
|
"loss": 2.9297, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.4656826568265683, |
|
"grad_norm": 0.4704754650592804, |
|
"learning_rate": 6.450460386426495e-05, |
|
"loss": 2.9489, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.4664206642066421, |
|
"grad_norm": 0.49764499068260193, |
|
"learning_rate": 6.438128812984199e-05, |
|
"loss": 2.8814, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.46715867158671587, |
|
"grad_norm": 0.46612176299095154, |
|
"learning_rate": 6.425787695470419e-05, |
|
"loss": 2.9663, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.46789667896678966, |
|
"grad_norm": 0.46676209568977356, |
|
"learning_rate": 6.41343711578638e-05, |
|
"loss": 2.9843, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.46863468634686345, |
|
"grad_norm": 0.45879995822906494, |
|
"learning_rate": 6.401077155896099e-05, |
|
"loss": 2.8991, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.4693726937269373, |
|
"grad_norm": 0.4595896303653717, |
|
"learning_rate": 6.388707897825846e-05, |
|
"loss": 2.9603, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.4701107011070111, |
|
"grad_norm": 0.47197359800338745, |
|
"learning_rate": 6.376329423663596e-05, |
|
"loss": 3.0058, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.4708487084870849, |
|
"grad_norm": 0.4487576186656952, |
|
"learning_rate": 6.363941815558484e-05, |
|
"loss": 2.9126, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.47158671586715867, |
|
"grad_norm": 0.45560458302497864, |
|
"learning_rate": 6.35154515572027e-05, |
|
"loss": 2.9979, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.47232472324723246, |
|
"grad_norm": 0.4601997435092926, |
|
"learning_rate": 6.339139526418778e-05, |
|
"loss": 2.8166, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.47306273062730625, |
|
"grad_norm": 0.48877766728401184, |
|
"learning_rate": 6.32672500998336e-05, |
|
"loss": 2.8798, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.4738007380073801, |
|
"grad_norm": 0.4835923910140991, |
|
"learning_rate": 6.314301688802347e-05, |
|
"loss": 2.9273, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.4745387453874539, |
|
"grad_norm": 0.465264230966568, |
|
"learning_rate": 6.301869645322498e-05, |
|
"loss": 2.9399, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.4752767527675277, |
|
"grad_norm": 0.49252355098724365, |
|
"learning_rate": 6.289428962048467e-05, |
|
"loss": 2.9608, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.47601476014760147, |
|
"grad_norm": 0.48788875341415405, |
|
"learning_rate": 6.276979721542239e-05, |
|
"loss": 2.9896, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.47675276752767526, |
|
"grad_norm": 0.4745902121067047, |
|
"learning_rate": 6.264522006422586e-05, |
|
"loss": 2.9076, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.4774907749077491, |
|
"grad_norm": 0.47580885887145996, |
|
"learning_rate": 6.252055899364525e-05, |
|
"loss": 2.899, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.4782287822878229, |
|
"grad_norm": 0.47672221064567566, |
|
"learning_rate": 6.239581483098766e-05, |
|
"loss": 2.9338, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.4789667896678967, |
|
"grad_norm": 0.46901679039001465, |
|
"learning_rate": 6.227098840411166e-05, |
|
"loss": 2.9081, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.4797047970479705, |
|
"grad_norm": 0.45821747183799744, |
|
"learning_rate": 6.214608054142167e-05, |
|
"loss": 2.9717, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.48044280442804427, |
|
"grad_norm": 0.457815945148468, |
|
"learning_rate": 6.202109207186263e-05, |
|
"loss": 2.9594, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.48118081180811806, |
|
"grad_norm": 0.45802658796310425, |
|
"learning_rate": 6.189602382491439e-05, |
|
"loss": 2.958, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.4819188191881919, |
|
"grad_norm": 0.47702470421791077, |
|
"learning_rate": 6.177087663058626e-05, |
|
"loss": 2.9481, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.4826568265682657, |
|
"grad_norm": 0.4765585660934448, |
|
"learning_rate": 6.164565131941147e-05, |
|
"loss": 2.9139, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.4833948339483395, |
|
"grad_norm": 0.49875739216804504, |
|
"learning_rate": 6.152034872244166e-05, |
|
"loss": 2.9726, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.4841328413284133, |
|
"grad_norm": 0.46083393692970276, |
|
"learning_rate": 6.13949696712414e-05, |
|
"loss": 2.9462, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.48487084870848707, |
|
"grad_norm": 0.4647446274757385, |
|
"learning_rate": 6.126951499788261e-05, |
|
"loss": 2.9349, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.48560885608856086, |
|
"grad_norm": 0.4930126667022705, |
|
"learning_rate": 6.114398553493908e-05, |
|
"loss": 2.9763, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.4863468634686347, |
|
"grad_norm": 0.4873722791671753, |
|
"learning_rate": 6.1018382115480985e-05, |
|
"loss": 2.9322, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.4870848708487085, |
|
"grad_norm": 0.4486652910709381, |
|
"learning_rate": 6.089270557306923e-05, |
|
"loss": 2.8796, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.4878228782287823, |
|
"grad_norm": 0.482166588306427, |
|
"learning_rate": 6.076695674175007e-05, |
|
"loss": 2.9542, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.4885608856088561, |
|
"grad_norm": 0.4913167953491211, |
|
"learning_rate": 6.0641136456049454e-05, |
|
"loss": 3.0476, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.48929889298892987, |
|
"grad_norm": 0.4978322982788086, |
|
"learning_rate": 6.051524555096754e-05, |
|
"loss": 2.8936, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.4900369003690037, |
|
"grad_norm": 0.4421325922012329, |
|
"learning_rate": 6.038928486197316e-05, |
|
"loss": 2.9131, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.4907749077490775, |
|
"grad_norm": 0.4662306308746338, |
|
"learning_rate": 6.02632552249983e-05, |
|
"loss": 2.8394, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.4915129151291513, |
|
"grad_norm": 0.5267830491065979, |
|
"learning_rate": 6.0137157476432424e-05, |
|
"loss": 2.8703, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.4922509225092251, |
|
"grad_norm": 0.509088397026062, |
|
"learning_rate": 6.001099245311711e-05, |
|
"loss": 2.9691, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.4929889298892989, |
|
"grad_norm": 0.46723711490631104, |
|
"learning_rate": 5.988476099234033e-05, |
|
"loss": 2.9496, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.49372693726937267, |
|
"grad_norm": 0.4566686153411865, |
|
"learning_rate": 5.975846393183101e-05, |
|
"loss": 2.8571, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.4944649446494465, |
|
"grad_norm": 0.4769027829170227, |
|
"learning_rate": 5.963210210975343e-05, |
|
"loss": 2.898, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.4952029520295203, |
|
"grad_norm": 0.4787648320198059, |
|
"learning_rate": 5.95056763647016e-05, |
|
"loss": 2.9649, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.4959409594095941, |
|
"grad_norm": 0.45179930329322815, |
|
"learning_rate": 5.9379187535693804e-05, |
|
"loss": 2.9201, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.4966789667896679, |
|
"grad_norm": 0.4381027817726135, |
|
"learning_rate": 5.925263646216697e-05, |
|
"loss": 2.9402, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.4974169741697417, |
|
"grad_norm": 0.49445804953575134, |
|
"learning_rate": 5.912602398397111e-05, |
|
"loss": 2.9305, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.4981549815498155, |
|
"grad_norm": 0.4826495349407196, |
|
"learning_rate": 5.8999350941363726e-05, |
|
"loss": 2.9346, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.4988929889298893, |
|
"grad_norm": 0.4974125921726227, |
|
"learning_rate": 5.887261817500427e-05, |
|
"loss": 2.9743, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.4996309963099631, |
|
"grad_norm": 0.47447288036346436, |
|
"learning_rate": 5.874582652594854e-05, |
|
"loss": 2.9399, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.5003690036900369, |
|
"grad_norm": 0.48605871200561523, |
|
"learning_rate": 5.861897683564312e-05, |
|
"loss": 2.9667, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.5011070110701107, |
|
"grad_norm": 0.4562762379646301, |
|
"learning_rate": 5.849206994591976e-05, |
|
"loss": 2.9355, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.5018450184501845, |
|
"grad_norm": 0.4724028706550598, |
|
"learning_rate": 5.8365106698989834e-05, |
|
"loss": 2.8938, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5025830258302583, |
|
"grad_norm": 0.4404136538505554, |
|
"learning_rate": 5.82380879374387e-05, |
|
"loss": 2.8332, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.5033210332103321, |
|
"grad_norm": 0.4685560464859009, |
|
"learning_rate": 5.8111014504220165e-05, |
|
"loss": 2.9792, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.5040590405904058, |
|
"grad_norm": 0.47112590074539185, |
|
"learning_rate": 5.7983887242650846e-05, |
|
"loss": 2.9933, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.5047970479704798, |
|
"grad_norm": 0.46272197365760803, |
|
"learning_rate": 5.78567069964046e-05, |
|
"loss": 2.9916, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.5055350553505535, |
|
"grad_norm": 0.47110989689826965, |
|
"learning_rate": 5.772947460950688e-05, |
|
"loss": 2.8869, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.5062730627306273, |
|
"grad_norm": 0.47916916012763977, |
|
"learning_rate": 5.760219092632924e-05, |
|
"loss": 2.9576, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.5070110701107011, |
|
"grad_norm": 0.47247427701950073, |
|
"learning_rate": 5.7474856791583576e-05, |
|
"loss": 2.9433, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.5077490774907749, |
|
"grad_norm": 0.4856591820716858, |
|
"learning_rate": 5.7347473050316636e-05, |
|
"loss": 2.983, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.5084870848708487, |
|
"grad_norm": 0.4498710036277771, |
|
"learning_rate": 5.722004054790442e-05, |
|
"loss": 2.95, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.5092250922509225, |
|
"grad_norm": 0.4407157003879547, |
|
"learning_rate": 5.7092560130046466e-05, |
|
"loss": 2.9004, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5099630996309963, |
|
"grad_norm": 0.4676019847393036, |
|
"learning_rate": 5.696503264276035e-05, |
|
"loss": 2.8584, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.5107011070110701, |
|
"grad_norm": 0.44521570205688477, |
|
"learning_rate": 5.683745893237597e-05, |
|
"loss": 2.9214, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.5114391143911439, |
|
"grad_norm": 0.4693831503391266, |
|
"learning_rate": 5.670983984553003e-05, |
|
"loss": 2.9721, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.5121771217712177, |
|
"grad_norm": 0.43683314323425293, |
|
"learning_rate": 5.6582176229160355e-05, |
|
"loss": 2.8837, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.5129151291512916, |
|
"grad_norm": 0.4462457299232483, |
|
"learning_rate": 5.645446893050029e-05, |
|
"loss": 2.8014, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.5136531365313654, |
|
"grad_norm": 0.46673473715782166, |
|
"learning_rate": 5.632671879707307e-05, |
|
"loss": 2.8542, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.5143911439114391, |
|
"grad_norm": 0.5018209218978882, |
|
"learning_rate": 5.619892667668618e-05, |
|
"loss": 2.9344, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.5151291512915129, |
|
"grad_norm": 0.4942212700843811, |
|
"learning_rate": 5.607109341742579e-05, |
|
"loss": 2.9002, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.5158671586715867, |
|
"grad_norm": 0.4789501428604126, |
|
"learning_rate": 5.5943219867651086e-05, |
|
"loss": 2.8955, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.5166051660516605, |
|
"grad_norm": 0.44573846459388733, |
|
"learning_rate": 5.58153068759886e-05, |
|
"loss": 2.9184, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5173431734317343, |
|
"grad_norm": 0.4906388819217682, |
|
"learning_rate": 5.568735529132665e-05, |
|
"loss": 2.9369, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.5180811808118081, |
|
"grad_norm": 0.44844797253608704, |
|
"learning_rate": 5.555936596280966e-05, |
|
"loss": 2.9435, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.5188191881918819, |
|
"grad_norm": 0.46517252922058105, |
|
"learning_rate": 5.5431339739832545e-05, |
|
"loss": 2.9933, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.5195571955719557, |
|
"grad_norm": 0.4549432396888733, |
|
"learning_rate": 5.530327747203506e-05, |
|
"loss": 2.8739, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.5202952029520295, |
|
"grad_norm": 0.47701096534729004, |
|
"learning_rate": 5.51751800092962e-05, |
|
"loss": 2.9088, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.5210332103321034, |
|
"grad_norm": 0.489654541015625, |
|
"learning_rate": 5.50470482017285e-05, |
|
"loss": 2.9574, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.5217712177121772, |
|
"grad_norm": 0.4661862850189209, |
|
"learning_rate": 5.491888289967241e-05, |
|
"loss": 2.9482, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.522509225092251, |
|
"grad_norm": 0.446463406085968, |
|
"learning_rate": 5.4790684953690706e-05, |
|
"loss": 2.9176, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.5232472324723247, |
|
"grad_norm": 0.4751204550266266, |
|
"learning_rate": 5.466245521456278e-05, |
|
"loss": 2.924, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.5239852398523985, |
|
"grad_norm": 0.5041395425796509, |
|
"learning_rate": 5.4534194533279e-05, |
|
"loss": 2.8624, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5247232472324723, |
|
"grad_norm": 0.4631516635417938, |
|
"learning_rate": 5.4405903761035124e-05, |
|
"loss": 2.9072, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.5254612546125461, |
|
"grad_norm": 0.45753976702690125, |
|
"learning_rate": 5.427758374922658e-05, |
|
"loss": 2.9332, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.5261992619926199, |
|
"grad_norm": 0.4684479236602783, |
|
"learning_rate": 5.414923534944283e-05, |
|
"loss": 2.9017, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.5269372693726937, |
|
"grad_norm": 0.46777448058128357, |
|
"learning_rate": 5.4020859413461756e-05, |
|
"loss": 2.9231, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.5276752767527675, |
|
"grad_norm": 0.47089943289756775, |
|
"learning_rate": 5.389245679324398e-05, |
|
"loss": 2.9215, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.5284132841328413, |
|
"grad_norm": 0.44447311758995056, |
|
"learning_rate": 5.376402834092721e-05, |
|
"loss": 2.9281, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.5291512915129152, |
|
"grad_norm": 0.47463953495025635, |
|
"learning_rate": 5.363557490882057e-05, |
|
"loss": 2.947, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.529889298892989, |
|
"grad_norm": 0.47504737973213196, |
|
"learning_rate": 5.350709734939897e-05, |
|
"loss": 3.0103, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.5306273062730628, |
|
"grad_norm": 0.472151517868042, |
|
"learning_rate": 5.337859651529746e-05, |
|
"loss": 2.966, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.5313653136531366, |
|
"grad_norm": 0.44552987813949585, |
|
"learning_rate": 5.325007325930554e-05, |
|
"loss": 2.8962, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5321033210332103, |
|
"grad_norm": 0.487582266330719, |
|
"learning_rate": 5.3121528434361524e-05, |
|
"loss": 2.9548, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.5328413284132841, |
|
"grad_norm": 0.47288230061531067, |
|
"learning_rate": 5.299296289354681e-05, |
|
"loss": 2.8969, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.5335793357933579, |
|
"grad_norm": 0.4963250756263733, |
|
"learning_rate": 5.2864377490080306e-05, |
|
"loss": 2.9785, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.5343173431734317, |
|
"grad_norm": 0.4519381821155548, |
|
"learning_rate": 5.2735773077312814e-05, |
|
"loss": 2.9112, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.5350553505535055, |
|
"grad_norm": 0.47766226530075073, |
|
"learning_rate": 5.2607150508721195e-05, |
|
"loss": 2.8749, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.5357933579335793, |
|
"grad_norm": 0.4712168872356415, |
|
"learning_rate": 5.24785106379028e-05, |
|
"loss": 2.9148, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.5365313653136531, |
|
"grad_norm": 0.44543230533599854, |
|
"learning_rate": 5.234985431856988e-05, |
|
"loss": 2.9281, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.537269372693727, |
|
"grad_norm": 0.46235865354537964, |
|
"learning_rate": 5.2221182404543754e-05, |
|
"loss": 2.9294, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.5380073800738008, |
|
"grad_norm": 0.4579477608203888, |
|
"learning_rate": 5.2092495749749346e-05, |
|
"loss": 2.9286, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.5387453874538746, |
|
"grad_norm": 0.4533149302005768, |
|
"learning_rate": 5.196379520820929e-05, |
|
"loss": 2.9063, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.5394833948339484, |
|
"grad_norm": 0.48128604888916016, |
|
"learning_rate": 5.183508163403845e-05, |
|
"loss": 2.8985, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.5402214022140222, |
|
"grad_norm": 0.46598076820373535, |
|
"learning_rate": 5.170635588143816e-05, |
|
"loss": 2.9074, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.5409594095940959, |
|
"grad_norm": 0.4706079363822937, |
|
"learning_rate": 5.157761880469058e-05, |
|
"loss": 2.9216, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.5416974169741697, |
|
"grad_norm": 0.45854324102401733, |
|
"learning_rate": 5.144887125815301e-05, |
|
"loss": 2.9771, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.5424354243542435, |
|
"grad_norm": 0.4575222134590149, |
|
"learning_rate": 5.132011409625224e-05, |
|
"loss": 2.878, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.5431734317343173, |
|
"grad_norm": 0.45603683590888977, |
|
"learning_rate": 5.1191348173478884e-05, |
|
"loss": 2.9328, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.5439114391143911, |
|
"grad_norm": 0.47662872076034546, |
|
"learning_rate": 5.1062574344381686e-05, |
|
"loss": 2.9483, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.5446494464944649, |
|
"grad_norm": 0.4564341604709625, |
|
"learning_rate": 5.093379346356185e-05, |
|
"loss": 2.8084, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.5453874538745388, |
|
"grad_norm": 0.4610985219478607, |
|
"learning_rate": 5.080500638566741e-05, |
|
"loss": 2.9255, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.5461254612546126, |
|
"grad_norm": 0.46059536933898926, |
|
"learning_rate": 5.0676213965387475e-05, |
|
"loss": 2.851, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.5468634686346864, |
|
"grad_norm": 0.482048362493515, |
|
"learning_rate": 5.0547417057446665e-05, |
|
"loss": 2.9626, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.5476014760147602, |
|
"grad_norm": 0.4469466209411621, |
|
"learning_rate": 5.0418616516599346e-05, |
|
"loss": 2.8261, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.548339483394834, |
|
"grad_norm": 0.4489482343196869, |
|
"learning_rate": 5.028981319762399e-05, |
|
"loss": 2.9388, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.5490774907749078, |
|
"grad_norm": 0.4895458221435547, |
|
"learning_rate": 5.016100795531754e-05, |
|
"loss": 2.9598, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.5498154981549815, |
|
"grad_norm": 0.45136043429374695, |
|
"learning_rate": 5.003220164448967e-05, |
|
"loss": 2.8466, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.5505535055350553, |
|
"grad_norm": 0.4319990873336792, |
|
"learning_rate": 4.990339511995718e-05, |
|
"loss": 2.8589, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.5512915129151291, |
|
"grad_norm": 0.4822845458984375, |
|
"learning_rate": 4.977458923653823e-05, |
|
"loss": 2.8766, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.5520295202952029, |
|
"grad_norm": 0.4683190882205963, |
|
"learning_rate": 4.9645784849046786e-05, |
|
"loss": 2.9471, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.5527675276752767, |
|
"grad_norm": 0.4755018353462219, |
|
"learning_rate": 4.9516982812286854e-05, |
|
"loss": 2.9336, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.5535055350553506, |
|
"grad_norm": 0.4847009778022766, |
|
"learning_rate": 4.938818398104685e-05, |
|
"loss": 2.8928, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5542435424354244, |
|
"grad_norm": 0.49205484986305237, |
|
"learning_rate": 4.92593892100939e-05, |
|
"loss": 2.9413, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.5549815498154982, |
|
"grad_norm": 0.4603287875652313, |
|
"learning_rate": 4.913059935416822e-05, |
|
"loss": 2.8814, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.555719557195572, |
|
"grad_norm": 0.4724648594856262, |
|
"learning_rate": 4.900181526797737e-05, |
|
"loss": 2.9493, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.5564575645756458, |
|
"grad_norm": 0.6270569562911987, |
|
"learning_rate": 4.887303780619066e-05, |
|
"loss": 2.9201, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.5571955719557196, |
|
"grad_norm": 0.4619079828262329, |
|
"learning_rate": 4.874426782343338e-05, |
|
"loss": 2.915, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.5579335793357934, |
|
"grad_norm": 0.45699045062065125, |
|
"learning_rate": 4.861550617428122e-05, |
|
"loss": 2.914, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.5586715867158671, |
|
"grad_norm": 0.46511203050613403, |
|
"learning_rate": 4.8486753713254586e-05, |
|
"loss": 2.8837, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.5594095940959409, |
|
"grad_norm": 0.4465058147907257, |
|
"learning_rate": 4.835801129481287e-05, |
|
"loss": 2.9087, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.5601476014760147, |
|
"grad_norm": 0.4666641652584076, |
|
"learning_rate": 4.8229279773348845e-05, |
|
"loss": 2.9486, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.5608856088560885, |
|
"grad_norm": 0.4582604765892029, |
|
"learning_rate": 4.810056000318293e-05, |
|
"loss": 2.9275, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.5616236162361624, |
|
"grad_norm": 0.4589548408985138, |
|
"learning_rate": 4.7971852838557565e-05, |
|
"loss": 2.8683, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.5623616236162362, |
|
"grad_norm": 0.4380606412887573, |
|
"learning_rate": 4.78431591336316e-05, |
|
"loss": 2.8368, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.56309963099631, |
|
"grad_norm": 0.44517070055007935, |
|
"learning_rate": 4.771447974247449e-05, |
|
"loss": 2.8804, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.5638376383763838, |
|
"grad_norm": 0.46472036838531494, |
|
"learning_rate": 4.7585815519060694e-05, |
|
"loss": 2.8983, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.5645756457564576, |
|
"grad_norm": 0.47114098072052, |
|
"learning_rate": 4.7457167317264064e-05, |
|
"loss": 2.9284, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.5653136531365314, |
|
"grad_norm": 0.4522678256034851, |
|
"learning_rate": 4.732853599085207e-05, |
|
"loss": 2.8971, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.5660516605166052, |
|
"grad_norm": 0.46045982837677, |
|
"learning_rate": 4.719992239348024e-05, |
|
"loss": 2.844, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.566789667896679, |
|
"grad_norm": 0.4543171525001526, |
|
"learning_rate": 4.7071327378686386e-05, |
|
"loss": 2.9121, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.5675276752767527, |
|
"grad_norm": 0.48567166924476624, |
|
"learning_rate": 4.6942751799885054e-05, |
|
"loss": 2.9274, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.5682656826568265, |
|
"grad_norm": 0.4700704514980316, |
|
"learning_rate": 4.681419651036177e-05, |
|
"loss": 2.9872, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.5690036900369003, |
|
"grad_norm": 0.44953039288520813, |
|
"learning_rate": 4.6685662363267415e-05, |
|
"loss": 2.873, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.5697416974169742, |
|
"grad_norm": 0.46205776929855347, |
|
"learning_rate": 4.655715021161258e-05, |
|
"loss": 2.8282, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.570479704797048, |
|
"grad_norm": 0.4394710063934326, |
|
"learning_rate": 4.6428660908261864e-05, |
|
"loss": 2.8753, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.5712177121771218, |
|
"grad_norm": 0.43995216488838196, |
|
"learning_rate": 4.6300195305928243e-05, |
|
"loss": 2.7643, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.5719557195571956, |
|
"grad_norm": 0.4612707495689392, |
|
"learning_rate": 4.617175425716741e-05, |
|
"loss": 2.8683, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.5726937269372694, |
|
"grad_norm": 0.4660702347755432, |
|
"learning_rate": 4.604333861437207e-05, |
|
"loss": 2.9493, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.5734317343173432, |
|
"grad_norm": 0.47154900431632996, |
|
"learning_rate": 4.591494922976637e-05, |
|
"loss": 2.9493, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.574169741697417, |
|
"grad_norm": 0.4602459967136383, |
|
"learning_rate": 4.578658695540018e-05, |
|
"loss": 2.9144, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.5749077490774908, |
|
"grad_norm": 0.4484480917453766, |
|
"learning_rate": 4.5658252643143435e-05, |
|
"loss": 2.9145, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.5756457564575646, |
|
"grad_norm": 0.469936341047287, |
|
"learning_rate": 4.552994714468055e-05, |
|
"loss": 2.8947, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.5763837638376383, |
|
"grad_norm": 0.48601603507995605, |
|
"learning_rate": 4.5401671311504616e-05, |
|
"loss": 2.9164, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.5771217712177121, |
|
"grad_norm": 0.46561533212661743, |
|
"learning_rate": 4.5273425994912e-05, |
|
"loss": 2.8656, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.5778597785977859, |
|
"grad_norm": 0.48168033361434937, |
|
"learning_rate": 4.5145212045996446e-05, |
|
"loss": 2.8667, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.5785977859778598, |
|
"grad_norm": 0.45122450590133667, |
|
"learning_rate": 4.5017030315643536e-05, |
|
"loss": 2.9668, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.5793357933579336, |
|
"grad_norm": 0.4591752290725708, |
|
"learning_rate": 4.4888881654525057e-05, |
|
"loss": 2.8924, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.5800738007380074, |
|
"grad_norm": 0.4341951906681061, |
|
"learning_rate": 4.4760766913093325e-05, |
|
"loss": 2.8232, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.5808118081180812, |
|
"grad_norm": 0.46191418170928955, |
|
"learning_rate": 4.463268694157556e-05, |
|
"loss": 2.9198, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.581549815498155, |
|
"grad_norm": 0.43734246492385864, |
|
"learning_rate": 4.450464258996822e-05, |
|
"loss": 2.8755, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.5822878228782288, |
|
"grad_norm": 0.4456181228160858, |
|
"learning_rate": 4.437663470803137e-05, |
|
"loss": 2.8545, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.5830258302583026, |
|
"grad_norm": 0.46855318546295166, |
|
"learning_rate": 4.4248664145283054e-05, |
|
"loss": 2.8658, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.5837638376383764, |
|
"grad_norm": 0.4666096568107605, |
|
"learning_rate": 4.4120731750993645e-05, |
|
"loss": 2.9317, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.5845018450184502, |
|
"grad_norm": 0.46038341522216797, |
|
"learning_rate": 4.3992838374180234e-05, |
|
"loss": 2.9288, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.5852398523985239, |
|
"grad_norm": 0.47123417258262634, |
|
"learning_rate": 4.386498486360094e-05, |
|
"loss": 2.9348, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.5859778597785977, |
|
"grad_norm": 0.43836262822151184, |
|
"learning_rate": 4.373717206774935e-05, |
|
"loss": 2.8594, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.5867158671586716, |
|
"grad_norm": 0.46412384510040283, |
|
"learning_rate": 4.360940083484881e-05, |
|
"loss": 2.9131, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.5874538745387454, |
|
"grad_norm": 0.43723878264427185, |
|
"learning_rate": 4.3481672012846865e-05, |
|
"loss": 2.9116, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.5881918819188192, |
|
"grad_norm": 0.46796315908432007, |
|
"learning_rate": 4.335398644940957e-05, |
|
"loss": 2.9236, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.588929889298893, |
|
"grad_norm": 0.4761864244937897, |
|
"learning_rate": 4.322634499191594e-05, |
|
"loss": 2.8988, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.5896678966789668, |
|
"grad_norm": 0.4379028081893921, |
|
"learning_rate": 4.309874848745225e-05, |
|
"loss": 2.851, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.5904059040590406, |
|
"grad_norm": 0.4515070617198944, |
|
"learning_rate": 4.297119778280645e-05, |
|
"loss": 2.8823, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5911439114391144, |
|
"grad_norm": 0.456480473279953, |
|
"learning_rate": 4.2843693724462555e-05, |
|
"loss": 2.9163, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.5918819188191882, |
|
"grad_norm": 0.4556421935558319, |
|
"learning_rate": 4.271623715859501e-05, |
|
"loss": 2.8997, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.592619926199262, |
|
"grad_norm": 0.4618515372276306, |
|
"learning_rate": 4.2588828931063086e-05, |
|
"loss": 2.9223, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.5933579335793358, |
|
"grad_norm": 0.4617830812931061, |
|
"learning_rate": 4.246146988740525e-05, |
|
"loss": 2.8476, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.5940959409594095, |
|
"grad_norm": 0.43721622228622437, |
|
"learning_rate": 4.233416087283354e-05, |
|
"loss": 2.9253, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.5948339483394834, |
|
"grad_norm": 0.43407517671585083, |
|
"learning_rate": 4.2206902732228015e-05, |
|
"loss": 2.9307, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.5955719557195572, |
|
"grad_norm": 0.4590218663215637, |
|
"learning_rate": 4.207969631013109e-05, |
|
"loss": 2.9194, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.596309963099631, |
|
"grad_norm": 0.45232662558555603, |
|
"learning_rate": 4.195254245074196e-05, |
|
"loss": 2.814, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.5970479704797048, |
|
"grad_norm": 0.47659075260162354, |
|
"learning_rate": 4.1825441997911016e-05, |
|
"loss": 2.8991, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.5977859778597786, |
|
"grad_norm": 0.4390777349472046, |
|
"learning_rate": 4.169839579513415e-05, |
|
"loss": 2.8377, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.5985239852398524, |
|
"grad_norm": 0.44624418020248413, |
|
"learning_rate": 4.1571404685547265e-05, |
|
"loss": 2.9126, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.5992619926199262, |
|
"grad_norm": 0.4411090314388275, |
|
"learning_rate": 4.14444695119207e-05, |
|
"loss": 2.8661, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.45906946063041687, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 2.8862, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.6007380073800738, |
|
"grad_norm": 0.450738787651062, |
|
"learning_rate": 4.1190770341767884e-05, |
|
"loss": 2.8788, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.6014760147601476, |
|
"grad_norm": 0.4635327458381653, |
|
"learning_rate": 4.1064008028903766e-05, |
|
"loss": 2.8856, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.6022140221402214, |
|
"grad_norm": 0.46390798687934875, |
|
"learning_rate": 4.093730501931301e-05, |
|
"loss": 2.8435, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.6029520295202953, |
|
"grad_norm": 0.46583694219589233, |
|
"learning_rate": 4.0810662153853955e-05, |
|
"loss": 2.9068, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.603690036900369, |
|
"grad_norm": 0.441485732793808, |
|
"learning_rate": 4.068408027298576e-05, |
|
"loss": 2.9141, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.6044280442804428, |
|
"grad_norm": 0.43635720014572144, |
|
"learning_rate": 4.0557560216762884e-05, |
|
"loss": 2.8165, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.6051660516605166, |
|
"grad_norm": 0.45056867599487305, |
|
"learning_rate": 4.0431102824829495e-05, |
|
"loss": 2.8923, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6059040590405904, |
|
"grad_norm": 0.47618359327316284, |
|
"learning_rate": 4.030470893641387e-05, |
|
"loss": 2.8337, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.6066420664206642, |
|
"grad_norm": 0.46678489446640015, |
|
"learning_rate": 4.0178379390322896e-05, |
|
"loss": 2.9041, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.607380073800738, |
|
"grad_norm": 0.45858731865882874, |
|
"learning_rate": 4.0052115024936396e-05, |
|
"loss": 2.8919, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.6081180811808118, |
|
"grad_norm": 0.46500325202941895, |
|
"learning_rate": 3.9925916678201656e-05, |
|
"loss": 2.7873, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.6088560885608856, |
|
"grad_norm": 0.4576093256473541, |
|
"learning_rate": 3.9799785187627844e-05, |
|
"loss": 2.9581, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.6095940959409594, |
|
"grad_norm": 0.4603584408760071, |
|
"learning_rate": 3.96737213902804e-05, |
|
"loss": 2.932, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.6103321033210332, |
|
"grad_norm": 0.4474504888057709, |
|
"learning_rate": 3.954772612277556e-05, |
|
"loss": 2.8907, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.6110701107011071, |
|
"grad_norm": 0.4676888585090637, |
|
"learning_rate": 3.942180022127475e-05, |
|
"loss": 2.9279, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.6118081180811809, |
|
"grad_norm": 0.4762161374092102, |
|
"learning_rate": 3.929594452147903e-05, |
|
"loss": 2.8668, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.6125461254612546, |
|
"grad_norm": 0.45031213760375977, |
|
"learning_rate": 3.917015985862364e-05, |
|
"loss": 3.0203, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6132841328413284, |
|
"grad_norm": 0.4627397656440735, |
|
"learning_rate": 3.904444706747227e-05, |
|
"loss": 2.8669, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.6140221402214022, |
|
"grad_norm": 0.4964381456375122, |
|
"learning_rate": 3.891880698231176e-05, |
|
"loss": 2.8888, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.614760147601476, |
|
"grad_norm": 0.4690164029598236, |
|
"learning_rate": 3.879324043694639e-05, |
|
"loss": 2.8772, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.6154981549815498, |
|
"grad_norm": 0.46316999197006226, |
|
"learning_rate": 3.8667748264692355e-05, |
|
"loss": 2.9203, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.6162361623616236, |
|
"grad_norm": 0.46457648277282715, |
|
"learning_rate": 3.854233129837233e-05, |
|
"loss": 2.8959, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.6169741697416974, |
|
"grad_norm": 0.46210619807243347, |
|
"learning_rate": 3.841699037030989e-05, |
|
"loss": 2.9754, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.6177121771217712, |
|
"grad_norm": 0.4708150029182434, |
|
"learning_rate": 3.829172631232395e-05, |
|
"loss": 2.8779, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.618450184501845, |
|
"grad_norm": 0.4539421498775482, |
|
"learning_rate": 3.8166539955723315e-05, |
|
"loss": 2.7857, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.6191881918819189, |
|
"grad_norm": 0.4383450150489807, |
|
"learning_rate": 3.80414321313011e-05, |
|
"loss": 2.9466, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.6199261992619927, |
|
"grad_norm": 0.47667232155799866, |
|
"learning_rate": 3.791640366932926e-05, |
|
"loss": 2.8896, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.6206642066420665, |
|
"grad_norm": 0.47078999876976013, |
|
"learning_rate": 3.7791455399553054e-05, |
|
"loss": 2.8787, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.6214022140221402, |
|
"grad_norm": 0.4621264934539795, |
|
"learning_rate": 3.7666588151185586e-05, |
|
"loss": 2.9516, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.622140221402214, |
|
"grad_norm": 0.4561121165752411, |
|
"learning_rate": 3.754180275290222e-05, |
|
"loss": 2.8712, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.6228782287822878, |
|
"grad_norm": 0.4745158851146698, |
|
"learning_rate": 3.741710003283515e-05, |
|
"loss": 2.9942, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.6236162361623616, |
|
"grad_norm": 0.4506776034832001, |
|
"learning_rate": 3.729248081856788e-05, |
|
"loss": 2.8662, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.6243542435424354, |
|
"grad_norm": 0.4925256073474884, |
|
"learning_rate": 3.716794593712973e-05, |
|
"loss": 2.9148, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.6250922509225092, |
|
"grad_norm": 0.4477274715900421, |
|
"learning_rate": 3.704349621499032e-05, |
|
"loss": 2.8946, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.625830258302583, |
|
"grad_norm": 0.45974335074424744, |
|
"learning_rate": 3.691913247805415e-05, |
|
"loss": 2.8444, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.6265682656826568, |
|
"grad_norm": 0.4468931555747986, |
|
"learning_rate": 3.6794855551655095e-05, |
|
"loss": 2.8183, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.6273062730627307, |
|
"grad_norm": 0.45352327823638916, |
|
"learning_rate": 3.6670666260550866e-05, |
|
"loss": 2.8385, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6280442804428045, |
|
"grad_norm": 0.48543328046798706, |
|
"learning_rate": 3.654656542891762e-05, |
|
"loss": 2.8982, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.6287822878228783, |
|
"grad_norm": 0.47315549850463867, |
|
"learning_rate": 3.642255388034448e-05, |
|
"loss": 2.8477, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.629520295202952, |
|
"grad_norm": 0.4466278851032257, |
|
"learning_rate": 3.629863243782799e-05, |
|
"loss": 2.9499, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.6302583025830258, |
|
"grad_norm": 0.4634998142719269, |
|
"learning_rate": 3.617480192376676e-05, |
|
"loss": 2.9209, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.6309963099630996, |
|
"grad_norm": 0.4444449841976166, |
|
"learning_rate": 3.6051063159955914e-05, |
|
"loss": 2.8547, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.6317343173431734, |
|
"grad_norm": 0.4805346727371216, |
|
"learning_rate": 3.592741696758171e-05, |
|
"loss": 2.9504, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.6324723247232472, |
|
"grad_norm": 0.4576335549354553, |
|
"learning_rate": 3.580386416721605e-05, |
|
"loss": 2.8166, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.633210332103321, |
|
"grad_norm": 0.48051634430885315, |
|
"learning_rate": 3.568040557881106e-05, |
|
"loss": 2.8457, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.6339483394833948, |
|
"grad_norm": 0.45053961873054504, |
|
"learning_rate": 3.55570420216936e-05, |
|
"loss": 2.8554, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.6346863468634686, |
|
"grad_norm": 0.4763762652873993, |
|
"learning_rate": 3.543377431455991e-05, |
|
"loss": 2.9245, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.6354243542435425, |
|
"grad_norm": 0.466516375541687, |
|
"learning_rate": 3.531060327547003e-05, |
|
"loss": 2.8784, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.6361623616236163, |
|
"grad_norm": 0.4508006274700165, |
|
"learning_rate": 3.51875297218426e-05, |
|
"loss": 2.8572, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.6369003690036901, |
|
"grad_norm": 0.43419796228408813, |
|
"learning_rate": 3.506455447044923e-05, |
|
"loss": 2.9553, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.6376383763837639, |
|
"grad_norm": 0.4657207131385803, |
|
"learning_rate": 3.494167833740912e-05, |
|
"loss": 2.9388, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.6383763837638377, |
|
"grad_norm": 0.47769656777381897, |
|
"learning_rate": 3.481890213818374e-05, |
|
"loss": 2.889, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.6391143911439114, |
|
"grad_norm": 0.452332466840744, |
|
"learning_rate": 3.469622668757132e-05, |
|
"loss": 2.8618, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.6398523985239852, |
|
"grad_norm": 0.44228044152259827, |
|
"learning_rate": 3.457365279970147e-05, |
|
"loss": 2.858, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.640590405904059, |
|
"grad_norm": 0.45381829142570496, |
|
"learning_rate": 3.4451181288029835e-05, |
|
"loss": 2.9324, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.6413284132841328, |
|
"grad_norm": 0.45243462920188904, |
|
"learning_rate": 3.4328812965332566e-05, |
|
"loss": 2.8569, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.6420664206642066, |
|
"grad_norm": 0.44624003767967224, |
|
"learning_rate": 3.420654864370107e-05, |
|
"loss": 2.8305, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.6428044280442804, |
|
"grad_norm": 0.45331937074661255, |
|
"learning_rate": 3.408438913453652e-05, |
|
"loss": 2.9233, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.6435424354243543, |
|
"grad_norm": 0.46031826734542847, |
|
"learning_rate": 3.396233524854453e-05, |
|
"loss": 2.8136, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.6442804428044281, |
|
"grad_norm": 0.4405251443386078, |
|
"learning_rate": 3.384038779572975e-05, |
|
"loss": 2.8196, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.6450184501845019, |
|
"grad_norm": 0.433918297290802, |
|
"learning_rate": 3.371854758539047e-05, |
|
"loss": 2.828, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.6457564575645757, |
|
"grad_norm": 0.437752366065979, |
|
"learning_rate": 3.3596815426113285e-05, |
|
"loss": 2.9084, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.6464944649446495, |
|
"grad_norm": 0.4461667537689209, |
|
"learning_rate": 3.3475192125767715e-05, |
|
"loss": 2.9163, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.6472324723247233, |
|
"grad_norm": 0.44983482360839844, |
|
"learning_rate": 3.335367849150084e-05, |
|
"loss": 2.8624, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.647970479704797, |
|
"grad_norm": 0.444402813911438, |
|
"learning_rate": 3.323227532973193e-05, |
|
"loss": 2.8645, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.6487084870848708, |
|
"grad_norm": 0.47475096583366394, |
|
"learning_rate": 3.311098344614715e-05, |
|
"loss": 2.8599, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.6494464944649446, |
|
"grad_norm": 0.42691770195961, |
|
"learning_rate": 3.298980364569413e-05, |
|
"loss": 2.9367, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.6501845018450184, |
|
"grad_norm": 0.43761834502220154, |
|
"learning_rate": 3.2868736732576696e-05, |
|
"loss": 2.8071, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.6509225092250922, |
|
"grad_norm": 0.4337967336177826, |
|
"learning_rate": 3.274778351024949e-05, |
|
"loss": 2.7961, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.6516605166051661, |
|
"grad_norm": 0.4518975615501404, |
|
"learning_rate": 3.262694478141265e-05, |
|
"loss": 2.8445, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.6523985239852399, |
|
"grad_norm": 0.44520917534828186, |
|
"learning_rate": 3.250622134800651e-05, |
|
"loss": 2.8298, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.6531365313653137, |
|
"grad_norm": 0.47246819734573364, |
|
"learning_rate": 3.238561401120619e-05, |
|
"loss": 2.8721, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.6538745387453875, |
|
"grad_norm": 0.46341249346733093, |
|
"learning_rate": 3.226512357141639e-05, |
|
"loss": 2.8465, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.6546125461254613, |
|
"grad_norm": 0.4418579339981079, |
|
"learning_rate": 3.214475082826602e-05, |
|
"loss": 2.7495, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.6553505535055351, |
|
"grad_norm": 0.4572698771953583, |
|
"learning_rate": 3.2024496580602895e-05, |
|
"loss": 2.8405, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.6560885608856089, |
|
"grad_norm": 0.4518590569496155, |
|
"learning_rate": 3.1904361626488464e-05, |
|
"loss": 2.8698, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.6568265682656826, |
|
"grad_norm": 0.49694785475730896, |
|
"learning_rate": 3.178434676319243e-05, |
|
"loss": 2.9178, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.6575645756457564, |
|
"grad_norm": 0.44036176800727844, |
|
"learning_rate": 3.166445278718758e-05, |
|
"loss": 2.9042, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.6583025830258302, |
|
"grad_norm": 0.4740366041660309, |
|
"learning_rate": 3.154468049414444e-05, |
|
"loss": 2.791, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.659040590405904, |
|
"grad_norm": 0.44894149899482727, |
|
"learning_rate": 3.1425030678925944e-05, |
|
"loss": 2.8882, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.6597785977859778, |
|
"grad_norm": 0.45504188537597656, |
|
"learning_rate": 3.1305504135582244e-05, |
|
"loss": 2.82, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.6605166051660517, |
|
"grad_norm": 0.45306116342544556, |
|
"learning_rate": 3.118610165734539e-05, |
|
"loss": 2.8076, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.6612546125461255, |
|
"grad_norm": 0.4355803430080414, |
|
"learning_rate": 3.106682403662409e-05, |
|
"loss": 2.8458, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.6619926199261993, |
|
"grad_norm": 0.45864707231521606, |
|
"learning_rate": 3.094767206499844e-05, |
|
"loss": 2.7888, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.6627306273062731, |
|
"grad_norm": 0.4467925727367401, |
|
"learning_rate": 3.082864653321466e-05, |
|
"loss": 2.8862, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.6634686346863469, |
|
"grad_norm": 0.4361802935600281, |
|
"learning_rate": 3.0709748231179855e-05, |
|
"loss": 2.8405, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.6642066420664207, |
|
"grad_norm": 0.4502997398376465, |
|
"learning_rate": 3.059097794795681e-05, |
|
"loss": 2.8651, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6649446494464945, |
|
"grad_norm": 0.446232408285141, |
|
"learning_rate": 3.0472336471758678e-05, |
|
"loss": 2.9009, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.6656826568265682, |
|
"grad_norm": 0.4600978493690491, |
|
"learning_rate": 3.0353824589943834e-05, |
|
"loss": 2.8842, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.666420664206642, |
|
"grad_norm": 0.45147082209587097, |
|
"learning_rate": 3.0235443089010562e-05, |
|
"loss": 2.842, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.6671586715867158, |
|
"grad_norm": 0.470324844121933, |
|
"learning_rate": 3.0117192754591893e-05, |
|
"loss": 2.9098, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.6678966789667896, |
|
"grad_norm": 0.4519864320755005, |
|
"learning_rate": 2.999907437145042e-05, |
|
"loss": 2.917, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.6686346863468635, |
|
"grad_norm": 0.44655749201774597, |
|
"learning_rate": 2.9881088723472966e-05, |
|
"loss": 2.9205, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.6693726937269373, |
|
"grad_norm": 0.45969992876052856, |
|
"learning_rate": 2.9763236593665533e-05, |
|
"loss": 2.8726, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.6701107011070111, |
|
"grad_norm": 0.45693284273147583, |
|
"learning_rate": 2.9645518764148007e-05, |
|
"loss": 2.8753, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.6708487084870849, |
|
"grad_norm": 0.442354291677475, |
|
"learning_rate": 2.9527936016149006e-05, |
|
"loss": 2.8377, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.6715867158671587, |
|
"grad_norm": 0.4796278476715088, |
|
"learning_rate": 2.9410489130000684e-05, |
|
"loss": 2.8303, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.6723247232472325, |
|
"grad_norm": 0.4597807824611664, |
|
"learning_rate": 2.9293178885133525e-05, |
|
"loss": 2.8325, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.6730627306273063, |
|
"grad_norm": 0.47112002968788147, |
|
"learning_rate": 2.917600606007127e-05, |
|
"loss": 2.8479, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.67380073800738, |
|
"grad_norm": 0.4425598978996277, |
|
"learning_rate": 2.905897143242562e-05, |
|
"loss": 2.8416, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.6745387453874538, |
|
"grad_norm": 0.4444707930088043, |
|
"learning_rate": 2.8942075778891153e-05, |
|
"loss": 2.9409, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.6752767527675276, |
|
"grad_norm": 0.4575837254524231, |
|
"learning_rate": 2.882531987524017e-05, |
|
"loss": 2.8615, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.6760147601476014, |
|
"grad_norm": 0.4663306176662445, |
|
"learning_rate": 2.8708704496317474e-05, |
|
"loss": 2.8184, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.6767527675276753, |
|
"grad_norm": 0.441550076007843, |
|
"learning_rate": 2.8592230416035335e-05, |
|
"loss": 2.8981, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.6774907749077491, |
|
"grad_norm": 0.47013741731643677, |
|
"learning_rate": 2.8475898407368296e-05, |
|
"loss": 2.9034, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.6782287822878229, |
|
"grad_norm": 0.47934868931770325, |
|
"learning_rate": 2.8359709242348032e-05, |
|
"loss": 2.9483, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.6789667896678967, |
|
"grad_norm": 0.44904670119285583, |
|
"learning_rate": 2.824366369205825e-05, |
|
"loss": 2.9038, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.6797047970479705, |
|
"grad_norm": 0.4706343710422516, |
|
"learning_rate": 2.8127762526629553e-05, |
|
"loss": 2.8976, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.6804428044280443, |
|
"grad_norm": 0.4544294774532318, |
|
"learning_rate": 2.801200651523438e-05, |
|
"loss": 2.8875, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.6811808118081181, |
|
"grad_norm": 0.4476546347141266, |
|
"learning_rate": 2.7896396426081844e-05, |
|
"loss": 2.8378, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.6819188191881919, |
|
"grad_norm": 0.4503355920314789, |
|
"learning_rate": 2.7780933026412602e-05, |
|
"loss": 2.8917, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.6826568265682657, |
|
"grad_norm": 0.4393197298049927, |
|
"learning_rate": 2.766561708249387e-05, |
|
"loss": 2.7785, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.6833948339483394, |
|
"grad_norm": 0.45384228229522705, |
|
"learning_rate": 2.7550449359614272e-05, |
|
"loss": 2.8712, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.6841328413284132, |
|
"grad_norm": 0.462931752204895, |
|
"learning_rate": 2.743543062207876e-05, |
|
"loss": 2.9299, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.6848708487084871, |
|
"grad_norm": 0.4446216821670532, |
|
"learning_rate": 2.7320561633203566e-05, |
|
"loss": 2.93, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.6856088560885609, |
|
"grad_norm": 0.4498085677623749, |
|
"learning_rate": 2.7205843155311094e-05, |
|
"loss": 2.8614, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.6863468634686347, |
|
"grad_norm": 0.44905975461006165, |
|
"learning_rate": 2.7091275949724926e-05, |
|
"loss": 2.8681, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.6870848708487085, |
|
"grad_norm": 0.4424300491809845, |
|
"learning_rate": 2.6976860776764713e-05, |
|
"loss": 2.8048, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.6878228782287823, |
|
"grad_norm": 0.46064937114715576, |
|
"learning_rate": 2.6862598395741136e-05, |
|
"loss": 2.8376, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.6885608856088561, |
|
"grad_norm": 0.45401063561439514, |
|
"learning_rate": 2.6748489564950908e-05, |
|
"loss": 2.8168, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.6892988929889299, |
|
"grad_norm": 0.4572742283344269, |
|
"learning_rate": 2.6634535041671693e-05, |
|
"loss": 2.8182, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.6900369003690037, |
|
"grad_norm": 0.4515658915042877, |
|
"learning_rate": 2.652073558215711e-05, |
|
"loss": 2.8569, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.6907749077490775, |
|
"grad_norm": 0.44633907079696655, |
|
"learning_rate": 2.64070919416317e-05, |
|
"loss": 2.8684, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.6915129151291513, |
|
"grad_norm": 0.4616515636444092, |
|
"learning_rate": 2.6293604874285927e-05, |
|
"loss": 2.8791, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.692250922509225, |
|
"grad_norm": 0.4603336751461029, |
|
"learning_rate": 2.618027513327116e-05, |
|
"loss": 2.8685, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.6929889298892989, |
|
"grad_norm": 0.4635460376739502, |
|
"learning_rate": 2.6067103470694672e-05, |
|
"loss": 2.8819, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.6937269372693727, |
|
"grad_norm": 0.446821004152298, |
|
"learning_rate": 2.5954090637614658e-05, |
|
"loss": 2.8775, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.6944649446494465, |
|
"grad_norm": 0.45208224654197693, |
|
"learning_rate": 2.5841237384035265e-05, |
|
"loss": 2.9185, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.6952029520295203, |
|
"grad_norm": 0.43966442346572876, |
|
"learning_rate": 2.5728544458901593e-05, |
|
"loss": 2.844, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.6959409594095941, |
|
"grad_norm": 0.4660171866416931, |
|
"learning_rate": 2.5616012610094704e-05, |
|
"loss": 2.8533, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.6966789667896679, |
|
"grad_norm": 0.4844834804534912, |
|
"learning_rate": 2.5503642584426712e-05, |
|
"loss": 2.9139, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.6974169741697417, |
|
"grad_norm": 0.4675824046134949, |
|
"learning_rate": 2.5391435127635805e-05, |
|
"loss": 2.857, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.6981549815498155, |
|
"grad_norm": 0.4488329291343689, |
|
"learning_rate": 2.5279390984381264e-05, |
|
"loss": 2.8484, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.6988929889298893, |
|
"grad_norm": 0.4558933675289154, |
|
"learning_rate": 2.5167510898238566e-05, |
|
"loss": 2.8784, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.6996309963099631, |
|
"grad_norm": 0.45454517006874084, |
|
"learning_rate": 2.5055795611694433e-05, |
|
"loss": 2.8075, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.7003690036900369, |
|
"grad_norm": 0.4401450455188751, |
|
"learning_rate": 2.4944245866141886e-05, |
|
"loss": 2.8661, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.7011070110701108, |
|
"grad_norm": 0.42718032002449036, |
|
"learning_rate": 2.4832862401875378e-05, |
|
"loss": 2.8306, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7018450184501845, |
|
"grad_norm": 0.4444067180156708, |
|
"learning_rate": 2.472164595808576e-05, |
|
"loss": 2.887, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.7025830258302583, |
|
"grad_norm": 0.4388265311717987, |
|
"learning_rate": 2.461059727285558e-05, |
|
"loss": 2.9248, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.7033210332103321, |
|
"grad_norm": 0.4537127614021301, |
|
"learning_rate": 2.449971708315397e-05, |
|
"loss": 2.866, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.7040590405904059, |
|
"grad_norm": 0.4571674168109894, |
|
"learning_rate": 2.4389006124831893e-05, |
|
"loss": 2.8524, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.7047970479704797, |
|
"grad_norm": 0.475065678358078, |
|
"learning_rate": 2.4278465132617207e-05, |
|
"loss": 2.9086, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.7055350553505535, |
|
"grad_norm": 0.4491478204727173, |
|
"learning_rate": 2.4168094840109785e-05, |
|
"loss": 2.8496, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.7062730627306273, |
|
"grad_norm": 0.4396122694015503, |
|
"learning_rate": 2.4057895979776683e-05, |
|
"loss": 2.8542, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.7070110701107011, |
|
"grad_norm": 0.45730844140052795, |
|
"learning_rate": 2.394786928294726e-05, |
|
"loss": 2.8448, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.7077490774907749, |
|
"grad_norm": 11.993217468261719, |
|
"learning_rate": 2.3838015479808263e-05, |
|
"loss": 2.8686, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.7084870848708487, |
|
"grad_norm": 0.4676622450351715, |
|
"learning_rate": 2.3728335299399106e-05, |
|
"loss": 2.8195, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7092250922509226, |
|
"grad_norm": 0.4665907621383667, |
|
"learning_rate": 2.3618829469606912e-05, |
|
"loss": 2.8851, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.7099630996309964, |
|
"grad_norm": 0.4478704631328583, |
|
"learning_rate": 2.3509498717161804e-05, |
|
"loss": 2.8631, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.7107011070110701, |
|
"grad_norm": 0.4518534541130066, |
|
"learning_rate": 2.3400343767631944e-05, |
|
"loss": 2.8542, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.7114391143911439, |
|
"grad_norm": 0.45083850622177124, |
|
"learning_rate": 2.329136534541882e-05, |
|
"loss": 2.8447, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.7121771217712177, |
|
"grad_norm": 0.44704335927963257, |
|
"learning_rate": 2.3182564173752396e-05, |
|
"loss": 2.8001, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.7129151291512915, |
|
"grad_norm": 0.459086149930954, |
|
"learning_rate": 2.3073940974686337e-05, |
|
"loss": 2.8562, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.7136531365313653, |
|
"grad_norm": 0.4504683017730713, |
|
"learning_rate": 2.296549646909315e-05, |
|
"loss": 2.8153, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.7143911439114391, |
|
"grad_norm": 0.4484894275665283, |
|
"learning_rate": 2.2857231376659516e-05, |
|
"loss": 2.8652, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.7151291512915129, |
|
"grad_norm": 0.44552645087242126, |
|
"learning_rate": 2.274914641588141e-05, |
|
"loss": 2.8544, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.7158671586715867, |
|
"grad_norm": 0.44962164759635925, |
|
"learning_rate": 2.2641242304059394e-05, |
|
"loss": 2.809, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.7166051660516605, |
|
"grad_norm": 0.4649772047996521, |
|
"learning_rate": 2.2533519757293803e-05, |
|
"loss": 2.9047, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.7173431734317344, |
|
"grad_norm": 0.44465893507003784, |
|
"learning_rate": 2.242597949048008e-05, |
|
"loss": 2.9289, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.7180811808118082, |
|
"grad_norm": 0.4587944746017456, |
|
"learning_rate": 2.2318622217303935e-05, |
|
"loss": 2.9381, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.718819188191882, |
|
"grad_norm": 0.45747023820877075, |
|
"learning_rate": 2.221144865023666e-05, |
|
"loss": 2.8596, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.7195571955719557, |
|
"grad_norm": 0.4500565528869629, |
|
"learning_rate": 2.2104459500530362e-05, |
|
"loss": 2.8122, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.7202952029520295, |
|
"grad_norm": 0.44870901107788086, |
|
"learning_rate": 2.1997655478213313e-05, |
|
"loss": 2.8318, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.7210332103321033, |
|
"grad_norm": 0.46823742985725403, |
|
"learning_rate": 2.1891037292085175e-05, |
|
"loss": 2.7682, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.7217712177121771, |
|
"grad_norm": 0.4822959899902344, |
|
"learning_rate": 2.1784605649712324e-05, |
|
"loss": 2.8845, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.7225092250922509, |
|
"grad_norm": 0.4569961726665497, |
|
"learning_rate": 2.167836125742315e-05, |
|
"loss": 2.8073, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.7232472324723247, |
|
"grad_norm": 0.5003052949905396, |
|
"learning_rate": 2.1572304820303363e-05, |
|
"loss": 2.966, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.7239852398523985, |
|
"grad_norm": 0.4504786431789398, |
|
"learning_rate": 2.1466437042191297e-05, |
|
"loss": 2.8226, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.7247232472324723, |
|
"grad_norm": 0.4485565423965454, |
|
"learning_rate": 2.1360758625673327e-05, |
|
"loss": 2.8301, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.7254612546125462, |
|
"grad_norm": 0.46124571561813354, |
|
"learning_rate": 2.1255270272079042e-05, |
|
"loss": 2.8485, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.72619926199262, |
|
"grad_norm": 0.4612502455711365, |
|
"learning_rate": 2.1149972681476765e-05, |
|
"loss": 2.8276, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.7269372693726938, |
|
"grad_norm": 0.45740193128585815, |
|
"learning_rate": 2.104486655266879e-05, |
|
"loss": 2.8669, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.7276752767527676, |
|
"grad_norm": 0.47378960251808167, |
|
"learning_rate": 2.0939952583186807e-05, |
|
"loss": 2.8149, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.7284132841328413, |
|
"grad_norm": 0.45929577946662903, |
|
"learning_rate": 2.0835231469287232e-05, |
|
"loss": 2.8346, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.7291512915129151, |
|
"grad_norm": 0.45453017950057983, |
|
"learning_rate": 2.0730703905946612e-05, |
|
"loss": 2.8851, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.7298892988929889, |
|
"grad_norm": 0.4465833604335785, |
|
"learning_rate": 2.0626370586857007e-05, |
|
"loss": 2.8381, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.7306273062730627, |
|
"grad_norm": 0.46699321269989014, |
|
"learning_rate": 2.052223220442139e-05, |
|
"loss": 2.8394, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.7313653136531365, |
|
"grad_norm": 0.4374259412288666, |
|
"learning_rate": 2.0418289449749027e-05, |
|
"loss": 2.8501, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.7321033210332103, |
|
"grad_norm": 0.4604252576828003, |
|
"learning_rate": 2.0314543012650933e-05, |
|
"loss": 2.8711, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.7328413284132841, |
|
"grad_norm": 0.45612022280693054, |
|
"learning_rate": 2.0210993581635256e-05, |
|
"loss": 2.844, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.7335793357933579, |
|
"grad_norm": 0.43427881598472595, |
|
"learning_rate": 2.0107641843902726e-05, |
|
"loss": 2.8084, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.7343173431734318, |
|
"grad_norm": 0.4502193331718445, |
|
"learning_rate": 2.0004488485342088e-05, |
|
"loss": 2.909, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.7350553505535056, |
|
"grad_norm": 0.44448336958885193, |
|
"learning_rate": 1.9901534190525566e-05, |
|
"loss": 2.8662, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.7357933579335794, |
|
"grad_norm": 0.4308652877807617, |
|
"learning_rate": 1.9798779642704297e-05, |
|
"loss": 2.7882, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.7365313653136532, |
|
"grad_norm": 0.4563472867012024, |
|
"learning_rate": 1.96962255238038e-05, |
|
"loss": 2.8956, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.7372693726937269, |
|
"grad_norm": 0.4397279620170593, |
|
"learning_rate": 1.9593872514419476e-05, |
|
"loss": 2.7707, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.7380073800738007, |
|
"grad_norm": 0.47456085681915283, |
|
"learning_rate": 1.9491721293812076e-05, |
|
"loss": 2.9205, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7387453874538745, |
|
"grad_norm": 0.43729913234710693, |
|
"learning_rate": 1.9389772539903122e-05, |
|
"loss": 2.8423, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.7394833948339483, |
|
"grad_norm": 0.4417737126350403, |
|
"learning_rate": 1.9288026929270587e-05, |
|
"loss": 2.832, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.7402214022140221, |
|
"grad_norm": 0.44813665747642517, |
|
"learning_rate": 1.9186485137144218e-05, |
|
"loss": 2.8494, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.7409594095940959, |
|
"grad_norm": 0.45640864968299866, |
|
"learning_rate": 1.908514783740114e-05, |
|
"loss": 2.8784, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.7416974169741697, |
|
"grad_norm": 0.4336318373680115, |
|
"learning_rate": 1.8984015702561393e-05, |
|
"loss": 2.8372, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.7424354243542436, |
|
"grad_norm": 0.4504336714744568, |
|
"learning_rate": 1.8883089403783434e-05, |
|
"loss": 2.7967, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.7431734317343174, |
|
"grad_norm": 0.46149566769599915, |
|
"learning_rate": 1.8782369610859708e-05, |
|
"loss": 2.8191, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.7439114391143912, |
|
"grad_norm": 0.4522392451763153, |
|
"learning_rate": 1.868185699221221e-05, |
|
"loss": 2.8794, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.744649446494465, |
|
"grad_norm": 0.4411635994911194, |
|
"learning_rate": 1.8581552214887977e-05, |
|
"loss": 2.8404, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.7453874538745388, |
|
"grad_norm": 0.46107056736946106, |
|
"learning_rate": 1.848145594455477e-05, |
|
"loss": 2.846, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.7461254612546125, |
|
"grad_norm": 0.45308247208595276, |
|
"learning_rate": 1.8381568845496578e-05, |
|
"loss": 2.807, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.7468634686346863, |
|
"grad_norm": 0.44377437233924866, |
|
"learning_rate": 1.828189158060927e-05, |
|
"loss": 2.9005, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.7476014760147601, |
|
"grad_norm": 0.45314696431159973, |
|
"learning_rate": 1.8182424811396133e-05, |
|
"loss": 2.8626, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.7483394833948339, |
|
"grad_norm": 0.4458778202533722, |
|
"learning_rate": 1.80831691979635e-05, |
|
"loss": 2.7985, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.7490774907749077, |
|
"grad_norm": 0.464269757270813, |
|
"learning_rate": 1.7984125399016392e-05, |
|
"loss": 2.9386, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.7498154981549815, |
|
"grad_norm": 0.4448395371437073, |
|
"learning_rate": 1.7885294071854157e-05, |
|
"loss": 2.833, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.7505535055350554, |
|
"grad_norm": 0.4455287754535675, |
|
"learning_rate": 1.7786675872366028e-05, |
|
"loss": 2.8184, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.7512915129151292, |
|
"grad_norm": 0.4467598497867584, |
|
"learning_rate": 1.7688271455026867e-05, |
|
"loss": 2.8357, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.752029520295203, |
|
"grad_norm": 0.4642166197299957, |
|
"learning_rate": 1.7590081472892776e-05, |
|
"loss": 2.9219, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.7527675276752768, |
|
"grad_norm": 0.44453924894332886, |
|
"learning_rate": 1.7492106577596772e-05, |
|
"loss": 2.8822, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.7535055350553506, |
|
"grad_norm": 0.4599774479866028, |
|
"learning_rate": 1.7394347419344432e-05, |
|
"loss": 2.8336, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.7542435424354244, |
|
"grad_norm": 0.4477832317352295, |
|
"learning_rate": 1.7296804646909654e-05, |
|
"loss": 2.785, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.7549815498154981, |
|
"grad_norm": 0.45672887563705444, |
|
"learning_rate": 1.7199478907630267e-05, |
|
"loss": 2.8166, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.7557195571955719, |
|
"grad_norm": 0.4571615159511566, |
|
"learning_rate": 1.710237084740378e-05, |
|
"loss": 2.9199, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.7564575645756457, |
|
"grad_norm": 0.4618014991283417, |
|
"learning_rate": 1.7005481110683062e-05, |
|
"loss": 2.907, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.7571955719557195, |
|
"grad_norm": 0.44089266657829285, |
|
"learning_rate": 1.690881034047212e-05, |
|
"loss": 2.854, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.7579335793357933, |
|
"grad_norm": 0.4468059837818146, |
|
"learning_rate": 1.6812359178321784e-05, |
|
"loss": 2.8511, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.7586715867158672, |
|
"grad_norm": 0.4517216682434082, |
|
"learning_rate": 1.6716128264325475e-05, |
|
"loss": 2.8117, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.759409594095941, |
|
"grad_norm": 0.4576111137866974, |
|
"learning_rate": 1.662011823711495e-05, |
|
"loss": 2.838, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.7601476014760148, |
|
"grad_norm": 0.4355645179748535, |
|
"learning_rate": 1.6524329733856047e-05, |
|
"loss": 2.8054, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.7608856088560886, |
|
"grad_norm": 0.4544225037097931, |
|
"learning_rate": 1.642876339024446e-05, |
|
"loss": 2.8703, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.7616236162361624, |
|
"grad_norm": 0.4510670006275177, |
|
"learning_rate": 1.633341984050162e-05, |
|
"loss": 2.8265, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.7623616236162362, |
|
"grad_norm": 0.444296658039093, |
|
"learning_rate": 1.6238299717370252e-05, |
|
"loss": 2.9467, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 0.76309963099631, |
|
"grad_norm": 0.44352987408638, |
|
"learning_rate": 1.614340365211044e-05, |
|
"loss": 2.8385, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.7638376383763837, |
|
"grad_norm": 0.4408433139324188, |
|
"learning_rate": 1.6048732274495255e-05, |
|
"loss": 2.7828, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.7645756457564575, |
|
"grad_norm": 0.4516165554523468, |
|
"learning_rate": 1.595428621280668e-05, |
|
"loss": 2.8448, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.7653136531365313, |
|
"grad_norm": 0.4665060341358185, |
|
"learning_rate": 1.5860066093831367e-05, |
|
"loss": 2.8067, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 0.7660516605166051, |
|
"grad_norm": 0.44463926553726196, |
|
"learning_rate": 1.5766072542856526e-05, |
|
"loss": 2.8421, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.766789667896679, |
|
"grad_norm": 0.426488995552063, |
|
"learning_rate": 1.5672306183665764e-05, |
|
"loss": 2.8121, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 0.7675276752767528, |
|
"grad_norm": 0.44521549344062805, |
|
"learning_rate": 1.557876763853493e-05, |
|
"loss": 2.7992, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.7682656826568266, |
|
"grad_norm": 0.45438680052757263, |
|
"learning_rate": 1.5485457528228003e-05, |
|
"loss": 2.8034, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 0.7690036900369004, |
|
"grad_norm": 0.4456971287727356, |
|
"learning_rate": 1.5392376471992965e-05, |
|
"loss": 2.8191, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.7697416974169742, |
|
"grad_norm": 0.4459834694862366, |
|
"learning_rate": 1.529952508755768e-05, |
|
"loss": 2.8668, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 0.770479704797048, |
|
"grad_norm": 0.4495449960231781, |
|
"learning_rate": 1.5206903991125832e-05, |
|
"loss": 2.8433, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.7712177121771218, |
|
"grad_norm": 0.4536389708518982, |
|
"learning_rate": 1.511451379737278e-05, |
|
"loss": 2.8522, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.7719557195571956, |
|
"grad_norm": 0.44112926721572876, |
|
"learning_rate": 1.502235511944154e-05, |
|
"loss": 2.872, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.7726937269372693, |
|
"grad_norm": 0.43305230140686035, |
|
"learning_rate": 1.4930428568938648e-05, |
|
"loss": 2.901, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 0.7734317343173431, |
|
"grad_norm": 0.4792589247226715, |
|
"learning_rate": 1.4838734755930167e-05, |
|
"loss": 2.7635, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.7741697416974169, |
|
"grad_norm": 0.4358636438846588, |
|
"learning_rate": 1.4747274288937596e-05, |
|
"loss": 2.8276, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 0.7749077490774908, |
|
"grad_norm": 0.44949012994766235, |
|
"learning_rate": 1.4656047774933874e-05, |
|
"loss": 2.8624, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.7756457564575646, |
|
"grad_norm": 0.4440300762653351, |
|
"learning_rate": 1.4565055819339235e-05, |
|
"loss": 2.8239, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 0.7763837638376384, |
|
"grad_norm": 0.4554462730884552, |
|
"learning_rate": 1.447429902601739e-05, |
|
"loss": 2.7734, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.7771217712177122, |
|
"grad_norm": 0.4523858428001404, |
|
"learning_rate": 1.4383777997271347e-05, |
|
"loss": 2.8976, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 0.777859778597786, |
|
"grad_norm": 0.46444228291511536, |
|
"learning_rate": 1.429349333383948e-05, |
|
"loss": 2.8756, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.7785977859778598, |
|
"grad_norm": 0.4419015347957611, |
|
"learning_rate": 1.4203445634891538e-05, |
|
"loss": 2.8626, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.7793357933579336, |
|
"grad_norm": 0.44527843594551086, |
|
"learning_rate": 1.4113635498024664e-05, |
|
"loss": 2.8063, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.7800738007380074, |
|
"grad_norm": 0.4554080665111542, |
|
"learning_rate": 1.4024063519259439e-05, |
|
"loss": 2.7555, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 0.7808118081180812, |
|
"grad_norm": 0.4289720952510834, |
|
"learning_rate": 1.3934730293035936e-05, |
|
"loss": 2.8304, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.7815498154981549, |
|
"grad_norm": 0.4606097936630249, |
|
"learning_rate": 1.38456364122097e-05, |
|
"loss": 2.8415, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 0.7822878228782287, |
|
"grad_norm": 0.4606861174106598, |
|
"learning_rate": 1.3756782468047936e-05, |
|
"loss": 2.889, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.7830258302583026, |
|
"grad_norm": 0.425731897354126, |
|
"learning_rate": 1.3668169050225472e-05, |
|
"loss": 2.8573, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 0.7837638376383764, |
|
"grad_norm": 0.4634413421154022, |
|
"learning_rate": 1.357979674682095e-05, |
|
"loss": 2.8677, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.7845018450184502, |
|
"grad_norm": 0.45793548226356506, |
|
"learning_rate": 1.349166614431282e-05, |
|
"loss": 2.9207, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 0.785239852398524, |
|
"grad_norm": 0.4642331898212433, |
|
"learning_rate": 1.3403777827575514e-05, |
|
"loss": 2.887, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.7859778597785978, |
|
"grad_norm": 0.4591294825077057, |
|
"learning_rate": 1.3316132379875551e-05, |
|
"loss": 2.8502, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.7867158671586716, |
|
"grad_norm": 0.4461764395236969, |
|
"learning_rate": 1.322873038286766e-05, |
|
"loss": 2.8357, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.7874538745387454, |
|
"grad_norm": 0.4518667757511139, |
|
"learning_rate": 1.3141572416590891e-05, |
|
"loss": 2.9274, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 0.7881918819188192, |
|
"grad_norm": 0.435041606426239, |
|
"learning_rate": 1.3054659059464835e-05, |
|
"loss": 2.7578, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.788929889298893, |
|
"grad_norm": 0.45000597834587097, |
|
"learning_rate": 1.2967990888285737e-05, |
|
"loss": 2.8792, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 0.7896678966789668, |
|
"grad_norm": 0.4507540464401245, |
|
"learning_rate": 1.2881568478222672e-05, |
|
"loss": 2.9286, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.7904059040590405, |
|
"grad_norm": 0.44547247886657715, |
|
"learning_rate": 1.2795392402813715e-05, |
|
"loss": 2.7792, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 0.7911439114391144, |
|
"grad_norm": 0.4526568353176117, |
|
"learning_rate": 1.2709463233962204e-05, |
|
"loss": 2.8923, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.7918819188191882, |
|
"grad_norm": 0.4650912284851074, |
|
"learning_rate": 1.262378154193285e-05, |
|
"loss": 2.7767, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 0.792619926199262, |
|
"grad_norm": 0.4619973301887512, |
|
"learning_rate": 1.2538347895348013e-05, |
|
"loss": 2.7074, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.7933579335793358, |
|
"grad_norm": 0.4545031487941742, |
|
"learning_rate": 1.2453162861183909e-05, |
|
"loss": 2.832, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.7940959409594096, |
|
"grad_norm": 0.45016980171203613, |
|
"learning_rate": 1.236822700476683e-05, |
|
"loss": 2.8709, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.7948339483394834, |
|
"grad_norm": 0.41397857666015625, |
|
"learning_rate": 1.2283540889769445e-05, |
|
"loss": 2.7864, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 0.7955719557195572, |
|
"grad_norm": 0.47167348861694336, |
|
"learning_rate": 1.2199105078207001e-05, |
|
"loss": 2.7768, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.796309963099631, |
|
"grad_norm": 0.46366357803344727, |
|
"learning_rate": 1.2114920130433644e-05, |
|
"loss": 2.8994, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 0.7970479704797048, |
|
"grad_norm": 0.4539276957511902, |
|
"learning_rate": 1.2030986605138644e-05, |
|
"loss": 2.8526, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.7977859778597786, |
|
"grad_norm": 0.430576354265213, |
|
"learning_rate": 1.1947305059342729e-05, |
|
"loss": 2.7993, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 0.7985239852398524, |
|
"grad_norm": 0.4400356113910675, |
|
"learning_rate": 1.1863876048394407e-05, |
|
"loss": 2.9068, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.7992619926199263, |
|
"grad_norm": 0.44879478216171265, |
|
"learning_rate": 1.1780700125966233e-05, |
|
"loss": 2.8591, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.44169095158576965, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 2.793, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.8007380073800738, |
|
"grad_norm": 0.45461106300354004, |
|
"learning_rate": 1.1615109752958713e-05, |
|
"loss": 2.9182, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.8014760147601476, |
|
"grad_norm": 0.4425186812877655, |
|
"learning_rate": 1.1532696401311787e-05, |
|
"loss": 2.8754, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.8022140221402214, |
|
"grad_norm": 0.4334977865219116, |
|
"learning_rate": 1.1450538336042516e-05, |
|
"loss": 2.8037, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 0.8029520295202952, |
|
"grad_norm": 0.43513453006744385, |
|
"learning_rate": 1.1368636102388868e-05, |
|
"loss": 2.8548, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.803690036900369, |
|
"grad_norm": 0.4428231716156006, |
|
"learning_rate": 1.1286990243891011e-05, |
|
"loss": 2.8673, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 0.8044280442804428, |
|
"grad_norm": 0.4509079158306122, |
|
"learning_rate": 1.1205601302387692e-05, |
|
"loss": 2.9012, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.8051660516605166, |
|
"grad_norm": 0.44838449358940125, |
|
"learning_rate": 1.1124469818012635e-05, |
|
"loss": 2.8056, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 0.8059040590405904, |
|
"grad_norm": 0.4536844491958618, |
|
"learning_rate": 1.1043596329190964e-05, |
|
"loss": 2.883, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.8066420664206642, |
|
"grad_norm": 0.44634494185447693, |
|
"learning_rate": 1.0962981372635628e-05, |
|
"loss": 2.8049, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 0.8073800738007381, |
|
"grad_norm": 0.4615216553211212, |
|
"learning_rate": 1.0882625483343845e-05, |
|
"loss": 2.9058, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.8081180811808119, |
|
"grad_norm": 0.4436852037906647, |
|
"learning_rate": 1.0802529194593547e-05, |
|
"loss": 2.8492, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.8088560885608856, |
|
"grad_norm": 0.4358108341693878, |
|
"learning_rate": 1.0722693037939818e-05, |
|
"loss": 2.8513, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.8095940959409594, |
|
"grad_norm": 0.45849135518074036, |
|
"learning_rate": 1.0643117543211422e-05, |
|
"loss": 2.8141, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 0.8103321033210332, |
|
"grad_norm": 0.4694216251373291, |
|
"learning_rate": 1.0563803238507219e-05, |
|
"loss": 2.8304, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.811070110701107, |
|
"grad_norm": 0.4531688094139099, |
|
"learning_rate": 1.0484750650192726e-05, |
|
"loss": 2.9128, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 0.8118081180811808, |
|
"grad_norm": 0.4585440754890442, |
|
"learning_rate": 1.0405960302896562e-05, |
|
"loss": 2.8299, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.8125461254612546, |
|
"grad_norm": 0.4274667799472809, |
|
"learning_rate": 1.0327432719507019e-05, |
|
"loss": 2.7979, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 0.8132841328413284, |
|
"grad_norm": 0.43614691495895386, |
|
"learning_rate": 1.0249168421168558e-05, |
|
"loss": 2.8119, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.8140221402214022, |
|
"grad_norm": 0.45556968450546265, |
|
"learning_rate": 1.0171167927278368e-05, |
|
"loss": 2.9038, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 0.814760147601476, |
|
"grad_norm": 0.44112008810043335, |
|
"learning_rate": 1.0093431755482908e-05, |
|
"loss": 2.9019, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.8154981549815498, |
|
"grad_norm": 0.444204181432724, |
|
"learning_rate": 1.001596042167447e-05, |
|
"loss": 2.7909, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.8162361623616237, |
|
"grad_norm": 0.427478551864624, |
|
"learning_rate": 9.93875443998778e-06, |
|
"loss": 2.8195, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.8169741697416975, |
|
"grad_norm": 0.4325047433376312, |
|
"learning_rate": 9.861814322796553e-06, |
|
"loss": 2.8227, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 0.8177121771217712, |
|
"grad_norm": 0.4463500380516052, |
|
"learning_rate": 9.785140580710107e-06, |
|
"loss": 2.8502, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.818450184501845, |
|
"grad_norm": 0.44314101338386536, |
|
"learning_rate": 9.708733722569996e-06, |
|
"loss": 2.8617, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 0.8191881918819188, |
|
"grad_norm": 0.43770846724510193, |
|
"learning_rate": 9.632594255446565e-06, |
|
"loss": 2.815, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.8199261992619926, |
|
"grad_norm": 0.48664426803588867, |
|
"learning_rate": 9.556722684635667e-06, |
|
"loss": 2.8386, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 0.8206642066420664, |
|
"grad_norm": 0.42718470096588135, |
|
"learning_rate": 9.48111951365529e-06, |
|
"loss": 2.7743, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.8214022140221402, |
|
"grad_norm": 0.4534224569797516, |
|
"learning_rate": 9.405785244242165e-06, |
|
"loss": 2.885, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 0.822140221402214, |
|
"grad_norm": 0.4469706118106842, |
|
"learning_rate": 9.330720376348483e-06, |
|
"loss": 2.7431, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.8228782287822878, |
|
"grad_norm": 0.4499460756778717, |
|
"learning_rate": 9.25592540813857e-06, |
|
"loss": 2.8604, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.8236162361623616, |
|
"grad_norm": 0.4386638104915619, |
|
"learning_rate": 9.18140083598557e-06, |
|
"loss": 2.797, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.8243542435424355, |
|
"grad_norm": 0.4377821683883667, |
|
"learning_rate": 9.10714715446817e-06, |
|
"loss": 2.8071, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 0.8250922509225093, |
|
"grad_norm": 0.4503236413002014, |
|
"learning_rate": 9.03316485636727e-06, |
|
"loss": 2.8215, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.825830258302583, |
|
"grad_norm": 0.4537326693534851, |
|
"learning_rate": 8.959454432662778e-06, |
|
"loss": 2.7938, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 0.8265682656826568, |
|
"grad_norm": 0.4477526843547821, |
|
"learning_rate": 8.88601637253032e-06, |
|
"loss": 2.7778, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.8273062730627306, |
|
"grad_norm": 0.45014604926109314, |
|
"learning_rate": 8.812851163337975e-06, |
|
"loss": 2.792, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 0.8280442804428044, |
|
"grad_norm": 0.44553130865097046, |
|
"learning_rate": 8.739959290643097e-06, |
|
"loss": 2.8268, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.8287822878228782, |
|
"grad_norm": 0.45030757784843445, |
|
"learning_rate": 8.667341238189009e-06, |
|
"loss": 2.8332, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 0.829520295202952, |
|
"grad_norm": 0.44522371888160706, |
|
"learning_rate": 8.594997487901879e-06, |
|
"loss": 2.8526, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.8302583025830258, |
|
"grad_norm": 0.46951159834861755, |
|
"learning_rate": 8.522928519887463e-06, |
|
"loss": 2.8052, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.8309963099630996, |
|
"grad_norm": 0.45531222224235535, |
|
"learning_rate": 8.451134812427925e-06, |
|
"loss": 2.8108, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.8317343173431734, |
|
"grad_norm": 0.4519606828689575, |
|
"learning_rate": 8.379616841978699e-06, |
|
"loss": 2.8302, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 0.8324723247232473, |
|
"grad_norm": 0.45735597610473633, |
|
"learning_rate": 8.308375083165298e-06, |
|
"loss": 2.9323, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.8332103321033211, |
|
"grad_norm": 0.4518982172012329, |
|
"learning_rate": 8.237410008780161e-06, |
|
"loss": 2.796, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 0.8339483394833949, |
|
"grad_norm": 0.4294179379940033, |
|
"learning_rate": 8.166722089779539e-06, |
|
"loss": 2.8383, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.8346863468634687, |
|
"grad_norm": 0.43325817584991455, |
|
"learning_rate": 8.096311795280331e-06, |
|
"loss": 2.7896, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 0.8354243542435424, |
|
"grad_norm": 0.4492734670639038, |
|
"learning_rate": 8.026179592557037e-06, |
|
"loss": 2.8272, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.8361623616236162, |
|
"grad_norm": 0.4338243007659912, |
|
"learning_rate": 7.956325947038584e-06, |
|
"loss": 2.8173, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 0.83690036900369, |
|
"grad_norm": 0.4449402987957001, |
|
"learning_rate": 7.886751322305247e-06, |
|
"loss": 2.8244, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.8376383763837638, |
|
"grad_norm": 0.44180235266685486, |
|
"learning_rate": 7.817456180085636e-06, |
|
"loss": 2.8902, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.8383763837638376, |
|
"grad_norm": 0.45504215359687805, |
|
"learning_rate": 7.748440980253562e-06, |
|
"loss": 2.8344, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.8391143911439114, |
|
"grad_norm": 0.4654461443424225, |
|
"learning_rate": 7.67970618082503e-06, |
|
"loss": 2.8335, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 0.8398523985239852, |
|
"grad_norm": 0.47360721230506897, |
|
"learning_rate": 7.611252237955169e-06, |
|
"loss": 2.8943, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.8405904059040591, |
|
"grad_norm": 0.4570152461528778, |
|
"learning_rate": 7.543079605935221e-06, |
|
"loss": 2.8674, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 0.8413284132841329, |
|
"grad_norm": 0.41285139322280884, |
|
"learning_rate": 7.47518873718952e-06, |
|
"loss": 2.8292, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.8420664206642067, |
|
"grad_norm": 0.45135176181793213, |
|
"learning_rate": 7.407580082272492e-06, |
|
"loss": 2.7573, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 0.8428044280442805, |
|
"grad_norm": 0.4763992726802826, |
|
"learning_rate": 7.340254089865672e-06, |
|
"loss": 2.8902, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.8435424354243543, |
|
"grad_norm": 0.480816513299942, |
|
"learning_rate": 7.27321120677471e-06, |
|
"loss": 2.9058, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 0.844280442804428, |
|
"grad_norm": 0.4476820230484009, |
|
"learning_rate": 7.206451877926418e-06, |
|
"loss": 2.8191, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.8450184501845018, |
|
"grad_norm": 0.4477422833442688, |
|
"learning_rate": 7.139976546365817e-06, |
|
"loss": 2.8023, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.8457564575645756, |
|
"grad_norm": 0.4407312572002411, |
|
"learning_rate": 7.0737856532531895e-06, |
|
"loss": 2.8368, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.8464944649446494, |
|
"grad_norm": 0.45549750328063965, |
|
"learning_rate": 7.007879637861159e-06, |
|
"loss": 2.8561, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.8472324723247232, |
|
"grad_norm": 0.4288015067577362, |
|
"learning_rate": 6.942258937571772e-06, |
|
"loss": 2.7234, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.847970479704797, |
|
"grad_norm": 0.4370770752429962, |
|
"learning_rate": 6.87692398787359e-06, |
|
"loss": 2.8607, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 0.8487084870848709, |
|
"grad_norm": 0.44784659147262573, |
|
"learning_rate": 6.81187522235881e-06, |
|
"loss": 2.78, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8494464944649447, |
|
"grad_norm": 0.43501320481300354, |
|
"learning_rate": 6.747113072720385e-06, |
|
"loss": 2.8121, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 0.8501845018450185, |
|
"grad_norm": 0.4419308006763458, |
|
"learning_rate": 6.6826379687491505e-06, |
|
"loss": 2.8502, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.8509225092250923, |
|
"grad_norm": 0.4417872130870819, |
|
"learning_rate": 6.6184503383309784e-06, |
|
"loss": 2.8042, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 0.8516605166051661, |
|
"grad_norm": 0.4433625638484955, |
|
"learning_rate": 6.5545506074439325e-06, |
|
"loss": 2.7962, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.8523985239852399, |
|
"grad_norm": 0.44587311148643494, |
|
"learning_rate": 6.490939200155449e-06, |
|
"loss": 2.841, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.8531365313653136, |
|
"grad_norm": 0.4439995288848877, |
|
"learning_rate": 6.427616538619524e-06, |
|
"loss": 2.8195, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.8538745387453874, |
|
"grad_norm": 0.4364805519580841, |
|
"learning_rate": 6.3645830430739015e-06, |
|
"loss": 2.7775, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 0.8546125461254612, |
|
"grad_norm": 0.4607424736022949, |
|
"learning_rate": 6.301839131837284e-06, |
|
"loss": 2.907, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.855350553505535, |
|
"grad_norm": 0.45834723114967346, |
|
"learning_rate": 6.239385221306587e-06, |
|
"loss": 2.8708, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 0.8560885608856088, |
|
"grad_norm": 0.43934082984924316, |
|
"learning_rate": 6.177221725954102e-06, |
|
"loss": 2.8159, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.8568265682656827, |
|
"grad_norm": 0.4437257945537567, |
|
"learning_rate": 6.1153490583248265e-06, |
|
"loss": 2.8734, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 0.8575645756457565, |
|
"grad_norm": 0.43929627537727356, |
|
"learning_rate": 6.053767629033713e-06, |
|
"loss": 2.874, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.8583025830258303, |
|
"grad_norm": 0.4439617097377777, |
|
"learning_rate": 5.992477846762895e-06, |
|
"loss": 2.8252, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 0.8590405904059041, |
|
"grad_norm": 0.4464716613292694, |
|
"learning_rate": 5.931480118259003e-06, |
|
"loss": 2.78, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.8597785977859779, |
|
"grad_norm": 0.43279653787612915, |
|
"learning_rate": 5.870774848330485e-06, |
|
"loss": 2.749, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.8605166051660517, |
|
"grad_norm": 0.4490513503551483, |
|
"learning_rate": 5.810362439844896e-06, |
|
"loss": 2.841, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.8612546125461255, |
|
"grad_norm": 0.4711556136608124, |
|
"learning_rate": 5.750243293726226e-06, |
|
"loss": 2.7801, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 0.8619926199261992, |
|
"grad_norm": 0.4525899887084961, |
|
"learning_rate": 5.690417808952242e-06, |
|
"loss": 2.8942, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.862730627306273, |
|
"grad_norm": 0.44727823138237, |
|
"learning_rate": 5.6308863825518425e-06, |
|
"loss": 2.8095, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 0.8634686346863468, |
|
"grad_norm": 0.43965160846710205, |
|
"learning_rate": 5.571649409602436e-06, |
|
"loss": 2.8073, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.8642066420664206, |
|
"grad_norm": 0.45212361216545105, |
|
"learning_rate": 5.512707283227275e-06, |
|
"loss": 2.8849, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 0.8649446494464945, |
|
"grad_norm": 0.4664202332496643, |
|
"learning_rate": 5.454060394592919e-06, |
|
"loss": 2.8199, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.8656826568265683, |
|
"grad_norm": 0.4387909471988678, |
|
"learning_rate": 5.395709132906568e-06, |
|
"loss": 2.8372, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 0.8664206642066421, |
|
"grad_norm": 0.4543474018573761, |
|
"learning_rate": 5.337653885413513e-06, |
|
"loss": 2.8331, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.8671586715867159, |
|
"grad_norm": 0.45128577947616577, |
|
"learning_rate": 5.279895037394566e-06, |
|
"loss": 2.8062, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.8678966789667897, |
|
"grad_norm": 0.4404621422290802, |
|
"learning_rate": 5.222432972163482e-06, |
|
"loss": 2.9088, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.8686346863468635, |
|
"grad_norm": 0.4398937225341797, |
|
"learning_rate": 5.165268071064455e-06, |
|
"loss": 2.7826, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 0.8693726937269373, |
|
"grad_norm": 0.4395955204963684, |
|
"learning_rate": 5.108400713469546e-06, |
|
"loss": 2.8196, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.870110701107011, |
|
"grad_norm": 0.43461933732032776, |
|
"learning_rate": 5.051831276776203e-06, |
|
"loss": 2.8663, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 0.8708487084870848, |
|
"grad_norm": 0.4447794258594513, |
|
"learning_rate": 4.995560136404709e-06, |
|
"loss": 2.8519, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.8715867158671586, |
|
"grad_norm": 0.4266679286956787, |
|
"learning_rate": 4.939587665795736e-06, |
|
"loss": 2.8062, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 0.8723247232472324, |
|
"grad_norm": 0.4411248564720154, |
|
"learning_rate": 4.88391423640786e-06, |
|
"loss": 2.8758, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.8730627306273063, |
|
"grad_norm": 0.44381076097488403, |
|
"learning_rate": 4.828540217715066e-06, |
|
"loss": 2.7979, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 0.8738007380073801, |
|
"grad_norm": 0.44569119811058044, |
|
"learning_rate": 4.773465977204311e-06, |
|
"loss": 2.8081, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.8745387453874539, |
|
"grad_norm": 0.48127833008766174, |
|
"learning_rate": 4.718691880373094e-06, |
|
"loss": 2.8617, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.8752767527675277, |
|
"grad_norm": 0.45613643527030945, |
|
"learning_rate": 4.664218290727035e-06, |
|
"loss": 2.8187, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.8760147601476015, |
|
"grad_norm": 0.440491646528244, |
|
"learning_rate": 4.610045569777444e-06, |
|
"loss": 2.8023, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 0.8767527675276753, |
|
"grad_norm": 0.4358707368373871, |
|
"learning_rate": 4.5561740770389275e-06, |
|
"loss": 2.8102, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.8774907749077491, |
|
"grad_norm": 0.43503841757774353, |
|
"learning_rate": 4.502604170027019e-06, |
|
"loss": 2.8204, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 0.8782287822878229, |
|
"grad_norm": 0.4486919343471527, |
|
"learning_rate": 4.449336204255777e-06, |
|
"loss": 2.8827, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.8789667896678967, |
|
"grad_norm": 0.43869447708129883, |
|
"learning_rate": 4.396370533235455e-06, |
|
"loss": 2.8374, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 0.8797047970479704, |
|
"grad_norm": 0.45128440856933594, |
|
"learning_rate": 4.343707508470135e-06, |
|
"loss": 2.8906, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.8804428044280442, |
|
"grad_norm": 0.46216467022895813, |
|
"learning_rate": 4.291347479455405e-06, |
|
"loss": 2.8381, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 0.8811808118081181, |
|
"grad_norm": 0.4366297721862793, |
|
"learning_rate": 4.2392907936760265e-06, |
|
"loss": 2.8183, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.8819188191881919, |
|
"grad_norm": 0.45038753747940063, |
|
"learning_rate": 4.187537796603658e-06, |
|
"loss": 2.7906, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.8826568265682657, |
|
"grad_norm": 0.45959797501564026, |
|
"learning_rate": 4.136088831694524e-06, |
|
"loss": 2.8724, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.8833948339483395, |
|
"grad_norm": 0.4413219392299652, |
|
"learning_rate": 4.084944240387168e-06, |
|
"loss": 2.8541, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 0.8841328413284133, |
|
"grad_norm": 0.47469910979270935, |
|
"learning_rate": 4.034104362100155e-06, |
|
"loss": 2.9288, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.8848708487084871, |
|
"grad_norm": 0.43708014488220215, |
|
"learning_rate": 3.983569534229864e-06, |
|
"loss": 2.7833, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 0.8856088560885609, |
|
"grad_norm": 0.44569307565689087, |
|
"learning_rate": 3.933340092148202e-06, |
|
"loss": 2.8684, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8863468634686347, |
|
"grad_norm": 0.462568998336792, |
|
"learning_rate": 3.883416369200399e-06, |
|
"loss": 2.8399, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 0.8870848708487085, |
|
"grad_norm": 0.4384634494781494, |
|
"learning_rate": 3.8337986967028e-06, |
|
"loss": 2.837, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.8878228782287823, |
|
"grad_norm": 0.46717679500579834, |
|
"learning_rate": 3.7844874039406674e-06, |
|
"loss": 2.8523, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 0.888560885608856, |
|
"grad_norm": 0.4314653277397156, |
|
"learning_rate": 3.7354828181659695e-06, |
|
"loss": 2.8815, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.8892988929889298, |
|
"grad_norm": 0.43344810605049133, |
|
"learning_rate": 3.6867852645952494e-06, |
|
"loss": 2.7918, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.8900369003690037, |
|
"grad_norm": 0.46255967020988464, |
|
"learning_rate": 3.6383950664074405e-06, |
|
"loss": 2.8106, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.8907749077490775, |
|
"grad_norm": 0.44985824823379517, |
|
"learning_rate": 3.5903125447417196e-06, |
|
"loss": 2.8244, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 0.8915129151291513, |
|
"grad_norm": 0.441011518239975, |
|
"learning_rate": 3.5425380186953904e-06, |
|
"loss": 2.8061, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.8922509225092251, |
|
"grad_norm": 0.4453372359275818, |
|
"learning_rate": 3.495071805321759e-06, |
|
"loss": 2.9384, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.8929889298892989, |
|
"grad_norm": 0.43761390447616577, |
|
"learning_rate": 3.447914219628029e-06, |
|
"loss": 2.7863, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.8937269372693727, |
|
"grad_norm": 0.4433492124080658, |
|
"learning_rate": 3.4010655745731865e-06, |
|
"loss": 2.8553, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 0.8944649446494465, |
|
"grad_norm": 0.43299391865730286, |
|
"learning_rate": 3.354526181066003e-06, |
|
"loss": 2.7823, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.8952029520295203, |
|
"grad_norm": 0.45678773522377014, |
|
"learning_rate": 3.308296347962875e-06, |
|
"loss": 2.7281, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 0.8959409594095941, |
|
"grad_norm": 0.4413972795009613, |
|
"learning_rate": 3.2623763820658237e-06, |
|
"loss": 2.8478, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.8966789667896679, |
|
"grad_norm": 0.44608476758003235, |
|
"learning_rate": 3.2167665881204567e-06, |
|
"loss": 2.7823, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.8974169741697416, |
|
"grad_norm": 0.4420614242553711, |
|
"learning_rate": 3.171467268813938e-06, |
|
"loss": 2.8281, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.8981549815498155, |
|
"grad_norm": 0.4385377764701843, |
|
"learning_rate": 3.1264787247729908e-06, |
|
"loss": 2.7918, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 0.8988929889298893, |
|
"grad_norm": 0.44008246064186096, |
|
"learning_rate": 3.0818012545618835e-06, |
|
"loss": 2.793, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.8996309963099631, |
|
"grad_norm": 0.44634199142456055, |
|
"learning_rate": 3.0374351546804514e-06, |
|
"loss": 2.7829, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 0.9003690036900369, |
|
"grad_norm": 0.4375803768634796, |
|
"learning_rate": 2.9933807195621445e-06, |
|
"loss": 2.8107, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.9011070110701107, |
|
"grad_norm": 0.4388578534126282, |
|
"learning_rate": 2.9496382415720723e-06, |
|
"loss": 2.8524, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 0.9018450184501845, |
|
"grad_norm": 0.43253517150878906, |
|
"learning_rate": 2.9062080110050515e-06, |
|
"loss": 2.8215, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.9025830258302583, |
|
"grad_norm": 0.4246656894683838, |
|
"learning_rate": 2.8630903160836773e-06, |
|
"loss": 2.835, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 0.9033210332103321, |
|
"grad_norm": 0.4635641872882843, |
|
"learning_rate": 2.820285442956422e-06, |
|
"loss": 2.829, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.9040590405904059, |
|
"grad_norm": 0.4323824644088745, |
|
"learning_rate": 2.7777936756957333e-06, |
|
"loss": 2.7945, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.9047970479704797, |
|
"grad_norm": 0.4489029347896576, |
|
"learning_rate": 2.7356152962961567e-06, |
|
"loss": 2.8904, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.9055350553505535, |
|
"grad_norm": 0.4545091390609741, |
|
"learning_rate": 2.6937505846724165e-06, |
|
"loss": 2.8889, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 0.9062730627306274, |
|
"grad_norm": 0.4438563585281372, |
|
"learning_rate": 2.6521998186576357e-06, |
|
"loss": 2.836, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.9070110701107011, |
|
"grad_norm": 0.4264052212238312, |
|
"learning_rate": 2.610963274001438e-06, |
|
"loss": 2.7639, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 0.9077490774907749, |
|
"grad_norm": 0.4508605897426605, |
|
"learning_rate": 2.5700412243681417e-06, |
|
"loss": 2.7735, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.9084870848708487, |
|
"grad_norm": 0.4573262929916382, |
|
"learning_rate": 2.5294339413349076e-06, |
|
"loss": 2.8901, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 0.9092250922509225, |
|
"grad_norm": 0.4440000057220459, |
|
"learning_rate": 2.4891416943900014e-06, |
|
"loss": 2.8662, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.9099630996309963, |
|
"grad_norm": 0.4513186812400818, |
|
"learning_rate": 2.449164750930938e-06, |
|
"loss": 2.8268, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 0.9107011070110701, |
|
"grad_norm": 0.43622398376464844, |
|
"learning_rate": 2.409503376262762e-06, |
|
"loss": 2.8246, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.9114391143911439, |
|
"grad_norm": 0.44066351652145386, |
|
"learning_rate": 2.3701578335962206e-06, |
|
"loss": 2.7924, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.9121771217712177, |
|
"grad_norm": 0.4405202269554138, |
|
"learning_rate": 2.3311283840460994e-06, |
|
"loss": 2.8639, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.9129151291512915, |
|
"grad_norm": 0.4488193094730377, |
|
"learning_rate": 2.292415286629418e-06, |
|
"loss": 2.8531, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 0.9136531365313653, |
|
"grad_norm": 0.4245339632034302, |
|
"learning_rate": 2.254018798263763e-06, |
|
"loss": 2.8349, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.9143911439114392, |
|
"grad_norm": 0.43623387813568115, |
|
"learning_rate": 2.2159391737655466e-06, |
|
"loss": 2.8225, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 0.915129151291513, |
|
"grad_norm": 0.4482229948043823, |
|
"learning_rate": 2.1781766658483303e-06, |
|
"loss": 2.7716, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.9158671586715867, |
|
"grad_norm": 0.450795441865921, |
|
"learning_rate": 2.1407315251211422e-06, |
|
"loss": 2.7796, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 0.9166051660516605, |
|
"grad_norm": 0.45314326882362366, |
|
"learning_rate": 2.103604000086856e-06, |
|
"loss": 2.8009, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.9173431734317343, |
|
"grad_norm": 0.44693273305892944, |
|
"learning_rate": 2.066794337140443e-06, |
|
"loss": 2.8486, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 0.9180811808118081, |
|
"grad_norm": 0.43216079473495483, |
|
"learning_rate": 2.0303027805674445e-06, |
|
"loss": 2.7234, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.9188191881918819, |
|
"grad_norm": 0.45111674070358276, |
|
"learning_rate": 1.994129572542286e-06, |
|
"loss": 2.7963, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.9195571955719557, |
|
"grad_norm": 0.46144166588783264, |
|
"learning_rate": 1.958274953126693e-06, |
|
"loss": 2.8314, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.9202952029520295, |
|
"grad_norm": 0.45646706223487854, |
|
"learning_rate": 1.922739160268089e-06, |
|
"loss": 2.8796, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 0.9210332103321033, |
|
"grad_norm": 0.49224853515625, |
|
"learning_rate": 1.8875224297980332e-06, |
|
"loss": 2.7904, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.9217712177121771, |
|
"grad_norm": 0.44804316759109497, |
|
"learning_rate": 1.8526249954306241e-06, |
|
"loss": 2.7583, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 0.922509225092251, |
|
"grad_norm": 0.43229466676712036, |
|
"learning_rate": 1.8180470887609769e-06, |
|
"loss": 2.8608, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.9232472324723248, |
|
"grad_norm": 0.43958374857902527, |
|
"learning_rate": 1.7837889392636864e-06, |
|
"loss": 2.8282, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 0.9239852398523986, |
|
"grad_norm": 0.4417596459388733, |
|
"learning_rate": 1.7498507742912784e-06, |
|
"loss": 2.8048, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.9247232472324723, |
|
"grad_norm": 0.4306926429271698, |
|
"learning_rate": 1.7162328190727217e-06, |
|
"loss": 2.8095, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 0.9254612546125461, |
|
"grad_norm": 0.439455509185791, |
|
"learning_rate": 1.682935296711935e-06, |
|
"loss": 2.7822, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.9261992619926199, |
|
"grad_norm": 0.4519449472427368, |
|
"learning_rate": 1.6499584281862935e-06, |
|
"loss": 2.8494, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.9269372693726937, |
|
"grad_norm": 0.4483802318572998, |
|
"learning_rate": 1.6173024323451747e-06, |
|
"loss": 2.8629, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.9276752767527675, |
|
"grad_norm": 0.4460211396217346, |
|
"learning_rate": 1.5849675259084872e-06, |
|
"loss": 2.8258, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 0.9284132841328413, |
|
"grad_norm": 0.43958115577697754, |
|
"learning_rate": 1.5529539234652668e-06, |
|
"loss": 2.8093, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.9291512915129151, |
|
"grad_norm": 0.46250835061073303, |
|
"learning_rate": 1.5212618374722155e-06, |
|
"loss": 2.828, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 0.9298892988929889, |
|
"grad_norm": 0.46097636222839355, |
|
"learning_rate": 1.4898914782523143e-06, |
|
"loss": 2.8305, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.9306273062730628, |
|
"grad_norm": 0.4385923445224762, |
|
"learning_rate": 1.458843053993403e-06, |
|
"loss": 2.7875, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 0.9313653136531366, |
|
"grad_norm": 0.44254031777381897, |
|
"learning_rate": 1.4281167707468457e-06, |
|
"loss": 2.8113, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.9321033210332104, |
|
"grad_norm": 0.4598987102508545, |
|
"learning_rate": 1.3977128324261068e-06, |
|
"loss": 2.8511, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 0.9328413284132842, |
|
"grad_norm": 0.4526178240776062, |
|
"learning_rate": 1.3676314408054391e-06, |
|
"loss": 2.7979, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.933579335793358, |
|
"grad_norm": 0.45094090700149536, |
|
"learning_rate": 1.3378727955185244e-06, |
|
"loss": 2.8319, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.9343173431734317, |
|
"grad_norm": 0.45027512311935425, |
|
"learning_rate": 1.3084370940571577e-06, |
|
"loss": 2.8245, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.9350553505535055, |
|
"grad_norm": 0.4329124391078949, |
|
"learning_rate": 1.2793245317699321e-06, |
|
"loss": 2.7542, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 0.9357933579335793, |
|
"grad_norm": 0.4586227536201477, |
|
"learning_rate": 1.2505353018609444e-06, |
|
"loss": 2.7729, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.9365313653136531, |
|
"grad_norm": 0.4397171437740326, |
|
"learning_rate": 1.2220695953885031e-06, |
|
"loss": 2.8164, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 0.9372693726937269, |
|
"grad_norm": 0.4415930211544037, |
|
"learning_rate": 1.1939276012638723e-06, |
|
"loss": 2.8644, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.9380073800738007, |
|
"grad_norm": 0.43980923295021057, |
|
"learning_rate": 1.1661095062500237e-06, |
|
"loss": 2.8716, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.9387453874538746, |
|
"grad_norm": 0.46194180846214294, |
|
"learning_rate": 1.1386154949603934e-06, |
|
"loss": 2.8307, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.9394833948339484, |
|
"grad_norm": 0.4496355652809143, |
|
"learning_rate": 1.1114457498576258e-06, |
|
"loss": 2.7868, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 0.9402214022140222, |
|
"grad_norm": 0.4483359456062317, |
|
"learning_rate": 1.0846004512524211e-06, |
|
"loss": 2.8357, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.940959409594096, |
|
"grad_norm": 0.44404512643814087, |
|
"learning_rate": 1.0580797773022733e-06, |
|
"loss": 2.8843, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.9416974169741698, |
|
"grad_norm": 0.4440787136554718, |
|
"learning_rate": 1.03188390401035e-06, |
|
"loss": 2.8038, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.9424354243542435, |
|
"grad_norm": 0.4445192813873291, |
|
"learning_rate": 1.006013005224271e-06, |
|
"loss": 2.813, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 0.9431734317343173, |
|
"grad_norm": 0.4234587550163269, |
|
"learning_rate": 9.80467252634998e-07, |
|
"loss": 2.8414, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.9439114391143911, |
|
"grad_norm": 0.4393916726112366, |
|
"learning_rate": 9.552468157756622e-07, |
|
"loss": 2.7851, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 0.9446494464944649, |
|
"grad_norm": 0.4591200053691864, |
|
"learning_rate": 9.303518620204677e-07, |
|
"loss": 2.8378, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.9453874538745387, |
|
"grad_norm": 0.43322470784187317, |
|
"learning_rate": 9.057825565835399e-07, |
|
"loss": 2.7366, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 0.9461254612546125, |
|
"grad_norm": 0.4324533939361572, |
|
"learning_rate": 8.815390625178887e-07, |
|
"loss": 2.7483, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.9468634686346864, |
|
"grad_norm": 0.4632011950016022, |
|
"learning_rate": 8.576215407142651e-07, |
|
"loss": 2.7874, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 0.9476014760147602, |
|
"grad_norm": 0.4332893490791321, |
|
"learning_rate": 8.340301499001446e-07, |
|
"loss": 2.8252, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.948339483394834, |
|
"grad_norm": 0.436294287443161, |
|
"learning_rate": 8.107650466386285e-07, |
|
"loss": 2.8445, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.9490774907749078, |
|
"grad_norm": 0.43967026472091675, |
|
"learning_rate": 7.878263853274281e-07, |
|
"loss": 2.8411, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.9498154981549816, |
|
"grad_norm": 0.45120909810066223, |
|
"learning_rate": 7.652143181978655e-07, |
|
"loss": 2.8118, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 0.9505535055350554, |
|
"grad_norm": 0.4368390738964081, |
|
"learning_rate": 7.429289953138019e-07, |
|
"loss": 2.8086, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.9512915129151291, |
|
"grad_norm": 0.4452465772628784, |
|
"learning_rate": 7.209705645706944e-07, |
|
"loss": 2.8468, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 0.9520295202952029, |
|
"grad_norm": 0.4445231258869171, |
|
"learning_rate": 6.993391716946019e-07, |
|
"loss": 2.8114, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.9527675276752767, |
|
"grad_norm": 0.43402281403541565, |
|
"learning_rate": 6.780349602411918e-07, |
|
"loss": 2.8352, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 0.9535055350553505, |
|
"grad_norm": 0.45803192257881165, |
|
"learning_rate": 6.570580715948404e-07, |
|
"loss": 2.8013, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.9542435424354243, |
|
"grad_norm": 0.45193520188331604, |
|
"learning_rate": 6.364086449676232e-07, |
|
"loss": 2.8368, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 0.9549815498154982, |
|
"grad_norm": 0.44040247797966003, |
|
"learning_rate": 6.160868173984591e-07, |
|
"loss": 2.8559, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.955719557195572, |
|
"grad_norm": 0.4719098210334778, |
|
"learning_rate": 5.960927237521563e-07, |
|
"loss": 2.85, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.9564575645756458, |
|
"grad_norm": 0.4502539336681366, |
|
"learning_rate": 5.764264967185462e-07, |
|
"loss": 2.9074, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.9571955719557196, |
|
"grad_norm": 0.4299696683883667, |
|
"learning_rate": 5.570882668115784e-07, |
|
"loss": 2.7595, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 0.9579335793357934, |
|
"grad_norm": 0.44181373715400696, |
|
"learning_rate": 5.380781623684661e-07, |
|
"loss": 2.8024, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.9586715867158672, |
|
"grad_norm": 0.437763512134552, |
|
"learning_rate": 5.193963095488419e-07, |
|
"loss": 2.8231, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 0.959409594095941, |
|
"grad_norm": 0.4234910011291504, |
|
"learning_rate": 5.010428323339033e-07, |
|
"loss": 2.8898, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.9601476014760147, |
|
"grad_norm": 0.45260801911354065, |
|
"learning_rate": 4.830178525256079e-07, |
|
"loss": 2.8558, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 0.9608856088560885, |
|
"grad_norm": 0.4440422058105469, |
|
"learning_rate": 4.653214897458513e-07, |
|
"loss": 2.8007, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.9616236162361623, |
|
"grad_norm": 0.4362104833126068, |
|
"learning_rate": 4.4795386143567374e-07, |
|
"loss": 2.8271, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 0.9623616236162361, |
|
"grad_norm": 0.44079768657684326, |
|
"learning_rate": 4.309150828544939e-07, |
|
"loss": 2.8371, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.9630996309963099, |
|
"grad_norm": 0.46145325899124146, |
|
"learning_rate": 4.1420526707933727e-07, |
|
"loss": 2.8808, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.9638376383763838, |
|
"grad_norm": 0.4297032058238983, |
|
"learning_rate": 3.978245250040702e-07, |
|
"loss": 2.8506, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.9645756457564576, |
|
"grad_norm": 0.4474189579486847, |
|
"learning_rate": 3.817729653386892e-07, |
|
"loss": 2.8261, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 0.9653136531365314, |
|
"grad_norm": 0.43458986282348633, |
|
"learning_rate": 3.660506946085829e-07, |
|
"loss": 2.8319, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.9660516605166052, |
|
"grad_norm": 0.4418502151966095, |
|
"learning_rate": 3.506578171538377e-07, |
|
"loss": 2.8326, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 0.966789667896679, |
|
"grad_norm": 0.4373183846473694, |
|
"learning_rate": 3.355944351285278e-07, |
|
"loss": 2.7896, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.9675276752767528, |
|
"grad_norm": 0.4467260241508484, |
|
"learning_rate": 3.2086064850004314e-07, |
|
"loss": 2.8499, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 0.9682656826568266, |
|
"grad_norm": 0.45079532265663147, |
|
"learning_rate": 3.064565550484455e-07, |
|
"loss": 2.8005, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.9690036900369003, |
|
"grad_norm": 0.4311223328113556, |
|
"learning_rate": 2.9238225036579693e-07, |
|
"loss": 2.8419, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 0.9697416974169741, |
|
"grad_norm": 0.4524695575237274, |
|
"learning_rate": 2.7863782785552685e-07, |
|
"loss": 2.8581, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.9704797047970479, |
|
"grad_norm": 0.4483130872249603, |
|
"learning_rate": 2.65223378731827e-07, |
|
"loss": 2.8275, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.9712177121771217, |
|
"grad_norm": 0.4370816946029663, |
|
"learning_rate": 2.521389920190298e-07, |
|
"loss": 2.8673, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.9719557195571956, |
|
"grad_norm": 0.444195032119751, |
|
"learning_rate": 2.3938475455103083e-07, |
|
"loss": 2.9407, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 0.9726937269372694, |
|
"grad_norm": 0.44004592299461365, |
|
"learning_rate": 2.269607509707006e-07, |
|
"loss": 2.8481, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.9734317343173432, |
|
"grad_norm": 0.44630327820777893, |
|
"learning_rate": 2.1486706372932375e-07, |
|
"loss": 2.7954, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 0.974169741697417, |
|
"grad_norm": 0.42796429991722107, |
|
"learning_rate": 2.031037730860774e-07, |
|
"loss": 2.8533, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.9749077490774908, |
|
"grad_norm": 0.4611528217792511, |
|
"learning_rate": 1.916709571074482e-07, |
|
"loss": 2.8151, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 0.9756457564575646, |
|
"grad_norm": 0.451028972864151, |
|
"learning_rate": 1.8056869166677703e-07, |
|
"loss": 2.8355, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.9763837638376384, |
|
"grad_norm": 0.4451844096183777, |
|
"learning_rate": 1.6979705044369297e-07, |
|
"loss": 2.8121, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 0.9771217712177122, |
|
"grad_norm": 0.4613220989704132, |
|
"learning_rate": 1.5935610492366915e-07, |
|
"loss": 2.9067, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.977859778597786, |
|
"grad_norm": 0.44495347142219543, |
|
"learning_rate": 1.4924592439753416e-07, |
|
"loss": 2.7666, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.9785977859778597, |
|
"grad_norm": 0.4585348963737488, |
|
"learning_rate": 1.394665759610003e-07, |
|
"loss": 2.7254, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.9793357933579335, |
|
"grad_norm": 0.43729352951049805, |
|
"learning_rate": 1.3001812451423068e-07, |
|
"loss": 2.778, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 0.9800738007380074, |
|
"grad_norm": 0.450089693069458, |
|
"learning_rate": 1.209006327614226e-07, |
|
"loss": 2.809, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.9808118081180812, |
|
"grad_norm": 0.43959712982177734, |
|
"learning_rate": 1.1211416121035823e-07, |
|
"loss": 2.8325, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 0.981549815498155, |
|
"grad_norm": 0.4504597783088684, |
|
"learning_rate": 1.036587681720269e-07, |
|
"loss": 2.7841, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.9822878228782288, |
|
"grad_norm": 0.44741228222846985, |
|
"learning_rate": 9.55345097602256e-08, |
|
"loss": 2.8358, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 0.9830258302583026, |
|
"grad_norm": 0.4463639557361603, |
|
"learning_rate": 8.774143989119798e-08, |
|
"loss": 2.8313, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.9837638376383764, |
|
"grad_norm": 0.4775594472885132, |
|
"learning_rate": 8.027961028328479e-08, |
|
"loss": 2.8781, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.9845018450184502, |
|
"grad_norm": 0.4243060350418091, |
|
"learning_rate": 7.314907045653519e-08, |
|
"loss": 2.7926, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.985239852398524, |
|
"grad_norm": 0.43475958704948425, |
|
"learning_rate": 6.634986773244034e-08, |
|
"loss": 2.7885, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.9859778597785978, |
|
"grad_norm": 0.4415262043476105, |
|
"learning_rate": 5.988204723356705e-08, |
|
"loss": 2.7721, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.9867158671586715, |
|
"grad_norm": 0.438672810792923, |
|
"learning_rate": 5.374565188329683e-08, |
|
"loss": 2.8138, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 0.9874538745387453, |
|
"grad_norm": 0.46068814396858215, |
|
"learning_rate": 4.794072240550951e-08, |
|
"loss": 2.7988, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.9881918819188192, |
|
"grad_norm": 0.44185954332351685, |
|
"learning_rate": 4.246729732434451e-08, |
|
"loss": 2.7823, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 0.988929889298893, |
|
"grad_norm": 0.4282056391239166, |
|
"learning_rate": 3.7325412963912235e-08, |
|
"loss": 2.872, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.9896678966789668, |
|
"grad_norm": 0.46537652611732483, |
|
"learning_rate": 3.251510344807751e-08, |
|
"loss": 2.9374, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 0.9904059040590406, |
|
"grad_norm": 0.4430101215839386, |
|
"learning_rate": 2.8036400700232058e-08, |
|
"loss": 2.7839, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.9911439114391144, |
|
"grad_norm": 0.45416316390037537, |
|
"learning_rate": 2.3889334443055744e-08, |
|
"loss": 2.8689, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 0.9918819188191882, |
|
"grad_norm": 0.4388124346733093, |
|
"learning_rate": 2.007393219836118e-08, |
|
"loss": 2.9239, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.992619926199262, |
|
"grad_norm": 0.43018996715545654, |
|
"learning_rate": 1.6590219286871655e-08, |
|
"loss": 2.8412, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.9933579335793358, |
|
"grad_norm": 0.42218539118766785, |
|
"learning_rate": 1.3438218828076832e-08, |
|
"loss": 2.7462, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.9940959409594096, |
|
"grad_norm": 0.4494752883911133, |
|
"learning_rate": 1.0617951740077292e-08, |
|
"loss": 2.8598, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 0.9948339483394834, |
|
"grad_norm": 0.41235294938087463, |
|
"learning_rate": 8.12943673943467e-09, |
|
"loss": 2.8083, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.9955719557195571, |
|
"grad_norm": 0.4434475004673004, |
|
"learning_rate": 5.9726903410661786e-09, |
|
"loss": 2.929, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 0.996309963099631, |
|
"grad_norm": 0.43739476799964905, |
|
"learning_rate": 4.147726858100276e-09, |
|
"loss": 2.844, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.9970479704797048, |
|
"grad_norm": 0.46633192896842957, |
|
"learning_rate": 2.6545584018211613e-09, |
|
"loss": 2.8096, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 0.9977859778597786, |
|
"grad_norm": 0.4500004053115845, |
|
"learning_rate": 1.4931948815744e-09, |
|
"loss": 2.8317, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 0.9985239852398524, |
|
"grad_norm": 0.45538780093193054, |
|
"learning_rate": 6.636440046892123e-10, |
|
"loss": 2.8792, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 0.9992619926199262, |
|
"grad_norm": 0.4632636308670044, |
|
"learning_rate": 1.6591127643961202e-10, |
|
"loss": 2.8205, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.4356023073196411, |
|
"learning_rate": 0.0, |
|
"loss": 2.8161, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 13550, |
|
"total_flos": 5.404563590201999e+18, |
|
"train_loss": 3.236852196415412, |
|
"train_runtime": 292848.6684, |
|
"train_samples_per_second": 0.74, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 13550, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.404563590201999e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|