|
{ |
|
"best_metric": 0.7808219178082192, |
|
"best_model_checkpoint": "MAE-CT-M1N0-M12_v8_split2_v3/checkpoint-9514", |
|
"epoch": 147.006, |
|
"eval_steps": 500, |
|
"global_step": 10500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009523809523809524, |
|
"grad_norm": 2.1773910522460938, |
|
"learning_rate": 9.523809523809525e-08, |
|
"loss": 0.6826, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0019047619047619048, |
|
"grad_norm": 1.9768496751785278, |
|
"learning_rate": 1.904761904761905e-07, |
|
"loss": 0.679, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002857142857142857, |
|
"grad_norm": 4.951999187469482, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.6363, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0038095238095238095, |
|
"grad_norm": 2.1496081352233887, |
|
"learning_rate": 3.80952380952381e-07, |
|
"loss": 0.688, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.004761904761904762, |
|
"grad_norm": 3.6749255657196045, |
|
"learning_rate": 4.7619047619047623e-07, |
|
"loss": 0.6768, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.005714285714285714, |
|
"grad_norm": 2.2195637226104736, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.6139, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.006666666666666667, |
|
"grad_norm": 2.312976598739624, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.6593, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0067619047619047615, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 0.6615375280380249, |
|
"eval_runtime": 15.1304, |
|
"eval_samples_per_second": 4.825, |
|
"eval_steps_per_second": 1.256, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.000857142857143, |
|
"grad_norm": 2.679697036743164, |
|
"learning_rate": 7.61904761904762e-07, |
|
"loss": 0.6401, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0018095238095237, |
|
"grad_norm": 2.0064761638641357, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.6628, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0027619047619047, |
|
"grad_norm": 4.904491901397705, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 0.6491, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0037142857142858, |
|
"grad_norm": 2.616330623626709, |
|
"learning_rate": 1.0476190476190478e-06, |
|
"loss": 0.6896, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0046666666666666, |
|
"grad_norm": 4.088841438293457, |
|
"learning_rate": 1.142857142857143e-06, |
|
"loss": 0.6674, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0056190476190476, |
|
"grad_norm": 5.650446891784668, |
|
"learning_rate": 1.2380952380952382e-06, |
|
"loss": 0.6174, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0065714285714287, |
|
"grad_norm": 5.058526039123535, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.6079, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0067619047619047, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 0.657329797744751, |
|
"eval_runtime": 16.6466, |
|
"eval_samples_per_second": 4.385, |
|
"eval_steps_per_second": 1.141, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.0007619047619047, |
|
"grad_norm": 8.990111351013184, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.6702, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.001714285714286, |
|
"grad_norm": 4.864251136779785, |
|
"learning_rate": 1.523809523809524e-06, |
|
"loss": 0.6142, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.002666666666667, |
|
"grad_norm": 7.566549777984619, |
|
"learning_rate": 1.6190476190476193e-06, |
|
"loss": 0.6563, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0036190476190474, |
|
"grad_norm": 6.286524295806885, |
|
"learning_rate": 1.7142857142857145e-06, |
|
"loss": 0.6098, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.0045714285714284, |
|
"grad_norm": 12.885725021362305, |
|
"learning_rate": 1.8095238095238097e-06, |
|
"loss": 0.7503, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0055238095238095, |
|
"grad_norm": 3.7384145259857178, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 0.6165, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0064761904761905, |
|
"grad_norm": 7.935666561126709, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.6428, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.006761904761905, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 0.6509902477264404, |
|
"eval_runtime": 16.1655, |
|
"eval_samples_per_second": 4.516, |
|
"eval_steps_per_second": 1.175, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.0006666666666666, |
|
"grad_norm": 9.862627029418945, |
|
"learning_rate": 2.0952380952380955e-06, |
|
"loss": 0.6517, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0016190476190476, |
|
"grad_norm": 20.06699562072754, |
|
"learning_rate": 2.1904761904761908e-06, |
|
"loss": 0.5473, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0025714285714287, |
|
"grad_norm": 7.37990140914917, |
|
"learning_rate": 2.285714285714286e-06, |
|
"loss": 0.6826, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.0035238095238097, |
|
"grad_norm": 5.7714924812316895, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 0.6766, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.0044761904761903, |
|
"grad_norm": 5.849754333496094, |
|
"learning_rate": 2.4761904761904764e-06, |
|
"loss": 0.5214, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.0054285714285713, |
|
"grad_norm": 8.858195304870605, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.6706, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.0063809523809524, |
|
"grad_norm": 5.670506954193115, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.7179, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.006761904761905, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 0.6320936679840088, |
|
"eval_runtime": 15.3272, |
|
"eval_samples_per_second": 4.763, |
|
"eval_steps_per_second": 1.24, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.000571428571429, |
|
"grad_norm": 5.734492778778076, |
|
"learning_rate": 2.7619047619047625e-06, |
|
"loss": 0.6192, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.0015238095238095, |
|
"grad_norm": 3.9660820960998535, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.5594, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.00247619047619, |
|
"grad_norm": 6.561830520629883, |
|
"learning_rate": 2.9523809523809525e-06, |
|
"loss": 0.5109, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.003428571428572, |
|
"grad_norm": 21.888965606689453, |
|
"learning_rate": 3.047619047619048e-06, |
|
"loss": 0.7406, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.004380952380952, |
|
"grad_norm": 9.803459167480469, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.6563, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.005333333333334, |
|
"grad_norm": 8.188459396362305, |
|
"learning_rate": 3.2380952380952385e-06, |
|
"loss": 0.661, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.006285714285714, |
|
"grad_norm": 10.706894874572754, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.6131, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.0067619047619045, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 0.6464080810546875, |
|
"eval_runtime": 14.937, |
|
"eval_samples_per_second": 4.887, |
|
"eval_steps_per_second": 1.272, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 5.00047619047619, |
|
"grad_norm": 4.722347736358643, |
|
"learning_rate": 3.428571428571429e-06, |
|
"loss": 0.5342, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.001428571428572, |
|
"grad_norm": 15.417410850524902, |
|
"learning_rate": 3.523809523809524e-06, |
|
"loss": 0.6338, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.002380952380952, |
|
"grad_norm": 11.414576530456543, |
|
"learning_rate": 3.6190476190476194e-06, |
|
"loss": 0.5611, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.003333333333333, |
|
"grad_norm": 5.671505928039551, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.5605, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.0042857142857144, |
|
"grad_norm": 17.15395164489746, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 0.5381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.005238095238095, |
|
"grad_norm": 25.007305145263672, |
|
"learning_rate": 3.9047619047619055e-06, |
|
"loss": 0.4602, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.0061904761904765, |
|
"grad_norm": 20.704477310180664, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.6769, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.0067619047619045, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 0.5554103851318359, |
|
"eval_runtime": 15.153, |
|
"eval_samples_per_second": 4.818, |
|
"eval_steps_per_second": 1.254, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 6.000380952380953, |
|
"grad_norm": 17.35660743713379, |
|
"learning_rate": 4.095238095238096e-06, |
|
"loss": 0.6151, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.001333333333333, |
|
"grad_norm": 11.369240760803223, |
|
"learning_rate": 4.190476190476191e-06, |
|
"loss": 0.479, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.002285714285715, |
|
"grad_norm": 20.25254249572754, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.3898, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.003238095238095, |
|
"grad_norm": 43.16263198852539, |
|
"learning_rate": 4.3809523809523815e-06, |
|
"loss": 0.3741, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.004190476190476, |
|
"grad_norm": 14.151541709899902, |
|
"learning_rate": 4.476190476190477e-06, |
|
"loss": 0.5788, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.005142857142857, |
|
"grad_norm": 29.54399871826172, |
|
"learning_rate": 4.571428571428572e-06, |
|
"loss": 0.5542, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.006095238095238, |
|
"grad_norm": 20.1972599029541, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.7054, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.0067619047619045, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 0.5056493878364563, |
|
"eval_runtime": 15.8055, |
|
"eval_samples_per_second": 4.619, |
|
"eval_steps_per_second": 1.202, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 7.000285714285714, |
|
"grad_norm": 23.355619430541992, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.5019, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0012380952380955, |
|
"grad_norm": 24.143203735351562, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.4324, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.002190476190476, |
|
"grad_norm": 7.906174659729004, |
|
"learning_rate": 4.952380952380953e-06, |
|
"loss": 0.2821, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.003142857142858, |
|
"grad_norm": 93.18160247802734, |
|
"learning_rate": 5.047619047619048e-06, |
|
"loss": 0.5616, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.004095238095238, |
|
"grad_norm": 106.39690399169922, |
|
"learning_rate": 5.142857142857142e-06, |
|
"loss": 0.317, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.005047619047619, |
|
"grad_norm": 54.72929000854492, |
|
"learning_rate": 5.2380952380952384e-06, |
|
"loss": 0.3681, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.006, |
|
"grad_norm": 25.610824584960938, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.758, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.0067619047619045, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 0.5271527767181396, |
|
"eval_runtime": 15.4918, |
|
"eval_samples_per_second": 4.712, |
|
"eval_steps_per_second": 1.226, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 8.000190476190475, |
|
"grad_norm": 22.395870208740234, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.4034, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.001142857142858, |
|
"grad_norm": 11.43806266784668, |
|
"learning_rate": 5.523809523809525e-06, |
|
"loss": 0.4353, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.002095238095238, |
|
"grad_norm": 8.536911010742188, |
|
"learning_rate": 5.619047619047619e-06, |
|
"loss": 0.2642, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.003047619047619, |
|
"grad_norm": 34.64637756347656, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.5705, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.004, |
|
"grad_norm": 68.64958953857422, |
|
"learning_rate": 5.8095238095238106e-06, |
|
"loss": 0.3396, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.00495238095238, |
|
"grad_norm": 16.429523468017578, |
|
"learning_rate": 5.904761904761905e-06, |
|
"loss": 0.6441, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.005904761904763, |
|
"grad_norm": 22.96882438659668, |
|
"learning_rate": 6e-06, |
|
"loss": 0.5288, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 8.006761904761905, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 0.5493802428245544, |
|
"eval_runtime": 14.9497, |
|
"eval_samples_per_second": 4.883, |
|
"eval_steps_per_second": 1.271, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 9.000095238095238, |
|
"grad_norm": 4.000064849853516, |
|
"learning_rate": 6.095238095238096e-06, |
|
"loss": 0.3453, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.001047619047618, |
|
"grad_norm": 2.3602702617645264, |
|
"learning_rate": 6.1904761904761914e-06, |
|
"loss": 0.2382, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.002, |
|
"grad_norm": 3.782233953475952, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.1652, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.002952380952381, |
|
"grad_norm": 39.49138259887695, |
|
"learning_rate": 6.380952380952381e-06, |
|
"loss": 0.2159, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.003904761904762, |
|
"grad_norm": 94.00725555419922, |
|
"learning_rate": 6.476190476190477e-06, |
|
"loss": 0.3012, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.004857142857142, |
|
"grad_norm": 75.73612976074219, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.6118, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.005809523809523, |
|
"grad_norm": 15.038615226745605, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.4917, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.006761904761905, |
|
"grad_norm": 93.08511352539062, |
|
"learning_rate": 6.761904761904763e-06, |
|
"loss": 0.3878, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 9.006761904761905, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 0.518031656742096, |
|
"eval_runtime": 15.3307, |
|
"eval_samples_per_second": 4.762, |
|
"eval_steps_per_second": 1.239, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.00095238095238, |
|
"grad_norm": 0.7588180303573608, |
|
"learning_rate": 6.857142857142858e-06, |
|
"loss": 0.2134, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.001904761904761, |
|
"grad_norm": 17.79435157775879, |
|
"learning_rate": 6.952380952380952e-06, |
|
"loss": 0.5438, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.002857142857144, |
|
"grad_norm": 12.763163566589355, |
|
"learning_rate": 7.047619047619048e-06, |
|
"loss": 0.3931, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.003809523809524, |
|
"grad_norm": 2.9164113998413086, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.3114, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.004761904761905, |
|
"grad_norm": 107.55001068115234, |
|
"learning_rate": 7.238095238095239e-06, |
|
"loss": 0.8739, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 10.005714285714285, |
|
"grad_norm": 38.44641876220703, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.2957, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 10.006666666666666, |
|
"grad_norm": 2.34265398979187, |
|
"learning_rate": 7.428571428571429e-06, |
|
"loss": 0.2466, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 10.006761904761905, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 0.7316363453865051, |
|
"eval_runtime": 15.8497, |
|
"eval_samples_per_second": 4.606, |
|
"eval_steps_per_second": 1.199, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 11.000857142857143, |
|
"grad_norm": 14.003036499023438, |
|
"learning_rate": 7.523809523809524e-06, |
|
"loss": 0.6049, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.001809523809523, |
|
"grad_norm": 35.401275634765625, |
|
"learning_rate": 7.61904761904762e-06, |
|
"loss": 0.257, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.002761904761904, |
|
"grad_norm": 0.565825343132019, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.0747, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.003714285714286, |
|
"grad_norm": 61.133934020996094, |
|
"learning_rate": 7.809523809523811e-06, |
|
"loss": 0.5517, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 11.004666666666667, |
|
"grad_norm": 31.394474029541016, |
|
"learning_rate": 7.904761904761904e-06, |
|
"loss": 0.3729, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 11.005619047619048, |
|
"grad_norm": 148.84947204589844, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4434, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 11.006571428571428, |
|
"grad_norm": 23.926483154296875, |
|
"learning_rate": 8.095238095238097e-06, |
|
"loss": 0.8338, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 11.006761904761905, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 1.1720706224441528, |
|
"eval_runtime": 15.3598, |
|
"eval_samples_per_second": 4.753, |
|
"eval_steps_per_second": 1.237, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 12.000761904761905, |
|
"grad_norm": 14.266438484191895, |
|
"learning_rate": 8.190476190476192e-06, |
|
"loss": 0.6715, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.001714285714286, |
|
"grad_norm": 32.107173919677734, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.1961, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 12.002666666666666, |
|
"grad_norm": 88.14974212646484, |
|
"learning_rate": 8.380952380952382e-06, |
|
"loss": 0.2895, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 12.003619047619047, |
|
"grad_norm": 68.69145965576172, |
|
"learning_rate": 8.476190476190477e-06, |
|
"loss": 0.3144, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 12.00457142857143, |
|
"grad_norm": 0.5290740132331848, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.3309, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.00552380952381, |
|
"grad_norm": 25.69615364074707, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.5129, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 12.00647619047619, |
|
"grad_norm": 2.943721294403076, |
|
"learning_rate": 8.761904761904763e-06, |
|
"loss": 0.603, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 12.006761904761905, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 0.7357335686683655, |
|
"eval_runtime": 15.5104, |
|
"eval_samples_per_second": 4.707, |
|
"eval_steps_per_second": 1.225, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 13.000666666666667, |
|
"grad_norm": 45.28472900390625, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.4108, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.001619047619048, |
|
"grad_norm": 50.090789794921875, |
|
"learning_rate": 8.952380952380953e-06, |
|
"loss": 0.2307, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 13.002571428571429, |
|
"grad_norm": 0.19888624548912048, |
|
"learning_rate": 9.047619047619049e-06, |
|
"loss": 0.5937, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.00352380952381, |
|
"grad_norm": 0.4607914388179779, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.2653, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 13.00447619047619, |
|
"grad_norm": 6.823062419891357, |
|
"learning_rate": 9.238095238095239e-06, |
|
"loss": 0.143, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 13.005428571428572, |
|
"grad_norm": 58.4351692199707, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.4181, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 13.006380952380953, |
|
"grad_norm": 4.692024230957031, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.2309, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 13.006761904761905, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 1.196142554283142, |
|
"eval_runtime": 15.237, |
|
"eval_samples_per_second": 4.791, |
|
"eval_steps_per_second": 1.247, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 14.000571428571428, |
|
"grad_norm": 0.3075767159461975, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.1516, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.00152380952381, |
|
"grad_norm": 0.18465390801429749, |
|
"learning_rate": 9.61904761904762e-06, |
|
"loss": 0.0162, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 14.002476190476191, |
|
"grad_norm": 5.012660026550293, |
|
"learning_rate": 9.714285714285715e-06, |
|
"loss": 0.4848, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 14.003428571428572, |
|
"grad_norm": 49.672149658203125, |
|
"learning_rate": 9.80952380952381e-06, |
|
"loss": 0.3613, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 14.004380952380952, |
|
"grad_norm": 2.5375776290893555, |
|
"learning_rate": 9.904761904761906e-06, |
|
"loss": 0.4663, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 14.005333333333333, |
|
"grad_norm": 12.158143043518066, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6027, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.006285714285715, |
|
"grad_norm": 0.09932547807693481, |
|
"learning_rate": 9.989417989417989e-06, |
|
"loss": 0.2656, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 14.006761904761905, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 1.1104544401168823, |
|
"eval_runtime": 16.2891, |
|
"eval_samples_per_second": 4.482, |
|
"eval_steps_per_second": 1.166, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 15.00047619047619, |
|
"grad_norm": 0.19421745836734772, |
|
"learning_rate": 9.97883597883598e-06, |
|
"loss": 0.1117, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 15.001428571428571, |
|
"grad_norm": 0.04361328110098839, |
|
"learning_rate": 9.968253968253969e-06, |
|
"loss": 0.4367, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 15.002380952380953, |
|
"grad_norm": 0.12471567094326019, |
|
"learning_rate": 9.957671957671959e-06, |
|
"loss": 0.2725, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 15.003333333333334, |
|
"grad_norm": 20.92134666442871, |
|
"learning_rate": 9.947089947089947e-06, |
|
"loss": 0.2468, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.004285714285714, |
|
"grad_norm": 42.768619537353516, |
|
"learning_rate": 9.936507936507937e-06, |
|
"loss": 0.1634, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 15.005238095238095, |
|
"grad_norm": 1.069857120513916, |
|
"learning_rate": 9.925925925925927e-06, |
|
"loss": 0.0557, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 15.006190476190476, |
|
"grad_norm": 3.843510866165161, |
|
"learning_rate": 9.915343915343916e-06, |
|
"loss": 0.5578, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 15.006761904761905, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 1.3217192888259888, |
|
"eval_runtime": 19.8127, |
|
"eval_samples_per_second": 3.684, |
|
"eval_steps_per_second": 0.959, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 16.00038095238095, |
|
"grad_norm": 0.07862857729196548, |
|
"learning_rate": 9.904761904761906e-06, |
|
"loss": 0.4283, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 16.001333333333335, |
|
"grad_norm": 1.592122197151184, |
|
"learning_rate": 9.894179894179896e-06, |
|
"loss": 0.0992, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 16.002285714285716, |
|
"grad_norm": 0.013593673706054688, |
|
"learning_rate": 9.883597883597884e-06, |
|
"loss": 0.338, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 16.003238095238096, |
|
"grad_norm": 2.4370782375335693, |
|
"learning_rate": 9.873015873015874e-06, |
|
"loss": 0.4465, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 16.004190476190477, |
|
"grad_norm": 82.60801696777344, |
|
"learning_rate": 9.862433862433864e-06, |
|
"loss": 0.356, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 16.005142857142857, |
|
"grad_norm": 4.153460502624512, |
|
"learning_rate": 9.851851851851852e-06, |
|
"loss": 0.1129, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 16.006095238095238, |
|
"grad_norm": 30.529008865356445, |
|
"learning_rate": 9.841269841269842e-06, |
|
"loss": 0.2875, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.006761904761905, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 1.2617907524108887, |
|
"eval_runtime": 17.587, |
|
"eval_samples_per_second": 4.151, |
|
"eval_steps_per_second": 1.08, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 17.000285714285713, |
|
"grad_norm": 0.2053322196006775, |
|
"learning_rate": 9.830687830687832e-06, |
|
"loss": 0.0168, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 17.001238095238094, |
|
"grad_norm": 54.472354888916016, |
|
"learning_rate": 9.82010582010582e-06, |
|
"loss": 0.3274, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 17.002190476190478, |
|
"grad_norm": 0.04339295253157616, |
|
"learning_rate": 9.80952380952381e-06, |
|
"loss": 0.1369, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 17.00314285714286, |
|
"grad_norm": 0.022288190200924873, |
|
"learning_rate": 9.7989417989418e-06, |
|
"loss": 0.1943, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 17.00409523809524, |
|
"grad_norm": 0.15292459726333618, |
|
"learning_rate": 9.788359788359789e-06, |
|
"loss": 0.2149, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 17.00504761904762, |
|
"grad_norm": 0.014596667140722275, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.3248, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 17.006, |
|
"grad_norm": 0.04519602656364441, |
|
"learning_rate": 9.767195767195769e-06, |
|
"loss": 0.4332, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 17.006761904761905, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 1.3749818801879883, |
|
"eval_runtime": 18.1834, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 1.045, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 18.000190476190475, |
|
"grad_norm": 0.18823349475860596, |
|
"learning_rate": 9.756613756613757e-06, |
|
"loss": 0.3385, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 18.001142857142856, |
|
"grad_norm": 0.1467002034187317, |
|
"learning_rate": 9.746031746031747e-06, |
|
"loss": 0.109, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 18.002095238095237, |
|
"grad_norm": 158.585693359375, |
|
"learning_rate": 9.735449735449735e-06, |
|
"loss": 0.3271, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.00304761904762, |
|
"grad_norm": 0.12998898327350616, |
|
"learning_rate": 9.724867724867725e-06, |
|
"loss": 0.2255, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.004, |
|
"grad_norm": 111.00196075439453, |
|
"learning_rate": 9.714285714285715e-06, |
|
"loss": 0.5387, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 18.004952380952382, |
|
"grad_norm": 0.07534196972846985, |
|
"learning_rate": 9.703703703703703e-06, |
|
"loss": 0.0355, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 18.005904761904763, |
|
"grad_norm": 0.1609535962343216, |
|
"learning_rate": 9.693121693121693e-06, |
|
"loss": 0.4794, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 18.006761904761905, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 1.536882758140564, |
|
"eval_runtime": 18.4676, |
|
"eval_samples_per_second": 3.953, |
|
"eval_steps_per_second": 1.029, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 19.000095238095238, |
|
"grad_norm": 0.008525008335709572, |
|
"learning_rate": 9.682539682539683e-06, |
|
"loss": 0.2529, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 19.00104761904762, |
|
"grad_norm": 24.530174255371094, |
|
"learning_rate": 9.671957671957672e-06, |
|
"loss": 0.1124, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 19.002, |
|
"grad_norm": 0.33917537331581116, |
|
"learning_rate": 9.661375661375663e-06, |
|
"loss": 0.0324, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 19.00295238095238, |
|
"grad_norm": 97.87699890136719, |
|
"learning_rate": 9.650793650793652e-06, |
|
"loss": 0.167, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 19.003904761904764, |
|
"grad_norm": 0.010749807581305504, |
|
"learning_rate": 9.64021164021164e-06, |
|
"loss": 0.6815, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 19.004857142857144, |
|
"grad_norm": 257.39935302734375, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.3253, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.005809523809525, |
|
"grad_norm": 0.015413103625178337, |
|
"learning_rate": 9.61904761904762e-06, |
|
"loss": 0.2588, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 19.006761904761905, |
|
"grad_norm": 0.02472815476357937, |
|
"learning_rate": 9.60846560846561e-06, |
|
"loss": 0.3151, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 19.006761904761905, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 1.2066000699996948, |
|
"eval_runtime": 18.6618, |
|
"eval_samples_per_second": 3.912, |
|
"eval_steps_per_second": 1.018, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 20.00095238095238, |
|
"grad_norm": 0.3033144176006317, |
|
"learning_rate": 9.597883597883598e-06, |
|
"loss": 0.3416, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 20.00190476190476, |
|
"grad_norm": 0.1329488307237625, |
|
"learning_rate": 9.587301587301588e-06, |
|
"loss": 0.7527, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 20.002857142857142, |
|
"grad_norm": 0.027109788730740547, |
|
"learning_rate": 9.576719576719578e-06, |
|
"loss": 0.0944, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 20.003809523809522, |
|
"grad_norm": 104.4439468383789, |
|
"learning_rate": 9.566137566137567e-06, |
|
"loss": 0.308, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 20.004761904761907, |
|
"grad_norm": 1.7383038997650146, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 0.0219, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 20.005714285714287, |
|
"grad_norm": 35.80508041381836, |
|
"learning_rate": 9.544973544973546e-06, |
|
"loss": 0.3035, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 20.006666666666668, |
|
"grad_norm": 0.024829713627696037, |
|
"learning_rate": 9.534391534391535e-06, |
|
"loss": 0.2433, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 20.006761904761905, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 1.6149253845214844, |
|
"eval_runtime": 17.0751, |
|
"eval_samples_per_second": 4.275, |
|
"eval_steps_per_second": 1.113, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 21.000857142857143, |
|
"grad_norm": 2.3730125427246094, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.0434, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.001809523809523, |
|
"grad_norm": 0.0377766489982605, |
|
"learning_rate": 9.513227513227515e-06, |
|
"loss": 0.0014, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 21.002761904761904, |
|
"grad_norm": 0.019763244315981865, |
|
"learning_rate": 9.502645502645503e-06, |
|
"loss": 0.0976, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 21.003714285714285, |
|
"grad_norm": 0.017320644110441208, |
|
"learning_rate": 9.492063492063493e-06, |
|
"loss": 0.2573, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 21.004666666666665, |
|
"grad_norm": 335.51092529296875, |
|
"learning_rate": 9.481481481481483e-06, |
|
"loss": 0.436, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 21.005619047619046, |
|
"grad_norm": 35.196353912353516, |
|
"learning_rate": 9.470899470899471e-06, |
|
"loss": 0.2655, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 21.00657142857143, |
|
"grad_norm": 168.40211486816406, |
|
"learning_rate": 9.460317460317461e-06, |
|
"loss": 0.1373, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 21.006761904761905, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 1.6915591955184937, |
|
"eval_runtime": 17.3309, |
|
"eval_samples_per_second": 4.212, |
|
"eval_steps_per_second": 1.096, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 22.000761904761905, |
|
"grad_norm": 0.034249383956193924, |
|
"learning_rate": 9.449735449735451e-06, |
|
"loss": 0.0381, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 22.001714285714286, |
|
"grad_norm": 0.039076317101716995, |
|
"learning_rate": 9.43915343915344e-06, |
|
"loss": 0.0004, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 22.002666666666666, |
|
"grad_norm": 101.02141571044922, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.1968, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 22.003619047619047, |
|
"grad_norm": 0.02349863387644291, |
|
"learning_rate": 9.417989417989418e-06, |
|
"loss": 0.1554, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.004571428571428, |
|
"grad_norm": 0.04626596346497536, |
|
"learning_rate": 9.407407407407408e-06, |
|
"loss": 0.0012, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 22.005523809523808, |
|
"grad_norm": 0.008542805910110474, |
|
"learning_rate": 9.396825396825398e-06, |
|
"loss": 0.1149, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 22.00647619047619, |
|
"grad_norm": 0.11814195662736893, |
|
"learning_rate": 9.386243386243386e-06, |
|
"loss": 0.0864, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 22.006761904761905, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.367408275604248, |
|
"eval_runtime": 18.6025, |
|
"eval_samples_per_second": 3.924, |
|
"eval_steps_per_second": 1.021, |
|
"step": 1633 |
|
}, |
|
{ |
|
"epoch": 23.000666666666667, |
|
"grad_norm": 0.843479335308075, |
|
"learning_rate": 9.375661375661376e-06, |
|
"loss": 0.1686, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 23.001619047619048, |
|
"grad_norm": 0.07934589684009552, |
|
"learning_rate": 9.365079365079366e-06, |
|
"loss": 0.1767, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 23.00257142857143, |
|
"grad_norm": 0.01831832155585289, |
|
"learning_rate": 9.354497354497354e-06, |
|
"loss": 0.2201, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 23.00352380952381, |
|
"grad_norm": 0.008153264410793781, |
|
"learning_rate": 9.343915343915344e-06, |
|
"loss": 0.0003, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 23.00447619047619, |
|
"grad_norm": 0.009105395525693893, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.0006, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 23.00542857142857, |
|
"grad_norm": 0.021583031862974167, |
|
"learning_rate": 9.322751322751323e-06, |
|
"loss": 0.265, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 23.00638095238095, |
|
"grad_norm": 0.012740103527903557, |
|
"learning_rate": 9.312169312169313e-06, |
|
"loss": 0.2188, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 23.006761904761905, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 2.404127836227417, |
|
"eval_runtime": 21.2975, |
|
"eval_samples_per_second": 3.428, |
|
"eval_steps_per_second": 0.892, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 24.00057142857143, |
|
"grad_norm": 0.03421939164400101, |
|
"learning_rate": 9.301587301587303e-06, |
|
"loss": 0.2348, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 24.00152380952381, |
|
"grad_norm": 198.67616271972656, |
|
"learning_rate": 9.291005291005291e-06, |
|
"loss": 0.3142, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 24.00247619047619, |
|
"grad_norm": 3.602328300476074, |
|
"learning_rate": 9.280423280423281e-06, |
|
"loss": 0.004, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 24.00342857142857, |
|
"grad_norm": 1.9957921504974365, |
|
"learning_rate": 9.26984126984127e-06, |
|
"loss": 0.3429, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 24.004380952380952, |
|
"grad_norm": 0.020262375473976135, |
|
"learning_rate": 9.25925925925926e-06, |
|
"loss": 0.0009, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 24.005333333333333, |
|
"grad_norm": 168.87521362304688, |
|
"learning_rate": 9.248677248677249e-06, |
|
"loss": 0.4748, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 24.006285714285713, |
|
"grad_norm": 16.499473571777344, |
|
"learning_rate": 9.238095238095239e-06, |
|
"loss": 0.089, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 24.006761904761905, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 1.8638439178466797, |
|
"eval_runtime": 21.4844, |
|
"eval_samples_per_second": 3.398, |
|
"eval_steps_per_second": 0.884, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 25.000476190476192, |
|
"grad_norm": 0.009172641672194004, |
|
"learning_rate": 9.227513227513229e-06, |
|
"loss": 0.0032, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 25.001428571428573, |
|
"grad_norm": 2.413815498352051, |
|
"learning_rate": 9.216931216931217e-06, |
|
"loss": 0.358, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 25.002380952380953, |
|
"grad_norm": 0.0387704111635685, |
|
"learning_rate": 9.206349206349207e-06, |
|
"loss": 0.112, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 25.003333333333334, |
|
"grad_norm": 0.009773080237209797, |
|
"learning_rate": 9.195767195767197e-06, |
|
"loss": 0.2075, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 25.004285714285714, |
|
"grad_norm": 2.395778179168701, |
|
"learning_rate": 9.185185185185186e-06, |
|
"loss": 0.001, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 25.005238095238095, |
|
"grad_norm": 0.005421150010079145, |
|
"learning_rate": 9.174603174603176e-06, |
|
"loss": 0.0004, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 25.006190476190476, |
|
"grad_norm": 0.01611645519733429, |
|
"learning_rate": 9.164021164021166e-06, |
|
"loss": 0.0911, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 25.006761904761905, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.0675227642059326, |
|
"eval_runtime": 13.4234, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 1.415, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 26.00038095238095, |
|
"grad_norm": 0.004970152862370014, |
|
"learning_rate": 9.153439153439154e-06, |
|
"loss": 0.0006, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 26.001333333333335, |
|
"grad_norm": 214.16207885742188, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.0221, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 26.002285714285716, |
|
"grad_norm": 194.74468994140625, |
|
"learning_rate": 9.132275132275134e-06, |
|
"loss": 0.1677, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 26.003238095238096, |
|
"grad_norm": 0.004110685084015131, |
|
"learning_rate": 9.121693121693122e-06, |
|
"loss": 0.0005, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 26.004190476190477, |
|
"grad_norm": 5.675693035125732, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 0.0173, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 26.005142857142857, |
|
"grad_norm": 0.004518811125308275, |
|
"learning_rate": 9.1005291005291e-06, |
|
"loss": 0.1337, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 26.006095238095238, |
|
"grad_norm": 0.0045015704818069935, |
|
"learning_rate": 9.08994708994709e-06, |
|
"loss": 0.137, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 26.006761904761905, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 1.8597570657730103, |
|
"eval_runtime": 13.5073, |
|
"eval_samples_per_second": 5.404, |
|
"eval_steps_per_second": 1.407, |
|
"step": 1917 |
|
}, |
|
{ |
|
"epoch": 27.000285714285713, |
|
"grad_norm": 0.004443326033651829, |
|
"learning_rate": 9.07936507936508e-06, |
|
"loss": 0.0003, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 27.001238095238094, |
|
"grad_norm": 75.04074096679688, |
|
"learning_rate": 9.068783068783069e-06, |
|
"loss": 0.0029, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 27.002190476190478, |
|
"grad_norm": 0.003498647827655077, |
|
"learning_rate": 9.058201058201059e-06, |
|
"loss": 0.2078, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 27.00314285714286, |
|
"grad_norm": 0.02224644646048546, |
|
"learning_rate": 9.047619047619049e-06, |
|
"loss": 0.0011, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 27.00409523809524, |
|
"grad_norm": 0.03713711351156235, |
|
"learning_rate": 9.037037037037037e-06, |
|
"loss": 0.0003, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 27.00504761904762, |
|
"grad_norm": 0.053550925105810165, |
|
"learning_rate": 9.026455026455027e-06, |
|
"loss": 0.0503, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 27.006, |
|
"grad_norm": 0.0075949616730213165, |
|
"learning_rate": 9.015873015873017e-06, |
|
"loss": 0.1882, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 27.006761904761905, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 1.689678430557251, |
|
"eval_runtime": 13.7634, |
|
"eval_samples_per_second": 5.304, |
|
"eval_steps_per_second": 1.38, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 28.000190476190475, |
|
"grad_norm": 0.033333685249090195, |
|
"learning_rate": 9.005291005291005e-06, |
|
"loss": 0.264, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 28.001142857142856, |
|
"grad_norm": 0.01675398275256157, |
|
"learning_rate": 8.994708994708995e-06, |
|
"loss": 0.0012, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.002095238095237, |
|
"grad_norm": 0.20471233129501343, |
|
"learning_rate": 8.984126984126985e-06, |
|
"loss": 0.0002, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 28.00304761904762, |
|
"grad_norm": 0.035939306020736694, |
|
"learning_rate": 8.973544973544973e-06, |
|
"loss": 0.0818, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 28.004, |
|
"grad_norm": 0.007089742925018072, |
|
"learning_rate": 8.962962962962963e-06, |
|
"loss": 0.1263, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 28.004952380952382, |
|
"grad_norm": 0.01347515732049942, |
|
"learning_rate": 8.952380952380953e-06, |
|
"loss": 0.1183, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 28.005904761904763, |
|
"grad_norm": 6.9969658851623535, |
|
"learning_rate": 8.941798941798942e-06, |
|
"loss": 0.1562, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 28.006761904761905, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.626549243927002, |
|
"eval_runtime": 15.1818, |
|
"eval_samples_per_second": 4.808, |
|
"eval_steps_per_second": 1.251, |
|
"step": 2059 |
|
}, |
|
{ |
|
"epoch": 29.000095238095238, |
|
"grad_norm": 0.0020758844912052155, |
|
"learning_rate": 8.931216931216932e-06, |
|
"loss": 0.7325, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 29.00104761904762, |
|
"grad_norm": 0.02904440276324749, |
|
"learning_rate": 8.920634920634922e-06, |
|
"loss": 0.0002, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 29.002, |
|
"grad_norm": 0.030916374176740646, |
|
"learning_rate": 8.910052910052912e-06, |
|
"loss": 0.0126, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 29.00295238095238, |
|
"grad_norm": 0.00556580675765872, |
|
"learning_rate": 8.8994708994709e-06, |
|
"loss": 0.0886, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 29.003904761904764, |
|
"grad_norm": 0.0061585246585309505, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1779, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 29.004857142857144, |
|
"grad_norm": 0.01303119771182537, |
|
"learning_rate": 8.87830687830688e-06, |
|
"loss": 0.0006, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 29.005809523809525, |
|
"grad_norm": 0.01168767735362053, |
|
"learning_rate": 8.867724867724868e-06, |
|
"loss": 0.4101, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 29.006761904761905, |
|
"grad_norm": 0.008081017062067986, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.0003, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 29.006761904761905, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 1.672070026397705, |
|
"eval_runtime": 15.2751, |
|
"eval_samples_per_second": 4.779, |
|
"eval_steps_per_second": 1.244, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 30.00095238095238, |
|
"grad_norm": 0.016996605321764946, |
|
"learning_rate": 8.846560846560848e-06, |
|
"loss": 0.2131, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 30.00190476190476, |
|
"grad_norm": 0.03582558408379555, |
|
"learning_rate": 8.835978835978837e-06, |
|
"loss": 0.0498, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 30.002857142857142, |
|
"grad_norm": 0.02985311672091484, |
|
"learning_rate": 8.825396825396827e-06, |
|
"loss": 0.0008, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 30.003809523809522, |
|
"grad_norm": 0.005227754358202219, |
|
"learning_rate": 8.814814814814817e-06, |
|
"loss": 0.1267, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 30.004761904761907, |
|
"grad_norm": 0.004671523813158274, |
|
"learning_rate": 8.804232804232805e-06, |
|
"loss": 0.0002, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 30.005714285714287, |
|
"grad_norm": 0.016684025526046753, |
|
"learning_rate": 8.793650793650795e-06, |
|
"loss": 0.001, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 30.006666666666668, |
|
"grad_norm": 0.01212390512228012, |
|
"learning_rate": 8.783068783068783e-06, |
|
"loss": 0.1783, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 30.006761904761905, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.0133984088897705, |
|
"eval_runtime": 14.4776, |
|
"eval_samples_per_second": 5.042, |
|
"eval_steps_per_second": 1.312, |
|
"step": 2201 |
|
}, |
|
{ |
|
"epoch": 31.000857142857143, |
|
"grad_norm": 0.005572069901973009, |
|
"learning_rate": 8.772486772486773e-06, |
|
"loss": 0.0002, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 31.001809523809523, |
|
"grad_norm": 0.030820639804005623, |
|
"learning_rate": 8.761904761904763e-06, |
|
"loss": 0.0008, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 31.002761904761904, |
|
"grad_norm": 43.24854278564453, |
|
"learning_rate": 8.751322751322751e-06, |
|
"loss": 0.1547, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 31.003714285714285, |
|
"grad_norm": 286.11175537109375, |
|
"learning_rate": 8.740740740740741e-06, |
|
"loss": 0.3087, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 31.004666666666665, |
|
"grad_norm": 0.01184868160635233, |
|
"learning_rate": 8.730158730158731e-06, |
|
"loss": 0.0033, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 31.005619047619046, |
|
"grad_norm": 0.012461334466934204, |
|
"learning_rate": 8.71957671957672e-06, |
|
"loss": 0.1977, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 31.00657142857143, |
|
"grad_norm": 0.06772441416978836, |
|
"learning_rate": 8.70899470899471e-06, |
|
"loss": 0.0041, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 31.006761904761905, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 1.8351718187332153, |
|
"eval_runtime": 14.814, |
|
"eval_samples_per_second": 4.928, |
|
"eval_steps_per_second": 1.283, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 32.0007619047619, |
|
"grad_norm": 0.010238522663712502, |
|
"learning_rate": 8.6984126984127e-06, |
|
"loss": 0.0418, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 32.001714285714286, |
|
"grad_norm": 0.009037792682647705, |
|
"learning_rate": 8.687830687830688e-06, |
|
"loss": 0.0002, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 32.00266666666667, |
|
"grad_norm": 0.015116223134100437, |
|
"learning_rate": 8.677248677248678e-06, |
|
"loss": 0.0012, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 32.00361904761905, |
|
"grad_norm": 0.07763515412807465, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.0712, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 32.00457142857143, |
|
"grad_norm": 0.0024017663672566414, |
|
"learning_rate": 8.656084656084656e-06, |
|
"loss": 0.0002, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 32.00552380952381, |
|
"grad_norm": 0.00618686294183135, |
|
"learning_rate": 8.645502645502646e-06, |
|
"loss": 0.0005, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 32.00647619047619, |
|
"grad_norm": 0.02092176117002964, |
|
"learning_rate": 8.634920634920636e-06, |
|
"loss": 0.0001, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 32.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.317070960998535, |
|
"eval_runtime": 14.3096, |
|
"eval_samples_per_second": 5.101, |
|
"eval_steps_per_second": 1.328, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 33.00066666666667, |
|
"grad_norm": 1.6639690399169922, |
|
"learning_rate": 8.624338624338624e-06, |
|
"loss": 0.0005, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 33.001619047619045, |
|
"grad_norm": 0.12533986568450928, |
|
"learning_rate": 8.613756613756614e-06, |
|
"loss": 0.1396, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 33.00257142857143, |
|
"grad_norm": 0.004117908421903849, |
|
"learning_rate": 8.603174603174604e-06, |
|
"loss": 0.166, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 33.00352380952381, |
|
"grad_norm": 0.005937062669545412, |
|
"learning_rate": 8.592592592592593e-06, |
|
"loss": 0.099, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 33.00447619047619, |
|
"grad_norm": 0.003989997319877148, |
|
"learning_rate": 8.582010582010583e-06, |
|
"loss": 0.1205, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 33.005428571428574, |
|
"grad_norm": 0.004362111911177635, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.009, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 33.00638095238095, |
|
"grad_norm": 0.003777262754738331, |
|
"learning_rate": 8.560846560846563e-06, |
|
"loss": 0.0001, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 33.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.254392385482788, |
|
"eval_runtime": 13.6693, |
|
"eval_samples_per_second": 5.34, |
|
"eval_steps_per_second": 1.39, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 34.000571428571426, |
|
"grad_norm": 0.004854101687669754, |
|
"learning_rate": 8.550264550264551e-06, |
|
"loss": 0.0018, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 34.00152380952381, |
|
"grad_norm": 0.0038497683126479387, |
|
"learning_rate": 8.53968253968254e-06, |
|
"loss": 0.1879, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 34.00247619047619, |
|
"grad_norm": 0.02858874946832657, |
|
"learning_rate": 8.529100529100531e-06, |
|
"loss": 0.0002, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 34.00342857142857, |
|
"grad_norm": 0.0020952511113137007, |
|
"learning_rate": 8.518518518518519e-06, |
|
"loss": 0.3701, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 34.004380952380956, |
|
"grad_norm": 0.004034761805087328, |
|
"learning_rate": 8.507936507936509e-06, |
|
"loss": 0.1456, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 34.00533333333333, |
|
"grad_norm": 0.36531704664230347, |
|
"learning_rate": 8.497354497354499e-06, |
|
"loss": 0.3613, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 34.00628571428572, |
|
"grad_norm": 0.0031751752831041813, |
|
"learning_rate": 8.486772486772487e-06, |
|
"loss": 0.0443, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 34.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.0804617404937744, |
|
"eval_runtime": 14.2643, |
|
"eval_samples_per_second": 5.118, |
|
"eval_steps_per_second": 1.332, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 35.00047619047619, |
|
"grad_norm": 0.0141215231269598, |
|
"learning_rate": 8.476190476190477e-06, |
|
"loss": 0.0001, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 35.00142857142857, |
|
"grad_norm": 0.005832843482494354, |
|
"learning_rate": 8.465608465608466e-06, |
|
"loss": 0.151, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 35.00238095238095, |
|
"grad_norm": 0.004920718260109425, |
|
"learning_rate": 8.455026455026456e-06, |
|
"loss": 0.0002, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 35.00333333333333, |
|
"grad_norm": 0.0038942135870456696, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.2402, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 35.004285714285714, |
|
"grad_norm": 0.06160876899957657, |
|
"learning_rate": 8.433862433862434e-06, |
|
"loss": 0.0001, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 35.0052380952381, |
|
"grad_norm": 0.0025770808570086956, |
|
"learning_rate": 8.423280423280424e-06, |
|
"loss": 0.1076, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 35.006190476190476, |
|
"grad_norm": 0.012355489656329155, |
|
"learning_rate": 8.412698412698414e-06, |
|
"loss": 0.0052, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 35.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.5060980319976807, |
|
"eval_runtime": 13.5732, |
|
"eval_samples_per_second": 5.378, |
|
"eval_steps_per_second": 1.4, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 36.00038095238095, |
|
"grad_norm": 0.0018953619292005897, |
|
"learning_rate": 8.402116402116402e-06, |
|
"loss": 0.0204, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 36.001333333333335, |
|
"grad_norm": 0.0030179175082594156, |
|
"learning_rate": 8.391534391534392e-06, |
|
"loss": 0.0995, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 36.00228571428571, |
|
"grad_norm": 0.005785011220723391, |
|
"learning_rate": 8.380952380952382e-06, |
|
"loss": 0.0001, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 36.003238095238096, |
|
"grad_norm": 0.002400570083409548, |
|
"learning_rate": 8.37037037037037e-06, |
|
"loss": 0.2971, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 36.00419047619047, |
|
"grad_norm": 60.340904235839844, |
|
"learning_rate": 8.35978835978836e-06, |
|
"loss": 0.2018, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 36.00514285714286, |
|
"grad_norm": 0.0017768730176612735, |
|
"learning_rate": 8.34920634920635e-06, |
|
"loss": 0.0014, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 36.00609523809524, |
|
"grad_norm": 0.0068740625865757465, |
|
"learning_rate": 8.338624338624339e-06, |
|
"loss": 0.1231, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 36.0067619047619, |
|
"eval_accuracy": 0.6438356164383562, |
|
"eval_loss": 2.2596001625061035, |
|
"eval_runtime": 13.5983, |
|
"eval_samples_per_second": 5.368, |
|
"eval_steps_per_second": 1.397, |
|
"step": 2627 |
|
}, |
|
{ |
|
"epoch": 37.00028571428572, |
|
"grad_norm": 0.02054959535598755, |
|
"learning_rate": 8.328042328042329e-06, |
|
"loss": 0.122, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 37.001238095238094, |
|
"grad_norm": 0.11884596943855286, |
|
"learning_rate": 8.317460317460319e-06, |
|
"loss": 0.0003, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 37.00219047619048, |
|
"grad_norm": 0.1037268340587616, |
|
"learning_rate": 8.306878306878307e-06, |
|
"loss": 0.0015, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 37.003142857142855, |
|
"grad_norm": 0.011001362465322018, |
|
"learning_rate": 8.296296296296297e-06, |
|
"loss": 0.0001, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 37.00409523809524, |
|
"grad_norm": 0.017799966037273407, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.0528, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 37.005047619047616, |
|
"grad_norm": 0.01603088155388832, |
|
"learning_rate": 8.275132275132275e-06, |
|
"loss": 0.0003, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 37.006, |
|
"grad_norm": 0.0017548603937029839, |
|
"learning_rate": 8.264550264550265e-06, |
|
"loss": 0.0001, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 37.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.4168407917022705, |
|
"eval_runtime": 13.4181, |
|
"eval_samples_per_second": 5.44, |
|
"eval_steps_per_second": 1.416, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 38.000190476190475, |
|
"grad_norm": 0.0024463327135890722, |
|
"learning_rate": 8.253968253968254e-06, |
|
"loss": 0.0001, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 38.00114285714286, |
|
"grad_norm": 0.0063362400978803635, |
|
"learning_rate": 8.243386243386245e-06, |
|
"loss": 0.0001, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 38.00209523809524, |
|
"grad_norm": 0.0032534021884202957, |
|
"learning_rate": 8.232804232804234e-06, |
|
"loss": 0.0001, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 38.00304761904762, |
|
"grad_norm": 0.004250057507306337, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 0.0001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 38.004, |
|
"grad_norm": 0.00851590558886528, |
|
"learning_rate": 8.211640211640213e-06, |
|
"loss": 0.0001, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 38.00495238095238, |
|
"grad_norm": 0.002357149263843894, |
|
"learning_rate": 8.201058201058202e-06, |
|
"loss": 0.0001, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 38.00590476190476, |
|
"grad_norm": 0.00219311797991395, |
|
"learning_rate": 8.190476190476192e-06, |
|
"loss": 0.0001, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 38.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.4288012981414795, |
|
"eval_runtime": 13.7745, |
|
"eval_samples_per_second": 5.3, |
|
"eval_steps_per_second": 1.379, |
|
"step": 2769 |
|
}, |
|
{ |
|
"epoch": 39.00009523809524, |
|
"grad_norm": 0.0019391386304050684, |
|
"learning_rate": 8.179894179894182e-06, |
|
"loss": 0.0001, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 39.00104761904762, |
|
"grad_norm": 0.0017324964283034205, |
|
"learning_rate": 8.16931216931217e-06, |
|
"loss": 0.0001, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 39.002, |
|
"grad_norm": 319.5306396484375, |
|
"learning_rate": 8.15873015873016e-06, |
|
"loss": 0.0211, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 39.00295238095238, |
|
"grad_norm": 0.008279277011752129, |
|
"learning_rate": 8.148148148148148e-06, |
|
"loss": 0.0001, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 39.003904761904764, |
|
"grad_norm": 50.196842193603516, |
|
"learning_rate": 8.137566137566138e-06, |
|
"loss": 0.0043, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 39.00485714285714, |
|
"grad_norm": 439.8645324707031, |
|
"learning_rate": 8.126984126984128e-06, |
|
"loss": 0.0968, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 39.005809523809525, |
|
"grad_norm": 0.0013006924418732524, |
|
"learning_rate": 8.116402116402117e-06, |
|
"loss": 0.0001, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 39.0067619047619, |
|
"grad_norm": 0.005911983083933592, |
|
"learning_rate": 8.105820105820107e-06, |
|
"loss": 0.0667, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 39.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.674278497695923, |
|
"eval_runtime": 13.7973, |
|
"eval_samples_per_second": 5.291, |
|
"eval_steps_per_second": 1.377, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 40.000952380952384, |
|
"grad_norm": 0.004958099219948053, |
|
"learning_rate": 8.095238095238097e-06, |
|
"loss": 0.1277, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 40.00190476190476, |
|
"grad_norm": 0.042543746531009674, |
|
"learning_rate": 8.084656084656085e-06, |
|
"loss": 0.0001, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 40.002857142857145, |
|
"grad_norm": 0.0019746676553040743, |
|
"learning_rate": 8.074074074074075e-06, |
|
"loss": 0.0464, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 40.00380952380952, |
|
"grad_norm": 0.0063486467115581036, |
|
"learning_rate": 8.063492063492065e-06, |
|
"loss": 0.1274, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 40.00476190476191, |
|
"grad_norm": 0.0018023444572463632, |
|
"learning_rate": 8.052910052910053e-06, |
|
"loss": 0.0001, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 40.005714285714284, |
|
"grad_norm": 0.0017704421188682318, |
|
"learning_rate": 8.042328042328043e-06, |
|
"loss": 0.0001, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 40.00666666666667, |
|
"grad_norm": 0.006013723090291023, |
|
"learning_rate": 8.031746031746033e-06, |
|
"loss": 0.0001, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 40.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.4384541511535645, |
|
"eval_runtime": 13.8197, |
|
"eval_samples_per_second": 5.282, |
|
"eval_steps_per_second": 1.375, |
|
"step": 2911 |
|
}, |
|
{ |
|
"epoch": 41.00085714285714, |
|
"grad_norm": 0.0018943676259368658, |
|
"learning_rate": 8.021164021164021e-06, |
|
"loss": 0.0001, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 41.00180952380953, |
|
"grad_norm": 0.0028211427852511406, |
|
"learning_rate": 8.010582010582011e-06, |
|
"loss": 0.0001, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 41.002761904761904, |
|
"grad_norm": 0.0016440409235656261, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0001, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 41.00371428571429, |
|
"grad_norm": 0.005344245117157698, |
|
"learning_rate": 7.98941798941799e-06, |
|
"loss": 0.0001, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 41.004666666666665, |
|
"grad_norm": 0.003290182678028941, |
|
"learning_rate": 7.97883597883598e-06, |
|
"loss": 0.0001, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 41.00561904761905, |
|
"grad_norm": 0.001862400327809155, |
|
"learning_rate": 7.968253968253968e-06, |
|
"loss": 0.0001, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 41.00657142857143, |
|
"grad_norm": 0.0017746849916875362, |
|
"learning_rate": 7.957671957671958e-06, |
|
"loss": 0.0001, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 41.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.022097110748291, |
|
"eval_runtime": 13.9061, |
|
"eval_samples_per_second": 5.25, |
|
"eval_steps_per_second": 1.366, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 42.0007619047619, |
|
"grad_norm": 0.003049792954698205, |
|
"learning_rate": 7.947089947089948e-06, |
|
"loss": 0.0001, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 42.001714285714286, |
|
"grad_norm": 0.0017019949154928327, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.1309, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 42.00266666666667, |
|
"grad_norm": 0.0011635601986199617, |
|
"learning_rate": 7.925925925925926e-06, |
|
"loss": 0.0001, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 42.00361904761905, |
|
"grad_norm": 221.78436279296875, |
|
"learning_rate": 7.915343915343916e-06, |
|
"loss": 0.1793, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 42.00457142857143, |
|
"grad_norm": 0.02079232968389988, |
|
"learning_rate": 7.904761904761904e-06, |
|
"loss": 0.0001, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 42.00552380952381, |
|
"grad_norm": 0.014244151301681995, |
|
"learning_rate": 7.894179894179896e-06, |
|
"loss": 0.0001, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 42.00647619047619, |
|
"grad_norm": 2.4623591899871826, |
|
"learning_rate": 7.883597883597884e-06, |
|
"loss": 0.0561, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 42.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 2.1502933502197266, |
|
"eval_runtime": 13.6939, |
|
"eval_samples_per_second": 5.331, |
|
"eval_steps_per_second": 1.387, |
|
"step": 3053 |
|
}, |
|
{ |
|
"epoch": 43.00066666666667, |
|
"grad_norm": 0.00719720171764493, |
|
"learning_rate": 7.873015873015873e-06, |
|
"loss": 0.0001, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 43.001619047619045, |
|
"grad_norm": 0.00430461298674345, |
|
"learning_rate": 7.862433862433863e-06, |
|
"loss": 0.0376, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 43.00257142857143, |
|
"grad_norm": 0.0019277018727734685, |
|
"learning_rate": 7.851851851851853e-06, |
|
"loss": 0.0001, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 43.00352380952381, |
|
"grad_norm": 0.1772233247756958, |
|
"learning_rate": 7.841269841269843e-06, |
|
"loss": 0.1042, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 43.00447619047619, |
|
"grad_norm": 0.025268716737627983, |
|
"learning_rate": 7.830687830687831e-06, |
|
"loss": 0.3745, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 43.005428571428574, |
|
"grad_norm": 0.003945184871554375, |
|
"learning_rate": 7.820105820105821e-06, |
|
"loss": 0.0378, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 43.00638095238095, |
|
"grad_norm": 0.006263280753046274, |
|
"learning_rate": 7.809523809523811e-06, |
|
"loss": 0.005, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 43.0067619047619, |
|
"eval_accuracy": 0.6575342465753424, |
|
"eval_loss": 3.0310723781585693, |
|
"eval_runtime": 13.9271, |
|
"eval_samples_per_second": 5.242, |
|
"eval_steps_per_second": 1.364, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 44.000571428571426, |
|
"grad_norm": 0.002363319043070078, |
|
"learning_rate": 7.7989417989418e-06, |
|
"loss": 0.0005, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 44.00152380952381, |
|
"grad_norm": 0.00214759330265224, |
|
"learning_rate": 7.78835978835979e-06, |
|
"loss": 0.0033, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 44.00247619047619, |
|
"grad_norm": 0.0017539850668981671, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.0008, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 44.00342857142857, |
|
"grad_norm": 0.0034868153743445873, |
|
"learning_rate": 7.767195767195767e-06, |
|
"loss": 0.0001, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 44.004380952380956, |
|
"grad_norm": 0.0029717048164457083, |
|
"learning_rate": 7.756613756613757e-06, |
|
"loss": 0.0001, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 44.00533333333333, |
|
"grad_norm": 0.01103096455335617, |
|
"learning_rate": 7.746031746031747e-06, |
|
"loss": 0.0002, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 44.00628571428572, |
|
"grad_norm": 0.0015669335843995214, |
|
"learning_rate": 7.735449735449736e-06, |
|
"loss": 0.0, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 44.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.2170116901397705, |
|
"eval_runtime": 13.8023, |
|
"eval_samples_per_second": 5.289, |
|
"eval_steps_per_second": 1.377, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 45.00047619047619, |
|
"grad_norm": 0.0024978877045214176, |
|
"learning_rate": 7.724867724867726e-06, |
|
"loss": 0.1397, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 45.00142857142857, |
|
"grad_norm": 0.006718004588037729, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.0001, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 45.00238095238095, |
|
"grad_norm": 0.0021593060810118914, |
|
"learning_rate": 7.703703703703704e-06, |
|
"loss": 0.0001, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 45.00333333333333, |
|
"grad_norm": 0.02300015650689602, |
|
"learning_rate": 7.693121693121694e-06, |
|
"loss": 0.2088, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 45.004285714285714, |
|
"grad_norm": 0.03867880627512932, |
|
"learning_rate": 7.682539682539684e-06, |
|
"loss": 0.2577, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 45.0052380952381, |
|
"grad_norm": 0.001522450940683484, |
|
"learning_rate": 7.671957671957672e-06, |
|
"loss": 0.0016, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 45.006190476190476, |
|
"grad_norm": 0.038878075778484344, |
|
"learning_rate": 7.661375661375662e-06, |
|
"loss": 0.2196, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 45.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.067211866378784, |
|
"eval_runtime": 13.8506, |
|
"eval_samples_per_second": 5.271, |
|
"eval_steps_per_second": 1.372, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 46.00038095238095, |
|
"grad_norm": 0.18913382291793823, |
|
"learning_rate": 7.65079365079365e-06, |
|
"loss": 0.1269, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 46.001333333333335, |
|
"grad_norm": 0.06659168750047684, |
|
"learning_rate": 7.64021164021164e-06, |
|
"loss": 0.0002, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 46.00228571428571, |
|
"grad_norm": 0.001147576724179089, |
|
"learning_rate": 7.62962962962963e-06, |
|
"loss": 0.0002, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 46.003238095238096, |
|
"grad_norm": 0.0035413168370723724, |
|
"learning_rate": 7.61904761904762e-06, |
|
"loss": 0.0002, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 46.00419047619047, |
|
"grad_norm": 0.0068791331723332405, |
|
"learning_rate": 7.60846560846561e-06, |
|
"loss": 0.2518, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 46.00514285714286, |
|
"grad_norm": 181.49005126953125, |
|
"learning_rate": 7.597883597883599e-06, |
|
"loss": 0.1522, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 46.00609523809524, |
|
"grad_norm": 0.001169729745015502, |
|
"learning_rate": 7.587301587301588e-06, |
|
"loss": 0.1848, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 46.0067619047619, |
|
"eval_accuracy": 0.6575342465753424, |
|
"eval_loss": 2.500297784805298, |
|
"eval_runtime": 14.2078, |
|
"eval_samples_per_second": 5.138, |
|
"eval_steps_per_second": 1.337, |
|
"step": 3337 |
|
}, |
|
{ |
|
"epoch": 47.00028571428572, |
|
"grad_norm": 0.0010710316710174084, |
|
"learning_rate": 7.576719576719578e-06, |
|
"loss": 0.0001, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 47.001238095238094, |
|
"grad_norm": 0.0021415085066109896, |
|
"learning_rate": 7.566137566137567e-06, |
|
"loss": 0.0986, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 47.00219047619048, |
|
"grad_norm": 0.04945885390043259, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.131, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 47.003142857142855, |
|
"grad_norm": 0.028383105993270874, |
|
"learning_rate": 7.544973544973545e-06, |
|
"loss": 0.001, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 47.00409523809524, |
|
"grad_norm": 0.004415535368025303, |
|
"learning_rate": 7.534391534391535e-06, |
|
"loss": 0.0001, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 47.005047619047616, |
|
"grad_norm": 0.006136058364063501, |
|
"learning_rate": 7.523809523809524e-06, |
|
"loss": 0.1968, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 47.006, |
|
"grad_norm": 0.014326406642794609, |
|
"learning_rate": 7.5132275132275136e-06, |
|
"loss": 0.2445, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 47.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.034447431564331, |
|
"eval_runtime": 18.3616, |
|
"eval_samples_per_second": 3.976, |
|
"eval_steps_per_second": 1.035, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 48.000190476190475, |
|
"grad_norm": 0.7491527199745178, |
|
"learning_rate": 7.5026455026455035e-06, |
|
"loss": 0.0002, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 48.00114285714286, |
|
"grad_norm": 0.0026449006982147694, |
|
"learning_rate": 7.492063492063493e-06, |
|
"loss": 0.0012, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 48.00209523809524, |
|
"grad_norm": 0.017544981092214584, |
|
"learning_rate": 7.481481481481482e-06, |
|
"loss": 0.0001, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 48.00304761904762, |
|
"grad_norm": 0.005183749366551638, |
|
"learning_rate": 7.470899470899472e-06, |
|
"loss": 0.0001, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 48.004, |
|
"grad_norm": 0.002835033694282174, |
|
"learning_rate": 7.460317460317461e-06, |
|
"loss": 0.202, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 48.00495238095238, |
|
"grad_norm": 0.0014751210110262036, |
|
"learning_rate": 7.44973544973545e-06, |
|
"loss": 0.0002, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 48.00590476190476, |
|
"grad_norm": 0.004137114156037569, |
|
"learning_rate": 7.439153439153439e-06, |
|
"loss": 0.3096, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 48.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.404043674468994, |
|
"eval_runtime": 16.2239, |
|
"eval_samples_per_second": 4.5, |
|
"eval_steps_per_second": 1.171, |
|
"step": 3479 |
|
}, |
|
{ |
|
"epoch": 49.00009523809524, |
|
"grad_norm": 0.08745479583740234, |
|
"learning_rate": 7.428571428571429e-06, |
|
"loss": 0.0001, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 49.00104761904762, |
|
"grad_norm": 0.0012825162848457694, |
|
"learning_rate": 7.417989417989418e-06, |
|
"loss": 0.0014, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 49.002, |
|
"grad_norm": 0.0021935352124273777, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 49.00295238095238, |
|
"grad_norm": 0.0031665137503296137, |
|
"learning_rate": 7.3968253968253975e-06, |
|
"loss": 0.0001, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 49.003904761904764, |
|
"grad_norm": 0.0069634681567549706, |
|
"learning_rate": 7.386243386243387e-06, |
|
"loss": 0.0223, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 49.00485714285714, |
|
"grad_norm": 0.0008838098729029298, |
|
"learning_rate": 7.375661375661376e-06, |
|
"loss": 0.0002, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 49.005809523809525, |
|
"grad_norm": 0.0043453024700284, |
|
"learning_rate": 7.3650793650793666e-06, |
|
"loss": 0.0001, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 49.0067619047619, |
|
"grad_norm": 0.0030397106893360615, |
|
"learning_rate": 7.354497354497355e-06, |
|
"loss": 0.0, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 49.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.32244610786438, |
|
"eval_runtime": 15.5054, |
|
"eval_samples_per_second": 4.708, |
|
"eval_steps_per_second": 1.225, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 50.000952380952384, |
|
"grad_norm": 0.003725986462086439, |
|
"learning_rate": 7.343915343915344e-06, |
|
"loss": 0.0, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 50.00190476190476, |
|
"grad_norm": 0.0018120300956070423, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.0001, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 50.002857142857145, |
|
"grad_norm": 0.014024248346686363, |
|
"learning_rate": 7.322751322751324e-06, |
|
"loss": 0.0006, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 50.00380952380952, |
|
"grad_norm": 0.0008814082248136401, |
|
"learning_rate": 7.312169312169313e-06, |
|
"loss": 0.0, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 50.00476190476191, |
|
"grad_norm": 0.0016708805924281478, |
|
"learning_rate": 7.301587301587301e-06, |
|
"loss": 0.0837, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 50.005714285714284, |
|
"grad_norm": 0.0031952844001352787, |
|
"learning_rate": 7.291005291005292e-06, |
|
"loss": 0.1672, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 50.00666666666667, |
|
"grad_norm": 0.0025649177841842175, |
|
"learning_rate": 7.280423280423281e-06, |
|
"loss": 0.0, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 50.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.6102144718170166, |
|
"eval_runtime": 13.4793, |
|
"eval_samples_per_second": 5.416, |
|
"eval_steps_per_second": 1.41, |
|
"step": 3621 |
|
}, |
|
{ |
|
"epoch": 51.00085714285714, |
|
"grad_norm": 0.036289017647504807, |
|
"learning_rate": 7.2698412698412705e-06, |
|
"loss": 0.0, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 51.00180952380953, |
|
"grad_norm": 0.0017578883562237024, |
|
"learning_rate": 7.2592592592592605e-06, |
|
"loss": 0.0, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 51.002761904761904, |
|
"grad_norm": 0.001723015564493835, |
|
"learning_rate": 7.24867724867725e-06, |
|
"loss": 0.0, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 51.00371428571429, |
|
"grad_norm": 0.0011370504507794976, |
|
"learning_rate": 7.238095238095239e-06, |
|
"loss": 0.0, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 51.004666666666665, |
|
"grad_norm": 0.0005091355997137725, |
|
"learning_rate": 7.227513227513228e-06, |
|
"loss": 0.0001, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 51.00561904761905, |
|
"grad_norm": 0.0006327593582682312, |
|
"learning_rate": 7.216931216931218e-06, |
|
"loss": 0.0966, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 51.00657142857143, |
|
"grad_norm": 0.0016729255439713597, |
|
"learning_rate": 7.206349206349207e-06, |
|
"loss": 0.2334, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 51.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 3.000962972640991, |
|
"eval_runtime": 13.4815, |
|
"eval_samples_per_second": 5.415, |
|
"eval_steps_per_second": 1.409, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 52.0007619047619, |
|
"grad_norm": 0.0014584249584004283, |
|
"learning_rate": 7.195767195767196e-06, |
|
"loss": 0.0007, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 52.001714285714286, |
|
"grad_norm": 0.7437462210655212, |
|
"learning_rate": 7.185185185185186e-06, |
|
"loss": 0.1845, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 52.00266666666667, |
|
"grad_norm": 0.001144355977885425, |
|
"learning_rate": 7.174603174603175e-06, |
|
"loss": 0.0001, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 52.00361904761905, |
|
"grad_norm": 0.00172973214648664, |
|
"learning_rate": 7.1640211640211644e-06, |
|
"loss": 0.0, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 52.00457142857143, |
|
"grad_norm": 0.0027641530614346266, |
|
"learning_rate": 7.1534391534391544e-06, |
|
"loss": 0.0002, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 52.00552380952381, |
|
"grad_norm": 0.0009939733427017927, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.0, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 52.00647619047619, |
|
"grad_norm": 0.0012999430764466524, |
|
"learning_rate": 7.132275132275133e-06, |
|
"loss": 0.0001, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 52.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.264735698699951, |
|
"eval_runtime": 13.8243, |
|
"eval_samples_per_second": 5.281, |
|
"eval_steps_per_second": 1.374, |
|
"step": 3763 |
|
}, |
|
{ |
|
"epoch": 53.00066666666667, |
|
"grad_norm": 0.0011330973356962204, |
|
"learning_rate": 7.121693121693122e-06, |
|
"loss": 0.0602, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 53.001619047619045, |
|
"grad_norm": 0.04610966145992279, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.0, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 53.00257142857143, |
|
"grad_norm": 0.0013028762768954039, |
|
"learning_rate": 7.100529100529101e-06, |
|
"loss": 0.0001, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 53.00352380952381, |
|
"grad_norm": 0.0012335841311141849, |
|
"learning_rate": 7.08994708994709e-06, |
|
"loss": 0.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 53.00447619047619, |
|
"grad_norm": 0.0016848533414304256, |
|
"learning_rate": 7.07936507936508e-06, |
|
"loss": 0.0, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 53.005428571428574, |
|
"grad_norm": 0.0025865156203508377, |
|
"learning_rate": 7.068783068783069e-06, |
|
"loss": 0.0019, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 53.00638095238095, |
|
"grad_norm": 0.017396176233887672, |
|
"learning_rate": 7.058201058201058e-06, |
|
"loss": 0.0001, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 53.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.280632972717285, |
|
"eval_runtime": 18.311, |
|
"eval_samples_per_second": 3.987, |
|
"eval_steps_per_second": 1.038, |
|
"step": 3834 |
|
}, |
|
{ |
|
"epoch": 54.000571428571426, |
|
"grad_norm": 0.0008356795297004282, |
|
"learning_rate": 7.047619047619048e-06, |
|
"loss": 0.0, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 54.00152380952381, |
|
"grad_norm": 0.01968306489288807, |
|
"learning_rate": 7.0370370370370375e-06, |
|
"loss": 0.0, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 54.00247619047619, |
|
"grad_norm": 0.06151906028389931, |
|
"learning_rate": 7.026455026455027e-06, |
|
"loss": 0.0001, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 54.00342857142857, |
|
"grad_norm": 0.0011022702092304826, |
|
"learning_rate": 7.015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 54.004380952380956, |
|
"grad_norm": 0.0006049445364624262, |
|
"learning_rate": 7.005291005291006e-06, |
|
"loss": 0.0, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 54.00533333333333, |
|
"grad_norm": 0.0009984824573621154, |
|
"learning_rate": 6.994708994708995e-06, |
|
"loss": 0.0, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 54.00628571428572, |
|
"grad_norm": 0.0011386544210836291, |
|
"learning_rate": 6.984126984126984e-06, |
|
"loss": 0.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 54.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.5543222427368164, |
|
"eval_runtime": 18.6581, |
|
"eval_samples_per_second": 3.913, |
|
"eval_steps_per_second": 1.018, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 55.00047619047619, |
|
"grad_norm": 0.0012212592409923673, |
|
"learning_rate": 6.973544973544975e-06, |
|
"loss": 0.0, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 55.00142857142857, |
|
"grad_norm": 0.0013299413258209825, |
|
"learning_rate": 6.962962962962964e-06, |
|
"loss": 0.0, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 55.00238095238095, |
|
"grad_norm": 0.00048118969425559044, |
|
"learning_rate": 6.952380952380952e-06, |
|
"loss": 0.0062, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 55.00333333333333, |
|
"grad_norm": 0.0026790364645421505, |
|
"learning_rate": 6.941798941798943e-06, |
|
"loss": 0.0, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 55.004285714285714, |
|
"grad_norm": 0.0005716817104257643, |
|
"learning_rate": 6.931216931216932e-06, |
|
"loss": 0.0007, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 55.0052380952381, |
|
"grad_norm": 0.0015618937322869897, |
|
"learning_rate": 6.920634920634921e-06, |
|
"loss": 0.0, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 55.006190476190476, |
|
"grad_norm": 0.001083777635358274, |
|
"learning_rate": 6.9100529100529105e-06, |
|
"loss": 0.0, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 55.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.620253562927246, |
|
"eval_runtime": 18.8252, |
|
"eval_samples_per_second": 3.878, |
|
"eval_steps_per_second": 1.009, |
|
"step": 3976 |
|
}, |
|
{ |
|
"epoch": 56.00038095238095, |
|
"grad_norm": 0.0008544697193428874, |
|
"learning_rate": 6.8994708994709005e-06, |
|
"loss": 0.0, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 56.001333333333335, |
|
"grad_norm": 0.0015285807894542813, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.0476, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 56.00228571428571, |
|
"grad_norm": 0.0009055461850948632, |
|
"learning_rate": 6.878306878306879e-06, |
|
"loss": 0.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 56.003238095238096, |
|
"grad_norm": 0.0003566649102140218, |
|
"learning_rate": 6.867724867724869e-06, |
|
"loss": 0.0, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 56.00419047619047, |
|
"grad_norm": 0.0005908096209168434, |
|
"learning_rate": 6.857142857142858e-06, |
|
"loss": 0.0008, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 56.00514285714286, |
|
"grad_norm": 0.001364821451716125, |
|
"learning_rate": 6.846560846560847e-06, |
|
"loss": 0.0, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 56.00609523809524, |
|
"grad_norm": 0.00044720666483044624, |
|
"learning_rate": 6.835978835978837e-06, |
|
"loss": 0.2117, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 56.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.548649311065674, |
|
"eval_runtime": 18.8332, |
|
"eval_samples_per_second": 3.876, |
|
"eval_steps_per_second": 1.009, |
|
"step": 4047 |
|
}, |
|
{ |
|
"epoch": 57.00028571428572, |
|
"grad_norm": 0.0007962590316310525, |
|
"learning_rate": 6.825396825396826e-06, |
|
"loss": 0.0, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 57.001238095238094, |
|
"grad_norm": 0.0007638200768269598, |
|
"learning_rate": 6.814814814814815e-06, |
|
"loss": 0.2585, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 57.00219047619048, |
|
"grad_norm": 0.001399993896484375, |
|
"learning_rate": 6.8042328042328045e-06, |
|
"loss": 0.1294, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 57.003142857142855, |
|
"grad_norm": 0.0018357918597757816, |
|
"learning_rate": 6.7936507936507944e-06, |
|
"loss": 0.3048, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 57.00409523809524, |
|
"grad_norm": 0.002194765256717801, |
|
"learning_rate": 6.783068783068784e-06, |
|
"loss": 0.3206, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 57.005047619047616, |
|
"grad_norm": 0.005216788966208696, |
|
"learning_rate": 6.772486772486773e-06, |
|
"loss": 0.0244, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 57.006, |
|
"grad_norm": 0.011147862300276756, |
|
"learning_rate": 6.761904761904763e-06, |
|
"loss": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 57.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.207242727279663, |
|
"eval_runtime": 19.0121, |
|
"eval_samples_per_second": 3.84, |
|
"eval_steps_per_second": 0.999, |
|
"step": 4118 |
|
}, |
|
{ |
|
"epoch": 58.000190476190475, |
|
"grad_norm": 0.005046378821134567, |
|
"learning_rate": 6.751322751322752e-06, |
|
"loss": 0.0001, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 58.00114285714286, |
|
"grad_norm": 0.008579636923968792, |
|
"learning_rate": 6.740740740740741e-06, |
|
"loss": 0.0002, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 58.00209523809524, |
|
"grad_norm": 0.0005268456880003214, |
|
"learning_rate": 6.730158730158731e-06, |
|
"loss": 0.0001, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 58.00304761904762, |
|
"grad_norm": 0.0019031857373192906, |
|
"learning_rate": 6.71957671957672e-06, |
|
"loss": 0.0001, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 58.004, |
|
"grad_norm": 0.0008498723618686199, |
|
"learning_rate": 6.708994708994709e-06, |
|
"loss": 0.0009, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 58.00495238095238, |
|
"grad_norm": 0.0006440227152779698, |
|
"learning_rate": 6.698412698412698e-06, |
|
"loss": 0.0, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 58.00590476190476, |
|
"grad_norm": 0.0011853055329993367, |
|
"learning_rate": 6.687830687830688e-06, |
|
"loss": 0.0001, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 58.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.4930429458618164, |
|
"eval_runtime": 14.5486, |
|
"eval_samples_per_second": 5.018, |
|
"eval_steps_per_second": 1.306, |
|
"step": 4189 |
|
}, |
|
{ |
|
"epoch": 59.00009523809524, |
|
"grad_norm": 0.08018586784601212, |
|
"learning_rate": 6.6772486772486775e-06, |
|
"loss": 0.0001, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 59.00104761904762, |
|
"grad_norm": 0.004129878710955381, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 59.002, |
|
"grad_norm": 0.020192867144942284, |
|
"learning_rate": 6.656084656084657e-06, |
|
"loss": 0.0003, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 59.00295238095238, |
|
"grad_norm": 0.0006513457628898323, |
|
"learning_rate": 6.645502645502646e-06, |
|
"loss": 0.2299, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 59.003904761904764, |
|
"grad_norm": 0.0016079139895737171, |
|
"learning_rate": 6.634920634920635e-06, |
|
"loss": 0.0887, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 59.00485714285714, |
|
"grad_norm": 0.15609946846961975, |
|
"learning_rate": 6.624338624338626e-06, |
|
"loss": 0.0001, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 59.005809523809525, |
|
"grad_norm": 0.0012843705480918288, |
|
"learning_rate": 6.613756613756615e-06, |
|
"loss": 0.0001, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 59.0067619047619, |
|
"grad_norm": 0.0064285434782505035, |
|
"learning_rate": 6.603174603174603e-06, |
|
"loss": 0.0001, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 59.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.326201915740967, |
|
"eval_runtime": 13.9086, |
|
"eval_samples_per_second": 5.249, |
|
"eval_steps_per_second": 1.366, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 60.000952380952384, |
|
"grad_norm": 0.0017877172213047743, |
|
"learning_rate": 6.592592592592592e-06, |
|
"loss": 0.0, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 60.00190476190476, |
|
"grad_norm": 0.006697217468172312, |
|
"learning_rate": 6.582010582010583e-06, |
|
"loss": 0.0, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 60.002857142857145, |
|
"grad_norm": 0.0012999498285353184, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.2442, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 60.00380952380952, |
|
"grad_norm": 0.005209555383771658, |
|
"learning_rate": 6.560846560846561e-06, |
|
"loss": 0.0001, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 60.00476190476191, |
|
"grad_norm": 0.00042439354001544416, |
|
"learning_rate": 6.550264550264551e-06, |
|
"loss": 0.0878, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 60.005714285714284, |
|
"grad_norm": 286.1653137207031, |
|
"learning_rate": 6.5396825396825405e-06, |
|
"loss": 0.1338, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 60.00666666666667, |
|
"grad_norm": 318.0029602050781, |
|
"learning_rate": 6.52910052910053e-06, |
|
"loss": 0.3114, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 60.0067619047619, |
|
"eval_accuracy": 0.6575342465753424, |
|
"eval_loss": 3.0585222244262695, |
|
"eval_runtime": 13.484, |
|
"eval_samples_per_second": 5.414, |
|
"eval_steps_per_second": 1.409, |
|
"step": 4331 |
|
}, |
|
{ |
|
"epoch": 61.00085714285714, |
|
"grad_norm": 0.04456228017807007, |
|
"learning_rate": 6.51851851851852e-06, |
|
"loss": 0.0001, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 61.00180952380953, |
|
"grad_norm": 0.005493298638612032, |
|
"learning_rate": 6.507936507936509e-06, |
|
"loss": 0.0002, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 61.002761904761904, |
|
"grad_norm": 0.006687607616186142, |
|
"learning_rate": 6.497354497354498e-06, |
|
"loss": 0.0092, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 61.00371428571429, |
|
"grad_norm": 79.56556701660156, |
|
"learning_rate": 6.486772486772487e-06, |
|
"loss": 0.22, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 61.004666666666665, |
|
"grad_norm": 0.003407603595405817, |
|
"learning_rate": 6.476190476190477e-06, |
|
"loss": 0.0001, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 61.00561904761905, |
|
"grad_norm": 0.011827602051198483, |
|
"learning_rate": 6.465608465608466e-06, |
|
"loss": 0.0301, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 61.00657142857143, |
|
"grad_norm": 0.0032097159419208765, |
|
"learning_rate": 6.455026455026455e-06, |
|
"loss": 0.0001, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 61.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.0491414070129395, |
|
"eval_runtime": 14.1172, |
|
"eval_samples_per_second": 5.171, |
|
"eval_steps_per_second": 1.346, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 62.0007619047619, |
|
"grad_norm": 0.004382165614515543, |
|
"learning_rate": 6.444444444444445e-06, |
|
"loss": 0.0526, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 62.001714285714286, |
|
"grad_norm": 0.004551396239548922, |
|
"learning_rate": 6.4338624338624345e-06, |
|
"loss": 0.0005, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 62.00266666666667, |
|
"grad_norm": 0.0009644835954532027, |
|
"learning_rate": 6.423280423280424e-06, |
|
"loss": 0.0001, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 62.00361904761905, |
|
"grad_norm": 0.0023691938258707523, |
|
"learning_rate": 6.412698412698414e-06, |
|
"loss": 0.0002, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 62.00457142857143, |
|
"grad_norm": 0.0037353537045419216, |
|
"learning_rate": 6.402116402116403e-06, |
|
"loss": 0.0001, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 62.00552380952381, |
|
"grad_norm": 0.0017645591869950294, |
|
"learning_rate": 6.391534391534392e-06, |
|
"loss": 0.2158, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 62.00647619047619, |
|
"grad_norm": 0.010423078201711178, |
|
"learning_rate": 6.380952380952381e-06, |
|
"loss": 0.0057, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 62.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.0623250007629395, |
|
"eval_runtime": 13.7043, |
|
"eval_samples_per_second": 5.327, |
|
"eval_steps_per_second": 1.386, |
|
"step": 4473 |
|
}, |
|
{ |
|
"epoch": 63.00066666666667, |
|
"grad_norm": 0.0027216938324272633, |
|
"learning_rate": 6.370370370370371e-06, |
|
"loss": 0.0219, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 63.001619047619045, |
|
"grad_norm": 0.030670195817947388, |
|
"learning_rate": 6.35978835978836e-06, |
|
"loss": 0.0003, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 63.00257142857143, |
|
"grad_norm": 0.006318194791674614, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 0.0001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 63.00352380952381, |
|
"grad_norm": 0.0013084968086332083, |
|
"learning_rate": 6.338624338624339e-06, |
|
"loss": 0.0752, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 63.00447619047619, |
|
"grad_norm": 0.002325527835637331, |
|
"learning_rate": 6.328042328042328e-06, |
|
"loss": 0.0009, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 63.005428571428574, |
|
"grad_norm": 0.00270919781178236, |
|
"learning_rate": 6.3174603174603175e-06, |
|
"loss": 0.0, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 63.00638095238095, |
|
"grad_norm": 0.0013514786260202527, |
|
"learning_rate": 6.3068783068783075e-06, |
|
"loss": 0.0, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 63.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.5215301513671875, |
|
"eval_runtime": 13.6656, |
|
"eval_samples_per_second": 5.342, |
|
"eval_steps_per_second": 1.39, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 64.00057142857143, |
|
"grad_norm": 0.00188708386849612, |
|
"learning_rate": 6.296296296296297e-06, |
|
"loss": 0.0001, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 64.0015238095238, |
|
"grad_norm": 0.0009275791817344725, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.1747, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 64.00247619047619, |
|
"grad_norm": 0.003444901667535305, |
|
"learning_rate": 6.275132275132275e-06, |
|
"loss": 0.0001, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 64.00342857142857, |
|
"grad_norm": 0.003165893955156207, |
|
"learning_rate": 6.264550264550266e-06, |
|
"loss": 0.0004, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 64.00438095238096, |
|
"grad_norm": 0.0017013954930007458, |
|
"learning_rate": 6.253968253968254e-06, |
|
"loss": 0.0, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 64.00533333333334, |
|
"grad_norm": 0.005363965407013893, |
|
"learning_rate": 6.243386243386243e-06, |
|
"loss": 0.0002, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 64.00628571428571, |
|
"grad_norm": 0.04103761166334152, |
|
"learning_rate": 6.232804232804234e-06, |
|
"loss": 0.0319, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 64.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.464803695678711, |
|
"eval_runtime": 13.7, |
|
"eval_samples_per_second": 5.328, |
|
"eval_steps_per_second": 1.387, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 65.00047619047619, |
|
"grad_norm": 0.0030973798129707575, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.0005, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 65.00142857142858, |
|
"grad_norm": 0.0011419994989410043, |
|
"learning_rate": 6.211640211640212e-06, |
|
"loss": 0.0, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 65.00238095238095, |
|
"grad_norm": 0.00028223267872817814, |
|
"learning_rate": 6.201058201058202e-06, |
|
"loss": 0.0, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 65.00333333333333, |
|
"grad_norm": 0.0009731514728628099, |
|
"learning_rate": 6.1904761904761914e-06, |
|
"loss": 0.1274, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 65.00428571428571, |
|
"grad_norm": 0.0018894043751060963, |
|
"learning_rate": 6.1798941798941806e-06, |
|
"loss": 0.0005, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 65.0052380952381, |
|
"grad_norm": 0.0011393952881917357, |
|
"learning_rate": 6.16931216931217e-06, |
|
"loss": 0.0001, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 65.00619047619048, |
|
"grad_norm": 0.0021387801971286535, |
|
"learning_rate": 6.15873015873016e-06, |
|
"loss": 0.2879, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 65.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 2.488492727279663, |
|
"eval_runtime": 13.8507, |
|
"eval_samples_per_second": 5.27, |
|
"eval_steps_per_second": 1.372, |
|
"step": 4686 |
|
}, |
|
{ |
|
"epoch": 66.00038095238095, |
|
"grad_norm": 0.0009078598232008517, |
|
"learning_rate": 6.148148148148149e-06, |
|
"loss": 0.144, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 66.00133333333333, |
|
"grad_norm": 0.0008898447849787772, |
|
"learning_rate": 6.137566137566138e-06, |
|
"loss": 0.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 66.00228571428572, |
|
"grad_norm": 0.08059752732515335, |
|
"learning_rate": 6.126984126984128e-06, |
|
"loss": 0.2594, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 66.00323809523809, |
|
"grad_norm": 0.016780750826001167, |
|
"learning_rate": 6.116402116402117e-06, |
|
"loss": 0.1642, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 66.00419047619047, |
|
"grad_norm": 13.7327299118042, |
|
"learning_rate": 6.105820105820106e-06, |
|
"loss": 0.1256, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 66.00514285714286, |
|
"grad_norm": 0.0023813629522919655, |
|
"learning_rate": 6.095238095238096e-06, |
|
"loss": 0.0008, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 66.00609523809524, |
|
"grad_norm": 0.0017860322259366512, |
|
"learning_rate": 6.084656084656085e-06, |
|
"loss": 0.0001, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 66.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 1.8653825521469116, |
|
"eval_runtime": 13.7468, |
|
"eval_samples_per_second": 5.31, |
|
"eval_steps_per_second": 1.382, |
|
"step": 4757 |
|
}, |
|
{ |
|
"epoch": 67.00028571428571, |
|
"grad_norm": 0.007275663781911135, |
|
"learning_rate": 6.0740740740740745e-06, |
|
"loss": 0.0001, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 67.0012380952381, |
|
"grad_norm": 0.019806934520602226, |
|
"learning_rate": 6.063492063492064e-06, |
|
"loss": 0.0, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 67.00219047619048, |
|
"grad_norm": 0.0051077078096568584, |
|
"learning_rate": 6.052910052910054e-06, |
|
"loss": 0.0001, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 67.00314285714286, |
|
"grad_norm": 0.001753455027937889, |
|
"learning_rate": 6.042328042328043e-06, |
|
"loss": 0.0001, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 67.00409523809523, |
|
"grad_norm": 0.007550004869699478, |
|
"learning_rate": 6.031746031746032e-06, |
|
"loss": 0.0001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 67.00504761904762, |
|
"grad_norm": 0.0010189657332375646, |
|
"learning_rate": 6.021164021164022e-06, |
|
"loss": 0.0, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 67.006, |
|
"grad_norm": 0.48897579312324524, |
|
"learning_rate": 6.010582010582011e-06, |
|
"loss": 0.0001, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 67.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.3099887371063232, |
|
"eval_runtime": 13.6937, |
|
"eval_samples_per_second": 5.331, |
|
"eval_steps_per_second": 1.388, |
|
"step": 4828 |
|
}, |
|
{ |
|
"epoch": 68.00019047619048, |
|
"grad_norm": 0.001033710315823555, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0001, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 68.00114285714285, |
|
"grad_norm": 0.0007215305231511593, |
|
"learning_rate": 5.989417989417989e-06, |
|
"loss": 0.1713, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 68.00209523809524, |
|
"grad_norm": 0.004004966001957655, |
|
"learning_rate": 5.978835978835979e-06, |
|
"loss": 0.1816, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 68.00304761904762, |
|
"grad_norm": 0.002507429337128997, |
|
"learning_rate": 5.968253968253968e-06, |
|
"loss": 0.0, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 68.004, |
|
"grad_norm": 0.0003998406755272299, |
|
"learning_rate": 5.9576719576719576e-06, |
|
"loss": 0.0001, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 68.00495238095237, |
|
"grad_norm": 0.0010914745507761836, |
|
"learning_rate": 5.9470899470899475e-06, |
|
"loss": 0.2406, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 68.00590476190476, |
|
"grad_norm": 0.006196844857186079, |
|
"learning_rate": 5.936507936507937e-06, |
|
"loss": 0.0001, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 68.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.087306499481201, |
|
"eval_runtime": 13.5032, |
|
"eval_samples_per_second": 5.406, |
|
"eval_steps_per_second": 1.407, |
|
"step": 4899 |
|
}, |
|
{ |
|
"epoch": 69.00009523809524, |
|
"grad_norm": 0.002998295472934842, |
|
"learning_rate": 5.925925925925926e-06, |
|
"loss": 0.0001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 69.00104761904763, |
|
"grad_norm": 0.004137849435210228, |
|
"learning_rate": 5.915343915343917e-06, |
|
"loss": 0.0002, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 69.002, |
|
"grad_norm": 0.004944485612213612, |
|
"learning_rate": 5.904761904761905e-06, |
|
"loss": 0.0001, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 69.00295238095238, |
|
"grad_norm": 0.0007010090630501509, |
|
"learning_rate": 5.894179894179894e-06, |
|
"loss": 0.0, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 69.00390476190476, |
|
"grad_norm": 0.014743346720933914, |
|
"learning_rate": 5.883597883597883e-06, |
|
"loss": 0.2235, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 69.00485714285715, |
|
"grad_norm": 0.05026319622993469, |
|
"learning_rate": 5.873015873015874e-06, |
|
"loss": 0.0001, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 69.00580952380952, |
|
"grad_norm": 0.0011177206179127097, |
|
"learning_rate": 5.862433862433863e-06, |
|
"loss": 0.0229, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 69.0067619047619, |
|
"grad_norm": 0.0014997220132499933, |
|
"learning_rate": 5.8518518518518515e-06, |
|
"loss": 0.1614, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 69.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.018893003463745, |
|
"eval_runtime": 16.6669, |
|
"eval_samples_per_second": 4.38, |
|
"eval_steps_per_second": 1.14, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 70.00095238095238, |
|
"grad_norm": 0.058722980320453644, |
|
"learning_rate": 5.841269841269842e-06, |
|
"loss": 0.0001, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 70.00190476190477, |
|
"grad_norm": 0.004790253005921841, |
|
"learning_rate": 5.8306878306878314e-06, |
|
"loss": 0.0002, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 70.00285714285714, |
|
"grad_norm": 0.0236496664583683, |
|
"learning_rate": 5.820105820105821e-06, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 70.00380952380952, |
|
"grad_norm": 0.004921732936054468, |
|
"learning_rate": 5.8095238095238106e-06, |
|
"loss": 0.0039, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 70.0047619047619, |
|
"grad_norm": 0.05060591176152229, |
|
"learning_rate": 5.7989417989418e-06, |
|
"loss": 0.0001, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 70.00571428571429, |
|
"grad_norm": 0.0011126629542559385, |
|
"learning_rate": 5.788359788359789e-06, |
|
"loss": 0.2252, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 70.00666666666666, |
|
"grad_norm": 0.006434687413275242, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 70.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.515955924987793, |
|
"eval_runtime": 16.0865, |
|
"eval_samples_per_second": 4.538, |
|
"eval_steps_per_second": 1.181, |
|
"step": 5041 |
|
}, |
|
{ |
|
"epoch": 71.00085714285714, |
|
"grad_norm": 0.007736885920166969, |
|
"learning_rate": 5.767195767195768e-06, |
|
"loss": 0.0083, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 71.00180952380953, |
|
"grad_norm": 0.08464159816503525, |
|
"learning_rate": 5.756613756613757e-06, |
|
"loss": 0.0001, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 71.00276190476191, |
|
"grad_norm": 0.0017586707836017013, |
|
"learning_rate": 5.746031746031746e-06, |
|
"loss": 0.0004, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 71.00371428571428, |
|
"grad_norm": 0.0041831196285784245, |
|
"learning_rate": 5.735449735449736e-06, |
|
"loss": 0.0435, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 71.00466666666667, |
|
"grad_norm": 0.0019000729080289602, |
|
"learning_rate": 5.724867724867725e-06, |
|
"loss": 0.0002, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 71.00561904761905, |
|
"grad_norm": 0.0008430950692854822, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 71.00657142857143, |
|
"grad_norm": 0.0011215987615287304, |
|
"learning_rate": 5.7037037037037045e-06, |
|
"loss": 0.0114, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 71.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.0018436908721924, |
|
"eval_runtime": 16.1537, |
|
"eval_samples_per_second": 4.519, |
|
"eval_steps_per_second": 1.176, |
|
"step": 5112 |
|
}, |
|
{ |
|
"epoch": 72.0007619047619, |
|
"grad_norm": 0.0026962740812450647, |
|
"learning_rate": 5.693121693121694e-06, |
|
"loss": 0.2243, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 72.00171428571429, |
|
"grad_norm": 0.006148957181721926, |
|
"learning_rate": 5.682539682539683e-06, |
|
"loss": 0.0001, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 72.00266666666667, |
|
"grad_norm": 118.13774108886719, |
|
"learning_rate": 5.671957671957672e-06, |
|
"loss": 0.0047, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 72.00361904761905, |
|
"grad_norm": 0.0032365689985454082, |
|
"learning_rate": 5.661375661375662e-06, |
|
"loss": 0.0927, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 72.00457142857142, |
|
"grad_norm": 0.002246755175292492, |
|
"learning_rate": 5.650793650793651e-06, |
|
"loss": 0.0001, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 72.00552380952381, |
|
"grad_norm": 0.0006160863558761775, |
|
"learning_rate": 5.64021164021164e-06, |
|
"loss": 0.0001, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 72.00647619047619, |
|
"grad_norm": 0.001819523749873042, |
|
"learning_rate": 5.62962962962963e-06, |
|
"loss": 0.0823, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 72.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.290499210357666, |
|
"eval_runtime": 16.2562, |
|
"eval_samples_per_second": 4.491, |
|
"eval_steps_per_second": 1.169, |
|
"step": 5183 |
|
}, |
|
{ |
|
"epoch": 73.00066666666666, |
|
"grad_norm": 0.0022564074024558067, |
|
"learning_rate": 5.619047619047619e-06, |
|
"loss": 0.0027, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 73.00161904761904, |
|
"grad_norm": 0.001252816873602569, |
|
"learning_rate": 5.6084656084656084e-06, |
|
"loss": 0.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 73.00257142857143, |
|
"grad_norm": 0.0025996342301368713, |
|
"learning_rate": 5.597883597883598e-06, |
|
"loss": 0.0, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 73.00352380952381, |
|
"grad_norm": 0.010484711267054081, |
|
"learning_rate": 5.5873015873015876e-06, |
|
"loss": 0.0085, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 73.0044761904762, |
|
"grad_norm": 0.015050379559397697, |
|
"learning_rate": 5.576719576719577e-06, |
|
"loss": 0.1896, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 73.00542857142857, |
|
"grad_norm": 0.033333856612443924, |
|
"learning_rate": 5.566137566137566e-06, |
|
"loss": 0.0001, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 73.00638095238095, |
|
"grad_norm": 0.0009271789458580315, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.0001, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 73.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.2781801223754883, |
|
"eval_runtime": 19.2978, |
|
"eval_samples_per_second": 3.783, |
|
"eval_steps_per_second": 0.985, |
|
"step": 5254 |
|
}, |
|
{ |
|
"epoch": 74.00057142857143, |
|
"grad_norm": 0.0016572705935686827, |
|
"learning_rate": 5.544973544973545e-06, |
|
"loss": 0.136, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 74.0015238095238, |
|
"grad_norm": 0.009578842669725418, |
|
"learning_rate": 5.534391534391534e-06, |
|
"loss": 0.0001, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 74.00247619047619, |
|
"grad_norm": 0.0018173794960603118, |
|
"learning_rate": 5.523809523809525e-06, |
|
"loss": 0.1654, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 74.00342857142857, |
|
"grad_norm": 0.1126793846487999, |
|
"learning_rate": 5.513227513227514e-06, |
|
"loss": 0.0001, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 74.00438095238096, |
|
"grad_norm": 0.0017416345654055476, |
|
"learning_rate": 5.502645502645503e-06, |
|
"loss": 0.0003, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 74.00533333333334, |
|
"grad_norm": 0.00208139605820179, |
|
"learning_rate": 5.492063492063493e-06, |
|
"loss": 0.1975, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 74.00628571428571, |
|
"grad_norm": 0.0028318113181740046, |
|
"learning_rate": 5.481481481481482e-06, |
|
"loss": 0.0, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 74.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.4494576454162598, |
|
"eval_runtime": 16.1349, |
|
"eval_samples_per_second": 4.524, |
|
"eval_steps_per_second": 1.178, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 75.00047619047619, |
|
"grad_norm": 0.0006662168307229877, |
|
"learning_rate": 5.4708994708994715e-06, |
|
"loss": 0.0001, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 75.00142857142858, |
|
"grad_norm": 0.0007147782016545534, |
|
"learning_rate": 5.460317460317461e-06, |
|
"loss": 0.0, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 75.00238095238095, |
|
"grad_norm": 0.0011023067636415362, |
|
"learning_rate": 5.449735449735451e-06, |
|
"loss": 0.0, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 75.00333333333333, |
|
"grad_norm": 0.002628614194691181, |
|
"learning_rate": 5.43915343915344e-06, |
|
"loss": 0.0, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 75.00428571428571, |
|
"grad_norm": 0.0013626012951135635, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.0002, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 75.0052380952381, |
|
"grad_norm": 0.0009591359994374216, |
|
"learning_rate": 5.417989417989419e-06, |
|
"loss": 0.0, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 75.00619047619048, |
|
"grad_norm": 2.4272632598876953, |
|
"learning_rate": 5.407407407407408e-06, |
|
"loss": 0.3044, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 75.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.441681385040283, |
|
"eval_runtime": 16.1368, |
|
"eval_samples_per_second": 4.524, |
|
"eval_steps_per_second": 1.177, |
|
"step": 5396 |
|
}, |
|
{ |
|
"epoch": 76.00038095238095, |
|
"grad_norm": 0.002158108865842223, |
|
"learning_rate": 5.396825396825397e-06, |
|
"loss": 0.003, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 76.00133333333333, |
|
"grad_norm": 0.005607745610177517, |
|
"learning_rate": 5.386243386243387e-06, |
|
"loss": 0.0, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 76.00228571428572, |
|
"grad_norm": 0.0019246706506237388, |
|
"learning_rate": 5.375661375661376e-06, |
|
"loss": 0.0, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 76.00323809523809, |
|
"grad_norm": 78.57376098632812, |
|
"learning_rate": 5.365079365079365e-06, |
|
"loss": 0.2387, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 76.00419047619047, |
|
"grad_norm": 0.011334899812936783, |
|
"learning_rate": 5.3544973544973545e-06, |
|
"loss": 0.0001, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 76.00514285714286, |
|
"grad_norm": 0.004917099606245756, |
|
"learning_rate": 5.3439153439153445e-06, |
|
"loss": 0.1924, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 76.00609523809524, |
|
"grad_norm": 0.002014393685385585, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.0, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 76.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.5167994499206543, |
|
"eval_runtime": 16.3785, |
|
"eval_samples_per_second": 4.457, |
|
"eval_steps_per_second": 1.16, |
|
"step": 5467 |
|
}, |
|
{ |
|
"epoch": 77.00028571428571, |
|
"grad_norm": 0.0003913980326615274, |
|
"learning_rate": 5.322751322751323e-06, |
|
"loss": 0.0001, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 77.0012380952381, |
|
"grad_norm": 0.0007715081446804106, |
|
"learning_rate": 5.312169312169313e-06, |
|
"loss": 0.0003, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 77.00219047619048, |
|
"grad_norm": 0.0018913410604000092, |
|
"learning_rate": 5.301587301587302e-06, |
|
"loss": 0.0001, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 77.00314285714286, |
|
"grad_norm": 0.0008994215750135481, |
|
"learning_rate": 5.291005291005291e-06, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 77.00409523809523, |
|
"grad_norm": 0.003617028007283807, |
|
"learning_rate": 5.280423280423281e-06, |
|
"loss": 0.0001, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 77.00504761904762, |
|
"grad_norm": 0.009494593366980553, |
|
"learning_rate": 5.26984126984127e-06, |
|
"loss": 0.0463, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 77.006, |
|
"grad_norm": 0.0005333954468369484, |
|
"learning_rate": 5.259259259259259e-06, |
|
"loss": 0.0007, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 77.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.94057297706604, |
|
"eval_runtime": 16.6902, |
|
"eval_samples_per_second": 4.374, |
|
"eval_steps_per_second": 1.138, |
|
"step": 5538 |
|
}, |
|
{ |
|
"epoch": 78.00019047619048, |
|
"grad_norm": 378.4049377441406, |
|
"learning_rate": 5.2486772486772485e-06, |
|
"loss": 0.0565, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 78.00114285714285, |
|
"grad_norm": 0.0016546836122870445, |
|
"learning_rate": 5.2380952380952384e-06, |
|
"loss": 0.0, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 78.00209523809524, |
|
"grad_norm": 0.005440497305244207, |
|
"learning_rate": 5.227513227513228e-06, |
|
"loss": 0.0, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 78.00304761904762, |
|
"grad_norm": 0.0026880651712417603, |
|
"learning_rate": 5.216931216931217e-06, |
|
"loss": 0.0, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 78.004, |
|
"grad_norm": 0.0014611100777983665, |
|
"learning_rate": 5.2063492063492076e-06, |
|
"loss": 0.0055, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 78.00495238095237, |
|
"grad_norm": 0.0003321405383758247, |
|
"learning_rate": 5.195767195767196e-06, |
|
"loss": 0.0, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 78.00590476190476, |
|
"grad_norm": 0.0027943067252635956, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.0001, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 78.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.653313636779785, |
|
"eval_runtime": 23.4163, |
|
"eval_samples_per_second": 3.117, |
|
"eval_steps_per_second": 0.811, |
|
"step": 5609 |
|
}, |
|
{ |
|
"epoch": 79.00009523809524, |
|
"grad_norm": 0.0004976940690539777, |
|
"learning_rate": 5.174603174603176e-06, |
|
"loss": 0.0001, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 79.00104761904763, |
|
"grad_norm": 0.21835973858833313, |
|
"learning_rate": 5.164021164021165e-06, |
|
"loss": 0.0001, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 79.002, |
|
"grad_norm": 0.00033309441641904414, |
|
"learning_rate": 5.153439153439154e-06, |
|
"loss": 0.0, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 79.00295238095238, |
|
"grad_norm": 0.0018042289884760976, |
|
"learning_rate": 5.142857142857142e-06, |
|
"loss": 0.0, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 79.00390476190476, |
|
"grad_norm": 0.001108268043026328, |
|
"learning_rate": 5.132275132275133e-06, |
|
"loss": 0.0, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 79.00485714285715, |
|
"grad_norm": 0.0016181732062250376, |
|
"learning_rate": 5.121693121693122e-06, |
|
"loss": 0.0, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 79.00580952380952, |
|
"grad_norm": 0.0004302693123463541, |
|
"learning_rate": 5.1111111111111115e-06, |
|
"loss": 0.0, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 79.0067619047619, |
|
"grad_norm": 0.0002925188164226711, |
|
"learning_rate": 5.1005291005291015e-06, |
|
"loss": 0.0, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 79.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.4311904907226562, |
|
"eval_runtime": 14.4342, |
|
"eval_samples_per_second": 5.057, |
|
"eval_steps_per_second": 1.316, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 80.00095238095238, |
|
"grad_norm": 0.0005595943075604737, |
|
"learning_rate": 5.089947089947091e-06, |
|
"loss": 0.0, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 80.00190476190477, |
|
"grad_norm": 0.0006927040521986783, |
|
"learning_rate": 5.07936507936508e-06, |
|
"loss": 0.0002, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 80.00285714285714, |
|
"grad_norm": 0.0012094530975446105, |
|
"learning_rate": 5.06878306878307e-06, |
|
"loss": 0.0001, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 80.00380952380952, |
|
"grad_norm": 0.008542956784367561, |
|
"learning_rate": 5.058201058201059e-06, |
|
"loss": 0.2416, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 80.0047619047619, |
|
"grad_norm": 0.0006435521063394845, |
|
"learning_rate": 5.047619047619048e-06, |
|
"loss": 0.0107, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 80.00571428571429, |
|
"grad_norm": 7.058255195617676, |
|
"learning_rate": 5.037037037037037e-06, |
|
"loss": 0.0721, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 80.00666666666666, |
|
"grad_norm": 0.00283995782956481, |
|
"learning_rate": 5.026455026455027e-06, |
|
"loss": 0.0, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 80.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.5024490356445312, |
|
"eval_runtime": 14.3641, |
|
"eval_samples_per_second": 5.082, |
|
"eval_steps_per_second": 1.323, |
|
"step": 5751 |
|
}, |
|
{ |
|
"epoch": 81.00085714285714, |
|
"grad_norm": 0.06330319494009018, |
|
"learning_rate": 5.015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 81.00180952380953, |
|
"grad_norm": 0.002878734841942787, |
|
"learning_rate": 5.005291005291005e-06, |
|
"loss": 0.1835, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 81.00276190476191, |
|
"grad_norm": 0.0006428166525438428, |
|
"learning_rate": 4.9947089947089946e-06, |
|
"loss": 0.0002, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 81.00371428571428, |
|
"grad_norm": 0.0006276570493355393, |
|
"learning_rate": 4.9841269841269845e-06, |
|
"loss": 0.0, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 81.00466666666667, |
|
"grad_norm": 0.0005557615077123046, |
|
"learning_rate": 4.973544973544974e-06, |
|
"loss": 0.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 81.00561904761905, |
|
"grad_norm": 0.004504733718931675, |
|
"learning_rate": 4.962962962962964e-06, |
|
"loss": 0.0, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 81.00657142857143, |
|
"grad_norm": 0.0007715458050370216, |
|
"learning_rate": 4.952380952380953e-06, |
|
"loss": 0.0, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 81.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.41776704788208, |
|
"eval_runtime": 14.1995, |
|
"eval_samples_per_second": 5.141, |
|
"eval_steps_per_second": 1.338, |
|
"step": 5822 |
|
}, |
|
{ |
|
"epoch": 82.0007619047619, |
|
"grad_norm": 0.0024374607019126415, |
|
"learning_rate": 4.941798941798942e-06, |
|
"loss": 0.0, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 82.00171428571429, |
|
"grad_norm": 0.0007062715594656765, |
|
"learning_rate": 4.931216931216932e-06, |
|
"loss": 0.0, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 82.00266666666667, |
|
"grad_norm": 0.0005352974403649569, |
|
"learning_rate": 4.920634920634921e-06, |
|
"loss": 0.0, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 82.00361904761905, |
|
"grad_norm": 0.0006415756652131677, |
|
"learning_rate": 4.91005291005291e-06, |
|
"loss": 0.1604, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 82.00457142857142, |
|
"grad_norm": 0.003098833141848445, |
|
"learning_rate": 4.8994708994709e-06, |
|
"loss": 0.0, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 82.00552380952381, |
|
"grad_norm": 0.0011778516927734017, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.0003, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 82.00647619047619, |
|
"grad_norm": 0.000274023215752095, |
|
"learning_rate": 4.8783068783068785e-06, |
|
"loss": 0.0, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 82.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.587204933166504, |
|
"eval_runtime": 16.0391, |
|
"eval_samples_per_second": 4.551, |
|
"eval_steps_per_second": 1.185, |
|
"step": 5893 |
|
}, |
|
{ |
|
"epoch": 83.00066666666666, |
|
"grad_norm": 0.0007703937008045614, |
|
"learning_rate": 4.867724867724868e-06, |
|
"loss": 0.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 83.00161904761904, |
|
"grad_norm": 0.00031386056798510253, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.0, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 83.00257142857143, |
|
"grad_norm": 0.001709269592538476, |
|
"learning_rate": 4.846560846560847e-06, |
|
"loss": 0.1225, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 83.00352380952381, |
|
"grad_norm": 0.001658945926465094, |
|
"learning_rate": 4.835978835978836e-06, |
|
"loss": 0.0059, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 83.0044761904762, |
|
"grad_norm": 0.0007320309523493052, |
|
"learning_rate": 4.825396825396826e-06, |
|
"loss": 0.0, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 83.00542857142857, |
|
"grad_norm": 0.0008329296833835542, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 0.0001, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 83.00638095238095, |
|
"grad_norm": 0.0006591449491679668, |
|
"learning_rate": 4.804232804232805e-06, |
|
"loss": 0.0, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 83.0067619047619, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 2.027384042739868, |
|
"eval_runtime": 17.1875, |
|
"eval_samples_per_second": 4.247, |
|
"eval_steps_per_second": 1.105, |
|
"step": 5964 |
|
}, |
|
{ |
|
"epoch": 84.00057142857143, |
|
"grad_norm": 0.0015914272516965866, |
|
"learning_rate": 4.793650793650794e-06, |
|
"loss": 0.0, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 84.0015238095238, |
|
"grad_norm": 0.0012222842779010534, |
|
"learning_rate": 4.783068783068783e-06, |
|
"loss": 0.0001, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 84.00247619047619, |
|
"grad_norm": 0.16174960136413574, |
|
"learning_rate": 4.772486772486773e-06, |
|
"loss": 0.0, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 84.00342857142857, |
|
"grad_norm": 0.0007423576898872852, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 84.00438095238096, |
|
"grad_norm": 0.0006728505832143128, |
|
"learning_rate": 4.7513227513227515e-06, |
|
"loss": 0.0, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 84.00533333333334, |
|
"grad_norm": 0.0012397300451993942, |
|
"learning_rate": 4.7407407407407415e-06, |
|
"loss": 0.001, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 84.00628571428571, |
|
"grad_norm": 0.0008281469345092773, |
|
"learning_rate": 4.730158730158731e-06, |
|
"loss": 0.1991, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 84.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.566321611404419, |
|
"eval_runtime": 17.789, |
|
"eval_samples_per_second": 4.104, |
|
"eval_steps_per_second": 1.068, |
|
"step": 6035 |
|
}, |
|
{ |
|
"epoch": 85.00047619047619, |
|
"grad_norm": 0.0005280555342324078, |
|
"learning_rate": 4.71957671957672e-06, |
|
"loss": 0.0002, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 85.00142857142858, |
|
"grad_norm": 0.03912334889173508, |
|
"learning_rate": 4.708994708994709e-06, |
|
"loss": 0.0, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 85.00238095238095, |
|
"grad_norm": 0.0007767456700094044, |
|
"learning_rate": 4.698412698412699e-06, |
|
"loss": 0.0, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 85.00333333333333, |
|
"grad_norm": 0.0005672441329807043, |
|
"learning_rate": 4.687830687830688e-06, |
|
"loss": 0.1355, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 85.00428571428571, |
|
"grad_norm": 0.00222939089871943, |
|
"learning_rate": 4.677248677248677e-06, |
|
"loss": 0.0, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 85.0052380952381, |
|
"grad_norm": 0.000355520227458328, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.0, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 85.00619047619048, |
|
"grad_norm": 0.0005006153369322419, |
|
"learning_rate": 4.656084656084656e-06, |
|
"loss": 0.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 85.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.6205379962921143, |
|
"eval_runtime": 16.6162, |
|
"eval_samples_per_second": 4.393, |
|
"eval_steps_per_second": 1.143, |
|
"step": 6106 |
|
}, |
|
{ |
|
"epoch": 86.00038095238095, |
|
"grad_norm": 0.000478866946650669, |
|
"learning_rate": 4.6455026455026454e-06, |
|
"loss": 0.0001, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 86.00133333333333, |
|
"grad_norm": 0.0005052104825153947, |
|
"learning_rate": 4.634920634920635e-06, |
|
"loss": 0.1825, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 86.00228571428572, |
|
"grad_norm": 0.0005532368086278439, |
|
"learning_rate": 4.6243386243386246e-06, |
|
"loss": 0.0, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 86.00323809523809, |
|
"grad_norm": 0.004795570392161608, |
|
"learning_rate": 4.6137566137566145e-06, |
|
"loss": 0.0004, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 86.00419047619047, |
|
"grad_norm": 0.00046328658936545253, |
|
"learning_rate": 4.603174603174604e-06, |
|
"loss": 0.1033, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 86.00514285714286, |
|
"grad_norm": 0.00032769489916972816, |
|
"learning_rate": 4.592592592592593e-06, |
|
"loss": 0.0, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 86.00609523809524, |
|
"grad_norm": 0.000338953104801476, |
|
"learning_rate": 4.582010582010583e-06, |
|
"loss": 0.1705, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 86.0067619047619, |
|
"eval_accuracy": 0.6575342465753424, |
|
"eval_loss": 2.7275164127349854, |
|
"eval_runtime": 17.5228, |
|
"eval_samples_per_second": 4.166, |
|
"eval_steps_per_second": 1.084, |
|
"step": 6177 |
|
}, |
|
{ |
|
"epoch": 87.00028571428571, |
|
"grad_norm": 0.0006086576031520963, |
|
"learning_rate": 4.571428571428572e-06, |
|
"loss": 0.1124, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 87.0012380952381, |
|
"grad_norm": 0.0007903364603407681, |
|
"learning_rate": 4.560846560846561e-06, |
|
"loss": 0.0, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 87.00219047619048, |
|
"grad_norm": 0.01424381136894226, |
|
"learning_rate": 4.55026455026455e-06, |
|
"loss": 0.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 87.00314285714286, |
|
"grad_norm": 0.0015253698220476508, |
|
"learning_rate": 4.53968253968254e-06, |
|
"loss": 0.0, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 87.00409523809523, |
|
"grad_norm": 0.0004983572289347649, |
|
"learning_rate": 4.529100529100529e-06, |
|
"loss": 0.0, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 87.00504761904762, |
|
"grad_norm": 0.0004745838523376733, |
|
"learning_rate": 4.5185185185185185e-06, |
|
"loss": 0.0, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 87.006, |
|
"grad_norm": 0.00034416132257319987, |
|
"learning_rate": 4.5079365079365085e-06, |
|
"loss": 0.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 87.0067619047619, |
|
"eval_accuracy": 0.6438356164383562, |
|
"eval_loss": 2.9716460704803467, |
|
"eval_runtime": 13.1857, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 1.441, |
|
"step": 6248 |
|
}, |
|
{ |
|
"epoch": 88.00019047619048, |
|
"grad_norm": 0.0002928862813860178, |
|
"learning_rate": 4.497354497354498e-06, |
|
"loss": 0.0, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 88.00114285714285, |
|
"grad_norm": 0.0006411031354218721, |
|
"learning_rate": 4.486772486772487e-06, |
|
"loss": 0.0, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 88.00209523809524, |
|
"grad_norm": 0.0034191946033388376, |
|
"learning_rate": 4.476190476190477e-06, |
|
"loss": 0.0, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 88.00304761904762, |
|
"grad_norm": 0.0008901719120331109, |
|
"learning_rate": 4.465608465608466e-06, |
|
"loss": 0.0, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 88.004, |
|
"grad_norm": 0.00043028598884120584, |
|
"learning_rate": 4.455026455026456e-06, |
|
"loss": 0.0, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 88.00495238095237, |
|
"grad_norm": 0.0006183036020956933, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 88.00590476190476, |
|
"grad_norm": 0.0029537761583924294, |
|
"learning_rate": 4.433862433862434e-06, |
|
"loss": 0.0, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 88.0067619047619, |
|
"eval_accuracy": 0.6438356164383562, |
|
"eval_loss": 2.910144567489624, |
|
"eval_runtime": 13.3156, |
|
"eval_samples_per_second": 5.482, |
|
"eval_steps_per_second": 1.427, |
|
"step": 6319 |
|
}, |
|
{ |
|
"epoch": 89.00009523809524, |
|
"grad_norm": 0.001594062428921461, |
|
"learning_rate": 4.423280423280424e-06, |
|
"loss": 0.0, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 89.00104761904763, |
|
"grad_norm": 0.0010166416177526116, |
|
"learning_rate": 4.412698412698413e-06, |
|
"loss": 0.0, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 89.002, |
|
"grad_norm": 0.0006591346464119852, |
|
"learning_rate": 4.402116402116402e-06, |
|
"loss": 0.0, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 89.00295238095238, |
|
"grad_norm": 0.00046361569548025727, |
|
"learning_rate": 4.3915343915343915e-06, |
|
"loss": 0.0, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 89.00390476190476, |
|
"grad_norm": 0.004378519020974636, |
|
"learning_rate": 4.3809523809523815e-06, |
|
"loss": 0.0, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 89.00485714285715, |
|
"grad_norm": 0.0004076876793988049, |
|
"learning_rate": 4.370370370370371e-06, |
|
"loss": 0.1512, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 89.00580952380952, |
|
"grad_norm": 44.422767639160156, |
|
"learning_rate": 4.35978835978836e-06, |
|
"loss": 0.2323, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 89.0067619047619, |
|
"grad_norm": 0.004708809778094292, |
|
"learning_rate": 4.34920634920635e-06, |
|
"loss": 0.0, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 89.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.4763612747192383, |
|
"eval_runtime": 13.9231, |
|
"eval_samples_per_second": 5.243, |
|
"eval_steps_per_second": 1.365, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 90.00095238095238, |
|
"grad_norm": 0.0005598313291557133, |
|
"learning_rate": 4.338624338624339e-06, |
|
"loss": 0.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 90.00190476190477, |
|
"grad_norm": 0.0034571285359561443, |
|
"learning_rate": 4.328042328042328e-06, |
|
"loss": 0.0, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 90.00285714285714, |
|
"grad_norm": 0.0009946099016815424, |
|
"learning_rate": 4.317460317460318e-06, |
|
"loss": 0.0001, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 90.00380952380952, |
|
"grad_norm": 0.00043478538282215595, |
|
"learning_rate": 4.306878306878307e-06, |
|
"loss": 0.0, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 90.0047619047619, |
|
"grad_norm": 0.0013073618756607175, |
|
"learning_rate": 4.296296296296296e-06, |
|
"loss": 0.0, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 90.00571428571429, |
|
"grad_norm": 0.000736343557946384, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 90.00666666666666, |
|
"grad_norm": 0.0011041646357625723, |
|
"learning_rate": 4.2751322751322754e-06, |
|
"loss": 0.0, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 90.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.5321733951568604, |
|
"eval_runtime": 15.0988, |
|
"eval_samples_per_second": 4.835, |
|
"eval_steps_per_second": 1.258, |
|
"step": 6461 |
|
}, |
|
{ |
|
"epoch": 91.00085714285714, |
|
"grad_norm": 0.0007017655298113823, |
|
"learning_rate": 4.2645502645502654e-06, |
|
"loss": 0.0, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 91.00180952380953, |
|
"grad_norm": 0.0008016679785214365, |
|
"learning_rate": 4.2539682539682546e-06, |
|
"loss": 0.0, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 91.00276190476191, |
|
"grad_norm": 0.008655794896185398, |
|
"learning_rate": 4.243386243386244e-06, |
|
"loss": 0.0001, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 91.00371428571428, |
|
"grad_norm": 0.004245663061738014, |
|
"learning_rate": 4.232804232804233e-06, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 91.00466666666667, |
|
"grad_norm": 0.00046286502038128674, |
|
"learning_rate": 4.222222222222223e-06, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 91.00561904761905, |
|
"grad_norm": 0.0005634785629808903, |
|
"learning_rate": 4.211640211640212e-06, |
|
"loss": 0.0, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 91.00657142857143, |
|
"grad_norm": 0.009624041616916656, |
|
"learning_rate": 4.201058201058201e-06, |
|
"loss": 0.0, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 91.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.7222588062286377, |
|
"eval_runtime": 13.4391, |
|
"eval_samples_per_second": 5.432, |
|
"eval_steps_per_second": 1.414, |
|
"step": 6532 |
|
}, |
|
{ |
|
"epoch": 92.0007619047619, |
|
"grad_norm": 0.00039595706039108336, |
|
"learning_rate": 4.190476190476191e-06, |
|
"loss": 0.0, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 92.00171428571429, |
|
"grad_norm": 0.0006271820166148245, |
|
"learning_rate": 4.17989417989418e-06, |
|
"loss": 0.0, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 92.00266666666667, |
|
"grad_norm": 0.0009222283260896802, |
|
"learning_rate": 4.169312169312169e-06, |
|
"loss": 0.0, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 92.00361904761905, |
|
"grad_norm": 0.00042221101466566324, |
|
"learning_rate": 4.158730158730159e-06, |
|
"loss": 0.0, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 92.00457142857142, |
|
"grad_norm": 0.00037234349292702973, |
|
"learning_rate": 4.1481481481481485e-06, |
|
"loss": 0.0, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 92.00552380952381, |
|
"grad_norm": 0.0008514290093444288, |
|
"learning_rate": 4.137566137566138e-06, |
|
"loss": 0.0, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 92.00647619047619, |
|
"grad_norm": 0.0002387171844020486, |
|
"learning_rate": 4.126984126984127e-06, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 92.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.6964991092681885, |
|
"eval_runtime": 15.9161, |
|
"eval_samples_per_second": 4.587, |
|
"eval_steps_per_second": 1.194, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 93.00066666666666, |
|
"grad_norm": 0.00039790949085727334, |
|
"learning_rate": 4.116402116402117e-06, |
|
"loss": 0.0, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 93.00161904761904, |
|
"grad_norm": 0.002076992765069008, |
|
"learning_rate": 4.105820105820107e-06, |
|
"loss": 0.0, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 93.00257142857143, |
|
"grad_norm": 0.0008443538681603968, |
|
"learning_rate": 4.095238095238096e-06, |
|
"loss": 0.0, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 93.00352380952381, |
|
"grad_norm": 0.0003063073963858187, |
|
"learning_rate": 4.084656084656085e-06, |
|
"loss": 0.0, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 93.0044761904762, |
|
"grad_norm": 0.0040846578776836395, |
|
"learning_rate": 4.074074074074074e-06, |
|
"loss": 0.0, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 93.00542857142857, |
|
"grad_norm": 0.002012980170547962, |
|
"learning_rate": 4.063492063492064e-06, |
|
"loss": 0.0, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 93.00638095238095, |
|
"grad_norm": 0.00030123526812531054, |
|
"learning_rate": 4.052910052910053e-06, |
|
"loss": 0.0, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 93.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.689643621444702, |
|
"eval_runtime": 15.4088, |
|
"eval_samples_per_second": 4.738, |
|
"eval_steps_per_second": 1.233, |
|
"step": 6674 |
|
}, |
|
{ |
|
"epoch": 94.00057142857143, |
|
"grad_norm": 0.0004771147505380213, |
|
"learning_rate": 4.042328042328042e-06, |
|
"loss": 0.0, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 94.0015238095238, |
|
"grad_norm": 0.00046914530685171485, |
|
"learning_rate": 4.031746031746032e-06, |
|
"loss": 0.0, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 94.00247619047619, |
|
"grad_norm": 0.0002539911074563861, |
|
"learning_rate": 4.0211640211640215e-06, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 94.00342857142857, |
|
"grad_norm": 0.0004079829959664494, |
|
"learning_rate": 4.010582010582011e-06, |
|
"loss": 0.0, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 94.00438095238096, |
|
"grad_norm": 0.0004812943807337433, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 94.00533333333334, |
|
"grad_norm": 0.0009666074765846133, |
|
"learning_rate": 3.98941798941799e-06, |
|
"loss": 0.0, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 94.00628571428571, |
|
"grad_norm": 0.0024105387274175882, |
|
"learning_rate": 3.978835978835979e-06, |
|
"loss": 0.0001, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 94.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.711493730545044, |
|
"eval_runtime": 17.6193, |
|
"eval_samples_per_second": 4.143, |
|
"eval_steps_per_second": 1.078, |
|
"step": 6745 |
|
}, |
|
{ |
|
"epoch": 95.00047619047619, |
|
"grad_norm": 0.0007906637620180845, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.0007, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 95.00142857142858, |
|
"grad_norm": 0.0007153503247536719, |
|
"learning_rate": 3.957671957671958e-06, |
|
"loss": 0.0, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 95.00238095238095, |
|
"grad_norm": 0.0006736738723702729, |
|
"learning_rate": 3.947089947089948e-06, |
|
"loss": 0.0035, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 95.00333333333333, |
|
"grad_norm": 0.0005118944100104272, |
|
"learning_rate": 3.936507936507936e-06, |
|
"loss": 0.0001, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 95.00428571428571, |
|
"grad_norm": 0.00033245744998566806, |
|
"learning_rate": 3.925925925925926e-06, |
|
"loss": 0.0, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 95.0052380952381, |
|
"grad_norm": 0.00025822161114774644, |
|
"learning_rate": 3.9153439153439155e-06, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 95.00619047619048, |
|
"grad_norm": 0.0016400256427004933, |
|
"learning_rate": 3.9047619047619055e-06, |
|
"loss": 0.0, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 95.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.612645149230957, |
|
"eval_runtime": 13.4682, |
|
"eval_samples_per_second": 5.42, |
|
"eval_steps_per_second": 1.411, |
|
"step": 6816 |
|
}, |
|
{ |
|
"epoch": 96.00038095238095, |
|
"grad_norm": 0.0007826865767128766, |
|
"learning_rate": 3.894179894179895e-06, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 96.00133333333333, |
|
"grad_norm": 0.001356912194751203, |
|
"learning_rate": 3.883597883597884e-06, |
|
"loss": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 96.00228571428572, |
|
"grad_norm": 0.0005574782844632864, |
|
"learning_rate": 3.873015873015874e-06, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 96.00323809523809, |
|
"grad_norm": 0.001946461503393948, |
|
"learning_rate": 3.862433862433863e-06, |
|
"loss": 0.0, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 96.00419047619047, |
|
"grad_norm": 0.0023338485043495893, |
|
"learning_rate": 3.851851851851852e-06, |
|
"loss": 0.0, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 96.00514285714286, |
|
"grad_norm": 0.000405468133976683, |
|
"learning_rate": 3.841269841269842e-06, |
|
"loss": 0.0, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 96.00609523809524, |
|
"grad_norm": 0.00028908386593684554, |
|
"learning_rate": 3.830687830687831e-06, |
|
"loss": 0.0, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 96.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.6572108268737793, |
|
"eval_runtime": 14.7571, |
|
"eval_samples_per_second": 4.947, |
|
"eval_steps_per_second": 1.288, |
|
"step": 6887 |
|
}, |
|
{ |
|
"epoch": 97.00028571428571, |
|
"grad_norm": 0.0006389078916981816, |
|
"learning_rate": 3.82010582010582e-06, |
|
"loss": 0.0, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 97.0012380952381, |
|
"grad_norm": 0.0009557904559187591, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 97.00219047619048, |
|
"grad_norm": 0.00033472280483692884, |
|
"learning_rate": 3.7989417989417994e-06, |
|
"loss": 0.0, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 97.00314285714286, |
|
"grad_norm": 0.0003657563356682658, |
|
"learning_rate": 3.788359788359789e-06, |
|
"loss": 0.0, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 97.00409523809523, |
|
"grad_norm": 0.0006084573687985539, |
|
"learning_rate": 3.777777777777778e-06, |
|
"loss": 0.0, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 97.00504761904762, |
|
"grad_norm": 0.0012074554106220603, |
|
"learning_rate": 3.7671957671957676e-06, |
|
"loss": 0.0, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 97.006, |
|
"grad_norm": 0.0017749678809195757, |
|
"learning_rate": 3.7566137566137568e-06, |
|
"loss": 0.0, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 97.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.70671010017395, |
|
"eval_runtime": 14.4437, |
|
"eval_samples_per_second": 5.054, |
|
"eval_steps_per_second": 1.315, |
|
"step": 6958 |
|
}, |
|
{ |
|
"epoch": 98.00019047619048, |
|
"grad_norm": 0.0003391267382539809, |
|
"learning_rate": 3.7460317460317463e-06, |
|
"loss": 0.0, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 98.00114285714285, |
|
"grad_norm": 0.0009979301830753684, |
|
"learning_rate": 3.735449735449736e-06, |
|
"loss": 0.0061, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 98.00209523809524, |
|
"grad_norm": 0.0004160029347985983, |
|
"learning_rate": 3.724867724867725e-06, |
|
"loss": 0.031, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 98.00304761904762, |
|
"grad_norm": 0.0004707147309090942, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.0, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 98.004, |
|
"grad_norm": 0.00035539219970814884, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 98.00495238095237, |
|
"grad_norm": 0.00016932585276663303, |
|
"learning_rate": 3.6931216931216933e-06, |
|
"loss": 0.0, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 98.00590476190476, |
|
"grad_norm": 0.0003616622416302562, |
|
"learning_rate": 3.6825396825396833e-06, |
|
"loss": 0.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 98.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.0454657077789307, |
|
"eval_runtime": 14.1374, |
|
"eval_samples_per_second": 5.164, |
|
"eval_steps_per_second": 1.344, |
|
"step": 7029 |
|
}, |
|
{ |
|
"epoch": 99.00009523809524, |
|
"grad_norm": 0.0021474002860486507, |
|
"learning_rate": 3.671957671957672e-06, |
|
"loss": 0.0, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 99.00104761904763, |
|
"grad_norm": 0.00020825969113502651, |
|
"learning_rate": 3.661375661375662e-06, |
|
"loss": 0.203, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 99.002, |
|
"grad_norm": 0.00019286935275886208, |
|
"learning_rate": 3.6507936507936507e-06, |
|
"loss": 0.0, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 99.00295238095238, |
|
"grad_norm": 0.004486401099711657, |
|
"learning_rate": 3.6402116402116407e-06, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 99.00390476190476, |
|
"grad_norm": 0.005953132174909115, |
|
"learning_rate": 3.6296296296296302e-06, |
|
"loss": 0.2312, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 99.00485714285715, |
|
"grad_norm": 0.004665942396968603, |
|
"learning_rate": 3.6190476190476194e-06, |
|
"loss": 0.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 99.00580952380952, |
|
"grad_norm": 0.001427503302693367, |
|
"learning_rate": 3.608465608465609e-06, |
|
"loss": 0.0, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 99.0067619047619, |
|
"grad_norm": 0.0008896868675947189, |
|
"learning_rate": 3.597883597883598e-06, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 99.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.809708595275879, |
|
"eval_runtime": 17.4402, |
|
"eval_samples_per_second": 4.186, |
|
"eval_steps_per_second": 1.089, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 100.00095238095238, |
|
"grad_norm": 0.0008946351008489728, |
|
"learning_rate": 3.5873015873015877e-06, |
|
"loss": 0.0, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 100.00190476190477, |
|
"grad_norm": 0.0008274571737274528, |
|
"learning_rate": 3.5767195767195772e-06, |
|
"loss": 0.0, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 100.00285714285714, |
|
"grad_norm": 0.0006609994452446699, |
|
"learning_rate": 3.5661375661375664e-06, |
|
"loss": 0.0, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 100.00380952380952, |
|
"grad_norm": 0.0006090060342103243, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.0, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 100.0047619047619, |
|
"grad_norm": 0.0004244809097144753, |
|
"learning_rate": 3.544973544973545e-06, |
|
"loss": 0.0, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 100.00571428571429, |
|
"grad_norm": 0.00048318656627088785, |
|
"learning_rate": 3.5343915343915346e-06, |
|
"loss": 0.0, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 100.00666666666666, |
|
"grad_norm": 0.0022462387569248676, |
|
"learning_rate": 3.523809523809524e-06, |
|
"loss": 0.0, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 100.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.856843948364258, |
|
"eval_runtime": 16.8499, |
|
"eval_samples_per_second": 4.332, |
|
"eval_steps_per_second": 1.128, |
|
"step": 7171 |
|
}, |
|
{ |
|
"epoch": 101.00085714285714, |
|
"grad_norm": 0.00037937192246317863, |
|
"learning_rate": 3.5132275132275133e-06, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 101.00180952380953, |
|
"grad_norm": 0.00037272131885401905, |
|
"learning_rate": 3.502645502645503e-06, |
|
"loss": 0.0, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 101.00276190476191, |
|
"grad_norm": 0.000506947748363018, |
|
"learning_rate": 3.492063492063492e-06, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 101.00371428571428, |
|
"grad_norm": 0.0011173028033226728, |
|
"learning_rate": 3.481481481481482e-06, |
|
"loss": 0.0, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 101.00466666666667, |
|
"grad_norm": 0.0008309134282171726, |
|
"learning_rate": 3.4708994708994716e-06, |
|
"loss": 0.0, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 101.00561904761905, |
|
"grad_norm": 0.00031402852619066834, |
|
"learning_rate": 3.4603174603174607e-06, |
|
"loss": 0.0, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 101.00657142857143, |
|
"grad_norm": 0.000444849458290264, |
|
"learning_rate": 3.4497354497354503e-06, |
|
"loss": 0.0, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 101.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.9188148975372314, |
|
"eval_runtime": 17.3074, |
|
"eval_samples_per_second": 4.218, |
|
"eval_steps_per_second": 1.098, |
|
"step": 7242 |
|
}, |
|
{ |
|
"epoch": 102.0007619047619, |
|
"grad_norm": 0.001609991304576397, |
|
"learning_rate": 3.4391534391534394e-06, |
|
"loss": 0.0, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 102.00171428571429, |
|
"grad_norm": 0.00038633521762676537, |
|
"learning_rate": 3.428571428571429e-06, |
|
"loss": 0.0, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 102.00266666666667, |
|
"grad_norm": 0.0011818393832072616, |
|
"learning_rate": 3.4179894179894185e-06, |
|
"loss": 0.0, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 102.00361904761905, |
|
"grad_norm": 0.000522238842677325, |
|
"learning_rate": 3.4074074074074077e-06, |
|
"loss": 0.0, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 102.00457142857142, |
|
"grad_norm": 0.0017618115525692701, |
|
"learning_rate": 3.3968253968253972e-06, |
|
"loss": 0.0, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 102.00552380952381, |
|
"grad_norm": 0.0009881546720862389, |
|
"learning_rate": 3.3862433862433864e-06, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 102.00647619047619, |
|
"grad_norm": 0.0009878567652776837, |
|
"learning_rate": 3.375661375661376e-06, |
|
"loss": 0.0, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 102.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.967825174331665, |
|
"eval_runtime": 17.7063, |
|
"eval_samples_per_second": 4.123, |
|
"eval_steps_per_second": 1.073, |
|
"step": 7313 |
|
}, |
|
{ |
|
"epoch": 103.00066666666666, |
|
"grad_norm": 0.0013936512405052781, |
|
"learning_rate": 3.3650793650793655e-06, |
|
"loss": 0.0, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 103.00161904761904, |
|
"grad_norm": 0.021675368770956993, |
|
"learning_rate": 3.3544973544973546e-06, |
|
"loss": 0.0, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 103.00257142857143, |
|
"grad_norm": 0.00028048918466083705, |
|
"learning_rate": 3.343915343915344e-06, |
|
"loss": 0.0, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 103.00352380952381, |
|
"grad_norm": 0.0011989453341811895, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 103.0044761904762, |
|
"grad_norm": 0.004449484869837761, |
|
"learning_rate": 3.322751322751323e-06, |
|
"loss": 0.0, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 103.00542857142857, |
|
"grad_norm": 0.00032475724583491683, |
|
"learning_rate": 3.312169312169313e-06, |
|
"loss": 0.0, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 103.00638095238095, |
|
"grad_norm": 0.00039636989822611213, |
|
"learning_rate": 3.3015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 103.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.0187313556671143, |
|
"eval_runtime": 14.9913, |
|
"eval_samples_per_second": 4.869, |
|
"eval_steps_per_second": 1.267, |
|
"step": 7384 |
|
}, |
|
{ |
|
"epoch": 104.00057142857143, |
|
"grad_norm": 0.00029685118352063, |
|
"learning_rate": 3.2910052910052916e-06, |
|
"loss": 0.0, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 104.0015238095238, |
|
"grad_norm": 0.00016953314479906112, |
|
"learning_rate": 3.2804232804232807e-06, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 104.00247619047619, |
|
"grad_norm": 0.0006629812996834517, |
|
"learning_rate": 3.2698412698412703e-06, |
|
"loss": 0.0, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 104.00342857142857, |
|
"grad_norm": 0.00020793879230041057, |
|
"learning_rate": 3.25925925925926e-06, |
|
"loss": 0.0, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 104.00438095238096, |
|
"grad_norm": 0.00029682304011657834, |
|
"learning_rate": 3.248677248677249e-06, |
|
"loss": 0.0, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 104.00533333333334, |
|
"grad_norm": 0.000494521576911211, |
|
"learning_rate": 3.2380952380952385e-06, |
|
"loss": 0.0, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 104.00628571428571, |
|
"grad_norm": 0.0015921973390504718, |
|
"learning_rate": 3.2275132275132277e-06, |
|
"loss": 0.0, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 104.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.042276620864868, |
|
"eval_runtime": 14.2245, |
|
"eval_samples_per_second": 5.132, |
|
"eval_steps_per_second": 1.336, |
|
"step": 7455 |
|
}, |
|
{ |
|
"epoch": 105.00047619047619, |
|
"grad_norm": 0.00046305853174999356, |
|
"learning_rate": 3.2169312169312172e-06, |
|
"loss": 0.0, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 105.00142857142858, |
|
"grad_norm": 0.0002641775063239038, |
|
"learning_rate": 3.206349206349207e-06, |
|
"loss": 0.0, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 105.00238095238095, |
|
"grad_norm": 0.00023569005134049803, |
|
"learning_rate": 3.195767195767196e-06, |
|
"loss": 0.0, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 105.00333333333333, |
|
"grad_norm": 0.0009397692629136145, |
|
"learning_rate": 3.1851851851851855e-06, |
|
"loss": 0.0, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 105.00428571428571, |
|
"grad_norm": 0.000642178812995553, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 105.0052380952381, |
|
"grad_norm": 0.0007158418302424252, |
|
"learning_rate": 3.164021164021164e-06, |
|
"loss": 0.0, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 105.00619047619048, |
|
"grad_norm": 0.0005456962389871478, |
|
"learning_rate": 3.1534391534391538e-06, |
|
"loss": 0.0, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 105.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.0591580867767334, |
|
"eval_runtime": 14.8193, |
|
"eval_samples_per_second": 4.926, |
|
"eval_steps_per_second": 1.282, |
|
"step": 7526 |
|
}, |
|
{ |
|
"epoch": 106.00038095238095, |
|
"grad_norm": 0.0004314870457164943, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.0, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 106.00133333333333, |
|
"grad_norm": 0.00031960938940756023, |
|
"learning_rate": 3.132275132275133e-06, |
|
"loss": 0.0, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 106.00228571428572, |
|
"grad_norm": 0.002196405315771699, |
|
"learning_rate": 3.1216931216931216e-06, |
|
"loss": 0.0, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 106.00323809523809, |
|
"grad_norm": 0.0007430929690599442, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.0, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 106.00419047619047, |
|
"grad_norm": 0.013663827441632748, |
|
"learning_rate": 3.100529100529101e-06, |
|
"loss": 0.0, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 106.00514285714286, |
|
"grad_norm": 0.00045246328227221966, |
|
"learning_rate": 3.0899470899470903e-06, |
|
"loss": 0.0, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 106.00609523809524, |
|
"grad_norm": 0.00018808837921824306, |
|
"learning_rate": 3.07936507936508e-06, |
|
"loss": 0.0, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 106.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.0763936042785645, |
|
"eval_runtime": 16.1963, |
|
"eval_samples_per_second": 4.507, |
|
"eval_steps_per_second": 1.173, |
|
"step": 7597 |
|
}, |
|
{ |
|
"epoch": 107.00028571428571, |
|
"grad_norm": 0.00047636666567996144, |
|
"learning_rate": 3.068783068783069e-06, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 107.0012380952381, |
|
"grad_norm": 0.0005603940808214247, |
|
"learning_rate": 3.0582010582010585e-06, |
|
"loss": 0.0, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 107.00219047619048, |
|
"grad_norm": 0.00046778249088674784, |
|
"learning_rate": 3.047619047619048e-06, |
|
"loss": 0.0, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 107.00314285714286, |
|
"grad_norm": 0.003669192548841238, |
|
"learning_rate": 3.0370370370370372e-06, |
|
"loss": 0.0, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 107.00409523809523, |
|
"grad_norm": 0.002282995730638504, |
|
"learning_rate": 3.026455026455027e-06, |
|
"loss": 0.0, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 107.00504761904762, |
|
"grad_norm": 0.0003627469704952091, |
|
"learning_rate": 3.015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 107.006, |
|
"grad_norm": 0.00038858724292367697, |
|
"learning_rate": 3.0052910052910055e-06, |
|
"loss": 0.0, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 107.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.094923973083496, |
|
"eval_runtime": 13.0162, |
|
"eval_samples_per_second": 5.608, |
|
"eval_steps_per_second": 1.46, |
|
"step": 7668 |
|
}, |
|
{ |
|
"epoch": 108.00019047619048, |
|
"grad_norm": 0.0007781111053191125, |
|
"learning_rate": 2.9947089947089946e-06, |
|
"loss": 0.0, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 108.00114285714285, |
|
"grad_norm": 0.00021683322847820818, |
|
"learning_rate": 2.984126984126984e-06, |
|
"loss": 0.0, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 108.00209523809524, |
|
"grad_norm": 0.007576378993690014, |
|
"learning_rate": 2.9735449735449738e-06, |
|
"loss": 0.0, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 108.00304761904762, |
|
"grad_norm": 0.0002780180366244167, |
|
"learning_rate": 2.962962962962963e-06, |
|
"loss": 0.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 108.004, |
|
"grad_norm": 0.004254285246133804, |
|
"learning_rate": 2.9523809523809525e-06, |
|
"loss": 0.0, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 108.00495238095237, |
|
"grad_norm": 0.0005138629348948598, |
|
"learning_rate": 2.9417989417989416e-06, |
|
"loss": 0.0, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 108.00590476190476, |
|
"grad_norm": 0.00029949008603580296, |
|
"learning_rate": 2.9312169312169316e-06, |
|
"loss": 0.0, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 108.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.125063419342041, |
|
"eval_runtime": 13.0857, |
|
"eval_samples_per_second": 5.579, |
|
"eval_steps_per_second": 1.452, |
|
"step": 7739 |
|
}, |
|
{ |
|
"epoch": 109.00009523809524, |
|
"grad_norm": 0.0006573015125468373, |
|
"learning_rate": 2.920634920634921e-06, |
|
"loss": 0.0, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 109.00104761904763, |
|
"grad_norm": 0.0008629634394310415, |
|
"learning_rate": 2.9100529100529103e-06, |
|
"loss": 0.0, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 109.002, |
|
"grad_norm": 0.0003830840578302741, |
|
"learning_rate": 2.8994708994709e-06, |
|
"loss": 0.0, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 109.00295238095238, |
|
"grad_norm": 0.0009122428018599749, |
|
"learning_rate": 2.888888888888889e-06, |
|
"loss": 0.0, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 109.00390476190476, |
|
"grad_norm": 0.00022861312027089298, |
|
"learning_rate": 2.8783068783068786e-06, |
|
"loss": 0.0, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 109.00485714285715, |
|
"grad_norm": 0.0005911447224207222, |
|
"learning_rate": 2.867724867724868e-06, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 109.00580952380952, |
|
"grad_norm": 0.00029654608806595206, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 109.0067619047619, |
|
"grad_norm": 0.00019855661957990378, |
|
"learning_rate": 2.846560846560847e-06, |
|
"loss": 0.0, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 109.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 3.1508212089538574, |
|
"eval_runtime": 13.2729, |
|
"eval_samples_per_second": 5.5, |
|
"eval_steps_per_second": 1.431, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 110.00095238095238, |
|
"grad_norm": 0.00021638018370140344, |
|
"learning_rate": 2.835978835978836e-06, |
|
"loss": 0.0, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 110.00190476190477, |
|
"grad_norm": 0.00039808437577448785, |
|
"learning_rate": 2.8253968253968255e-06, |
|
"loss": 0.0, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 110.00285714285714, |
|
"grad_norm": 0.00034315523225814104, |
|
"learning_rate": 2.814814814814815e-06, |
|
"loss": 0.0, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 110.00380952380952, |
|
"grad_norm": 0.00046854885295033455, |
|
"learning_rate": 2.8042328042328042e-06, |
|
"loss": 0.0, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 110.0047619047619, |
|
"grad_norm": 0.00020149444753769785, |
|
"learning_rate": 2.7936507936507938e-06, |
|
"loss": 0.1539, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 110.00571428571429, |
|
"grad_norm": 0.0013558556092903018, |
|
"learning_rate": 2.783068783068783e-06, |
|
"loss": 0.0021, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 110.00666666666666, |
|
"grad_norm": 0.000865154608618468, |
|
"learning_rate": 2.7724867724867725e-06, |
|
"loss": 0.0, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 110.0067619047619, |
|
"eval_accuracy": 0.6986301369863014, |
|
"eval_loss": 2.7330446243286133, |
|
"eval_runtime": 13.1533, |
|
"eval_samples_per_second": 5.55, |
|
"eval_steps_per_second": 1.445, |
|
"step": 7881 |
|
}, |
|
{ |
|
"epoch": 111.00085714285714, |
|
"grad_norm": 0.0007956172921694815, |
|
"learning_rate": 2.7619047619047625e-06, |
|
"loss": 0.0, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 111.00180952380953, |
|
"grad_norm": 0.001300819218158722, |
|
"learning_rate": 2.7513227513227516e-06, |
|
"loss": 0.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 111.00276190476191, |
|
"grad_norm": 0.0003041352319996804, |
|
"learning_rate": 2.740740740740741e-06, |
|
"loss": 0.0, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 111.00371428571428, |
|
"grad_norm": 0.00022135638573672622, |
|
"learning_rate": 2.7301587301587303e-06, |
|
"loss": 0.0, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 111.00466666666667, |
|
"grad_norm": 0.0002552993828430772, |
|
"learning_rate": 2.71957671957672e-06, |
|
"loss": 0.0, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 111.00561904761905, |
|
"grad_norm": 0.00045482220593839884, |
|
"learning_rate": 2.7089947089947094e-06, |
|
"loss": 0.0, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 111.00657142857143, |
|
"grad_norm": 0.00021129060769453645, |
|
"learning_rate": 2.6984126984126986e-06, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 111.0067619047619, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 2.5366127490997314, |
|
"eval_runtime": 13.0404, |
|
"eval_samples_per_second": 5.598, |
|
"eval_steps_per_second": 1.457, |
|
"step": 7952 |
|
}, |
|
{ |
|
"epoch": 112.0007619047619, |
|
"grad_norm": 0.0002526868775021285, |
|
"learning_rate": 2.687830687830688e-06, |
|
"loss": 0.0, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 112.00171428571429, |
|
"grad_norm": 0.0002749340783338994, |
|
"learning_rate": 2.6772486772486773e-06, |
|
"loss": 0.0, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 112.00266666666667, |
|
"grad_norm": 0.00016855007561389357, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.0, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 112.00361904761905, |
|
"grad_norm": 29.284297943115234, |
|
"learning_rate": 2.6560846560846564e-06, |
|
"loss": 0.0011, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 112.00457142857142, |
|
"grad_norm": 0.009005044586956501, |
|
"learning_rate": 2.6455026455026455e-06, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 112.00552380952381, |
|
"grad_norm": 0.002663363702595234, |
|
"learning_rate": 2.634920634920635e-06, |
|
"loss": 0.0001, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 112.00647619047619, |
|
"grad_norm": 0.0003704349510371685, |
|
"learning_rate": 2.6243386243386242e-06, |
|
"loss": 0.0, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 112.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.791083574295044, |
|
"eval_runtime": 13.3463, |
|
"eval_samples_per_second": 5.47, |
|
"eval_steps_per_second": 1.424, |
|
"step": 8023 |
|
}, |
|
{ |
|
"epoch": 113.00066666666666, |
|
"grad_norm": 0.002777799731120467, |
|
"learning_rate": 2.613756613756614e-06, |
|
"loss": 0.0, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 113.00161904761904, |
|
"grad_norm": 0.00025223082047887146, |
|
"learning_rate": 2.6031746031746038e-06, |
|
"loss": 0.0, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 113.00257142857143, |
|
"grad_norm": 0.00021542073227465153, |
|
"learning_rate": 2.5925925925925925e-06, |
|
"loss": 0.0, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 113.00352380952381, |
|
"grad_norm": 0.00017471532919444144, |
|
"learning_rate": 2.5820105820105825e-06, |
|
"loss": 0.0, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 113.0044761904762, |
|
"grad_norm": 0.000242567082750611, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.0, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 113.00542857142857, |
|
"grad_norm": 0.00022389466175809503, |
|
"learning_rate": 2.560846560846561e-06, |
|
"loss": 0.0, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 113.00638095238095, |
|
"grad_norm": 0.0001458414481021464, |
|
"learning_rate": 2.5502645502645507e-06, |
|
"loss": 0.0, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 113.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.6361753940582275, |
|
"eval_runtime": 13.4797, |
|
"eval_samples_per_second": 5.416, |
|
"eval_steps_per_second": 1.41, |
|
"step": 8094 |
|
}, |
|
{ |
|
"epoch": 114.00057142857143, |
|
"grad_norm": 0.0007376170833595097, |
|
"learning_rate": 2.53968253968254e-06, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 114.0015238095238, |
|
"grad_norm": 0.0005933254724368453, |
|
"learning_rate": 2.5291005291005294e-06, |
|
"loss": 0.0, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 114.00247619047619, |
|
"grad_norm": 0.0005948114558123052, |
|
"learning_rate": 2.5185185185185186e-06, |
|
"loss": 0.1729, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 114.00342857142857, |
|
"grad_norm": 0.0013701848220080137, |
|
"learning_rate": 2.507936507936508e-06, |
|
"loss": 0.2449, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 114.00438095238096, |
|
"grad_norm": 0.0013559028739109635, |
|
"learning_rate": 2.4973544973544973e-06, |
|
"loss": 0.0, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 114.00533333333334, |
|
"grad_norm": 25.319177627563477, |
|
"learning_rate": 2.486772486772487e-06, |
|
"loss": 0.001, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 114.00628571428571, |
|
"grad_norm": 0.0011747395619750023, |
|
"learning_rate": 2.4761904761904764e-06, |
|
"loss": 0.0, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 114.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.4666783809661865, |
|
"eval_runtime": 13.5501, |
|
"eval_samples_per_second": 5.387, |
|
"eval_steps_per_second": 1.402, |
|
"step": 8165 |
|
}, |
|
{ |
|
"epoch": 115.00047619047619, |
|
"grad_norm": 0.0002828662982210517, |
|
"learning_rate": 2.465608465608466e-06, |
|
"loss": 0.0, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 115.00142857142858, |
|
"grad_norm": 0.0008794226450845599, |
|
"learning_rate": 2.455026455026455e-06, |
|
"loss": 0.0, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 115.00238095238095, |
|
"grad_norm": 0.001528170658275485, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.0, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 115.00333333333333, |
|
"grad_norm": 0.000787918281275779, |
|
"learning_rate": 2.433862433862434e-06, |
|
"loss": 0.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 115.00428571428571, |
|
"grad_norm": 0.001971907913684845, |
|
"learning_rate": 2.4232804232804234e-06, |
|
"loss": 0.0, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 115.0052380952381, |
|
"grad_norm": 0.00044486342812888324, |
|
"learning_rate": 2.412698412698413e-06, |
|
"loss": 0.0, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 115.00619047619048, |
|
"grad_norm": 0.00047971829189918935, |
|
"learning_rate": 2.4021164021164025e-06, |
|
"loss": 0.0, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 115.0067619047619, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 2.524650812149048, |
|
"eval_runtime": 13.0994, |
|
"eval_samples_per_second": 5.573, |
|
"eval_steps_per_second": 1.45, |
|
"step": 8236 |
|
}, |
|
{ |
|
"epoch": 116.00038095238095, |
|
"grad_norm": 0.000269345473498106, |
|
"learning_rate": 2.3915343915343916e-06, |
|
"loss": 0.0, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 116.00133333333333, |
|
"grad_norm": 0.000654588162433356, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 0.0, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 116.00228571428572, |
|
"grad_norm": 0.002269661519676447, |
|
"learning_rate": 2.3703703703703707e-06, |
|
"loss": 0.0, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 116.00323809523809, |
|
"grad_norm": 0.00038912022137083113, |
|
"learning_rate": 2.35978835978836e-06, |
|
"loss": 0.0, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 116.00419047619047, |
|
"grad_norm": 0.0040027485229074955, |
|
"learning_rate": 2.3492063492063494e-06, |
|
"loss": 0.0, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 116.00514285714286, |
|
"grad_norm": 0.002949787536635995, |
|
"learning_rate": 2.3386243386243386e-06, |
|
"loss": 0.0, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 116.00609523809524, |
|
"grad_norm": 0.0003639089991338551, |
|
"learning_rate": 2.328042328042328e-06, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 116.0067619047619, |
|
"eval_accuracy": 0.6712328767123288, |
|
"eval_loss": 2.615229845046997, |
|
"eval_runtime": 13.0724, |
|
"eval_samples_per_second": 5.584, |
|
"eval_steps_per_second": 1.453, |
|
"step": 8307 |
|
}, |
|
{ |
|
"epoch": 117.00028571428571, |
|
"grad_norm": 0.0006976649747230113, |
|
"learning_rate": 2.3174603174603177e-06, |
|
"loss": 0.0, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 117.0012380952381, |
|
"grad_norm": 0.0008079927647486329, |
|
"learning_rate": 2.3068783068783073e-06, |
|
"loss": 0.0, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 117.00219047619048, |
|
"grad_norm": 0.0005299202166497707, |
|
"learning_rate": 2.2962962962962964e-06, |
|
"loss": 0.0001, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 117.00314285714286, |
|
"grad_norm": 0.0022638612426817417, |
|
"learning_rate": 2.285714285714286e-06, |
|
"loss": 0.0, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 117.00409523809523, |
|
"grad_norm": 0.0013422874035313725, |
|
"learning_rate": 2.275132275132275e-06, |
|
"loss": 0.0001, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 117.00504761904762, |
|
"grad_norm": 0.0006376210367307067, |
|
"learning_rate": 2.2645502645502647e-06, |
|
"loss": 0.0, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 117.006, |
|
"grad_norm": 0.001159311388619244, |
|
"learning_rate": 2.2539682539682542e-06, |
|
"loss": 0.0, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 117.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.6152660846710205, |
|
"eval_runtime": 13.0967, |
|
"eval_samples_per_second": 5.574, |
|
"eval_steps_per_second": 1.451, |
|
"step": 8378 |
|
}, |
|
{ |
|
"epoch": 118.00019047619048, |
|
"grad_norm": 0.00016380433226004243, |
|
"learning_rate": 2.2433862433862434e-06, |
|
"loss": 0.0, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 118.00114285714285, |
|
"grad_norm": 0.0003558326861821115, |
|
"learning_rate": 2.232804232804233e-06, |
|
"loss": 0.0, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 118.00209523809524, |
|
"grad_norm": 0.0002517969987820834, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0053, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 118.00304761904762, |
|
"grad_norm": 0.00020131978089921176, |
|
"learning_rate": 2.211640211640212e-06, |
|
"loss": 0.0003, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 118.004, |
|
"grad_norm": 0.0005578813143074512, |
|
"learning_rate": 2.201058201058201e-06, |
|
"loss": 0.0, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 118.00495238095237, |
|
"grad_norm": 0.00020661774033214897, |
|
"learning_rate": 2.1904761904761908e-06, |
|
"loss": 0.0, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 118.00590476190476, |
|
"grad_norm": 0.0007093061576597393, |
|
"learning_rate": 2.17989417989418e-06, |
|
"loss": 0.0, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 118.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.5004801750183105, |
|
"eval_runtime": 13.2434, |
|
"eval_samples_per_second": 5.512, |
|
"eval_steps_per_second": 1.435, |
|
"step": 8449 |
|
}, |
|
{ |
|
"epoch": 119.00009523809524, |
|
"grad_norm": 0.0013267587637528777, |
|
"learning_rate": 2.1693121693121695e-06, |
|
"loss": 0.0, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 119.00104761904763, |
|
"grad_norm": 0.00019657477969303727, |
|
"learning_rate": 2.158730158730159e-06, |
|
"loss": 0.0, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 119.002, |
|
"grad_norm": 0.00021630125411320478, |
|
"learning_rate": 2.148148148148148e-06, |
|
"loss": 0.0, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 119.00295238095238, |
|
"grad_norm": 0.0013466409873217344, |
|
"learning_rate": 2.1375661375661377e-06, |
|
"loss": 0.0, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 119.00390476190476, |
|
"grad_norm": 0.0003468025824986398, |
|
"learning_rate": 2.1269841269841273e-06, |
|
"loss": 0.0, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 119.00485714285715, |
|
"grad_norm": 0.00021876097889617085, |
|
"learning_rate": 2.1164021164021164e-06, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 119.00580952380952, |
|
"grad_norm": 0.0002601834712550044, |
|
"learning_rate": 2.105820105820106e-06, |
|
"loss": 0.0, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 119.0067619047619, |
|
"grad_norm": 9.906197374220937e-05, |
|
"learning_rate": 2.0952380952380955e-06, |
|
"loss": 0.0, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 119.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.5095863342285156, |
|
"eval_runtime": 13.478, |
|
"eval_samples_per_second": 5.416, |
|
"eval_steps_per_second": 1.41, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 120.00095238095238, |
|
"grad_norm": 0.0007687858887948096, |
|
"learning_rate": 2.0846560846560847e-06, |
|
"loss": 0.0, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 120.00190476190477, |
|
"grad_norm": 0.0005936938687227666, |
|
"learning_rate": 2.0740740740740742e-06, |
|
"loss": 0.0, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 120.00285714285714, |
|
"grad_norm": 0.0006451211520470679, |
|
"learning_rate": 2.0634920634920634e-06, |
|
"loss": 0.0, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 120.00380952380952, |
|
"grad_norm": 0.00022526290558744222, |
|
"learning_rate": 2.0529100529100534e-06, |
|
"loss": 0.0, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 120.0047619047619, |
|
"grad_norm": 0.0002888664021156728, |
|
"learning_rate": 2.0423280423280425e-06, |
|
"loss": 0.0, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 120.00571428571429, |
|
"grad_norm": 0.00027360659441910684, |
|
"learning_rate": 2.031746031746032e-06, |
|
"loss": 0.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 120.00666666666666, |
|
"grad_norm": 0.00036791572347283363, |
|
"learning_rate": 2.021164021164021e-06, |
|
"loss": 0.0, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 120.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.5173282623291016, |
|
"eval_runtime": 20.6118, |
|
"eval_samples_per_second": 3.542, |
|
"eval_steps_per_second": 0.922, |
|
"step": 8591 |
|
}, |
|
{ |
|
"epoch": 121.00085714285714, |
|
"grad_norm": 0.0019504308002069592, |
|
"learning_rate": 2.0105820105820108e-06, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 121.00180952380953, |
|
"grad_norm": 0.00012742081889882684, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 121.00276190476191, |
|
"grad_norm": 0.00021107718930579722, |
|
"learning_rate": 1.9894179894179895e-06, |
|
"loss": 0.0, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 121.00371428571428, |
|
"grad_norm": 0.000727532955352217, |
|
"learning_rate": 1.978835978835979e-06, |
|
"loss": 0.0, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 121.00466666666667, |
|
"grad_norm": 0.0003416416293475777, |
|
"learning_rate": 1.968253968253968e-06, |
|
"loss": 0.0, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 121.00561904761905, |
|
"grad_norm": 0.00017602296429686248, |
|
"learning_rate": 1.9576719576719577e-06, |
|
"loss": 0.0, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 121.00657142857143, |
|
"grad_norm": 0.0001626180310267955, |
|
"learning_rate": 1.9470899470899473e-06, |
|
"loss": 0.0, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 121.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.522595167160034, |
|
"eval_runtime": 15.3165, |
|
"eval_samples_per_second": 4.766, |
|
"eval_steps_per_second": 1.24, |
|
"step": 8662 |
|
}, |
|
{ |
|
"epoch": 122.0007619047619, |
|
"grad_norm": 0.00031321553979068995, |
|
"learning_rate": 1.936507936507937e-06, |
|
"loss": 0.0, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 122.00171428571429, |
|
"grad_norm": 0.00030723740928806365, |
|
"learning_rate": 1.925925925925926e-06, |
|
"loss": 0.0, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 122.00266666666667, |
|
"grad_norm": 0.00022486411035060883, |
|
"learning_rate": 1.9153439153439156e-06, |
|
"loss": 0.0, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 122.00361904761905, |
|
"grad_norm": 0.0005100387497805059, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 122.00457142857142, |
|
"grad_norm": 0.0001284780155401677, |
|
"learning_rate": 1.8941798941798945e-06, |
|
"loss": 0.0, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 122.00552380952381, |
|
"grad_norm": 0.0003152410499751568, |
|
"learning_rate": 1.8835978835978838e-06, |
|
"loss": 0.0, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 122.00647619047619, |
|
"grad_norm": 0.0002786237746477127, |
|
"learning_rate": 1.8730158730158732e-06, |
|
"loss": 0.0, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 122.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.531287670135498, |
|
"eval_runtime": 15.1054, |
|
"eval_samples_per_second": 4.833, |
|
"eval_steps_per_second": 1.258, |
|
"step": 8733 |
|
}, |
|
{ |
|
"epoch": 123.00066666666666, |
|
"grad_norm": 0.0002176911075366661, |
|
"learning_rate": 1.8624338624338625e-06, |
|
"loss": 0.0894, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 123.00161904761904, |
|
"grad_norm": 0.00015445193275809288, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 123.00257142857143, |
|
"grad_norm": 0.00014010295853950083, |
|
"learning_rate": 1.8412698412698416e-06, |
|
"loss": 0.0, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 123.00352380952381, |
|
"grad_norm": 0.0015225009992718697, |
|
"learning_rate": 1.830687830687831e-06, |
|
"loss": 0.0, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 123.0044761904762, |
|
"grad_norm": 0.0009147560340352356, |
|
"learning_rate": 1.8201058201058203e-06, |
|
"loss": 0.0, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 123.00542857142857, |
|
"grad_norm": 0.0033724787645041943, |
|
"learning_rate": 1.8095238095238097e-06, |
|
"loss": 0.0, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 123.00638095238095, |
|
"grad_norm": 0.0007049464038573205, |
|
"learning_rate": 1.798941798941799e-06, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 123.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.6165266036987305, |
|
"eval_runtime": 16.957, |
|
"eval_samples_per_second": 4.305, |
|
"eval_steps_per_second": 1.12, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 124.00057142857143, |
|
"grad_norm": 0.00017877879145089537, |
|
"learning_rate": 1.7883597883597886e-06, |
|
"loss": 0.0078, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 124.0015238095238, |
|
"grad_norm": 0.00032923344406299293, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 124.00247619047619, |
|
"grad_norm": 0.0002218599256593734, |
|
"learning_rate": 1.7671957671957673e-06, |
|
"loss": 0.0, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 124.00342857142857, |
|
"grad_norm": 0.0002749300911091268, |
|
"learning_rate": 1.7566137566137567e-06, |
|
"loss": 0.0, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 124.00438095238096, |
|
"grad_norm": 0.0003476983984000981, |
|
"learning_rate": 1.746031746031746e-06, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 124.00533333333334, |
|
"grad_norm": 0.00046555098379030824, |
|
"learning_rate": 1.7354497354497358e-06, |
|
"loss": 0.0, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 124.00628571428571, |
|
"grad_norm": 0.0003248922585044056, |
|
"learning_rate": 1.7248677248677251e-06, |
|
"loss": 0.0, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 124.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.44909930229187, |
|
"eval_runtime": 15.2748, |
|
"eval_samples_per_second": 4.779, |
|
"eval_steps_per_second": 1.244, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 125.00047619047619, |
|
"grad_norm": 0.0003055214765481651, |
|
"learning_rate": 1.7142857142857145e-06, |
|
"loss": 0.0, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 125.00142857142858, |
|
"grad_norm": 0.0009096021531149745, |
|
"learning_rate": 1.7037037037037038e-06, |
|
"loss": 0.0, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 125.00238095238095, |
|
"grad_norm": 0.0002497760870028287, |
|
"learning_rate": 1.6931216931216932e-06, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 125.00333333333333, |
|
"grad_norm": 0.001024364260956645, |
|
"learning_rate": 1.6825396825396827e-06, |
|
"loss": 0.0, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 125.00428571428571, |
|
"grad_norm": 0.00013686090824194252, |
|
"learning_rate": 1.671957671957672e-06, |
|
"loss": 0.0, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 125.0052380952381, |
|
"grad_norm": 0.005116929765790701, |
|
"learning_rate": 1.6613756613756614e-06, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 125.00619047619048, |
|
"grad_norm": 0.00035035223118029535, |
|
"learning_rate": 1.6507936507936508e-06, |
|
"loss": 0.0, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 125.0067619047619, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 2.342148542404175, |
|
"eval_runtime": 15.2848, |
|
"eval_samples_per_second": 4.776, |
|
"eval_steps_per_second": 1.243, |
|
"step": 8946 |
|
}, |
|
{ |
|
"epoch": 126.00038095238095, |
|
"grad_norm": 0.00012745351705234498, |
|
"learning_rate": 1.6402116402116404e-06, |
|
"loss": 0.0, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 126.00133333333333, |
|
"grad_norm": 0.0006788919563405216, |
|
"learning_rate": 1.62962962962963e-06, |
|
"loss": 0.0, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 126.00228571428572, |
|
"grad_norm": 0.0005730336997658014, |
|
"learning_rate": 1.6190476190476193e-06, |
|
"loss": 0.0, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 126.00323809523809, |
|
"grad_norm": 0.00025528325932100415, |
|
"learning_rate": 1.6084656084656086e-06, |
|
"loss": 0.0, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 126.00419047619047, |
|
"grad_norm": 0.0005645015044137836, |
|
"learning_rate": 1.597883597883598e-06, |
|
"loss": 0.0, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 126.00514285714286, |
|
"grad_norm": 0.000495299173053354, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 126.00609523809524, |
|
"grad_norm": 0.00029814988374710083, |
|
"learning_rate": 1.5767195767195769e-06, |
|
"loss": 0.0, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 126.0067619047619, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 2.3381338119506836, |
|
"eval_runtime": 15.1917, |
|
"eval_samples_per_second": 4.805, |
|
"eval_steps_per_second": 1.251, |
|
"step": 9017 |
|
}, |
|
{ |
|
"epoch": 127.00028571428571, |
|
"grad_norm": 0.00038966414285823703, |
|
"learning_rate": 1.5661375661375664e-06, |
|
"loss": 0.0, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 127.0012380952381, |
|
"grad_norm": 0.0003006823535542935, |
|
"learning_rate": 1.5555555555555558e-06, |
|
"loss": 0.0, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 127.00219047619048, |
|
"grad_norm": 0.00020581170974764973, |
|
"learning_rate": 1.5449735449735451e-06, |
|
"loss": 0.0, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 127.00314285714286, |
|
"grad_norm": 0.0003945867356378585, |
|
"learning_rate": 1.5343915343915345e-06, |
|
"loss": 0.0, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 127.00409523809523, |
|
"grad_norm": 0.0003842693695332855, |
|
"learning_rate": 1.523809523809524e-06, |
|
"loss": 0.0, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 127.00504761904762, |
|
"grad_norm": 0.00013588345609605312, |
|
"learning_rate": 1.5132275132275134e-06, |
|
"loss": 0.0, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 127.006, |
|
"grad_norm": 0.00021416005620267242, |
|
"learning_rate": 1.5026455026455028e-06, |
|
"loss": 0.0, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 127.0067619047619, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 2.3614723682403564, |
|
"eval_runtime": 15.4444, |
|
"eval_samples_per_second": 4.727, |
|
"eval_steps_per_second": 1.23, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 128.00019047619048, |
|
"grad_norm": 0.0002484635915607214, |
|
"learning_rate": 1.492063492063492e-06, |
|
"loss": 0.0, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 128.00114285714287, |
|
"grad_norm": 0.00036805597483180463, |
|
"learning_rate": 1.4814814814814815e-06, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 128.00209523809525, |
|
"grad_norm": 0.0006575100123882294, |
|
"learning_rate": 1.4708994708994708e-06, |
|
"loss": 0.0, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 128.0030476190476, |
|
"grad_norm": 0.00018772153998725116, |
|
"learning_rate": 1.4603174603174606e-06, |
|
"loss": 0.0, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 128.004, |
|
"grad_norm": 0.00034466315992176533, |
|
"learning_rate": 1.44973544973545e-06, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 128.00495238095237, |
|
"grad_norm": 0.00042705878149718046, |
|
"learning_rate": 1.4391534391534393e-06, |
|
"loss": 0.0, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 128.00590476190476, |
|
"grad_norm": 0.00016666139708831906, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 128.0067619047619, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 2.442920207977295, |
|
"eval_runtime": 15.2795, |
|
"eval_samples_per_second": 4.778, |
|
"eval_steps_per_second": 1.243, |
|
"step": 9159 |
|
}, |
|
{ |
|
"epoch": 129.00009523809524, |
|
"grad_norm": 0.00020781603234354407, |
|
"learning_rate": 1.417989417989418e-06, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 129.00104761904763, |
|
"grad_norm": 0.0002236000436823815, |
|
"learning_rate": 1.4074074074074075e-06, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 129.002, |
|
"grad_norm": 0.00029241846641525626, |
|
"learning_rate": 1.3968253968253969e-06, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 129.0029523809524, |
|
"grad_norm": 0.00022293177607934922, |
|
"learning_rate": 1.3862433862433862e-06, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 129.00390476190475, |
|
"grad_norm": 0.0002516314561944455, |
|
"learning_rate": 1.3756613756613758e-06, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 129.00485714285713, |
|
"grad_norm": 0.0002649887464940548, |
|
"learning_rate": 1.3650793650793652e-06, |
|
"loss": 0.0, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 129.00580952380952, |
|
"grad_norm": 0.000248853990342468, |
|
"learning_rate": 1.3544973544973547e-06, |
|
"loss": 0.0, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 129.0067619047619, |
|
"grad_norm": 0.0001870815467555076, |
|
"learning_rate": 1.343915343915344e-06, |
|
"loss": 0.0, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 129.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.626615524291992, |
|
"eval_runtime": 15.0645, |
|
"eval_samples_per_second": 4.846, |
|
"eval_steps_per_second": 1.261, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 130.00095238095238, |
|
"grad_norm": 0.00024522689636796713, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 130.00190476190477, |
|
"grad_norm": 0.00015474161773454398, |
|
"learning_rate": 1.3227513227513228e-06, |
|
"loss": 0.0, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 130.00285714285715, |
|
"grad_norm": 0.00021853228099644184, |
|
"learning_rate": 1.3121693121693121e-06, |
|
"loss": 0.0, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 130.00380952380954, |
|
"grad_norm": 0.0007916698814369738, |
|
"learning_rate": 1.3015873015873019e-06, |
|
"loss": 0.0, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 130.0047619047619, |
|
"grad_norm": 0.0002638715668581426, |
|
"learning_rate": 1.2910052910052912e-06, |
|
"loss": 0.0, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 130.00571428571428, |
|
"grad_norm": 0.0005141887231729925, |
|
"learning_rate": 1.2804232804232806e-06, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 130.00666666666666, |
|
"grad_norm": 0.000244722148636356, |
|
"learning_rate": 1.26984126984127e-06, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 130.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.628141403198242, |
|
"eval_runtime": 14.903, |
|
"eval_samples_per_second": 4.898, |
|
"eval_steps_per_second": 1.275, |
|
"step": 9301 |
|
}, |
|
{ |
|
"epoch": 131.00085714285714, |
|
"grad_norm": 0.0004123387625440955, |
|
"learning_rate": 1.2592592592592593e-06, |
|
"loss": 0.0, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 131.00180952380953, |
|
"grad_norm": 0.00025080115301534534, |
|
"learning_rate": 1.2486772486772486e-06, |
|
"loss": 0.0, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 131.0027619047619, |
|
"grad_norm": 0.0002924806030932814, |
|
"learning_rate": 1.2380952380952382e-06, |
|
"loss": 0.0, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 131.0037142857143, |
|
"grad_norm": 0.0003221347869839519, |
|
"learning_rate": 1.2275132275132276e-06, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 131.00466666666668, |
|
"grad_norm": 0.00011789855489041656, |
|
"learning_rate": 1.216931216931217e-06, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 131.00561904761904, |
|
"grad_norm": 0.00014974681835155934, |
|
"learning_rate": 1.2063492063492065e-06, |
|
"loss": 0.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 131.00657142857142, |
|
"grad_norm": 0.00016921368660405278, |
|
"learning_rate": 1.1957671957671958e-06, |
|
"loss": 0.0, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 131.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.632081985473633, |
|
"eval_runtime": 18.1652, |
|
"eval_samples_per_second": 4.019, |
|
"eval_steps_per_second": 1.046, |
|
"step": 9372 |
|
}, |
|
{ |
|
"epoch": 132.0007619047619, |
|
"grad_norm": 0.00015733508917037398, |
|
"learning_rate": 1.1851851851851854e-06, |
|
"loss": 0.0, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 132.00171428571429, |
|
"grad_norm": 0.00016954125021584332, |
|
"learning_rate": 1.1746031746031747e-06, |
|
"loss": 0.0, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 132.00266666666667, |
|
"grad_norm": 0.0002530089404899627, |
|
"learning_rate": 1.164021164021164e-06, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 132.00361904761905, |
|
"grad_norm": 0.00021686778927687556, |
|
"learning_rate": 1.1534391534391536e-06, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 132.00457142857144, |
|
"grad_norm": 0.0003280985984019935, |
|
"learning_rate": 1.142857142857143e-06, |
|
"loss": 0.0, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 132.00552380952382, |
|
"grad_norm": 0.00023514677013736218, |
|
"learning_rate": 1.1322751322751323e-06, |
|
"loss": 0.0, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 132.00647619047618, |
|
"grad_norm": 0.0002899873361457139, |
|
"learning_rate": 1.1216931216931217e-06, |
|
"loss": 0.0, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 132.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.635000228881836, |
|
"eval_runtime": 19.2931, |
|
"eval_samples_per_second": 3.784, |
|
"eval_steps_per_second": 0.985, |
|
"step": 9443 |
|
}, |
|
{ |
|
"epoch": 133.00066666666666, |
|
"grad_norm": 0.0003683580143842846, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 133.00161904761904, |
|
"grad_norm": 0.0002963476290460676, |
|
"learning_rate": 1.1005291005291006e-06, |
|
"loss": 0.0, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 133.00257142857143, |
|
"grad_norm": 0.00015299726510420442, |
|
"learning_rate": 1.08994708994709e-06, |
|
"loss": 0.0, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 133.0035238095238, |
|
"grad_norm": 0.00014682565233670175, |
|
"learning_rate": 1.0793650793650795e-06, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 133.0044761904762, |
|
"grad_norm": 0.00018223447841592133, |
|
"learning_rate": 1.0687830687830689e-06, |
|
"loss": 0.0, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 133.00542857142858, |
|
"grad_norm": 0.0004476811154745519, |
|
"learning_rate": 1.0582010582010582e-06, |
|
"loss": 0.191, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 133.00638095238097, |
|
"grad_norm": 0.00021226915123406798, |
|
"learning_rate": 1.0476190476190478e-06, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 133.0067619047619, |
|
"eval_accuracy": 0.7808219178082192, |
|
"eval_loss": 2.5210020542144775, |
|
"eval_runtime": 13.3228, |
|
"eval_samples_per_second": 5.479, |
|
"eval_steps_per_second": 1.426, |
|
"step": 9514 |
|
}, |
|
{ |
|
"epoch": 134.00057142857142, |
|
"grad_norm": 0.00019762705778703094, |
|
"learning_rate": 1.0370370370370371e-06, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 134.0015238095238, |
|
"grad_norm": 0.001722596469335258, |
|
"learning_rate": 1.0264550264550267e-06, |
|
"loss": 0.0, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 134.0024761904762, |
|
"grad_norm": 0.00013699405826628208, |
|
"learning_rate": 1.015873015873016e-06, |
|
"loss": 0.0, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 134.00342857142857, |
|
"grad_norm": 0.0003934580599889159, |
|
"learning_rate": 1.0052910052910054e-06, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 134.00438095238096, |
|
"grad_norm": 0.00029361352790147066, |
|
"learning_rate": 9.947089947089947e-07, |
|
"loss": 0.0, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 134.00533333333334, |
|
"grad_norm": 0.00011465285933809355, |
|
"learning_rate": 9.84126984126984e-07, |
|
"loss": 0.0, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 134.00628571428572, |
|
"grad_norm": 0.00014121955609880388, |
|
"learning_rate": 9.735449735449736e-07, |
|
"loss": 0.0, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 134.0067619047619, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 2.557199239730835, |
|
"eval_runtime": 13.1919, |
|
"eval_samples_per_second": 5.534, |
|
"eval_steps_per_second": 1.44, |
|
"step": 9585 |
|
}, |
|
{ |
|
"epoch": 135.00047619047618, |
|
"grad_norm": 0.000587943650316447, |
|
"learning_rate": 9.62962962962963e-07, |
|
"loss": 0.0, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 135.00142857142856, |
|
"grad_norm": 0.00026095641078427434, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 135.00238095238095, |
|
"grad_norm": 0.00023201369913294911, |
|
"learning_rate": 9.417989417989419e-07, |
|
"loss": 0.0, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 135.00333333333333, |
|
"grad_norm": 8.678707672515884e-05, |
|
"learning_rate": 9.312169312169313e-07, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 135.00428571428571, |
|
"grad_norm": 0.00020305972429923713, |
|
"learning_rate": 9.206349206349208e-07, |
|
"loss": 0.0, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 135.0052380952381, |
|
"grad_norm": 0.0002525858290027827, |
|
"learning_rate": 9.100529100529102e-07, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 135.00619047619048, |
|
"grad_norm": 0.0003141445340588689, |
|
"learning_rate": 8.994708994708995e-07, |
|
"loss": 0.0, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 135.0067619047619, |
|
"eval_accuracy": 0.7671232876712328, |
|
"eval_loss": 2.541918992996216, |
|
"eval_runtime": 13.3786, |
|
"eval_samples_per_second": 5.456, |
|
"eval_steps_per_second": 1.42, |
|
"step": 9656 |
|
}, |
|
{ |
|
"epoch": 136.00038095238097, |
|
"grad_norm": 0.0005338073242455721, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 136.00133333333332, |
|
"grad_norm": 0.0005123758455738425, |
|
"learning_rate": 8.783068783068783e-07, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 136.0022857142857, |
|
"grad_norm": 0.0005338129121810198, |
|
"learning_rate": 8.677248677248679e-07, |
|
"loss": 0.0, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 136.0032380952381, |
|
"grad_norm": 0.00018759489466901869, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.0, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 136.00419047619047, |
|
"grad_norm": 0.00013946475519333035, |
|
"learning_rate": 8.465608465608466e-07, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 136.00514285714286, |
|
"grad_norm": 0.00021313272009138018, |
|
"learning_rate": 8.35978835978836e-07, |
|
"loss": 0.0, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 136.00609523809524, |
|
"grad_norm": 0.00012443563900887966, |
|
"learning_rate": 8.253968253968254e-07, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 136.0067619047619, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 2.5427541732788086, |
|
"eval_runtime": 16.7485, |
|
"eval_samples_per_second": 4.359, |
|
"eval_steps_per_second": 1.134, |
|
"step": 9727 |
|
}, |
|
{ |
|
"epoch": 137.00028571428572, |
|
"grad_norm": 0.0002751411811914295, |
|
"learning_rate": 8.14814814814815e-07, |
|
"loss": 0.0, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 137.0012380952381, |
|
"grad_norm": 0.00017545548325870186, |
|
"learning_rate": 8.042328042328043e-07, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 137.00219047619046, |
|
"grad_norm": 0.000109168715425767, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 0.0, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 137.00314285714285, |
|
"grad_norm": 0.00022859223827254027, |
|
"learning_rate": 7.830687830687832e-07, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 137.00409523809523, |
|
"grad_norm": 0.0002865736314561218, |
|
"learning_rate": 7.724867724867726e-07, |
|
"loss": 0.0, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 137.00504761904762, |
|
"grad_norm": 0.00021152329281903803, |
|
"learning_rate": 7.61904761904762e-07, |
|
"loss": 0.0, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 137.006, |
|
"grad_norm": 0.00015338289085775614, |
|
"learning_rate": 7.513227513227514e-07, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 137.0067619047619, |
|
"eval_accuracy": 0.7534246575342466, |
|
"eval_loss": 2.564866304397583, |
|
"eval_runtime": 18.7744, |
|
"eval_samples_per_second": 3.888, |
|
"eval_steps_per_second": 1.012, |
|
"step": 9798 |
|
}, |
|
{ |
|
"epoch": 138.00019047619048, |
|
"grad_norm": 0.00016186927678063512, |
|
"learning_rate": 7.407407407407407e-07, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 138.00114285714287, |
|
"grad_norm": 0.0005098835681565106, |
|
"learning_rate": 7.301587301587303e-07, |
|
"loss": 0.0, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 138.00209523809525, |
|
"grad_norm": 0.00022757577244192362, |
|
"learning_rate": 7.195767195767196e-07, |
|
"loss": 0.0333, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 138.0030476190476, |
|
"grad_norm": 0.00017647018830757588, |
|
"learning_rate": 7.08994708994709e-07, |
|
"loss": 0.0, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 138.004, |
|
"grad_norm": 0.0001440487540094182, |
|
"learning_rate": 6.984126984126984e-07, |
|
"loss": 0.0, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 138.00495238095237, |
|
"grad_norm": 0.00019855830760207027, |
|
"learning_rate": 6.878306878306879e-07, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 138.00590476190476, |
|
"grad_norm": 0.00011215502308914438, |
|
"learning_rate": 6.772486772486774e-07, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 138.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.7969422340393066, |
|
"eval_runtime": 18.3133, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 1.037, |
|
"step": 9869 |
|
}, |
|
{ |
|
"epoch": 139.00009523809524, |
|
"grad_norm": 0.00022895568690728396, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 139.00104761904763, |
|
"grad_norm": 0.00027200000477023423, |
|
"learning_rate": 6.560846560846561e-07, |
|
"loss": 0.0, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 139.002, |
|
"grad_norm": 0.0003704431001096964, |
|
"learning_rate": 6.455026455026456e-07, |
|
"loss": 0.0, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 139.0029523809524, |
|
"grad_norm": 0.0002268942625960335, |
|
"learning_rate": 6.34920634920635e-07, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 139.00390476190475, |
|
"grad_norm": 0.00012369590695016086, |
|
"learning_rate": 6.243386243386243e-07, |
|
"loss": 0.0, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 139.00485714285713, |
|
"grad_norm": 0.0007778329309076071, |
|
"learning_rate": 6.137566137566138e-07, |
|
"loss": 0.0, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 139.00580952380952, |
|
"grad_norm": 0.00018602225463837385, |
|
"learning_rate": 6.031746031746032e-07, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 139.0067619047619, |
|
"grad_norm": 0.0001342453178949654, |
|
"learning_rate": 5.925925925925927e-07, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 139.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.80258846282959, |
|
"eval_runtime": 18.539, |
|
"eval_samples_per_second": 3.938, |
|
"eval_steps_per_second": 1.025, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 140.00095238095238, |
|
"grad_norm": 0.00012229051208123565, |
|
"learning_rate": 5.82010582010582e-07, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 140.00190476190477, |
|
"grad_norm": 0.00013425771612673998, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.0, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 140.00285714285715, |
|
"grad_norm": 0.0002464202116243541, |
|
"learning_rate": 5.608465608465608e-07, |
|
"loss": 0.0, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 140.00380952380954, |
|
"grad_norm": 0.00034413309185765684, |
|
"learning_rate": 5.502645502645503e-07, |
|
"loss": 0.0, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 140.0047619047619, |
|
"grad_norm": 0.00018081619055010378, |
|
"learning_rate": 5.396825396825398e-07, |
|
"loss": 0.0, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 140.00571428571428, |
|
"grad_norm": 0.00016579817747697234, |
|
"learning_rate": 5.291005291005291e-07, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 140.00666666666666, |
|
"grad_norm": 0.0001416023151250556, |
|
"learning_rate": 5.185185185185186e-07, |
|
"loss": 0.0, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 140.0067619047619, |
|
"eval_accuracy": 0.7123287671232876, |
|
"eval_loss": 2.8066341876983643, |
|
"eval_runtime": 18.6478, |
|
"eval_samples_per_second": 3.915, |
|
"eval_steps_per_second": 1.019, |
|
"step": 10011 |
|
}, |
|
{ |
|
"epoch": 141.00085714285714, |
|
"grad_norm": 0.00018761874525807798, |
|
"learning_rate": 5.07936507936508e-07, |
|
"loss": 0.0, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 141.00180952380953, |
|
"grad_norm": 0.0002681266632862389, |
|
"learning_rate": 4.973544973544974e-07, |
|
"loss": 0.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 141.0027619047619, |
|
"grad_norm": 0.00020291414693929255, |
|
"learning_rate": 4.867724867724868e-07, |
|
"loss": 0.0006, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 141.0037142857143, |
|
"grad_norm": 0.0015192608116194606, |
|
"learning_rate": 4.7619047619047623e-07, |
|
"loss": 0.0, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 141.00466666666668, |
|
"grad_norm": 0.00034483778290450573, |
|
"learning_rate": 4.6560846560846563e-07, |
|
"loss": 0.0, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 141.00561904761904, |
|
"grad_norm": 0.00031573971500620246, |
|
"learning_rate": 4.550264550264551e-07, |
|
"loss": 0.0, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 141.00657142857142, |
|
"grad_norm": 0.0001705311005935073, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.0, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 141.0067619047619, |
|
"eval_accuracy": 0.7397260273972602, |
|
"eval_loss": 2.6292834281921387, |
|
"eval_runtime": 18.2166, |
|
"eval_samples_per_second": 4.007, |
|
"eval_steps_per_second": 1.043, |
|
"step": 10082 |
|
}, |
|
{ |
|
"epoch": 142.0007619047619, |
|
"grad_norm": 0.0002706398954614997, |
|
"learning_rate": 4.3386243386243395e-07, |
|
"loss": 0.0, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 142.00171428571429, |
|
"grad_norm": 0.0001493979652877897, |
|
"learning_rate": 4.232804232804233e-07, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 142.00266666666667, |
|
"grad_norm": 0.0003185007080901414, |
|
"learning_rate": 4.126984126984127e-07, |
|
"loss": 0.0, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 142.00361904761905, |
|
"grad_norm": 0.00015999308379832655, |
|
"learning_rate": 4.0211640211640215e-07, |
|
"loss": 0.0, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 142.00457142857144, |
|
"grad_norm": 0.00015013849770184606, |
|
"learning_rate": 3.915343915343916e-07, |
|
"loss": 0.0, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 142.00552380952382, |
|
"grad_norm": 0.00012767186854034662, |
|
"learning_rate": 3.80952380952381e-07, |
|
"loss": 0.0, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 142.00647619047618, |
|
"grad_norm": 0.00013092839799355716, |
|
"learning_rate": 3.7037037037037036e-07, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 142.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.6858787536621094, |
|
"eval_runtime": 15.5225, |
|
"eval_samples_per_second": 4.703, |
|
"eval_steps_per_second": 1.224, |
|
"step": 10153 |
|
}, |
|
{ |
|
"epoch": 143.00066666666666, |
|
"grad_norm": 0.00038538037915714085, |
|
"learning_rate": 3.597883597883598e-07, |
|
"loss": 0.0, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 143.00161904761904, |
|
"grad_norm": 0.00017552358622197062, |
|
"learning_rate": 3.492063492063492e-07, |
|
"loss": 0.0, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 143.00257142857143, |
|
"grad_norm": 0.000381525227567181, |
|
"learning_rate": 3.386243386243387e-07, |
|
"loss": 0.0, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 143.0035238095238, |
|
"grad_norm": 0.0001535443152533844, |
|
"learning_rate": 3.2804232804232803e-07, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 143.0044761904762, |
|
"grad_norm": 0.00018170243129134178, |
|
"learning_rate": 3.174603174603175e-07, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 143.00542857142858, |
|
"grad_norm": 0.00019048094691243023, |
|
"learning_rate": 3.068783068783069e-07, |
|
"loss": 0.0, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 143.00638095238097, |
|
"grad_norm": 0.00048235792201012373, |
|
"learning_rate": 2.9629629629629634e-07, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 143.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.6885600090026855, |
|
"eval_runtime": 14.9304, |
|
"eval_samples_per_second": 4.889, |
|
"eval_steps_per_second": 1.273, |
|
"step": 10224 |
|
}, |
|
{ |
|
"epoch": 144.00057142857142, |
|
"grad_norm": 0.0001256720715900883, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.0, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 144.0015238095238, |
|
"grad_norm": 0.00017404030950274318, |
|
"learning_rate": 2.7513227513227515e-07, |
|
"loss": 0.0, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 144.0024761904762, |
|
"grad_norm": 0.00011612015805440024, |
|
"learning_rate": 2.6455026455026455e-07, |
|
"loss": 0.0, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 144.00342857142857, |
|
"grad_norm": 0.0001568691077409312, |
|
"learning_rate": 2.53968253968254e-07, |
|
"loss": 0.0, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 144.00438095238096, |
|
"grad_norm": 0.0001277936971746385, |
|
"learning_rate": 2.433862433862434e-07, |
|
"loss": 0.0, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 144.00533333333334, |
|
"grad_norm": 0.00034027377841994166, |
|
"learning_rate": 2.3280423280423281e-07, |
|
"loss": 0.0, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 144.00628571428572, |
|
"grad_norm": 0.0001803150080377236, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 144.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.7222938537597656, |
|
"eval_runtime": 14.1421, |
|
"eval_samples_per_second": 5.162, |
|
"eval_steps_per_second": 1.344, |
|
"step": 10295 |
|
}, |
|
{ |
|
"epoch": 145.00047619047618, |
|
"grad_norm": 0.00014836977061349899, |
|
"learning_rate": 2.1164021164021165e-07, |
|
"loss": 0.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 145.00142857142856, |
|
"grad_norm": 0.0002304692898178473, |
|
"learning_rate": 2.0105820105820108e-07, |
|
"loss": 0.0, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 145.00238095238095, |
|
"grad_norm": 0.00023921544197946787, |
|
"learning_rate": 1.904761904761905e-07, |
|
"loss": 0.0, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 145.00333333333333, |
|
"grad_norm": 0.00011568454647203907, |
|
"learning_rate": 1.798941798941799e-07, |
|
"loss": 0.0, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 145.00428571428571, |
|
"grad_norm": 8.033207893371582, |
|
"learning_rate": 1.6931216931216934e-07, |
|
"loss": 0.0003, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 145.0052380952381, |
|
"grad_norm": 0.000271545082796365, |
|
"learning_rate": 1.5873015873015874e-07, |
|
"loss": 0.0, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 145.00619047619048, |
|
"grad_norm": 0.0002849562733899802, |
|
"learning_rate": 1.4814814814814817e-07, |
|
"loss": 0.0, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 145.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.787161111831665, |
|
"eval_runtime": 13.2484, |
|
"eval_samples_per_second": 5.51, |
|
"eval_steps_per_second": 1.434, |
|
"step": 10366 |
|
}, |
|
{ |
|
"epoch": 146.00038095238097, |
|
"grad_norm": 0.0001854184956755489, |
|
"learning_rate": 1.3756613756613757e-07, |
|
"loss": 0.0, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 146.00133333333332, |
|
"grad_norm": 0.0001862685603555292, |
|
"learning_rate": 1.26984126984127e-07, |
|
"loss": 0.0, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 146.0022857142857, |
|
"grad_norm": 0.00014588805788662285, |
|
"learning_rate": 1.1640211640211641e-07, |
|
"loss": 0.0, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 146.0032380952381, |
|
"grad_norm": 0.00012009394413325936, |
|
"learning_rate": 1.0582010582010582e-07, |
|
"loss": 0.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 146.00419047619047, |
|
"grad_norm": 0.00022948795231059194, |
|
"learning_rate": 9.523809523809525e-08, |
|
"loss": 0.0, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 146.00514285714286, |
|
"grad_norm": 9.62372068897821e-05, |
|
"learning_rate": 8.465608465608467e-08, |
|
"loss": 0.0, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 146.00609523809524, |
|
"grad_norm": 0.00013434002175927162, |
|
"learning_rate": 7.407407407407409e-08, |
|
"loss": 0.0, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 146.0067619047619, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.788707971572876, |
|
"eval_runtime": 13.3352, |
|
"eval_samples_per_second": 5.474, |
|
"eval_steps_per_second": 1.425, |
|
"step": 10437 |
|
}, |
|
{ |
|
"epoch": 147.00028571428572, |
|
"grad_norm": 0.00018250966968480498, |
|
"learning_rate": 6.34920634920635e-08, |
|
"loss": 0.0, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 147.0012380952381, |
|
"grad_norm": 0.00015822089335415512, |
|
"learning_rate": 5.291005291005291e-08, |
|
"loss": 0.0, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 147.00219047619046, |
|
"grad_norm": 0.0001716633269097656, |
|
"learning_rate": 4.2328042328042335e-08, |
|
"loss": 0.0, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 147.00314285714285, |
|
"grad_norm": 0.00010248153557768092, |
|
"learning_rate": 3.174603174603175e-08, |
|
"loss": 0.0, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 147.00409523809523, |
|
"grad_norm": 0.0007682426949031651, |
|
"learning_rate": 2.1164021164021167e-08, |
|
"loss": 0.0, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 147.00504761904762, |
|
"grad_norm": 0.0001468470727559179, |
|
"learning_rate": 1.0582010582010584e-08, |
|
"loss": 0.0, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"grad_norm": 0.0003807171597145498, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"eval_accuracy": 0.726027397260274, |
|
"eval_loss": 2.788776397705078, |
|
"eval_runtime": 14.0192, |
|
"eval_samples_per_second": 5.207, |
|
"eval_steps_per_second": 1.355, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"step": 10500, |
|
"total_flos": 1.83132143505767e+20, |
|
"train_loss": 0.08905730428299008, |
|
"train_runtime": 20256.6491, |
|
"train_samples_per_second": 2.073, |
|
"train_steps_per_second": 0.518 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"eval_accuracy": 0.7808219178082192, |
|
"eval_loss": 2.5210022926330566, |
|
"eval_runtime": 16.7537, |
|
"eval_samples_per_second": 4.357, |
|
"eval_steps_per_second": 1.134, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 147.006, |
|
"eval_accuracy": 0.7808219178082192, |
|
"eval_loss": 2.5210020542144775, |
|
"eval_runtime": 14.1674, |
|
"eval_samples_per_second": 5.153, |
|
"eval_steps_per_second": 1.341, |
|
"step": 10500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.83132143505767e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|