|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 150.0, |
|
"global_step": 3450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2e-06, |
|
"loss": 20.1322, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.02e-05, |
|
"loss": 20.2807, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.5599999999999996e-05, |
|
"loss": 19.9863, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1599999999999996e-05, |
|
"loss": 16.998, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.7599999999999997e-05, |
|
"loss": 11.753, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.36e-05, |
|
"loss": 8.5319, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.96e-05, |
|
"loss": 7.0837, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.56e-05, |
|
"loss": 6.0396, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 5.1599999999999994e-05, |
|
"loss": 5.5626, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 5.76e-05, |
|
"loss": 5.1697, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 6.359999999999999e-05, |
|
"loss": 4.7468, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 6.96e-05, |
|
"loss": 4.6084, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 7.56e-05, |
|
"loss": 4.0859, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 8.16e-05, |
|
"loss": 3.9953, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 8.759999999999999e-05, |
|
"loss": 3.7522, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 9.36e-05, |
|
"loss": 3.5674, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 9.96e-05, |
|
"loss": 3.4598, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00010559999999999998, |
|
"loss": 3.42, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.00011159999999999999, |
|
"loss": 3.3579, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0001176, |
|
"loss": 3.3237, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 0.0001236, |
|
"loss": 3.2857, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.00012959999999999998, |
|
"loss": 3.2553, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001356, |
|
"loss": 3.2545, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.00014159999999999997, |
|
"loss": 3.2053, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.00014759999999999998, |
|
"loss": 3.249, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 0.0001536, |
|
"loss": 3.2334, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 0.0001596, |
|
"loss": 3.2278, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 0.0001656, |
|
"loss": 3.206, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 0.00017159999999999997, |
|
"loss": 3.2164, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00017759999999999998, |
|
"loss": 3.2048, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 0.0001836, |
|
"loss": 3.2024, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 0.00018959999999999997, |
|
"loss": 3.2113, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 0.00019559999999999998, |
|
"loss": 3.2023, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 0.0002016, |
|
"loss": 3.2135, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 0.00020759999999999998, |
|
"loss": 3.2014, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 0.00021359999999999996, |
|
"loss": 3.1402, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 0.00021959999999999997, |
|
"loss": 3.093, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 0.00022559999999999998, |
|
"loss": 2.9955, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.0002316, |
|
"loss": 2.8186, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.0002376, |
|
"loss": 2.5288, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_loss": 2.0544939041137695, |
|
"eval_runtime": 2.3101, |
|
"eval_samples_per_second": 96.532, |
|
"eval_steps_per_second": 3.03, |
|
"eval_wer": 0.6038511871377827, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 0.00024359999999999999, |
|
"loss": 2.1609, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 0.00024959999999999994, |
|
"loss": 1.8764, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 0.0002556, |
|
"loss": 1.6154, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 0.00026159999999999996, |
|
"loss": 1.4481, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 0.0002676, |
|
"loss": 1.2572, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0002736, |
|
"loss": 1.2195, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 0.00027959999999999997, |
|
"loss": 1.0625, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 0.00028559999999999995, |
|
"loss": 1.0071, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 0.0002916, |
|
"loss": 0.9312, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 0.8633, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"learning_rate": 0.00029052631578947366, |
|
"loss": 0.8454, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 0.0002747368421052631, |
|
"loss": 0.7471, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"learning_rate": 0.0002589473684210526, |
|
"loss": 0.7783, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"learning_rate": 0.00024315789473684207, |
|
"loss": 0.64, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 0.00022736842105263157, |
|
"loss": 0.6077, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 0.00021157894736842102, |
|
"loss": 0.5887, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 0.0001957894736842105, |
|
"loss": 0.5389, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.4779, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 25.65, |
|
"learning_rate": 0.00016421052631578948, |
|
"loss": 0.4971, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 0.00014842105263157893, |
|
"loss": 0.4876, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.52, |
|
"learning_rate": 0.0001326315789473684, |
|
"loss": 0.4645, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 0.00011684210526315788, |
|
"loss": 0.3915, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 27.39, |
|
"learning_rate": 0.00010105263157894735, |
|
"loss": 0.3831, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 8.526315789473684e-05, |
|
"loss": 0.3738, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 6.947368421052631e-05, |
|
"loss": 0.3274, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 5.3684210526315784e-05, |
|
"loss": 0.3508, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"learning_rate": 3.789473684210526e-05, |
|
"loss": 0.3293, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 2.2105263157894733e-05, |
|
"loss": 0.3146, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 6.3157894736842095e-06, |
|
"loss": 0.2893, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 690, |
|
"total_flos": 3.7988686760446525e+18, |
|
"train_loss": 3.560509018276049, |
|
"train_runtime": 533.4671, |
|
"train_samples_per_second": 40.49, |
|
"train_steps_per_second": 1.293 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 5.999999999999999e-06, |
|
"loss": 0.3166, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 30.87, |
|
"learning_rate": 1.1999999999999999e-05, |
|
"loss": 0.3066, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 1.7999999999999997e-05, |
|
"loss": 0.2917, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 31.74, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 0.2866, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 32.17, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.2835, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 3.5999999999999994e-05, |
|
"loss": 0.2765, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2951, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 33.48, |
|
"learning_rate": 4.7999999999999994e-05, |
|
"loss": 0.2685, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 0.3011, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 34.35, |
|
"learning_rate": 5.94e-05, |
|
"loss": 0.273, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 6.479999999999999e-05, |
|
"loss": 0.2616, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"eval_loss": 0.4040215313434601, |
|
"eval_runtime": 2.307, |
|
"eval_samples_per_second": 96.662, |
|
"eval_steps_per_second": 3.034, |
|
"eval_wer": 0.12422882781828379, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 35.22, |
|
"learning_rate": 7.079999999999999e-05, |
|
"loss": 0.2468, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 35.65, |
|
"learning_rate": 7.68e-05, |
|
"loss": 0.2571, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 36.09, |
|
"learning_rate": 8.28e-05, |
|
"loss": 0.2412, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 36.52, |
|
"learning_rate": 8.879999999999999e-05, |
|
"loss": 0.2392, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"learning_rate": 9.479999999999999e-05, |
|
"loss": 0.2594, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 0.0001008, |
|
"loss": 0.2413, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 37.83, |
|
"learning_rate": 0.00010679999999999998, |
|
"loss": 0.2502, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 38.26, |
|
"learning_rate": 0.00011279999999999999, |
|
"loss": 0.248, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 38.7, |
|
"learning_rate": 0.0001188, |
|
"loss": 0.2272, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 0.00012479999999999997, |
|
"loss": 0.2359, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"learning_rate": 0.00013079999999999998, |
|
"loss": 0.2299, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0001368, |
|
"loss": 0.2321, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 0.00014279999999999997, |
|
"loss": 0.2385, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 40.87, |
|
"learning_rate": 0.00014879999999999998, |
|
"loss": 0.2183, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"learning_rate": 0.0001548, |
|
"loss": 0.2318, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 41.74, |
|
"learning_rate": 0.0001608, |
|
"loss": 0.241, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 42.17, |
|
"learning_rate": 0.0001668, |
|
"loss": 0.2314, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"learning_rate": 0.00017279999999999997, |
|
"loss": 0.2195, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 43.04, |
|
"learning_rate": 0.00017879999999999998, |
|
"loss": 0.232, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 0.0001848, |
|
"loss": 0.196, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 43.91, |
|
"learning_rate": 0.00019079999999999998, |
|
"loss": 0.2384, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"learning_rate": 0.00019679999999999999, |
|
"loss": 0.2293, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"learning_rate": 0.0002028, |
|
"loss": 0.2241, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 45.22, |
|
"learning_rate": 0.00020879999999999998, |
|
"loss": 0.1895, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 0.00021479999999999996, |
|
"loss": 0.2248, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 46.09, |
|
"learning_rate": 0.00022079999999999997, |
|
"loss": 0.2288, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 46.52, |
|
"learning_rate": 0.00022679999999999998, |
|
"loss": 0.2115, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 0.0002328, |
|
"loss": 0.2041, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 47.39, |
|
"learning_rate": 0.0002388, |
|
"loss": 0.2281, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 0.0002448, |
|
"loss": 0.2306, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 48.26, |
|
"learning_rate": 0.00025079999999999997, |
|
"loss": 0.2039, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 0.00025679999999999995, |
|
"loss": 0.2167, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 49.13, |
|
"learning_rate": 0.0002628, |
|
"loss": 0.2339, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"learning_rate": 0.0002688, |
|
"loss": 0.2366, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0002748, |
|
"loss": 0.1972, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 1150, |
|
"total_flos": 6.332515423493292e+18, |
|
"train_loss": 0.09717807769775391, |
|
"train_runtime": 228.6677, |
|
"train_samples_per_second": 157.434, |
|
"train_steps_per_second": 5.029 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"learning_rate": 3.675e-06, |
|
"loss": 1.0586, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 56.52, |
|
"learning_rate": 1.1099999999999999e-05, |
|
"loss": 0.9511, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 60.87, |
|
"learning_rate": 1.8599999999999998e-05, |
|
"loss": 0.8827, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"learning_rate": 2.6024999999999996e-05, |
|
"loss": 0.8123, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"eval_loss": 0.38979411125183105, |
|
"eval_runtime": 2.5496, |
|
"eval_samples_per_second": 87.465, |
|
"eval_steps_per_second": 2.746, |
|
"eval_wer": 0.11226397457468686, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 69.57, |
|
"learning_rate": 3.3524999999999995e-05, |
|
"loss": 0.7657, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 73.91, |
|
"learning_rate": 4.1025e-05, |
|
"loss": 0.7197, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 78.26, |
|
"learning_rate": 4.845e-05, |
|
"loss": 0.6831, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 82.61, |
|
"learning_rate": 5.595e-05, |
|
"loss": 0.6424, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"learning_rate": 6.345e-05, |
|
"loss": 0.6314, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 86.96, |
|
"eval_loss": 0.3785865306854248, |
|
"eval_runtime": 2.183, |
|
"eval_samples_per_second": 102.155, |
|
"eval_steps_per_second": 3.207, |
|
"eval_wer": 0.10431856421761077, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"learning_rate": 7.094999999999999e-05, |
|
"loss": 0.5952, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 95.65, |
|
"learning_rate": 7.359183673469387e-05, |
|
"loss": 0.5769, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 7.053061224489794e-05, |
|
"loss": 0.5631, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 104.35, |
|
"learning_rate": 6.746938775510203e-05, |
|
"loss": 0.5318, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 108.7, |
|
"learning_rate": 6.440816326530611e-05, |
|
"loss": 0.5046, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 108.7, |
|
"eval_loss": 0.38604071736335754, |
|
"eval_runtime": 2.1232, |
|
"eval_samples_per_second": 105.03, |
|
"eval_steps_per_second": 3.297, |
|
"eval_wer": 0.10515984296130118, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 113.04, |
|
"learning_rate": 6.13469387755102e-05, |
|
"loss": 0.5035, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 117.39, |
|
"learning_rate": 5.828571428571428e-05, |
|
"loss": 0.4646, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 121.74, |
|
"learning_rate": 5.525510204081632e-05, |
|
"loss": 0.4552, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 126.09, |
|
"learning_rate": 5.21938775510204e-05, |
|
"loss": 0.4378, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 130.43, |
|
"learning_rate": 4.9132653061224486e-05, |
|
"loss": 0.4264, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 130.43, |
|
"eval_loss": 0.37559226155281067, |
|
"eval_runtime": 2.1713, |
|
"eval_samples_per_second": 102.705, |
|
"eval_steps_per_second": 3.224, |
|
"eval_wer": 0.10001869508319312, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 134.78, |
|
"learning_rate": 4.607142857142857e-05, |
|
"loss": 0.4114, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 139.13, |
|
"learning_rate": 4.3010204081632646e-05, |
|
"loss": 0.3945, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 143.48, |
|
"learning_rate": 3.994897959183673e-05, |
|
"loss": 0.3842, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 147.83, |
|
"learning_rate": 3.688775510204081e-05, |
|
"loss": 0.3757, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"step": 3450, |
|
"total_flos": 1.8994436456459534e+19, |
|
"train_loss": 0.38979322350543477, |
|
"train_runtime": 1100.4507, |
|
"train_samples_per_second": 98.142, |
|
"train_steps_per_second": 3.135 |
|
} |
|
], |
|
"max_steps": 3450, |
|
"num_train_epochs": 150, |
|
"total_flos": 1.8994436456459534e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|