|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 9650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 19.5412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 11.0922, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 7.1984, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 5.9337, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.8712499999999997e-05, |
|
"loss": 4.9733, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 5.06973934173584, |
|
"eval_runtime": 115.8922, |
|
"eval_samples_per_second": 23.237, |
|
"eval_steps_per_second": 2.908, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 2.2462499999999997e-05, |
|
"loss": 4.2014, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.6212499999999997e-05, |
|
"loss": 3.704, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 3.5005, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 3.4424, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 3.3839, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"eval_loss": 3.3517656326293945, |
|
"eval_runtime": 115.48, |
|
"eval_samples_per_second": 23.32, |
|
"eval_steps_per_second": 2.918, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 3.3036, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 3.2565, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 4.871249999999999e-05, |
|
"loss": 2.9583, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.2462499999999994e-05, |
|
"loss": 2.3824, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 5.62125e-05, |
|
"loss": 2.0596, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_loss": 1.3991833925247192, |
|
"eval_runtime": 116.0149, |
|
"eval_samples_per_second": 23.213, |
|
"eval_steps_per_second": 2.905, |
|
"eval_wer": 0.7869276218611522, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 5.9962499999999994e-05, |
|
"loss": 1.9024, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 6.37125e-05, |
|
"loss": 1.7879, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 6.746249999999999e-05, |
|
"loss": 1.7072, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 7.121249999999999e-05, |
|
"loss": 1.6689, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 7.49625e-05, |
|
"loss": 1.6102, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"eval_loss": 1.0711737871170044, |
|
"eval_runtime": 115.3994, |
|
"eval_samples_per_second": 23.336, |
|
"eval_steps_per_second": 2.92, |
|
"eval_wer": 0.6754062038404727, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 7.40392156862745e-05, |
|
"loss": 1.5881, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 7.305882352941176e-05, |
|
"loss": 1.5371, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 7.208823529411764e-05, |
|
"loss": 1.5062, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 7.11078431372549e-05, |
|
"loss": 1.4704, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 7.012745098039215e-05, |
|
"loss": 1.4587, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_loss": 0.9280298352241516, |
|
"eval_runtime": 115.6447, |
|
"eval_samples_per_second": 23.287, |
|
"eval_steps_per_second": 2.914, |
|
"eval_wer": 0.6361244460856721, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 6.91470588235294e-05, |
|
"loss": 1.4201, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 6.81764705882353e-05, |
|
"loss": 1.4221, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 6.719607843137255e-05, |
|
"loss": 1.3937, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 6.62156862745098e-05, |
|
"loss": 1.3984, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 6.523529411764705e-05, |
|
"loss": 1.3667, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"eval_loss": 0.9280989766120911, |
|
"eval_runtime": 115.1284, |
|
"eval_samples_per_second": 23.391, |
|
"eval_steps_per_second": 2.927, |
|
"eval_wer": 0.6155372968980798, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 6.425490196078431e-05, |
|
"loss": 1.367, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 6.327450980392156e-05, |
|
"loss": 1.3535, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 6.229411764705881e-05, |
|
"loss": 1.3265, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 6.131372549019608e-05, |
|
"loss": 1.3202, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 6.033333333333333e-05, |
|
"loss": 1.3042, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"eval_loss": 0.9037219882011414, |
|
"eval_runtime": 114.717, |
|
"eval_samples_per_second": 23.475, |
|
"eval_steps_per_second": 2.938, |
|
"eval_wer": 0.5921344165435746, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 5.9352941176470584e-05, |
|
"loss": 1.3016, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.17, |
|
"learning_rate": 5.837254901960784e-05, |
|
"loss": 1.2862, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 5.739215686274509e-05, |
|
"loss": 1.2773, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 5.641176470588235e-05, |
|
"loss": 1.2543, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"learning_rate": 5.54313725490196e-05, |
|
"loss": 1.2544, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"eval_loss": 0.8996412754058838, |
|
"eval_runtime": 115.5118, |
|
"eval_samples_per_second": 23.314, |
|
"eval_steps_per_second": 2.917, |
|
"eval_wer": 0.5824409158050221, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 5.445098039215686e-05, |
|
"loss": 1.2419, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 5.347058823529411e-05, |
|
"loss": 1.2347, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.28, |
|
"learning_rate": 5.2490196078431365e-05, |
|
"loss": 1.2373, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 5.150980392156863e-05, |
|
"loss": 1.2337, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"learning_rate": 5.052941176470588e-05, |
|
"loss": 1.2274, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"eval_loss": 0.8933804631233215, |
|
"eval_runtime": 115.4101, |
|
"eval_samples_per_second": 23.334, |
|
"eval_steps_per_second": 2.92, |
|
"eval_wer": 0.5797175036927622, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"learning_rate": 4.9549019607843137e-05, |
|
"loss": 1.2091, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 4.856862745098039e-05, |
|
"loss": 1.1947, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 24.87, |
|
"learning_rate": 4.759803921568627e-05, |
|
"loss": 1.1901, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"learning_rate": 4.6617647058823525e-05, |
|
"loss": 1.1731, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"learning_rate": 4.5637254901960776e-05, |
|
"loss": 1.1763, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"eval_loss": 0.8642701506614685, |
|
"eval_runtime": 115.2886, |
|
"eval_samples_per_second": 23.359, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 0.5759785819793205, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 4.465686274509803e-05, |
|
"loss": 1.1661, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"learning_rate": 4.367647058823529e-05, |
|
"loss": 1.1628, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 27.46, |
|
"learning_rate": 4.269607843137254e-05, |
|
"loss": 1.1618, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 27.98, |
|
"learning_rate": 4.1715686274509805e-05, |
|
"loss": 1.1468, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 4.0735294117647055e-05, |
|
"loss": 1.149, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"eval_loss": 0.8251490592956543, |
|
"eval_runtime": 116.3124, |
|
"eval_samples_per_second": 23.153, |
|
"eval_steps_per_second": 2.897, |
|
"eval_wer": 0.5543759231905465, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 3.975490196078431e-05, |
|
"loss": 1.1572, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 29.53, |
|
"learning_rate": 3.877450980392157e-05, |
|
"loss": 1.1389, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 30.05, |
|
"learning_rate": 3.779411764705882e-05, |
|
"loss": 1.1337, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 30.57, |
|
"learning_rate": 3.681372549019607e-05, |
|
"loss": 1.1226, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 31.09, |
|
"learning_rate": 3.5833333333333335e-05, |
|
"loss": 1.1207, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 31.09, |
|
"eval_loss": 0.8505932092666626, |
|
"eval_runtime": 115.2841, |
|
"eval_samples_per_second": 23.36, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 0.5527141802067946, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 31.61, |
|
"learning_rate": 3.4852941176470585e-05, |
|
"loss": 1.1243, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 32.12, |
|
"learning_rate": 3.387254901960784e-05, |
|
"loss": 1.1156, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 3.289215686274509e-05, |
|
"loss": 1.1158, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 33.16, |
|
"learning_rate": 3.191176470588235e-05, |
|
"loss": 1.0918, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"learning_rate": 3.09313725490196e-05, |
|
"loss": 1.091, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"eval_loss": 0.8370497822761536, |
|
"eval_runtime": 115.5394, |
|
"eval_samples_per_second": 23.308, |
|
"eval_steps_per_second": 2.917, |
|
"eval_wer": 0.5365583456425406, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"learning_rate": 2.995098039215686e-05, |
|
"loss": 1.0892, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 34.72, |
|
"learning_rate": 2.8980392156862746e-05, |
|
"loss": 1.0799, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 35.23, |
|
"learning_rate": 2.8e-05, |
|
"loss": 1.0671, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 35.75, |
|
"learning_rate": 2.7019607843137253e-05, |
|
"loss": 1.07, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 2.6049019607843135e-05, |
|
"loss": 1.0613, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"eval_loss": 0.8345041275024414, |
|
"eval_runtime": 116.5707, |
|
"eval_samples_per_second": 23.102, |
|
"eval_steps_per_second": 2.891, |
|
"eval_wer": 0.5351735598227474, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 2.506862745098039e-05, |
|
"loss": 1.0797, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"learning_rate": 2.4088235294117646e-05, |
|
"loss": 1.0787, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 37.82, |
|
"learning_rate": 2.31078431372549e-05, |
|
"loss": 1.0706, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 38.34, |
|
"learning_rate": 2.2127450980392153e-05, |
|
"loss": 1.0585, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 38.86, |
|
"learning_rate": 2.114705882352941e-05, |
|
"loss": 1.0495, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 38.86, |
|
"eval_loss": 0.8380374908447266, |
|
"eval_runtime": 115.7848, |
|
"eval_samples_per_second": 23.259, |
|
"eval_steps_per_second": 2.911, |
|
"eval_wer": 0.5321270310192023, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 39.38, |
|
"learning_rate": 2.0166666666666664e-05, |
|
"loss": 1.0456, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 39.9, |
|
"learning_rate": 1.9186274509803922e-05, |
|
"loss": 1.0446, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 40.41, |
|
"learning_rate": 1.8205882352941176e-05, |
|
"loss": 1.0353, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 40.93, |
|
"learning_rate": 1.722549019607843e-05, |
|
"loss": 1.0375, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 41.45, |
|
"learning_rate": 1.6245098039215687e-05, |
|
"loss": 1.0345, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 41.45, |
|
"eval_loss": 0.828546941280365, |
|
"eval_runtime": 115.0908, |
|
"eval_samples_per_second": 23.399, |
|
"eval_steps_per_second": 2.928, |
|
"eval_wer": 0.5269110044313147, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"learning_rate": 1.526470588235294e-05, |
|
"loss": 1.0196, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 42.49, |
|
"learning_rate": 1.4284313725490196e-05, |
|
"loss": 1.0265, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 1.330392156862745e-05, |
|
"loss": 1.028, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 43.52, |
|
"learning_rate": 1.2323529411764704e-05, |
|
"loss": 1.0281, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 44.04, |
|
"learning_rate": 1.1343137254901961e-05, |
|
"loss": 1.0297, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 44.04, |
|
"eval_loss": 0.7836087346076965, |
|
"eval_runtime": 116.6272, |
|
"eval_samples_per_second": 23.091, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.5141248153618907, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 44.56, |
|
"learning_rate": 1.0362745098039215e-05, |
|
"loss": 1.0097, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 45.08, |
|
"learning_rate": 9.382352941176469e-06, |
|
"loss": 1.0046, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"learning_rate": 8.401960784313724e-06, |
|
"loss": 1.0082, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 46.11, |
|
"learning_rate": 7.42156862745098e-06, |
|
"loss": 1.0065, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 46.63, |
|
"learning_rate": 6.4411764705882346e-06, |
|
"loss": 1.027, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 46.63, |
|
"eval_loss": 0.8119935989379883, |
|
"eval_runtime": 115.6592, |
|
"eval_samples_per_second": 23.284, |
|
"eval_steps_per_second": 2.914, |
|
"eval_wer": 0.5179560561299852, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 47.15, |
|
"learning_rate": 5.460784313725489e-06, |
|
"loss": 0.9917, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 47.67, |
|
"learning_rate": 4.480392156862745e-06, |
|
"loss": 0.9925, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 48.19, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.9924, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 2.5196078431372547e-06, |
|
"loss": 0.983, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 49.22, |
|
"learning_rate": 1.5392156862745098e-06, |
|
"loss": 0.9876, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 49.22, |
|
"eval_loss": 0.8109092116355896, |
|
"eval_runtime": 115.9344, |
|
"eval_samples_per_second": 23.229, |
|
"eval_steps_per_second": 2.907, |
|
"eval_wer": 0.5187869276218612, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 49.74, |
|
"learning_rate": 5.588235294117647e-07, |
|
"loss": 0.973, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 9650, |
|
"total_flos": 4.0125393308879946e+19, |
|
"train_loss": 1.853262206161578, |
|
"train_runtime": 19140.1846, |
|
"train_samples_per_second": 16.134, |
|
"train_steps_per_second": 0.504 |
|
} |
|
], |
|
"max_steps": 9650, |
|
"num_train_epochs": 50, |
|
"total_flos": 4.0125393308879946e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|