|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 150.0, |
|
"global_step": 4350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 7.2e-06, |
|
"loss": 16.2031, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.47e-05, |
|
"loss": 6.6774, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 2.2199999999999998e-05, |
|
"loss": 4.7603, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 2.97e-05, |
|
"loss": 3.9198, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 3.7199999999999996e-05, |
|
"loss": 3.5558, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"eval_loss": 3.5216922760009766, |
|
"eval_runtime": 2.4797, |
|
"eval_samples_per_second": 108.882, |
|
"eval_steps_per_second": 3.629, |
|
"eval_wer": 0.9812865497076023, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 4.4699999999999996e-05, |
|
"loss": 3.4799, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 5.2199999999999995e-05, |
|
"loss": 3.4271, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"learning_rate": 5.97e-05, |
|
"loss": 3.2779, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 31.03, |
|
"learning_rate": 6.72e-05, |
|
"loss": 2.9317, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"learning_rate": 7.47e-05, |
|
"loss": 2.2202, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"eval_loss": 0.9991578459739685, |
|
"eval_runtime": 1.9665, |
|
"eval_samples_per_second": 137.299, |
|
"eval_steps_per_second": 4.577, |
|
"eval_wer": 0.2637958532695375, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 37.93, |
|
"learning_rate": 7.285074626865672e-05, |
|
"loss": 1.835, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 41.38, |
|
"learning_rate": 7.061194029850745e-05, |
|
"loss": 1.653, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 44.83, |
|
"learning_rate": 6.83731343283582e-05, |
|
"loss": 1.5243, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 48.28, |
|
"learning_rate": 6.613432835820895e-05, |
|
"loss": 1.4343, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 51.72, |
|
"learning_rate": 6.38955223880597e-05, |
|
"loss": 1.376, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 51.72, |
|
"eval_loss": 0.5396047830581665, |
|
"eval_runtime": 1.8956, |
|
"eval_samples_per_second": 142.435, |
|
"eval_steps_per_second": 4.748, |
|
"eval_wer": 0.1946836788942052, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 55.17, |
|
"learning_rate": 6.165671641791044e-05, |
|
"loss": 1.3168, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 58.62, |
|
"learning_rate": 5.941791044776119e-05, |
|
"loss": 1.2773, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 62.07, |
|
"learning_rate": 5.717910447761193e-05, |
|
"loss": 1.2455, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 65.52, |
|
"learning_rate": 5.494029850746268e-05, |
|
"loss": 1.1881, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 68.97, |
|
"learning_rate": 5.272388059701492e-05, |
|
"loss": 1.1565, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 68.97, |
|
"eval_loss": 0.4707144498825073, |
|
"eval_runtime": 1.8963, |
|
"eval_samples_per_second": 142.381, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.1773524720893142, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 72.41, |
|
"learning_rate": 5.048507462686567e-05, |
|
"loss": 1.1295, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 75.86, |
|
"learning_rate": 4.8268656716417906e-05, |
|
"loss": 1.0983, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 79.31, |
|
"learning_rate": 4.6029850746268655e-05, |
|
"loss": 1.0716, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 82.76, |
|
"learning_rate": 4.37910447761194e-05, |
|
"loss": 1.0582, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 86.21, |
|
"learning_rate": 4.155223880597015e-05, |
|
"loss": 1.0112, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 86.21, |
|
"eval_loss": 0.46029242873191833, |
|
"eval_runtime": 1.9007, |
|
"eval_samples_per_second": 142.05, |
|
"eval_steps_per_second": 4.735, |
|
"eval_wer": 0.17533227006911217, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 89.66, |
|
"learning_rate": 3.931343283582089e-05, |
|
"loss": 0.9962, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 93.1, |
|
"learning_rate": 3.707462686567164e-05, |
|
"loss": 0.9746, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 96.55, |
|
"learning_rate": 3.483582089552238e-05, |
|
"loss": 0.9592, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 3.259701492537313e-05, |
|
"loss": 0.9364, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 103.45, |
|
"learning_rate": 3.0358208955223878e-05, |
|
"loss": 0.9387, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 103.45, |
|
"eval_loss": 0.4466392397880554, |
|
"eval_runtime": 1.9017, |
|
"eval_samples_per_second": 141.977, |
|
"eval_steps_per_second": 4.733, |
|
"eval_wer": 0.1721424774056353, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 106.9, |
|
"learning_rate": 2.811940298507462e-05, |
|
"loss": 0.9223, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 110.34, |
|
"learning_rate": 2.588059701492537e-05, |
|
"loss": 0.8839, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 113.79, |
|
"learning_rate": 2.364179104477612e-05, |
|
"loss": 0.8845, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 117.24, |
|
"learning_rate": 2.1402985074626863e-05, |
|
"loss": 0.8604, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 120.69, |
|
"learning_rate": 1.9164179104477612e-05, |
|
"loss": 0.8526, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 120.69, |
|
"eval_loss": 0.43604278564453125, |
|
"eval_runtime": 1.8975, |
|
"eval_samples_per_second": 142.29, |
|
"eval_steps_per_second": 4.743, |
|
"eval_wer": 0.17044125465178098, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 124.14, |
|
"learning_rate": 1.6925373134328355e-05, |
|
"loss": 0.8311, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 127.59, |
|
"learning_rate": 1.4686567164179104e-05, |
|
"loss": 0.832, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 131.03, |
|
"learning_rate": 1.244776119402985e-05, |
|
"loss": 0.812, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 134.48, |
|
"learning_rate": 1.0208955223880597e-05, |
|
"loss": 0.8016, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 137.93, |
|
"learning_rate": 7.970149253731343e-06, |
|
"loss": 0.8018, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 137.93, |
|
"eval_loss": 0.437362939119339, |
|
"eval_runtime": 1.9635, |
|
"eval_samples_per_second": 137.506, |
|
"eval_steps_per_second": 4.584, |
|
"eval_wer": 0.1720361509835194, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 141.38, |
|
"learning_rate": 5.731343283582089e-06, |
|
"loss": 0.7863, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 144.83, |
|
"learning_rate": 3.4925373134328353e-06, |
|
"loss": 0.7862, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 148.28, |
|
"learning_rate": 1.2537313432835818e-06, |
|
"loss": 0.7983, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"step": 4350, |
|
"total_flos": 1.8721826713154617e+19, |
|
"train_loss": 1.9741196327647943, |
|
"train_runtime": 1808.1454, |
|
"train_samples_per_second": 75.077, |
|
"train_steps_per_second": 2.406 |
|
} |
|
], |
|
"max_steps": 4350, |
|
"num_train_epochs": 150, |
|
"total_flos": 1.8721826713154617e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|