|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 12.67427122940431, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.2638113498687744, |
|
"eval_runtime": 149.9992, |
|
"eval_samples_per_second": 31.487, |
|
"eval_steps_per_second": 2.627, |
|
"eval_wer": 0.9359337678636492, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.970000000000001e-05, |
|
"loss": 2.6089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 0.7277476787567139, |
|
"eval_runtime": 148.226, |
|
"eval_samples_per_second": 31.864, |
|
"eval_steps_per_second": 2.658, |
|
"eval_wer": 0.24067851503987692, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 0.5800275802612305, |
|
"eval_runtime": 148.0487, |
|
"eval_samples_per_second": 31.902, |
|
"eval_steps_per_second": 2.661, |
|
"eval_wer": 0.1745475891664305, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.892222222222223e-05, |
|
"loss": 0.6019, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 0.48867326974868774, |
|
"eval_runtime": 148.4526, |
|
"eval_samples_per_second": 31.815, |
|
"eval_steps_per_second": 2.654, |
|
"eval_wer": 0.15135014776729688, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 0.466572642326355, |
|
"eval_runtime": 148.0238, |
|
"eval_samples_per_second": 31.907, |
|
"eval_steps_per_second": 2.662, |
|
"eval_wer": 0.14213999433221328, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 7.78111111111111e-05, |
|
"loss": 0.4722, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 0.44257038831710815, |
|
"eval_runtime": 149.7003, |
|
"eval_samples_per_second": 31.55, |
|
"eval_steps_per_second": 2.632, |
|
"eval_wer": 0.14505485607870128, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 0.41759932041168213, |
|
"eval_runtime": 149.0588, |
|
"eval_samples_per_second": 31.685, |
|
"eval_steps_per_second": 2.643, |
|
"eval_wer": 0.12481276061697907, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 6.671111111111111e-05, |
|
"loss": 0.4278, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 0.4364745318889618, |
|
"eval_runtime": 148.5505, |
|
"eval_samples_per_second": 31.794, |
|
"eval_steps_per_second": 2.652, |
|
"eval_wer": 0.12388162422573985, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_loss": 0.3815610408782959, |
|
"eval_runtime": 149.3382, |
|
"eval_samples_per_second": 31.626, |
|
"eval_steps_per_second": 2.638, |
|
"eval_wer": 0.1177280272053763, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 5.560000000000001e-05, |
|
"loss": 0.369, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"eval_loss": 0.4113306403160095, |
|
"eval_runtime": 159.7075, |
|
"eval_samples_per_second": 29.573, |
|
"eval_steps_per_second": 2.467, |
|
"eval_wer": 0.11716124853244808, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 0.3862614035606384, |
|
"eval_runtime": 152.4609, |
|
"eval_samples_per_second": 30.978, |
|
"eval_steps_per_second": 2.584, |
|
"eval_wer": 0.1230112141208858, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.448888888888889e-05, |
|
"loss": 0.341, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_loss": 0.384976863861084, |
|
"eval_runtime": 159.7493, |
|
"eval_samples_per_second": 29.565, |
|
"eval_steps_per_second": 2.466, |
|
"eval_wer": 0.1116149143759362, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"eval_loss": 0.401400089263916, |
|
"eval_runtime": 155.917, |
|
"eval_samples_per_second": 30.292, |
|
"eval_steps_per_second": 2.527, |
|
"eval_wer": 0.11406420792680458, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 3.337777777777778e-05, |
|
"loss": 0.3119, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_loss": 0.39530250430107117, |
|
"eval_runtime": 165.7417, |
|
"eval_samples_per_second": 28.496, |
|
"eval_steps_per_second": 2.377, |
|
"eval_wer": 0.10782964252459415, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_loss": 0.4018384516239166, |
|
"eval_runtime": 163.4263, |
|
"eval_samples_per_second": 28.9, |
|
"eval_steps_per_second": 2.411, |
|
"eval_wer": 0.10801182138374965, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 2.2277777777777778e-05, |
|
"loss": 0.3008, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"eval_loss": 0.3963571786880493, |
|
"eval_runtime": 172.5171, |
|
"eval_samples_per_second": 27.377, |
|
"eval_steps_per_second": 2.284, |
|
"eval_wer": 0.10744504271082142, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"eval_loss": 0.39167362451553345, |
|
"eval_runtime": 165.6389, |
|
"eval_samples_per_second": 28.514, |
|
"eval_steps_per_second": 2.379, |
|
"eval_wer": 0.10780940042913242, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 1.1166666666666668e-05, |
|
"loss": 0.2741, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"eval_loss": 0.3961273431777954, |
|
"eval_runtime": 164.9191, |
|
"eval_samples_per_second": 28.638, |
|
"eval_steps_per_second": 2.389, |
|
"eval_wer": 0.10568398040565159, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"eval_loss": 0.39744970202445984, |
|
"eval_runtime": 164.8733, |
|
"eval_samples_per_second": 28.646, |
|
"eval_steps_per_second": 2.39, |
|
"eval_wer": 0.10529938059187888, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 5.555555555555556e-08, |
|
"loss": 0.2531, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"eval_loss": 0.40417608618736267, |
|
"eval_runtime": 164.6908, |
|
"eval_samples_per_second": 28.678, |
|
"eval_steps_per_second": 2.392, |
|
"eval_wer": 0.10485405449172099, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"step": 10000, |
|
"total_flos": 2.7136568655380324e+19, |
|
"train_loss": 0.5960800704956055, |
|
"train_runtime": 13155.4001, |
|
"train_samples_per_second": 18.243, |
|
"train_steps_per_second": 0.76 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 13, |
|
"total_flos": 2.7136568655380324e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|