|
{ |
|
"best_metric": 6.499429874572406, |
|
"best_model_checkpoint": "../whisper-medium-NST-uf-linlr/checkpoint-13000", |
|
"epoch": 2.1946, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.47e-06, |
|
"loss": 1.7524, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.970000000000001e-06, |
|
"loss": 0.3854, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.4700000000000005e-06, |
|
"loss": 0.2369, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.970000000000001e-06, |
|
"loss": 0.2046, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.34258419275283813, |
|
"eval_runtime": 49.7358, |
|
"eval_samples_per_second": 2.011, |
|
"eval_steps_per_second": 0.141, |
|
"eval_wer": 15.279361459521096, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.87e-06, |
|
"loss": 0.1852, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.738421052631579e-06, |
|
"loss": 0.167, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.606842105263159e-06, |
|
"loss": 0.1547, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.475263157894737e-06, |
|
"loss": 0.148, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.3284447491168976, |
|
"eval_runtime": 52.426, |
|
"eval_samples_per_second": 1.907, |
|
"eval_steps_per_second": 0.134, |
|
"eval_wer": 10.832383124287343, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.343684210526317e-06, |
|
"loss": 0.1401, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.212105263157895e-06, |
|
"loss": 0.1298, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.080526315789475e-06, |
|
"loss": 0.1252, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.948947368421053e-06, |
|
"loss": 0.121, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.30924132466316223, |
|
"eval_runtime": 57.6047, |
|
"eval_samples_per_second": 1.736, |
|
"eval_steps_per_second": 0.122, |
|
"eval_wer": 12.884834663625996, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.817368421052631e-06, |
|
"loss": 0.1169, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.685789473684211e-06, |
|
"loss": 0.1143, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.55421052631579e-06, |
|
"loss": 0.1128, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.42263157894737e-06, |
|
"loss": 0.1089, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.2808048129081726, |
|
"eval_runtime": 46.0082, |
|
"eval_samples_per_second": 2.174, |
|
"eval_steps_per_second": 0.152, |
|
"eval_wer": 10.490307867730902, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.291052631578948e-06, |
|
"loss": 0.1041, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.159473684210528e-06, |
|
"loss": 0.1073, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.027894736842106e-06, |
|
"loss": 0.0996, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.896315789473686e-06, |
|
"loss": 0.0976, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.26165536046028137, |
|
"eval_runtime": 47.1016, |
|
"eval_samples_per_second": 2.123, |
|
"eval_steps_per_second": 0.149, |
|
"eval_wer": 9.92018244013683, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.764736842105264e-06, |
|
"loss": 0.099, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.633157894736842e-06, |
|
"loss": 0.0925, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.501578947368422e-06, |
|
"loss": 0.0918, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.370000000000001e-06, |
|
"loss": 0.0901, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.26038578152656555, |
|
"eval_runtime": 72.3813, |
|
"eval_samples_per_second": 1.382, |
|
"eval_steps_per_second": 0.097, |
|
"eval_wer": 21.892816419612316, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.238421052631579e-06, |
|
"loss": 0.0881, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.106842105263159e-06, |
|
"loss": 0.0908, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.975263157894737e-06, |
|
"loss": 0.0887, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.843684210526317e-06, |
|
"loss": 0.0834, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.2877318859100342, |
|
"eval_runtime": 47.1887, |
|
"eval_samples_per_second": 2.119, |
|
"eval_steps_per_second": 0.148, |
|
"eval_wer": 9.35005701254276, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.712105263157895e-06, |
|
"loss": 0.0828, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.580526315789474e-06, |
|
"loss": 0.0825, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.448947368421053e-06, |
|
"loss": 0.085, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.317368421052632e-06, |
|
"loss": 0.0825, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.2793583571910858, |
|
"eval_runtime": 46.2565, |
|
"eval_samples_per_second": 2.162, |
|
"eval_steps_per_second": 0.151, |
|
"eval_wer": 9.35005701254276, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.185789473684211e-06, |
|
"loss": 0.0783, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.05421052631579e-06, |
|
"loss": 0.0708, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.922631578947369e-06, |
|
"loss": 0.0605, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.791052631578948e-06, |
|
"loss": 0.0553, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.2844734191894531, |
|
"eval_runtime": 54.3571, |
|
"eval_samples_per_second": 1.84, |
|
"eval_steps_per_second": 0.129, |
|
"eval_wer": 9.578107183580387, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.659473684210527e-06, |
|
"loss": 0.0505, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.527894736842105e-06, |
|
"loss": 0.0475, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5.396842105263158e-06, |
|
"loss": 0.0465, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.265263157894738e-06, |
|
"loss": 0.0472, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 0.2814468741416931, |
|
"eval_runtime": 74.4128, |
|
"eval_samples_per_second": 1.344, |
|
"eval_steps_per_second": 0.094, |
|
"eval_wer": 24.173318129988598, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5.133684210526316e-06, |
|
"loss": 0.0471, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5.002105263157895e-06, |
|
"loss": 0.0431, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.870526315789474e-06, |
|
"loss": 0.0396, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.738947368421053e-06, |
|
"loss": 0.0409, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.30840516090393066, |
|
"eval_runtime": 47.429, |
|
"eval_samples_per_second": 2.108, |
|
"eval_steps_per_second": 0.148, |
|
"eval_wer": 8.095781071835804, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.607368421052632e-06, |
|
"loss": 0.0401, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.475789473684211e-06, |
|
"loss": 0.041, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.344210526315789e-06, |
|
"loss": 0.0394, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.212631578947368e-06, |
|
"loss": 0.041, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.2865241765975952, |
|
"eval_runtime": 57.8167, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 0.121, |
|
"eval_wer": 9.236031927023944, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.0810526315789474e-06, |
|
"loss": 0.0383, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.9494736842105265e-06, |
|
"loss": 0.0404, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.817894736842106e-06, |
|
"loss": 0.0374, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.6863157894736847e-06, |
|
"loss": 0.0353, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.2827776074409485, |
|
"eval_runtime": 49.2884, |
|
"eval_samples_per_second": 2.029, |
|
"eval_steps_per_second": 0.142, |
|
"eval_wer": 6.499429874572406, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.554736842105264e-06, |
|
"loss": 0.0387, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.423157894736842e-06, |
|
"loss": 0.0353, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.292105263157895e-06, |
|
"loss": 0.0353, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.160526315789474e-06, |
|
"loss": 0.0348, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 0.27084633708000183, |
|
"eval_runtime": 56.2306, |
|
"eval_samples_per_second": 1.778, |
|
"eval_steps_per_second": 0.124, |
|
"eval_wer": 7.525655644241732, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.028947368421053e-06, |
|
"loss": 0.0367, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.897368421052632e-06, |
|
"loss": 0.0369, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7657894736842104e-06, |
|
"loss": 0.0354, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.6342105263157895e-06, |
|
"loss": 0.0349, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.28421100974082947, |
|
"eval_runtime": 69.8567, |
|
"eval_samples_per_second": 1.432, |
|
"eval_steps_per_second": 0.1, |
|
"eval_wer": 23.033067274800455, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.5026315789473686e-06, |
|
"loss": 0.033, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.3710526315789477e-06, |
|
"loss": 0.0351, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.24e-06, |
|
"loss": 0.0355, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.1084210526315792e-06, |
|
"loss": 0.0361, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 0.2769256830215454, |
|
"eval_runtime": 54.7414, |
|
"eval_samples_per_second": 1.827, |
|
"eval_steps_per_second": 0.128, |
|
"eval_wer": 10.148232611174459, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9773684210526317e-06, |
|
"loss": 0.0338, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8457894736842108e-06, |
|
"loss": 0.03, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7142105263157897e-06, |
|
"loss": 0.0263, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5826315789473687e-06, |
|
"loss": 0.0249, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.2934916913509369, |
|
"eval_runtime": 60.1234, |
|
"eval_samples_per_second": 1.663, |
|
"eval_steps_per_second": 0.116, |
|
"eval_wer": 8.893956670467503, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.4510526315789474e-06, |
|
"loss": 0.0214, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.3194736842105263e-06, |
|
"loss": 0.0197, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.1878947368421054e-06, |
|
"loss": 0.0208, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.0563157894736843e-06, |
|
"loss": 0.0204, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 0.28742682933807373, |
|
"eval_runtime": 70.388, |
|
"eval_samples_per_second": 1.421, |
|
"eval_steps_per_second": 0.099, |
|
"eval_wer": 12.428734321550742, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.247368421052633e-07, |
|
"loss": 0.0207, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 7.931578947368422e-07, |
|
"loss": 0.0188, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 6.615789473684211e-07, |
|
"loss": 0.0167, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.3e-07, |
|
"loss": 0.0175, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 0.28821495175361633, |
|
"eval_runtime": 69.297, |
|
"eval_samples_per_second": 1.443, |
|
"eval_steps_per_second": 0.101, |
|
"eval_wer": 12.998859749144811, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.98421052631579e-07, |
|
"loss": 0.0184, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.6684210526315793e-07, |
|
"loss": 0.0186, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3526315789473686e-07, |
|
"loss": 0.0179, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.6842105263157898e-09, |
|
"loss": 0.0197, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 0.3006799817085266, |
|
"eval_runtime": 45.4742, |
|
"eval_samples_per_second": 2.199, |
|
"eval_steps_per_second": 0.154, |
|
"eval_wer": 9.122006841505131, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"step": 20000, |
|
"total_flos": 2.077757165223936e+20, |
|
"train_loss": 0.09227749185562134, |
|
"train_runtime": 214472.7404, |
|
"train_samples_per_second": 6.714, |
|
"train_steps_per_second": 0.093 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 2.077757165223936e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|