|
{ |
|
"best_metric": 42.76086285863452, |
|
"best_model_checkpoint": "./checkpoint-10000", |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.4e-07, |
|
"loss": 1.6662, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.400000000000001e-07, |
|
"loss": 1.5882, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.44e-06, |
|
"loss": 1.4859, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.94e-06, |
|
"loss": 1.3801, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4400000000000004e-06, |
|
"loss": 1.2195, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9400000000000002e-06, |
|
"loss": 1.2027, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.44e-06, |
|
"loss": 1.1254, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.94e-06, |
|
"loss": 1.0992, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.440000000000001e-06, |
|
"loss": 1.1116, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.94e-06, |
|
"loss": 1.1071, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.4400000000000004e-06, |
|
"loss": 1.0727, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.94e-06, |
|
"loss": 0.9899, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.440000000000001e-06, |
|
"loss": 1.0348, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.9400000000000005e-06, |
|
"loss": 1.0282, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.440000000000001e-06, |
|
"loss": 0.9794, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.94e-06, |
|
"loss": 0.9765, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.44e-06, |
|
"loss": 0.9348, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.94e-06, |
|
"loss": 0.9781, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.440000000000001e-06, |
|
"loss": 0.9395, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.940000000000001e-06, |
|
"loss": 0.9068, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.976842105263158e-06, |
|
"loss": 0.9286, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.950526315789475e-06, |
|
"loss": 0.9343, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.92421052631579e-06, |
|
"loss": 0.9175, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.897894736842107e-06, |
|
"loss": 0.8807, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.871578947368422e-06, |
|
"loss": 0.9134, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.845263157894738e-06, |
|
"loss": 0.8834, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.818947368421053e-06, |
|
"loss": 0.8511, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.79263157894737e-06, |
|
"loss": 0.8868, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.766315789473685e-06, |
|
"loss": 0.8664, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.74e-06, |
|
"loss": 0.8324, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.713684210526317e-06, |
|
"loss": 0.8426, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.687368421052632e-06, |
|
"loss": 0.8381, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.661052631578948e-06, |
|
"loss": 0.8184, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.634736842105265e-06, |
|
"loss": 0.8261, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.60842105263158e-06, |
|
"loss": 0.8248, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.582105263157897e-06, |
|
"loss": 0.803, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.555789473684211e-06, |
|
"loss": 0.7759, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.529473684210528e-06, |
|
"loss": 0.8195, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.503157894736843e-06, |
|
"loss": 0.8211, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.476842105263158e-06, |
|
"loss": 0.8057, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.450526315789475e-06, |
|
"loss": 0.825, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.42421052631579e-06, |
|
"loss": 0.7549, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.397894736842106e-06, |
|
"loss": 0.8096, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.371578947368421e-06, |
|
"loss": 0.7947, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.345263157894738e-06, |
|
"loss": 0.812, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.318947368421053e-06, |
|
"loss": 0.7615, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.292631578947368e-06, |
|
"loss": 0.781, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.266315789473685e-06, |
|
"loss": 0.7546, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.240000000000001e-06, |
|
"loss": 0.7737, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.213684210526316e-06, |
|
"loss": 0.7733, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.187368421052633e-06, |
|
"loss": 0.7421, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.161052631578948e-06, |
|
"loss": 0.7283, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.134736842105265e-06, |
|
"loss": 0.7855, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.10842105263158e-06, |
|
"loss": 0.7727, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.082105263157896e-06, |
|
"loss": 0.7846, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.055789473684211e-06, |
|
"loss": 0.7535, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.029473684210526e-06, |
|
"loss": 0.7695, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.003157894736843e-06, |
|
"loss": 0.7598, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.976842105263158e-06, |
|
"loss": 0.765, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.950526315789474e-06, |
|
"loss": 0.7752, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.92421052631579e-06, |
|
"loss": 0.7359, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.897894736842106e-06, |
|
"loss": 0.8049, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.871578947368423e-06, |
|
"loss": 0.7799, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.845263157894738e-06, |
|
"loss": 0.7506, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.818947368421054e-06, |
|
"loss": 0.7106, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.79263157894737e-06, |
|
"loss": 0.6927, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.766315789473684e-06, |
|
"loss": 0.7267, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.740000000000001e-06, |
|
"loss": 0.7268, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.713684210526316e-06, |
|
"loss": 0.7288, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.687368421052633e-06, |
|
"loss": 0.7342, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.661052631578948e-06, |
|
"loss": 0.7446, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.634736842105264e-06, |
|
"loss": 0.7206, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.60842105263158e-06, |
|
"loss": 0.6896, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.582105263157894e-06, |
|
"loss": 0.6801, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.555789473684213e-06, |
|
"loss": 0.7053, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.529473684210528e-06, |
|
"loss": 0.7464, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.503157894736843e-06, |
|
"loss": 0.7012, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.47684210526316e-06, |
|
"loss": 0.7474, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.450526315789474e-06, |
|
"loss": 0.7307, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.42421052631579e-06, |
|
"loss": 0.7005, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.7135426998138428, |
|
"eval_runtime": 436.6989, |
|
"eval_samples_per_second": 11.461, |
|
"eval_steps_per_second": 0.36, |
|
"eval_wer": 51.536581314221976, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.397894736842106e-06, |
|
"loss": 0.7708, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.371578947368422e-06, |
|
"loss": 0.7349, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.345263157894737e-06, |
|
"loss": 0.7119, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.318947368421052e-06, |
|
"loss": 0.7018, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.292631578947369e-06, |
|
"loss": 0.7114, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.266315789473684e-06, |
|
"loss": 0.701, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.24e-06, |
|
"loss": 0.6855, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.213684210526316e-06, |
|
"loss": 0.7156, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.187368421052632e-06, |
|
"loss": 0.6985, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.161052631578949e-06, |
|
"loss": 0.705, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.134736842105264e-06, |
|
"loss": 0.6979, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.10842105263158e-06, |
|
"loss": 0.6929, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.082105263157896e-06, |
|
"loss": 0.6943, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.05578947368421e-06, |
|
"loss": 0.707, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.029473684210527e-06, |
|
"loss": 0.678, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.003157894736842e-06, |
|
"loss": 0.6805, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.976842105263159e-06, |
|
"loss": 0.6967, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.950526315789474e-06, |
|
"loss": 0.6595, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.92421052631579e-06, |
|
"loss": 0.653, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.897894736842106e-06, |
|
"loss": 0.687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.87157894736842e-06, |
|
"loss": 0.6772, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.845263157894739e-06, |
|
"loss": 0.6975, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.818947368421054e-06, |
|
"loss": 0.6932, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.792631578947369e-06, |
|
"loss": 0.7012, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.766315789473685e-06, |
|
"loss": 0.686, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.74e-06, |
|
"loss": 0.6454, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.713684210526317e-06, |
|
"loss": 0.6852, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.687368421052632e-06, |
|
"loss": 0.6506, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.661052631578949e-06, |
|
"loss": 0.6917, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.634736842105264e-06, |
|
"loss": 0.6545, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.6084210526315795e-06, |
|
"loss": 0.6586, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.582105263157895e-06, |
|
"loss": 0.679, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.555789473684211e-06, |
|
"loss": 0.6672, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.529473684210526e-06, |
|
"loss": 0.6611, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.503157894736842e-06, |
|
"loss": 0.6907, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.476842105263159e-06, |
|
"loss": 0.6636, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.4505263157894744e-06, |
|
"loss": 0.6438, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.42421052631579e-06, |
|
"loss": 0.6208, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.397894736842106e-06, |
|
"loss": 0.638, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.371578947368422e-06, |
|
"loss": 0.6909, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.345263157894738e-06, |
|
"loss": 0.6642, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.3189473684210535e-06, |
|
"loss": 0.6624, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.292631578947369e-06, |
|
"loss": 0.6324, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.266315789473684e-06, |
|
"loss": 0.6556, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.24e-06, |
|
"loss": 0.7184, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.213684210526316e-06, |
|
"loss": 0.6502, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.187368421052632e-06, |
|
"loss": 0.6639, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.161052631578948e-06, |
|
"loss": 0.639, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.134736842105264e-06, |
|
"loss": 0.6593, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.10842105263158e-06, |
|
"loss": 0.6519, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.082105263157896e-06, |
|
"loss": 0.6465, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.055789473684212e-06, |
|
"loss": 0.6401, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.0294736842105275e-06, |
|
"loss": 0.6329, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.0031578947368425e-06, |
|
"loss": 0.6461, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.976842105263158e-06, |
|
"loss": 0.6286, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.950526315789474e-06, |
|
"loss": 0.646, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.92421052631579e-06, |
|
"loss": 0.6578, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.897894736842106e-06, |
|
"loss": 0.6468, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.871578947368422e-06, |
|
"loss": 0.6224, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.845263157894737e-06, |
|
"loss": 0.6628, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.818947368421052e-06, |
|
"loss": 0.6386, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.792631578947368e-06, |
|
"loss": 0.6612, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.766315789473685e-06, |
|
"loss": 0.6707, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.740000000000001e-06, |
|
"loss": 0.6297, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.7136842105263165e-06, |
|
"loss": 0.626, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.687368421052632e-06, |
|
"loss": 0.6481, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.661052631578948e-06, |
|
"loss": 0.6489, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.634736842105264e-06, |
|
"loss": 0.6169, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.60842105263158e-06, |
|
"loss": 0.6204, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.582105263157896e-06, |
|
"loss": 0.6138, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.5557894736842106e-06, |
|
"loss": 0.609, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.529473684210526e-06, |
|
"loss": 0.6137, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.503157894736842e-06, |
|
"loss": 0.6208, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.476842105263158e-06, |
|
"loss": 0.6497, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.450526315789474e-06, |
|
"loss": 0.6193, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.42421052631579e-06, |
|
"loss": 0.6229, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.397894736842106e-06, |
|
"loss": 0.5991, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.371578947368422e-06, |
|
"loss": 0.6543, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.345263157894738e-06, |
|
"loss": 0.648, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.318947368421054e-06, |
|
"loss": 0.6267, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.6309367418289185, |
|
"eval_runtime": 403.8252, |
|
"eval_samples_per_second": 12.394, |
|
"eval_steps_per_second": 0.389, |
|
"eval_wer": 50.94330667552624, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.292631578947369e-06, |
|
"loss": 0.635, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.267368421052632e-06, |
|
"loss": 0.6179, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.241052631578948e-06, |
|
"loss": 0.6384, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.214736842105264e-06, |
|
"loss": 0.6188, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.188421052631579e-06, |
|
"loss": 0.6474, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.1621052631578945e-06, |
|
"loss": 0.6328, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.13578947368421e-06, |
|
"loss": 0.6264, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.109473684210527e-06, |
|
"loss": 0.5977, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.083157894736843e-06, |
|
"loss": 0.6131, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.056842105263159e-06, |
|
"loss": 0.6194, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.0305263157894745e-06, |
|
"loss": 0.6223, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.00421052631579e-06, |
|
"loss": 0.6101, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.977894736842106e-06, |
|
"loss": 0.6336, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.951578947368422e-06, |
|
"loss": 0.6282, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.925263157894737e-06, |
|
"loss": 0.6566, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.898947368421053e-06, |
|
"loss": 0.6117, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.8726315789473685e-06, |
|
"loss": 0.6015, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.846315789473684e-06, |
|
"loss": 0.6123, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.82e-06, |
|
"loss": 0.5862, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.793684210526316e-06, |
|
"loss": 0.6229, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.767368421052632e-06, |
|
"loss": 0.618, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.7410526315789485e-06, |
|
"loss": 0.5953, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.714736842105264e-06, |
|
"loss": 0.5907, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.68842105263158e-06, |
|
"loss": 0.6315, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.662105263157895e-06, |
|
"loss": 0.6302, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.635789473684211e-06, |
|
"loss": 0.5802, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.609473684210527e-06, |
|
"loss": 0.6067, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.5831578947368425e-06, |
|
"loss": 0.6164, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.556842105263158e-06, |
|
"loss": 0.6171, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.530526315789474e-06, |
|
"loss": 0.6182, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.50421052631579e-06, |
|
"loss": 0.6245, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.477894736842105e-06, |
|
"loss": 0.6152, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.451578947368421e-06, |
|
"loss": 0.6387, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.425263157894737e-06, |
|
"loss": 0.6096, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.398947368421053e-06, |
|
"loss": 0.6575, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.372631578947369e-06, |
|
"loss": 0.5805, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.346315789473685e-06, |
|
"loss": 0.5909, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.320000000000001e-06, |
|
"loss": 0.5884, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.2936842105263166e-06, |
|
"loss": 0.6108, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.267368421052632e-06, |
|
"loss": 0.6104, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.241052631578948e-06, |
|
"loss": 0.6116, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.214736842105263e-06, |
|
"loss": 0.6191, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.188421052631579e-06, |
|
"loss": 0.6403, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.162105263157895e-06, |
|
"loss": 0.6045, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.135789473684211e-06, |
|
"loss": 0.5822, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.1094736842105264e-06, |
|
"loss": 0.5876, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.083157894736842e-06, |
|
"loss": 0.634, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.056842105263158e-06, |
|
"loss": 0.6033, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.030526315789475e-06, |
|
"loss": 0.6072, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.0042105263157906e-06, |
|
"loss": 0.5876, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.9778947368421055e-06, |
|
"loss": 0.6234, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.951578947368421e-06, |
|
"loss": 0.6089, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.925263157894737e-06, |
|
"loss": 0.5988, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.898947368421053e-06, |
|
"loss": 0.6183, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.872631578947369e-06, |
|
"loss": 0.6051, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.846315789473685e-06, |
|
"loss": 0.575, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.8200000000000004e-06, |
|
"loss": 0.6117, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.793684210526316e-06, |
|
"loss": 0.5778, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.767368421052632e-06, |
|
"loss": 0.6341, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.741052631578948e-06, |
|
"loss": 0.6157, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.714736842105264e-06, |
|
"loss": 0.5898, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.6884210526315795e-06, |
|
"loss": 0.575, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.662105263157895e-06, |
|
"loss": 0.6109, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.63578947368421e-06, |
|
"loss": 0.5851, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.609473684210526e-06, |
|
"loss": 0.6109, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.583157894736843e-06, |
|
"loss": 0.6034, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.556842105263159e-06, |
|
"loss": 0.5678, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5305263157894744e-06, |
|
"loss": 0.6222, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.504210526315789e-06, |
|
"loss": 0.5833, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.477894736842105e-06, |
|
"loss": 0.5756, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.451578947368421e-06, |
|
"loss": 0.6017, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.425263157894737e-06, |
|
"loss": 0.6033, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.3989473684210535e-06, |
|
"loss": 0.5788, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.3726315789473685e-06, |
|
"loss": 0.5869, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.346315789473684e-06, |
|
"loss": 0.5965, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.32e-06, |
|
"loss": 0.5983, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.293684210526316e-06, |
|
"loss": 0.5778, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.267368421052632e-06, |
|
"loss": 0.565, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.241052631578948e-06, |
|
"loss": 0.6058, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.214736842105263e-06, |
|
"loss": 0.5886, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.5892367362976074, |
|
"eval_runtime": 422.3368, |
|
"eval_samples_per_second": 11.851, |
|
"eval_steps_per_second": 0.372, |
|
"eval_wer": 50.02254443627044, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.188421052631579e-06, |
|
"loss": 0.5986, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.162105263157895e-06, |
|
"loss": 0.5945, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.135789473684211e-06, |
|
"loss": 0.591, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.109473684210527e-06, |
|
"loss": 0.5771, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.08421052631579e-06, |
|
"loss": 0.5946, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.057894736842106e-06, |
|
"loss": 0.5638, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.031578947368422e-06, |
|
"loss": 0.597, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.005263157894737e-06, |
|
"loss": 0.5793, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9789473684210525e-06, |
|
"loss": 0.5645, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.952631578947368e-06, |
|
"loss": 0.6107, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.926315789473685e-06, |
|
"loss": 0.5951, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 0.5811, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.873684210526316e-06, |
|
"loss": 0.5749, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.8473684210526316e-06, |
|
"loss": 0.5914, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.821052631578947e-06, |
|
"loss": 0.5615, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.794736842105263e-06, |
|
"loss": 0.5701, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.768421052631579e-06, |
|
"loss": 0.5854, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7421052631578953e-06, |
|
"loss": 0.5495, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.715789473684211e-06, |
|
"loss": 0.6006, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.6894736842105265e-06, |
|
"loss": 0.6187, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.6631578947368423e-06, |
|
"loss": 0.5812, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.636842105263158e-06, |
|
"loss": 0.5769, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.610526315789474e-06, |
|
"loss": 0.5981, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.58421052631579e-06, |
|
"loss": 0.6079, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.5578947368421056e-06, |
|
"loss": 0.5721, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.5315789473684214e-06, |
|
"loss": 0.5537, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.505263157894737e-06, |
|
"loss": 0.5549, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.478947368421053e-06, |
|
"loss": 0.5688, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.4526315789473684e-06, |
|
"loss": 0.5795, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.4263157894736842e-06, |
|
"loss": 0.574, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.5672, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.3736842105263163e-06, |
|
"loss": 0.5893, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.347368421052632e-06, |
|
"loss": 0.5996, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.3210526315789475e-06, |
|
"loss": 0.5472, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.2947368421052633e-06, |
|
"loss": 0.5832, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.268421052631579e-06, |
|
"loss": 0.5587, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.2421052631578945e-06, |
|
"loss": 0.588, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.215789473684211e-06, |
|
"loss": 0.5712, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.1894736842105266e-06, |
|
"loss": 0.6115, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.1631578947368424e-06, |
|
"loss": 0.57, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.1368421052631582e-06, |
|
"loss": 0.5756, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.1105263157894736e-06, |
|
"loss": 0.5401, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.0842105263157895e-06, |
|
"loss": 0.5939, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.0578947368421053e-06, |
|
"loss": 0.5911, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.0315789473684215e-06, |
|
"loss": 0.5722, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.0052631578947373e-06, |
|
"loss": 0.5766, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9789473684210527e-06, |
|
"loss": 0.5718, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9526315789473685e-06, |
|
"loss": 0.5511, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9263157894736844e-06, |
|
"loss": 0.5551, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9e-06, |
|
"loss": 0.5906, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.8736842105263164e-06, |
|
"loss": 0.5383, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.847368421052632e-06, |
|
"loss": 0.5771, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.8210526315789476e-06, |
|
"loss": 0.5755, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7947368421052635e-06, |
|
"loss": 0.5551, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.7684210526315793e-06, |
|
"loss": 0.564, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.7421052631578947e-06, |
|
"loss": 0.5542, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.7157894736842105e-06, |
|
"loss": 0.5679, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6894736842105267e-06, |
|
"loss": 0.5954, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.6631578947368426e-06, |
|
"loss": 0.5502, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.6368421052631584e-06, |
|
"loss": 0.5569, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.6105263157894738e-06, |
|
"loss": 0.5477, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5842105263157896e-06, |
|
"loss": 0.5707, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5578947368421054e-06, |
|
"loss": 0.5519, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5315789473684212e-06, |
|
"loss": 0.5938, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5052631578947375e-06, |
|
"loss": 0.5453, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.478947368421053e-06, |
|
"loss": 0.5627, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.4526315789473687e-06, |
|
"loss": 0.5972, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.4263157894736845e-06, |
|
"loss": 0.573, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.5764, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.373684210526316e-06, |
|
"loss": 0.5714, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.347368421052632e-06, |
|
"loss": 0.5668, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.3210526315789473e-06, |
|
"loss": 0.5775, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.294736842105263e-06, |
|
"loss": 0.5845, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.268421052631579e-06, |
|
"loss": 0.5707, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.242105263157895e-06, |
|
"loss": 0.5894, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2157894736842106e-06, |
|
"loss": 0.5615, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.1894736842105264e-06, |
|
"loss": 0.5488, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1631578947368423e-06, |
|
"loss": 0.5586, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.136842105263158e-06, |
|
"loss": 0.5904, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.110526315789474e-06, |
|
"loss": 0.5627, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.567868173122406, |
|
"eval_runtime": 396.2585, |
|
"eval_samples_per_second": 12.631, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 43.945039037471226, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.0842105263157897e-06, |
|
"loss": 0.5628, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0578947368421055e-06, |
|
"loss": 0.5972, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.031578947368421e-06, |
|
"loss": 0.5498, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.005263157894737e-06, |
|
"loss": 0.5645, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.978947368421053e-06, |
|
"loss": 0.5648, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.9526315789473684e-06, |
|
"loss": 0.5667, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9263157894736846e-06, |
|
"loss": 0.5696, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 0.5914, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8736842105263158e-06, |
|
"loss": 0.5556, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8473684210526319e-06, |
|
"loss": 0.5576, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8210526315789475e-06, |
|
"loss": 0.5615, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7947368421052633e-06, |
|
"loss": 0.5495, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.768421052631579e-06, |
|
"loss": 0.5661, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.742105263157895e-06, |
|
"loss": 0.544, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.716842105263158e-06, |
|
"loss": 0.5326, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6905263157894739e-06, |
|
"loss": 0.5353, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6642105263157895e-06, |
|
"loss": 0.5401, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.6378947368421053e-06, |
|
"loss": 0.5504, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6115789473684211e-06, |
|
"loss": 0.5611, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.585263157894737e-06, |
|
"loss": 0.5457, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.5589473684210526e-06, |
|
"loss": 0.5333, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.5326315789473686e-06, |
|
"loss": 0.5699, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.5063157894736844e-06, |
|
"loss": 0.5316, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.48e-06, |
|
"loss": 0.5738, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.453684210526316e-06, |
|
"loss": 0.565, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4273684210526317e-06, |
|
"loss": 0.571, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4010526315789475e-06, |
|
"loss": 0.5665, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.374736842105263e-06, |
|
"loss": 0.5527, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.3484210526315791e-06, |
|
"loss": 0.5277, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.322105263157895e-06, |
|
"loss": 0.522, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2957894736842105e-06, |
|
"loss": 0.5784, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2694736842105266e-06, |
|
"loss": 0.5685, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2431578947368422e-06, |
|
"loss": 0.5707, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.216842105263158e-06, |
|
"loss": 0.5582, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1905263157894738e-06, |
|
"loss": 0.5629, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1642105263157896e-06, |
|
"loss": 0.5391, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1378947368421052e-06, |
|
"loss": 0.5384, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1115789473684213e-06, |
|
"loss": 0.5458, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0852631578947369e-06, |
|
"loss": 0.5788, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0589473684210527e-06, |
|
"loss": 0.5698, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0326315789473685e-06, |
|
"loss": 0.5349, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0063157894736843e-06, |
|
"loss": 0.5231, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 0.5363, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.536842105263158e-07, |
|
"loss": 0.566, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.273684210526317e-07, |
|
"loss": 0.5928, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.010526315789474e-07, |
|
"loss": 0.5697, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.747368421052632e-07, |
|
"loss": 0.5603, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.48421052631579e-07, |
|
"loss": 0.5458, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.221052631578947e-07, |
|
"loss": 0.5733, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.957894736842107e-07, |
|
"loss": 0.5626, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.694736842105263e-07, |
|
"loss": 0.5243, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.431578947368422e-07, |
|
"loss": 0.5582, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.16842105263158e-07, |
|
"loss": 0.5822, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.905263157894737e-07, |
|
"loss": 0.543, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.642105263157895e-07, |
|
"loss": 0.5528, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.378947368421053e-07, |
|
"loss": 0.5618, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.115789473684211e-07, |
|
"loss": 0.5527, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.852631578947369e-07, |
|
"loss": 0.5681, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.589473684210526e-07, |
|
"loss": 0.5638, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.326315789473684e-07, |
|
"loss": 0.568, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.063157894736842e-07, |
|
"loss": 0.5424, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 0.5734, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5368421052631583e-07, |
|
"loss": 0.5366, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.273684210526316e-07, |
|
"loss": 0.5485, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.0105263157894736e-07, |
|
"loss": 0.5489, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.7473684210526323e-07, |
|
"loss": 0.5682, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.48421052631579e-07, |
|
"loss": 0.5699, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.2210526315789476e-07, |
|
"loss": 0.5595, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.9578947368421053e-07, |
|
"loss": 0.5514, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.6947368421052635e-07, |
|
"loss": 0.5848, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.431578947368421e-07, |
|
"loss": 0.5723, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.168421052631579e-07, |
|
"loss": 0.5605, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.9052631578947372e-07, |
|
"loss": 0.5862, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.642105263157895e-07, |
|
"loss": 0.5483, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3789473684210528e-07, |
|
"loss": 0.5528, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1157894736842106e-07, |
|
"loss": 0.5322, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.526315789473685e-08, |
|
"loss": 0.5614, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.8947368421052637e-08, |
|
"loss": 0.5457, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.263157894736842e-08, |
|
"loss": 0.5877, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.315789473684211e-09, |
|
"loss": 0.5694, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5582889914512634, |
|
"eval_runtime": 400.4487, |
|
"eval_samples_per_second": 12.498, |
|
"eval_steps_per_second": 0.392, |
|
"eval_wer": 42.76086285863452, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 10000, |
|
"total_flos": 7.8780432384e+18, |
|
"train_loss": 0.6556177062988281, |
|
"train_runtime": 23200.9159, |
|
"train_samples_per_second": 13.793, |
|
"train_steps_per_second": 0.431 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 2000, |
|
"total_flos": 7.8780432384e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|