|
{ |
|
"best_metric": 12.226690123146518, |
|
"best_model_checkpoint": "./checkpoint-6000", |
|
"epoch": 0.2, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.0453611334320685e-06, |
|
"loss": 1.2966, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.229195710491767e-06, |
|
"loss": 0.8695, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.903829450223392e-06, |
|
"loss": 0.7429, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.377725845391017e-06, |
|
"loss": 0.6562, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.743343231239583e-06, |
|
"loss": 0.6217, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.041073861170494e-06, |
|
"loss": 0.5393, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.292222957399574e-06, |
|
"loss": 0.5282, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.509413541357755e-06, |
|
"loss": 0.5062, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.700744577655557e-06, |
|
"loss": 0.4942, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.871723942761204e-06, |
|
"loss": 0.4508, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.026267958246849e-06, |
|
"loss": 0.4466, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.16726106663399e-06, |
|
"loss": 0.4026, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.296889251455016e-06, |
|
"loss": 0.37, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.416848797368692e-06, |
|
"loss": 0.3864, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.528482449516371e-06, |
|
"loss": 0.3801, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.632871309784314e-06, |
|
"loss": 0.3893, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.73089868785391e-06, |
|
"loss": 0.4079, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.823295589572114e-06, |
|
"loss": 0.3554, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.910673836465484e-06, |
|
"loss": 0.3197, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.993550644973805e-06, |
|
"loss": 0.3405, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.98871794871795e-06, |
|
"loss": 0.3328, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.975897435897436e-06, |
|
"loss": 0.3147, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.963076923076925e-06, |
|
"loss": 0.2954, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.950256410256412e-06, |
|
"loss": 0.3034, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.937435897435898e-06, |
|
"loss": 0.2931, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.924615384615385e-06, |
|
"loss": 0.2798, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.911794871794874e-06, |
|
"loss": 0.2619, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.89897435897436e-06, |
|
"loss": 0.3157, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.886153846153846e-06, |
|
"loss": 0.2937, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.873333333333334e-06, |
|
"loss": 0.2606, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.860512820512821e-06, |
|
"loss": 0.2482, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.847692307692308e-06, |
|
"loss": 0.2539, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.834871794871795e-06, |
|
"loss": 0.2501, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.822051282051283e-06, |
|
"loss": 0.241, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.80923076923077e-06, |
|
"loss": 0.2323, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.796410256410257e-06, |
|
"loss": 0.2507, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.783589743589744e-06, |
|
"loss": 0.2157, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.770769230769232e-06, |
|
"loss": 0.2356, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.757948717948719e-06, |
|
"loss": 0.2352, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.745128205128206e-06, |
|
"loss": 0.2181, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.2607421875, |
|
"eval_runtime": 228.2189, |
|
"eval_samples_per_second": 4.382, |
|
"eval_steps_per_second": 0.14, |
|
"eval_wer": 18.384016084443328, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 0.2279, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.720512820512822e-06, |
|
"loss": 0.225, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.707692307692308e-06, |
|
"loss": 0.2238, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.694871794871794e-06, |
|
"loss": 0.228, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.682051282051283e-06, |
|
"loss": 0.2261, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.66923076923077e-06, |
|
"loss": 0.196, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.656410256410257e-06, |
|
"loss": 0.1962, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.643589743589743e-06, |
|
"loss": 0.1931, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.630769230769232e-06, |
|
"loss": 0.1827, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.617948717948719e-06, |
|
"loss": 0.1815, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.605128205128206e-06, |
|
"loss": 0.1972, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.592307692307692e-06, |
|
"loss": 0.1681, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.579487179487181e-06, |
|
"loss": 0.151, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.566666666666668e-06, |
|
"loss": 0.1743, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.553846153846155e-06, |
|
"loss": 0.1808, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.541025641025641e-06, |
|
"loss": 0.1984, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.52820512820513e-06, |
|
"loss": 0.2098, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.515384615384617e-06, |
|
"loss": 0.1741, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.502564102564103e-06, |
|
"loss": 0.1489, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.48974358974359e-06, |
|
"loss": 0.1655, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.476923076923079e-06, |
|
"loss": 0.1643, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.464102564102566e-06, |
|
"loss": 0.151, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.451282051282052e-06, |
|
"loss": 0.1431, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.43846153846154e-06, |
|
"loss": 0.1503, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.425641025641026e-06, |
|
"loss": 0.1491, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.412820512820515e-06, |
|
"loss": 0.1408, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.1294, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.387179487179488e-06, |
|
"loss": 0.1618, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.374358974358975e-06, |
|
"loss": 0.1484, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.361538461538462e-06, |
|
"loss": 0.1298, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.348717948717949e-06, |
|
"loss": 0.1231, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.335897435897436e-06, |
|
"loss": 0.1241, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.323076923076924e-06, |
|
"loss": 0.1215, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.310256410256411e-06, |
|
"loss": 0.1225, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.297435897435898e-06, |
|
"loss": 0.1122, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.284615384615385e-06, |
|
"loss": 0.1191, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.271794871794873e-06, |
|
"loss": 0.0995, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.25897435897436e-06, |
|
"loss": 0.1199, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.246153846153847e-06, |
|
"loss": 0.1243, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.233333333333334e-06, |
|
"loss": 0.1089, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.2489013671875, |
|
"eval_runtime": 209.1058, |
|
"eval_samples_per_second": 4.782, |
|
"eval_steps_per_second": 0.153, |
|
"eval_wer": 16.486554410655945, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.221538461538462e-06, |
|
"loss": 0.1231, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.208717948717948e-06, |
|
"loss": 0.1173, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.195897435897437e-06, |
|
"loss": 0.1234, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.183076923076924e-06, |
|
"loss": 0.1244, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.17025641025641e-06, |
|
"loss": 0.1223, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.157435897435897e-06, |
|
"loss": 0.1027, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.144615384615386e-06, |
|
"loss": 0.1022, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.131794871794873e-06, |
|
"loss": 0.1013, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.11897435897436e-06, |
|
"loss": 0.0949, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.106153846153846e-06, |
|
"loss": 0.0971, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.093333333333333e-06, |
|
"loss": 0.1098, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.080512820512822e-06, |
|
"loss": 0.0861, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.067692307692309e-06, |
|
"loss": 0.0765, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.054871794871795e-06, |
|
"loss": 0.0933, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.042051282051282e-06, |
|
"loss": 0.1025, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.02923076923077e-06, |
|
"loss": 0.1138, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.016410256410258e-06, |
|
"loss": 0.1198, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.003589743589744e-06, |
|
"loss": 0.0981, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.990769230769231e-06, |
|
"loss": 0.0807, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.97794871794872e-06, |
|
"loss": 0.0875, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.965128205128207e-06, |
|
"loss": 0.0911, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.952307692307693e-06, |
|
"loss": 0.0796, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.93948717948718e-06, |
|
"loss": 0.0766, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.926666666666669e-06, |
|
"loss": 0.0862, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.913846153846154e-06, |
|
"loss": 0.0822, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.90102564102564e-06, |
|
"loss": 0.0757, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.88820512820513e-06, |
|
"loss": 0.0674, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.875384615384616e-06, |
|
"loss": 0.0913, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.862564102564103e-06, |
|
"loss": 0.0815, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.84974358974359e-06, |
|
"loss": 0.0687, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.836923076923078e-06, |
|
"loss": 0.0649, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.824102564102565e-06, |
|
"loss": 0.0647, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.811282051282052e-06, |
|
"loss": 0.0607, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.798461538461539e-06, |
|
"loss": 0.065, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.785641025641025e-06, |
|
"loss": 0.058, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.772820512820514e-06, |
|
"loss": 0.0618, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.76e-06, |
|
"loss": 0.0491, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.747179487179488e-06, |
|
"loss": 0.0646, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.734358974358974e-06, |
|
"loss": 0.0673, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.721538461538463e-06, |
|
"loss": 0.0564, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 0.261474609375, |
|
"eval_runtime": 212.2501, |
|
"eval_samples_per_second": 4.711, |
|
"eval_steps_per_second": 0.151, |
|
"eval_wer": 16.14727318421714, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.70974358974359e-06, |
|
"loss": 0.1893, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.696923076923078e-06, |
|
"loss": 0.1774, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.684102564102565e-06, |
|
"loss": 0.1866, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.671282051282051e-06, |
|
"loss": 0.1896, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.658461538461538e-06, |
|
"loss": 0.1925, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.645641025641027e-06, |
|
"loss": 0.2126, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.632820512820514e-06, |
|
"loss": 0.2004, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.62e-06, |
|
"loss": 0.1636, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.607179487179487e-06, |
|
"loss": 0.1597, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.594358974358976e-06, |
|
"loss": 0.1768, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.581538461538463e-06, |
|
"loss": 0.1684, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.56871794871795e-06, |
|
"loss": 0.1739, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.555897435897436e-06, |
|
"loss": 0.1706, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.543076923076923e-06, |
|
"loss": 0.1701, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.530256410256412e-06, |
|
"loss": 0.1564, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.517435897435898e-06, |
|
"loss": 0.1627, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.504615384615385e-06, |
|
"loss": 0.1586, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.491794871794872e-06, |
|
"loss": 0.1579, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.47897435897436e-06, |
|
"loss": 0.1676, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.466153846153847e-06, |
|
"loss": 0.1558, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.453333333333334e-06, |
|
"loss": 0.1624, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.440512820512821e-06, |
|
"loss": 0.1459, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.427692307692308e-06, |
|
"loss": 0.151, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.414871794871795e-06, |
|
"loss": 0.1524, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.402051282051282e-06, |
|
"loss": 0.1444, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.38923076923077e-06, |
|
"loss": 0.1218, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.376410256410257e-06, |
|
"loss": 0.1207, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.363589743589744e-06, |
|
"loss": 0.1199, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.35076923076923e-06, |
|
"loss": 0.1154, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.337948717948719e-06, |
|
"loss": 0.1075, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.325128205128206e-06, |
|
"loss": 0.1005, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.312307692307693e-06, |
|
"loss": 0.1053, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.29948717948718e-06, |
|
"loss": 0.1087, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.286666666666668e-06, |
|
"loss": 0.1207, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 8.273846153846155e-06, |
|
"loss": 0.1099, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.261025641025642e-06, |
|
"loss": 0.1054, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.248205128205129e-06, |
|
"loss": 0.1019, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.235384615384615e-06, |
|
"loss": 0.0974, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.222564102564104e-06, |
|
"loss": 0.0975, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.20974358974359e-06, |
|
"loss": 0.1031, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.1995849609375, |
|
"eval_runtime": 214.2177, |
|
"eval_samples_per_second": 4.668, |
|
"eval_steps_per_second": 0.149, |
|
"eval_wer": 14.224679567730586, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.197948717948719e-06, |
|
"loss": 0.1122, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.185128205128206e-06, |
|
"loss": 0.1038, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.172307692307692e-06, |
|
"loss": 0.1172, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.15948717948718e-06, |
|
"loss": 0.1251, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.146666666666668e-06, |
|
"loss": 0.1306, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.133846153846155e-06, |
|
"loss": 0.1043, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.121025641025641e-06, |
|
"loss": 0.1095, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.108205128205128e-06, |
|
"loss": 0.1194, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.095384615384617e-06, |
|
"loss": 0.1209, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.082564102564104e-06, |
|
"loss": 0.1108, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.06974358974359e-06, |
|
"loss": 0.1059, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.056923076923077e-06, |
|
"loss": 0.0923, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.044102564102566e-06, |
|
"loss": 0.1027, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.031282051282053e-06, |
|
"loss": 0.1008, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.01846153846154e-06, |
|
"loss": 0.0885, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.005641025641026e-06, |
|
"loss": 0.0916, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.992820512820515e-06, |
|
"loss": 0.1045, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.980000000000002e-06, |
|
"loss": 0.0954, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.967179487179488e-06, |
|
"loss": 0.1013, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.954358974358975e-06, |
|
"loss": 0.0983, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.941538461538462e-06, |
|
"loss": 0.1021, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.928717948717949e-06, |
|
"loss": 0.1024, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.915897435897436e-06, |
|
"loss": 0.1068, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.903076923076922e-06, |
|
"loss": 0.1057, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.890256410256411e-06, |
|
"loss": 0.1144, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.877435897435898e-06, |
|
"loss": 0.1401, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 7.864615384615385e-06, |
|
"loss": 0.1234, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.851794871794871e-06, |
|
"loss": 0.1364, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.83897435897436e-06, |
|
"loss": 0.1367, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.826153846153847e-06, |
|
"loss": 0.1399, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.813333333333334e-06, |
|
"loss": 0.1387, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.80051282051282e-06, |
|
"loss": 0.1429, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.787692307692309e-06, |
|
"loss": 0.134, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.774871794871796e-06, |
|
"loss": 0.1615, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.762051282051283e-06, |
|
"loss": 0.1502, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.74923076923077e-06, |
|
"loss": 0.1574, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.736410256410258e-06, |
|
"loss": 0.1529, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.723589743589745e-06, |
|
"loss": 0.1406, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.710769230769232e-06, |
|
"loss": 0.1363, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.697948717948718e-06, |
|
"loss": 0.14, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.18212890625, |
|
"eval_runtime": 217.0927, |
|
"eval_samples_per_second": 4.606, |
|
"eval_steps_per_second": 0.147, |
|
"eval_wer": 13.005780346820808, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.686153846153846e-06, |
|
"loss": 0.1421, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.673333333333333e-06, |
|
"loss": 0.1507, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.660512820512822e-06, |
|
"loss": 0.1445, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.647692307692309e-06, |
|
"loss": 0.1517, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.634871794871795e-06, |
|
"loss": 0.1363, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.622051282051282e-06, |
|
"loss": 0.1548, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.60923076923077e-06, |
|
"loss": 0.1403, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.596410256410257e-06, |
|
"loss": 0.1543, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.5835897435897444e-06, |
|
"loss": 0.1386, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.570769230769231e-06, |
|
"loss": 0.1194, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.557948717948719e-06, |
|
"loss": 0.1075, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.545128205128206e-06, |
|
"loss": 0.1076, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.5323076923076934e-06, |
|
"loss": 0.1034, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.51948717948718e-06, |
|
"loss": 0.1096, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.506666666666668e-06, |
|
"loss": 0.1051, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.493846153846155e-06, |
|
"loss": 0.0989, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.481025641025642e-06, |
|
"loss": 0.1002, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.468205128205129e-06, |
|
"loss": 0.0961, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.455384615384615e-06, |
|
"loss": 0.1017, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.442564102564103e-06, |
|
"loss": 0.088, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.42974358974359e-06, |
|
"loss": 0.0921, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.416923076923077e-06, |
|
"loss": 0.0921, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.404102564102564e-06, |
|
"loss": 0.0853, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.391282051282052e-06, |
|
"loss": 0.0752, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.378461538461539e-06, |
|
"loss": 0.0705, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.365641025641026e-06, |
|
"loss": 0.0743, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.352820512820513e-06, |
|
"loss": 0.0859, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.340000000000001e-06, |
|
"loss": 0.0759, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.327179487179488e-06, |
|
"loss": 0.0821, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.3143589743589745e-06, |
|
"loss": 0.0824, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.301538461538462e-06, |
|
"loss": 0.0747, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.288717948717949e-06, |
|
"loss": 0.072, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.275897435897437e-06, |
|
"loss": 0.0785, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.2630769230769235e-06, |
|
"loss": 0.0751, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 7.250256410256411e-06, |
|
"loss": 0.0822, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.237435897435898e-06, |
|
"loss": 0.101, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.224615384615386e-06, |
|
"loss": 0.0896, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.2117948717948725e-06, |
|
"loss": 0.1193, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.19897435897436e-06, |
|
"loss": 0.0841, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.186153846153847e-06, |
|
"loss": 0.0872, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.1734619140625, |
|
"eval_runtime": 224.4062, |
|
"eval_samples_per_second": 4.456, |
|
"eval_steps_per_second": 0.143, |
|
"eval_wer": 12.226690123146518, |
|
"step": 6000 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 3.9191228855447716e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|