|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 5750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.3949999999999997e-06, |
|
"loss": 13.6823, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.895e-06, |
|
"loss": 7.5854, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.0394999999999998e-05, |
|
"loss": 4.3711, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 4.312221050262451, |
|
"eval_runtime": 80.91, |
|
"eval_samples_per_second": 21.011, |
|
"eval_steps_per_second": 1.322, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.3895e-05, |
|
"loss": 3.8129, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.7395e-05, |
|
"loss": 3.4258, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 2.0894999999999996e-05, |
|
"loss": 3.1653, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 3.115588426589966, |
|
"eval_runtime": 81.1622, |
|
"eval_samples_per_second": 20.946, |
|
"eval_steps_per_second": 1.318, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 2.4394999999999996e-05, |
|
"loss": 3.0356, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 2.7895e-05, |
|
"loss": 2.9791, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 3.1395e-05, |
|
"loss": 2.8904, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 2.842055320739746, |
|
"eval_runtime": 84.1755, |
|
"eval_samples_per_second": 20.196, |
|
"eval_steps_per_second": 1.271, |
|
"eval_wer": 0.9918110836031232, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 3.4895e-05, |
|
"loss": 2.1422, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 3.8394999999999994e-05, |
|
"loss": 1.2257, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.1895e-05, |
|
"loss": 0.9207, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"eval_loss": 0.9894591569900513, |
|
"eval_runtime": 82.3044, |
|
"eval_samples_per_second": 20.655, |
|
"eval_steps_per_second": 1.3, |
|
"eval_wer": 0.8688503777058338, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.5394999999999995e-05, |
|
"loss": 0.7881, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 4.8895e-05, |
|
"loss": 0.7047, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 5.2395e-05, |
|
"loss": 0.6384, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_loss": 0.6993927359580994, |
|
"eval_runtime": 82.4631, |
|
"eval_samples_per_second": 20.615, |
|
"eval_steps_per_second": 1.298, |
|
"eval_wer": 0.7700120611946931, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 5.589499999999999e-05, |
|
"loss": 0.5989, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 5.9394999999999996e-05, |
|
"loss": 0.5601, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 6.289499999999999e-05, |
|
"loss": 0.5215, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"eval_loss": 0.5627515912055969, |
|
"eval_runtime": 81.3944, |
|
"eval_samples_per_second": 20.886, |
|
"eval_steps_per_second": 1.315, |
|
"eval_wer": 0.6443217164984447, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 6.639499999999999e-05, |
|
"loss": 0.4949, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 6.9895e-05, |
|
"loss": 0.4869, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 6.818933333333333e-05, |
|
"loss": 0.4573, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 0.5316212177276611, |
|
"eval_runtime": 82.2602, |
|
"eval_samples_per_second": 20.666, |
|
"eval_steps_per_second": 1.301, |
|
"eval_wer": 0.6174062083412684, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 6.632266666666666e-05, |
|
"loss": 0.438, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.4456e-05, |
|
"loss": 0.4153, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 6.258933333333333e-05, |
|
"loss": 0.3875, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"eval_loss": 0.4931696653366089, |
|
"eval_runtime": 80.8801, |
|
"eval_samples_per_second": 21.019, |
|
"eval_steps_per_second": 1.323, |
|
"eval_wer": 0.5778581857423982, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 6.072266666666667e-05, |
|
"loss": 0.3807, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 5.8855999999999993e-05, |
|
"loss": 0.3715, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"learning_rate": 5.6989333333333333e-05, |
|
"loss": 0.3562, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"eval_loss": 0.4971640110015869, |
|
"eval_runtime": 82.278, |
|
"eval_samples_per_second": 20.662, |
|
"eval_steps_per_second": 1.3, |
|
"eval_wer": 0.547514759093506, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 5.512266666666666e-05, |
|
"loss": 0.3457, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 5.3256e-05, |
|
"loss": 0.3356, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 5.1389333333333326e-05, |
|
"loss": 0.3218, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"eval_loss": 0.4894775450229645, |
|
"eval_runtime": 81.4053, |
|
"eval_samples_per_second": 20.883, |
|
"eval_steps_per_second": 1.314, |
|
"eval_wer": 0.5219323303497746, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 4.9522666666666666e-05, |
|
"loss": 0.3072, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 4.765599999999999e-05, |
|
"loss": 0.3006, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 4.578933333333333e-05, |
|
"loss": 0.2954, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"eval_loss": 0.5226009488105774, |
|
"eval_runtime": 82.2645, |
|
"eval_samples_per_second": 20.665, |
|
"eval_steps_per_second": 1.301, |
|
"eval_wer": 0.5192026915508157, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 4.392266666666666e-05, |
|
"loss": 0.2965, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 4.2056e-05, |
|
"loss": 0.286, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 4.018933333333333e-05, |
|
"loss": 0.287, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"eval_loss": 0.495715469121933, |
|
"eval_runtime": 79.9357, |
|
"eval_samples_per_second": 21.267, |
|
"eval_steps_per_second": 1.339, |
|
"eval_wer": 0.5145686535897924, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.17, |
|
"learning_rate": 3.8322666666666665e-05, |
|
"loss": 0.2768, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 3.6456e-05, |
|
"loss": 0.2731, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"learning_rate": 3.458933333333333e-05, |
|
"loss": 0.2587, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"eval_loss": 0.49437007308006287, |
|
"eval_runtime": 82.64, |
|
"eval_samples_per_second": 20.571, |
|
"eval_steps_per_second": 1.295, |
|
"eval_wer": 0.48930362470640515, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 3.2722666666666664e-05, |
|
"loss": 0.2549, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 35.65, |
|
"learning_rate": 3.0856e-05, |
|
"loss": 0.2538, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 36.52, |
|
"learning_rate": 2.8989333333333334e-05, |
|
"loss": 0.2496, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 36.52, |
|
"eval_loss": 0.4975605010986328, |
|
"eval_runtime": 82.1209, |
|
"eval_samples_per_second": 20.701, |
|
"eval_steps_per_second": 1.303, |
|
"eval_wer": 0.4894940646226116, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 2.7122666666666667e-05, |
|
"loss": 0.2461, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 38.26, |
|
"learning_rate": 2.5256e-05, |
|
"loss": 0.2359, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 2.3389333333333333e-05, |
|
"loss": 0.2365, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"eval_loss": 0.5185123085975647, |
|
"eval_runtime": 79.6732, |
|
"eval_samples_per_second": 21.337, |
|
"eval_steps_per_second": 1.343, |
|
"eval_wer": 0.4818764679743541, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 2.1522666666666666e-05, |
|
"loss": 0.2357, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 40.87, |
|
"learning_rate": 1.9656e-05, |
|
"loss": 0.2289, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 41.74, |
|
"learning_rate": 1.7789333333333333e-05, |
|
"loss": 0.2264, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 41.74, |
|
"eval_loss": 0.5152125954627991, |
|
"eval_runtime": 79.201, |
|
"eval_samples_per_second": 21.464, |
|
"eval_steps_per_second": 1.351, |
|
"eval_wer": 0.47755982987367485, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"learning_rate": 1.5922666666666666e-05, |
|
"loss": 0.2211, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 1.4055999999999999e-05, |
|
"loss": 0.2186, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"learning_rate": 1.2189333333333332e-05, |
|
"loss": 0.2224, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"eval_loss": 0.5030579566955566, |
|
"eval_runtime": 80.9089, |
|
"eval_samples_per_second": 21.011, |
|
"eval_steps_per_second": 1.322, |
|
"eval_wer": 0.4745762711864407, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 45.22, |
|
"learning_rate": 1.0322666666666665e-05, |
|
"loss": 0.2162, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 46.09, |
|
"learning_rate": 8.456e-06, |
|
"loss": 0.2159, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 6.589333333333332e-06, |
|
"loss": 0.2096, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_loss": 0.5061585307121277, |
|
"eval_runtime": 81.3005, |
|
"eval_samples_per_second": 20.91, |
|
"eval_steps_per_second": 1.316, |
|
"eval_wer": 0.47076747286231196, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 4.7226666666666654e-06, |
|
"loss": 0.205, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 2.856e-06, |
|
"loss": 0.2038, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"learning_rate": 9.893333333333332e-07, |
|
"loss": 0.2038, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"eval_loss": 0.5217297077178955, |
|
"eval_runtime": 83.7172, |
|
"eval_samples_per_second": 20.306, |
|
"eval_steps_per_second": 1.278, |
|
"eval_wer": 0.46981527328127975, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 5750, |
|
"total_flos": 2.9609940258263142e+19, |
|
"train_loss": 1.123646092788033, |
|
"train_runtime": 13730.8326, |
|
"train_samples_per_second": 13.386, |
|
"train_steps_per_second": 0.419 |
|
} |
|
], |
|
"max_steps": 5750, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.9609940258263142e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|