|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 400.0, |
|
"global_step": 8800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 16.0016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 8.8896, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 5.8994, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 4.9866, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 1.8712499999999997e-05, |
|
"loss": 4.2706, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"eval_loss": 4.017421722412109, |
|
"eval_runtime": 13.7125, |
|
"eval_samples_per_second": 22.315, |
|
"eval_steps_per_second": 2.844, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 2.2462499999999997e-05, |
|
"loss": 3.7391, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 2.6212499999999997e-05, |
|
"loss": 3.4669, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 3.3769, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 40.91, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 3.304, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 3.2492, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 45.45, |
|
"eval_loss": 3.2308928966522217, |
|
"eval_runtime": 13.6766, |
|
"eval_samples_per_second": 22.374, |
|
"eval_steps_per_second": 2.852, |
|
"eval_wer": 0.9907741676694746, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 3.1542, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 54.55, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 2.9723, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 59.09, |
|
"learning_rate": 4.871249999999999e-05, |
|
"loss": 2.6187, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 63.64, |
|
"learning_rate": 5.2462499999999994e-05, |
|
"loss": 2.2201, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"learning_rate": 5.62125e-05, |
|
"loss": 1.9709, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 68.18, |
|
"eval_loss": 1.0651201009750366, |
|
"eval_runtime": 13.4664, |
|
"eval_samples_per_second": 22.723, |
|
"eval_steps_per_second": 2.896, |
|
"eval_wer": 0.8439630966706779, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 72.73, |
|
"learning_rate": 5.9962499999999994e-05, |
|
"loss": 1.7822, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 77.27, |
|
"learning_rate": 6.37125e-05, |
|
"loss": 1.6511, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 81.82, |
|
"learning_rate": 6.746249999999999e-05, |
|
"loss": 1.5519, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 86.36, |
|
"learning_rate": 7.121249999999999e-05, |
|
"loss": 1.4612, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"learning_rate": 7.49625e-05, |
|
"loss": 1.4088, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 90.91, |
|
"eval_loss": 0.576468288898468, |
|
"eval_runtime": 13.6104, |
|
"eval_samples_per_second": 22.483, |
|
"eval_steps_per_second": 2.865, |
|
"eval_wer": 0.6550340954673085, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 95.45, |
|
"learning_rate": 7.390808823529411e-05, |
|
"loss": 1.3302, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 7.280514705882352e-05, |
|
"loss": 1.2739, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 104.55, |
|
"learning_rate": 7.171323529411764e-05, |
|
"loss": 1.2181, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 109.09, |
|
"learning_rate": 7.062132352941176e-05, |
|
"loss": 1.1776, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"learning_rate": 6.951838235294117e-05, |
|
"loss": 1.1326, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 113.64, |
|
"eval_loss": 0.48421531915664673, |
|
"eval_runtime": 13.6288, |
|
"eval_samples_per_second": 22.452, |
|
"eval_steps_per_second": 2.862, |
|
"eval_wer": 0.5760128359406338, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 118.18, |
|
"learning_rate": 6.841544117647059e-05, |
|
"loss": 1.0897, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 122.73, |
|
"learning_rate": 6.731249999999999e-05, |
|
"loss": 1.0634, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 127.27, |
|
"learning_rate": 6.62095588235294e-05, |
|
"loss": 1.0321, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 131.82, |
|
"learning_rate": 6.510661764705882e-05, |
|
"loss": 0.9962, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"learning_rate": 6.400367647058824e-05, |
|
"loss": 0.9709, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 136.36, |
|
"eval_loss": 0.4784719944000244, |
|
"eval_runtime": 13.5638, |
|
"eval_samples_per_second": 22.56, |
|
"eval_steps_per_second": 2.875, |
|
"eval_wer": 0.6012835940633775, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 140.91, |
|
"learning_rate": 6.290073529411764e-05, |
|
"loss": 0.935, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 145.45, |
|
"learning_rate": 6.179779411764705e-05, |
|
"loss": 0.9179, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 6.069485294117646e-05, |
|
"loss": 0.8854, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 154.55, |
|
"learning_rate": 5.9591911764705876e-05, |
|
"loss": 0.87, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"learning_rate": 5.848897058823529e-05, |
|
"loss": 0.8433, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 159.09, |
|
"eval_loss": 0.5048245191574097, |
|
"eval_runtime": 14.3488, |
|
"eval_samples_per_second": 21.326, |
|
"eval_steps_per_second": 2.718, |
|
"eval_wer": 0.5419173686321701, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 163.64, |
|
"learning_rate": 5.73860294117647e-05, |
|
"loss": 0.8145, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 168.18, |
|
"learning_rate": 5.6283088235294115e-05, |
|
"loss": 0.7986, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 172.73, |
|
"learning_rate": 5.5180147058823523e-05, |
|
"loss": 0.7804, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 177.27, |
|
"learning_rate": 5.407720588235294e-05, |
|
"loss": 0.7639, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"learning_rate": 5.297426470588235e-05, |
|
"loss": 0.7404, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 181.82, |
|
"eval_loss": 0.5051876902580261, |
|
"eval_runtime": 14.251, |
|
"eval_samples_per_second": 21.472, |
|
"eval_steps_per_second": 2.737, |
|
"eval_wer": 0.533894905736061, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 186.36, |
|
"learning_rate": 5.187132352941176e-05, |
|
"loss": 0.7352, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 190.91, |
|
"learning_rate": 5.076838235294117e-05, |
|
"loss": 0.7073, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 195.45, |
|
"learning_rate": 4.9665441176470586e-05, |
|
"loss": 0.6933, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 4.8562499999999995e-05, |
|
"loss": 0.6729, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 204.55, |
|
"learning_rate": 4.747058823529411e-05, |
|
"loss": 0.6589, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 204.55, |
|
"eval_loss": 0.5237030982971191, |
|
"eval_runtime": 13.4774, |
|
"eval_samples_per_second": 22.705, |
|
"eval_steps_per_second": 2.894, |
|
"eval_wer": 0.5896510228640193, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 209.09, |
|
"learning_rate": 4.6367647058823526e-05, |
|
"loss": 0.644, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 213.64, |
|
"learning_rate": 4.5264705882352935e-05, |
|
"loss": 0.6221, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 218.18, |
|
"learning_rate": 4.416176470588235e-05, |
|
"loss": 0.6119, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 222.73, |
|
"learning_rate": 4.305882352941176e-05, |
|
"loss": 0.601, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 227.27, |
|
"learning_rate": 4.196691176470588e-05, |
|
"loss": 0.5831, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 227.27, |
|
"eval_loss": 0.5166385769844055, |
|
"eval_runtime": 13.5084, |
|
"eval_samples_per_second": 22.653, |
|
"eval_steps_per_second": 2.887, |
|
"eval_wer": 0.5447252306458082, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 231.82, |
|
"learning_rate": 4.086397058823529e-05, |
|
"loss": 0.579, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 236.36, |
|
"learning_rate": 3.9761029411764705e-05, |
|
"loss": 0.5577, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 240.91, |
|
"learning_rate": 3.8658088235294113e-05, |
|
"loss": 0.547, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 245.45, |
|
"learning_rate": 3.755514705882353e-05, |
|
"loss": 0.5452, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 3.645220588235294e-05, |
|
"loss": 0.5375, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 0.5291843414306641, |
|
"eval_runtime": 13.5842, |
|
"eval_samples_per_second": 22.526, |
|
"eval_steps_per_second": 2.871, |
|
"eval_wer": 0.5487364620938628, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 254.55, |
|
"learning_rate": 3.534926470588235e-05, |
|
"loss": 0.5233, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 259.09, |
|
"learning_rate": 3.424632352941176e-05, |
|
"loss": 0.5081, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 263.64, |
|
"learning_rate": 3.314338235294117e-05, |
|
"loss": 0.5033, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 268.18, |
|
"learning_rate": 3.2040441176470585e-05, |
|
"loss": 0.4857, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"learning_rate": 3.093749999999999e-05, |
|
"loss": 0.4784, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 272.73, |
|
"eval_loss": 0.548018217086792, |
|
"eval_runtime": 13.4861, |
|
"eval_samples_per_second": 22.69, |
|
"eval_steps_per_second": 2.892, |
|
"eval_wer": 0.5595667870036101, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 277.27, |
|
"learning_rate": 2.9834558823529408e-05, |
|
"loss": 0.4693, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 281.82, |
|
"learning_rate": 2.8731617647058824e-05, |
|
"loss": 0.4673, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 286.36, |
|
"learning_rate": 2.7628676470588235e-05, |
|
"loss": 0.4582, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 290.91, |
|
"learning_rate": 2.6525735294117647e-05, |
|
"loss": 0.4397, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 295.45, |
|
"learning_rate": 2.542279411764706e-05, |
|
"loss": 0.4421, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 295.45, |
|
"eval_loss": 0.5682183504104614, |
|
"eval_runtime": 13.4936, |
|
"eval_samples_per_second": 22.677, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.5467308463698355, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 2.431985294117647e-05, |
|
"loss": 0.4361, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 304.55, |
|
"learning_rate": 2.321691176470588e-05, |
|
"loss": 0.4287, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 309.09, |
|
"learning_rate": 2.211397058823529e-05, |
|
"loss": 0.4181, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 313.64, |
|
"learning_rate": 2.1011029411764703e-05, |
|
"loss": 0.4156, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 318.18, |
|
"learning_rate": 1.9908088235294115e-05, |
|
"loss": 0.4047, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 318.18, |
|
"eval_loss": 0.5681235194206238, |
|
"eval_runtime": 13.3347, |
|
"eval_samples_per_second": 22.948, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 0.5447252306458082, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 322.73, |
|
"learning_rate": 1.8805147058823527e-05, |
|
"loss": 0.3978, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 327.27, |
|
"learning_rate": 1.770220588235294e-05, |
|
"loss": 0.3874, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 331.82, |
|
"learning_rate": 1.659926470588235e-05, |
|
"loss": 0.385, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 336.36, |
|
"learning_rate": 1.5496323529411763e-05, |
|
"loss": 0.3817, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"learning_rate": 1.4393382352941176e-05, |
|
"loss": 0.3779, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 340.91, |
|
"eval_loss": 0.578274667263031, |
|
"eval_runtime": 13.5305, |
|
"eval_samples_per_second": 22.616, |
|
"eval_steps_per_second": 2.882, |
|
"eval_wer": 0.5346971520256719, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 345.45, |
|
"learning_rate": 1.3290441176470588e-05, |
|
"loss": 0.3702, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"learning_rate": 1.21875e-05, |
|
"loss": 0.3723, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 354.55, |
|
"learning_rate": 1.1084558823529412e-05, |
|
"loss": 0.3646, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 359.09, |
|
"learning_rate": 9.981617647058822e-06, |
|
"loss": 0.3478, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 363.64, |
|
"learning_rate": 8.878676470588234e-06, |
|
"loss": 0.3525, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 363.64, |
|
"eval_loss": 0.5856263041496277, |
|
"eval_runtime": 13.4874, |
|
"eval_samples_per_second": 22.688, |
|
"eval_steps_per_second": 2.892, |
|
"eval_wer": 0.5367027677496992, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 368.18, |
|
"learning_rate": 7.775735294117647e-06, |
|
"loss": 0.3574, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 372.73, |
|
"learning_rate": 6.672794117647058e-06, |
|
"loss": 0.3495, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 377.27, |
|
"learning_rate": 5.56985294117647e-06, |
|
"loss": 0.3576, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 381.82, |
|
"learning_rate": 4.466911764705882e-06, |
|
"loss": 0.3441, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 386.36, |
|
"learning_rate": 3.363970588235294e-06, |
|
"loss": 0.3393, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 386.36, |
|
"eval_loss": 0.5959608554840088, |
|
"eval_runtime": 13.5943, |
|
"eval_samples_per_second": 22.509, |
|
"eval_steps_per_second": 2.869, |
|
"eval_wer": 0.5359005214600883, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 390.91, |
|
"learning_rate": 2.2610294117647057e-06, |
|
"loss": 0.3366, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 395.45, |
|
"learning_rate": 1.1580882352941175e-06, |
|
"loss": 0.3404, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"learning_rate": 5.514705882352941e-08, |
|
"loss": 0.3312, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"step": 8800, |
|
"total_flos": 4.6124470831742304e+19, |
|
"train_loss": 1.3667902833765204, |
|
"train_runtime": 19151.8999, |
|
"train_samples_per_second": 14.578, |
|
"train_steps_per_second": 0.459 |
|
} |
|
], |
|
"max_steps": 8800, |
|
"num_train_epochs": 400, |
|
"total_flos": 4.6124470831742304e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|