|
{ |
|
"best_metric": 0.22928521037101746, |
|
"best_model_checkpoint": "./checkpoint-4000", |
|
"epoch": 70.17543859649123, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.7851749999999994e-05, |
|
"loss": 9.5756, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.517674999999999e-05, |
|
"loss": 4.1914, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 5.250174999999999e-05, |
|
"loss": 3.3316, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 6.982674999999999e-05, |
|
"loss": 3.1471, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.1599209308624268, |
|
"eval_runtime": 8.4621, |
|
"eval_samples_per_second": 39.588, |
|
"eval_steps_per_second": 0.709, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 7e-05, |
|
"loss": 3.0957, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 7e-05, |
|
"loss": 2.8518, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 7e-05, |
|
"loss": 2.3182, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 7e-05, |
|
"loss": 1.8691, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"eval_cer": 0.1685887708649469, |
|
"eval_loss": 0.767376184463501, |
|
"eval_runtime": 8.3651, |
|
"eval_samples_per_second": 40.048, |
|
"eval_steps_per_second": 0.717, |
|
"eval_wer": 0.7361436377829821, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 7e-05, |
|
"loss": 1.6182, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 7e-05, |
|
"loss": 1.4813, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"learning_rate": 7e-05, |
|
"loss": 1.4096, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"learning_rate": 7e-05, |
|
"loss": 1.3227, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"eval_cer": 0.10065756196256954, |
|
"eval_loss": 0.38490724563598633, |
|
"eval_runtime": 8.5046, |
|
"eval_samples_per_second": 39.39, |
|
"eval_steps_per_second": 0.705, |
|
"eval_wer": 0.5335675253708041, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 22.81, |
|
"learning_rate": 7e-05, |
|
"loss": 1.2765, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 24.56, |
|
"learning_rate": 7e-05, |
|
"loss": 1.2296, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 7e-05, |
|
"loss": 1.1942, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"learning_rate": 7e-05, |
|
"loss": 1.163, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"eval_cer": 0.08229640870005059, |
|
"eval_loss": 0.30146270990371704, |
|
"eval_runtime": 8.4606, |
|
"eval_samples_per_second": 39.595, |
|
"eval_steps_per_second": 0.709, |
|
"eval_wer": 0.4558938329430133, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 29.82, |
|
"learning_rate": 7e-05, |
|
"loss": 1.1351, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 31.58, |
|
"learning_rate": 7e-05, |
|
"loss": 1.1201, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 7e-05, |
|
"loss": 1.1014, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 35.09, |
|
"learning_rate": 7e-05, |
|
"loss": 1.0768, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 35.09, |
|
"eval_cer": 0.07278705108750633, |
|
"eval_loss": 0.27209824323654175, |
|
"eval_runtime": 8.2339, |
|
"eval_samples_per_second": 40.685, |
|
"eval_steps_per_second": 0.729, |
|
"eval_wer": 0.4032006245120999, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 36.84, |
|
"learning_rate": 6.670824999999999e-05, |
|
"loss": 1.063, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 38.6, |
|
"learning_rate": 6.338324999999999e-05, |
|
"loss": 1.0538, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 40.35, |
|
"learning_rate": 6.005824999999999e-05, |
|
"loss": 1.0439, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"learning_rate": 5.673325e-05, |
|
"loss": 1.0224, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"eval_cer": 0.06914516944865959, |
|
"eval_loss": 0.2586216628551483, |
|
"eval_runtime": 8.3142, |
|
"eval_samples_per_second": 40.292, |
|
"eval_steps_per_second": 0.722, |
|
"eval_wer": 0.3825136612021858, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 43.86, |
|
"learning_rate": 5.340824999999999e-05, |
|
"loss": 1.0116, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 45.61, |
|
"learning_rate": 5.008325e-05, |
|
"loss": 0.9925, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"learning_rate": 4.675824999999999e-05, |
|
"loss": 0.9877, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"learning_rate": 4.343324999999999e-05, |
|
"loss": 0.9817, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 49.12, |
|
"eval_cer": 0.06525037936267071, |
|
"eval_loss": 0.24575529992580414, |
|
"eval_runtime": 8.5545, |
|
"eval_samples_per_second": 39.16, |
|
"eval_steps_per_second": 0.701, |
|
"eval_wer": 0.36533957845433257, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 50.88, |
|
"learning_rate": 4.010825e-05, |
|
"loss": 0.9781, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 3.6816499999999996e-05, |
|
"loss": 0.9625, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 54.39, |
|
"learning_rate": 3.34915e-05, |
|
"loss": 0.9499, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 56.14, |
|
"learning_rate": 3.01665e-05, |
|
"loss": 0.941, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 56.14, |
|
"eval_cer": 0.06054628224582701, |
|
"eval_loss": 0.23060913383960724, |
|
"eval_runtime": 8.3111, |
|
"eval_samples_per_second": 40.307, |
|
"eval_steps_per_second": 0.722, |
|
"eval_wer": 0.33879781420765026, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 57.89, |
|
"learning_rate": 2.6841499999999994e-05, |
|
"loss": 0.9375, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 59.65, |
|
"learning_rate": 2.3516499999999998e-05, |
|
"loss": 0.9297, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"learning_rate": 2.0191499999999998e-05, |
|
"loss": 0.9218, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 63.16, |
|
"learning_rate": 1.68665e-05, |
|
"loss": 0.9235, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 63.16, |
|
"eval_cer": 0.06150733434496712, |
|
"eval_loss": 0.23152786493301392, |
|
"eval_runtime": 8.3332, |
|
"eval_samples_per_second": 40.201, |
|
"eval_steps_per_second": 0.72, |
|
"eval_wer": 0.33801717408274784, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 64.91, |
|
"learning_rate": 1.3541499999999999e-05, |
|
"loss": 0.9173, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 1.0216500000000001e-05, |
|
"loss": 0.9136, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 68.42, |
|
"learning_rate": 6.891500000000002e-06, |
|
"loss": 0.9107, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 70.18, |
|
"learning_rate": 3.566500000000003e-06, |
|
"loss": 0.9141, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 70.18, |
|
"eval_cer": 0.06019221041982802, |
|
"eval_loss": 0.22928521037101746, |
|
"eval_runtime": 8.619, |
|
"eval_samples_per_second": 38.868, |
|
"eval_steps_per_second": 0.696, |
|
"eval_wer": 0.3333333333333333, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 70.18, |
|
"step": 4000, |
|
"total_flos": 1.1026564721970925e+20, |
|
"train_loss": 1.6216355571746826, |
|
"train_runtime": 16397.7621, |
|
"train_samples_per_second": 31.224, |
|
"train_steps_per_second": 0.244 |
|
} |
|
], |
|
"max_steps": 4000, |
|
"num_train_epochs": 71, |
|
"total_flos": 1.1026564721970925e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|