|
{ |
|
"best_metric": 7.581081081081082, |
|
"best_model_checkpoint": "./checkpoint-9000", |
|
"epoch": 5.48847420417124, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.365, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.478947368421053e-06, |
|
"loss": 0.1912, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_cer": 7.035654676637185, |
|
"eval_loss": 0.18279600143432617, |
|
"eval_runtime": 7736.1561, |
|
"eval_samples_per_second": 0.595, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 11.23141891891892, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.95263157894737e-06, |
|
"loss": 0.1649, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.426315789473684e-06, |
|
"loss": 0.1329, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_cer": 5.9028005940255746, |
|
"eval_loss": 0.16182316839694977, |
|
"eval_runtime": 7650.4506, |
|
"eval_samples_per_second": 0.602, |
|
"eval_steps_per_second": 0.075, |
|
"eval_wer": 9.41722972972973, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.0954, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 7.373684210526316e-06, |
|
"loss": 0.0912, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_cer": 5.47108067971245, |
|
"eval_loss": 0.16157789528369904, |
|
"eval_runtime": 7517.9849, |
|
"eval_samples_per_second": 0.612, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 8.925675675675675, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.8473684210526325e-06, |
|
"loss": 0.0887, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 6.321052631578948e-06, |
|
"loss": 0.0576, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_cer": 5.305489479701936, |
|
"eval_loss": 0.1663784682750702, |
|
"eval_runtime": 7600.817, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.076, |
|
"eval_wer": 8.58614864864865, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.794736842105264e-06, |
|
"loss": 0.047, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.269473684210527e-06, |
|
"loss": 0.0449, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_cer": 5.2930044289074925, |
|
"eval_loss": 0.16421984136104584, |
|
"eval_runtime": 7476.5946, |
|
"eval_samples_per_second": 0.616, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 8.451013513513514, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.743157894736842e-06, |
|
"loss": 0.0431, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 4.217894736842106e-06, |
|
"loss": 0.02, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_cer": 5.035418117780027, |
|
"eval_loss": 0.17986369132995605, |
|
"eval_runtime": 7470.6199, |
|
"eval_samples_per_second": 0.616, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 8.153716216216218, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.6915789473684216e-06, |
|
"loss": 0.0207, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.1652631578947375e-06, |
|
"loss": 0.019, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_cer": 5.082729889211602, |
|
"eval_loss": 0.180050328373909, |
|
"eval_runtime": 7452.1074, |
|
"eval_samples_per_second": 0.618, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 8.125, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.6389473684210526e-06, |
|
"loss": 0.014, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.1126315789473685e-06, |
|
"loss": 0.0067, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_cer": 4.8133156352262425, |
|
"eval_loss": 0.20030897855758667, |
|
"eval_runtime": 7521.4475, |
|
"eval_samples_per_second": 0.612, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 7.841216216216217, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.5863157894736845e-06, |
|
"loss": 0.0062, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.06e-06, |
|
"loss": 0.006, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_cer": 4.70226439394935, |
|
"eval_loss": 0.2070685774087906, |
|
"eval_runtime": 7515.5986, |
|
"eval_samples_per_second": 0.613, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 7.581081081081082, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 5.336842105263159e-07, |
|
"loss": 0.0029, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 7.3684210526315796e-09, |
|
"loss": 0.0022, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_cer": 4.718692092363091, |
|
"eval_loss": 0.2283746749162674, |
|
"eval_runtime": 7466.269, |
|
"eval_samples_per_second": 0.617, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 7.64527027027027, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"step": 10000, |
|
"total_flos": 3.39643668197376e+20, |
|
"train_loss": 0.07097679934501648, |
|
"train_runtime": 114325.2073, |
|
"train_samples_per_second": 1.4, |
|
"train_steps_per_second": 0.087 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 6, |
|
"total_flos": 3.39643668197376e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|