|
{ |
|
"best_metric": 0.3883848786354065, |
|
"best_model_checkpoint": "./model/checkpoint-4252", |
|
"epoch": 4.999412110523222, |
|
"eval_steps": 500, |
|
"global_step": 4252, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002850705882352941, |
|
"loss": 2.6138, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.27268008948545863, |
|
"eval_cer_best": 0.7273199105145414, |
|
"eval_loss": 0.7238138318061829, |
|
"eval_runtime": 95.5312, |
|
"eval_samples_per_second": 31.665, |
|
"eval_steps_per_second": 3.967, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0002700705882352941, |
|
"loss": 1.0825, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.2253959731543624, |
|
"eval_cer_best": 0.7746040268456376, |
|
"eval_loss": 0.5289922952651978, |
|
"eval_runtime": 95.2118, |
|
"eval_samples_per_second": 31.771, |
|
"eval_steps_per_second": 3.981, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0002550882352941176, |
|
"loss": 0.9076, |
|
"step": 2551 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.20298881431767338, |
|
"eval_cer_best": 0.7970111856823267, |
|
"eval_loss": 0.46624556183815, |
|
"eval_runtime": 94.6631, |
|
"eval_samples_per_second": 31.955, |
|
"eval_steps_per_second": 4.004, |
|
"step": 2551 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00024008823529411761, |
|
"loss": 0.804, |
|
"step": 3402 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.18314093959731545, |
|
"eval_cer_best": 0.8168590604026845, |
|
"eval_loss": 0.42872071266174316, |
|
"eval_runtime": 94.4283, |
|
"eval_samples_per_second": 32.035, |
|
"eval_steps_per_second": 4.014, |
|
"step": 3402 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00022510588235294114, |
|
"loss": 0.7448, |
|
"step": 4252 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.1720626398210291, |
|
"eval_cer_best": 0.8279373601789709, |
|
"eval_loss": 0.3883848786354065, |
|
"eval_runtime": 94.0362, |
|
"eval_samples_per_second": 32.168, |
|
"eval_steps_per_second": 4.03, |
|
"step": 4252 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 17000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 6.943311650462615e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|