|
{ |
|
"best_metric": 0.00030570320086553693, |
|
"best_model_checkpoint": "/media/aiteam/Data/LuminSoft/projects/ocr/results/checkpoint-212814", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 364824, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.07173027098178864, |
|
"learning_rate": 9.375e-05, |
|
"loss": 0.0796, |
|
"step": 30402 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.9691155403795684, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.0006197646143846214, |
|
"eval_runtime": 2732.1107, |
|
"eval_samples_per_second": 11.117, |
|
"eval_steps_per_second": 1.39, |
|
"step": 30402 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.011926465667784214, |
|
"learning_rate": 8.75e-05, |
|
"loss": 0.0008, |
|
"step": 60804 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.976564760945452, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.0004327517526689917, |
|
"eval_runtime": 2738.2395, |
|
"eval_samples_per_second": 11.092, |
|
"eval_steps_per_second": 1.387, |
|
"step": 60804 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.006019719876348972, |
|
"learning_rate": 8.125000000000001e-05, |
|
"loss": 0.0005, |
|
"step": 91206 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.9809933294323522, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.0003870356595143676, |
|
"eval_runtime": 2734.2135, |
|
"eval_samples_per_second": 11.108, |
|
"eval_steps_per_second": 1.389, |
|
"step": 91206 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.0002722485805861652, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0004, |
|
"step": 121608 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.981663601147577, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.0003594958398025483, |
|
"eval_runtime": 2730.1307, |
|
"eval_samples_per_second": 11.125, |
|
"eval_steps_per_second": 1.391, |
|
"step": 121608 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.00043152127182111144, |
|
"learning_rate": 6.875e-05, |
|
"loss": 0.0003, |
|
"step": 152010 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.9837872090873163, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.000314301869366318, |
|
"eval_runtime": 2695.597, |
|
"eval_samples_per_second": 11.268, |
|
"eval_steps_per_second": 1.409, |
|
"step": 152010 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.0002681140904314816, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0002, |
|
"step": 182412 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.9853297104414079, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.00031128423870541155, |
|
"eval_runtime": 2742.3832, |
|
"eval_samples_per_second": 11.075, |
|
"eval_steps_per_second": 1.385, |
|
"step": 182412 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.00042421137914061546, |
|
"learning_rate": 5.6250000000000005e-05, |
|
"loss": 0.0002, |
|
"step": 212814 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.9852415464502277, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.00030570320086553693, |
|
"eval_runtime": 2763.5679, |
|
"eval_samples_per_second": 10.991, |
|
"eval_steps_per_second": 1.374, |
|
"step": 212814 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.013042913749814034, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0002, |
|
"step": 243216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.9855596017686066, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.0003345392469782382, |
|
"eval_runtime": 2754.3836, |
|
"eval_samples_per_second": 11.027, |
|
"eval_steps_per_second": 1.379, |
|
"step": 243216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.009026958607137203, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.0001, |
|
"step": 273618 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.9856968436402628, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.0003159026673529297, |
|
"eval_runtime": 2750.1172, |
|
"eval_samples_per_second": 11.044, |
|
"eval_steps_per_second": 1.381, |
|
"step": 273618 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.4567546713806223e-05, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0001, |
|
"step": 304020 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.9858578320415788, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.00036075865500606596, |
|
"eval_runtime": 2744.9657, |
|
"eval_samples_per_second": 11.065, |
|
"eval_steps_per_second": 1.383, |
|
"step": 304020 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.00015938151045702398, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.0001, |
|
"step": 334422 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.9860261333017161, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.00039157370338216424, |
|
"eval_runtime": 2750.6108, |
|
"eval_samples_per_second": 11.042, |
|
"eval_steps_per_second": 1.38, |
|
"step": 334422 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.00013136252528056502, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0001, |
|
"step": 364824 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.9866485075861869, |
|
"eval_cer": 0.0, |
|
"eval_loss": 0.00037165774847380817, |
|
"eval_runtime": 2749.3584, |
|
"eval_samples_per_second": 11.047, |
|
"eval_steps_per_second": 1.381, |
|
"step": 364824 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 486432, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.681400041091367e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|