|
{ |
|
"best_metric": 0.34165722131729126, |
|
"best_model_checkpoint": "camembert-model/checkpoint-1600", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.880101905419033, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.6244299411773682, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.7459, |
|
"eval_samples_per_second": 456.492, |
|
"eval_steps_per_second": 28.638, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.949524972135237, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.5487059354782104, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.7233, |
|
"eval_samples_per_second": 462.498, |
|
"eval_steps_per_second": 29.015, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9524972135236983, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.4897811710834503, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.7082, |
|
"eval_samples_per_second": 466.564, |
|
"eval_steps_per_second": 29.27, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 1.9320571422576904, |
|
"learning_rate": 1.375e-05, |
|
"loss": 0.5894, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9527625922190966, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.44394731521606445, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.7198, |
|
"eval_samples_per_second": 463.435, |
|
"eval_steps_per_second": 29.074, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9527625922190966, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.40905287861824036, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.7243, |
|
"eval_samples_per_second": 462.226, |
|
"eval_steps_per_second": 28.998, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9527625922190966, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.38293978571891785, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 4.7777, |
|
"eval_samples_per_second": 166.818, |
|
"eval_steps_per_second": 10.465, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.3343875408172607, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.4292, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9527625922190966, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.36409473419189453, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.7395, |
|
"eval_samples_per_second": 458.191, |
|
"eval_steps_per_second": 28.745, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9527625922190966, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.35134562849998474, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.7277, |
|
"eval_samples_per_second": 461.316, |
|
"eval_steps_per_second": 28.941, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9527625922190966, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.34404492378234863, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.696, |
|
"eval_samples_per_second": 469.941, |
|
"eval_steps_per_second": 29.482, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 1.457685112953186, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.3631, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9527625922190966, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.34165722131729126, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 1.8323, |
|
"eval_samples_per_second": 434.963, |
|
"eval_steps_per_second": 27.288, |
|
"step": 1600 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 884828390782752.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|