adel-cybral's picture
Training in progress, step 4390, checkpoint
c766308 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 4390,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5694760820045558,
"grad_norm": 1.5650427341461182,
"learning_rate": 1.7722095671981778e-05,
"loss": 0.5173,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.9492747867253404,
"eval_f1": 0.7589075584259208,
"eval_loss": 0.21155649423599243,
"eval_precision": 0.7429275610801543,
"eval_recall": 0.7755901107506432,
"eval_runtime": 3.0816,
"eval_samples_per_second": 1054.64,
"eval_steps_per_second": 66.199,
"step": 878
},
{
"epoch": 1.1389521640091116,
"grad_norm": 3.1316566467285156,
"learning_rate": 1.5444191343963555e-05,
"loss": 0.2538,
"step": 1000
},
{
"epoch": 1.7084282460136673,
"grad_norm": 3.542625665664673,
"learning_rate": 1.3166287015945332e-05,
"loss": 0.196,
"step": 1500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9620474367324893,
"eval_f1": 0.8322505413959687,
"eval_loss": 0.15278467535972595,
"eval_precision": 0.8262403528114663,
"eval_recall": 0.838348808591565,
"eval_runtime": 4.0623,
"eval_samples_per_second": 800.036,
"eval_steps_per_second": 50.218,
"step": 1756
},
{
"epoch": 2.277904328018223,
"grad_norm": 5.83428955078125,
"learning_rate": 1.0888382687927108e-05,
"loss": 0.1682,
"step": 2000
},
{
"epoch": 2.847380410022779,
"grad_norm": 4.558180332183838,
"learning_rate": 8.610478359908885e-06,
"loss": 0.1444,
"step": 2500
},
{
"epoch": 3.0,
"eval_accuracy": 0.9651611673312469,
"eval_f1": 0.8525989138867338,
"eval_loss": 0.13545359671115875,
"eval_precision": 0.8447348193697156,
"eval_recall": 0.860610806577917,
"eval_runtime": 2.9602,
"eval_samples_per_second": 1097.888,
"eval_steps_per_second": 68.914,
"step": 2634
},
{
"epoch": 3.416856492027335,
"grad_norm": 2.630763292312622,
"learning_rate": 6.3325740318906616e-06,
"loss": 0.1302,
"step": 3000
},
{
"epoch": 3.9863325740318905,
"grad_norm": 5.151127338409424,
"learning_rate": 4.054669703872437e-06,
"loss": 0.116,
"step": 3500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9662573275930545,
"eval_f1": 0.85545364128633,
"eval_loss": 0.12546123564243317,
"eval_precision": 0.845179604760345,
"eval_recall": 0.865980534735429,
"eval_runtime": 3.1188,
"eval_samples_per_second": 1042.072,
"eval_steps_per_second": 65.41,
"step": 3512
},
{
"epoch": 4.555808656036446,
"grad_norm": 3.605297803878784,
"learning_rate": 1.7767653758542143e-06,
"loss": 0.1116,
"step": 4000
}
],
"logging_steps": 500,
"max_steps": 4390,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 91500454459296.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}