|
{ |
|
"best_metric": 0.6912539515279241, |
|
"best_model_checkpoint": "../saved_model/tibetan-bert_tusa/checkpoint-3000", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.646, |
|
"eval_f1": 0.5241935483870969, |
|
"eval_loss": 0.7404859662055969, |
|
"eval_precision": 0.7991803278688525, |
|
"eval_recall": 0.39, |
|
"eval_runtime": 7.8525, |
|
"eval_samples_per_second": 127.348, |
|
"eval_steps_per_second": 4.075, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.344, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.669, |
|
"eval_f1": 0.6182237600922721, |
|
"eval_loss": 0.9954240322113037, |
|
"eval_precision": 0.7302452316076294, |
|
"eval_recall": 0.536, |
|
"eval_runtime": 7.8266, |
|
"eval_samples_per_second": 127.769, |
|
"eval_steps_per_second": 4.089, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.69, |
|
"eval_f1": 0.6784232365145229, |
|
"eval_loss": 0.9894322752952576, |
|
"eval_precision": 0.7047413793103449, |
|
"eval_recall": 0.654, |
|
"eval_runtime": 7.8356, |
|
"eval_samples_per_second": 127.623, |
|
"eval_steps_per_second": 4.084, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.157, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.657, |
|
"eval_f1": 0.5862484921592279, |
|
"eval_loss": 1.347570538520813, |
|
"eval_precision": 0.7386018237082067, |
|
"eval_recall": 0.486, |
|
"eval_runtime": 7.8871, |
|
"eval_samples_per_second": 126.789, |
|
"eval_steps_per_second": 4.057, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.643, |
|
"eval_f1": 0.5486725663716814, |
|
"eval_loss": 1.6362618207931519, |
|
"eval_precision": 0.7457044673539519, |
|
"eval_recall": 0.434, |
|
"eval_runtime": 7.8844, |
|
"eval_samples_per_second": 126.833, |
|
"eval_steps_per_second": 4.059, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0727, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.649, |
|
"eval_f1": 0.5755743651753326, |
|
"eval_loss": 1.8187189102172852, |
|
"eval_precision": 0.72782874617737, |
|
"eval_recall": 0.476, |
|
"eval_runtime": 7.8876, |
|
"eval_samples_per_second": 126.781, |
|
"eval_steps_per_second": 4.057, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.678, |
|
"eval_f1": 0.6373873873873873, |
|
"eval_loss": 1.8846988677978516, |
|
"eval_precision": 0.729381443298969, |
|
"eval_recall": 0.566, |
|
"eval_runtime": 7.8344, |
|
"eval_samples_per_second": 127.643, |
|
"eval_steps_per_second": 4.085, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0407, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.667, |
|
"eval_f1": 0.622876557191393, |
|
"eval_loss": 2.593871831893921, |
|
"eval_precision": 0.7180156657963447, |
|
"eval_recall": 0.55, |
|
"eval_runtime": 7.8379, |
|
"eval_samples_per_second": 127.584, |
|
"eval_steps_per_second": 4.083, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.663, |
|
"eval_f1": 0.6174801362088536, |
|
"eval_loss": 2.2667245864868164, |
|
"eval_precision": 0.7139107611548556, |
|
"eval_recall": 0.544, |
|
"eval_runtime": 7.8522, |
|
"eval_samples_per_second": 127.353, |
|
"eval_steps_per_second": 4.075, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0219, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.683, |
|
"eval_f1": 0.6458100558659218, |
|
"eval_loss": 2.565606117248535, |
|
"eval_precision": 0.7316455696202532, |
|
"eval_recall": 0.578, |
|
"eval_runtime": 7.8306, |
|
"eval_samples_per_second": 127.705, |
|
"eval_steps_per_second": 4.087, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.689, |
|
"eval_f1": 0.645381984036488, |
|
"eval_loss": 2.9553158283233643, |
|
"eval_precision": 0.7506631299734748, |
|
"eval_recall": 0.566, |
|
"eval_runtime": 7.8582, |
|
"eval_samples_per_second": 127.256, |
|
"eval_steps_per_second": 4.072, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0066, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.707, |
|
"eval_f1": 0.6912539515279241, |
|
"eval_loss": 2.7209701538085938, |
|
"eval_precision": 0.7305122494432071, |
|
"eval_recall": 0.656, |
|
"eval_runtime": 7.9302, |
|
"eval_samples_per_second": 126.101, |
|
"eval_steps_per_second": 4.035, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.69, |
|
"eval_f1": 0.6630434782608696, |
|
"eval_loss": 2.795156478881836, |
|
"eval_precision": 0.7261904761904762, |
|
"eval_recall": 0.61, |
|
"eval_runtime": 7.8563, |
|
"eval_samples_per_second": 127.287, |
|
"eval_steps_per_second": 4.073, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0054, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.701, |
|
"eval_f1": 0.6862539349422876, |
|
"eval_loss": 2.847877025604248, |
|
"eval_precision": 0.7218543046357616, |
|
"eval_recall": 0.654, |
|
"eval_runtime": 7.8163, |
|
"eval_samples_per_second": 127.938, |
|
"eval_steps_per_second": 4.094, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.696, |
|
"eval_f1": 0.68, |
|
"eval_loss": 2.996243715286255, |
|
"eval_precision": 0.7177777777777777, |
|
"eval_recall": 0.646, |
|
"eval_runtime": 7.8761, |
|
"eval_samples_per_second": 126.966, |
|
"eval_steps_per_second": 4.063, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0026, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.695, |
|
"eval_f1": 0.6709816612729235, |
|
"eval_loss": 3.150722026824951, |
|
"eval_precision": 0.7283372365339579, |
|
"eval_recall": 0.622, |
|
"eval_runtime": 7.8492, |
|
"eval_samples_per_second": 127.402, |
|
"eval_steps_per_second": 4.077, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.688, |
|
"eval_f1": 0.6578947368421052, |
|
"eval_loss": 3.2561535835266113, |
|
"eval_precision": 0.7281553398058253, |
|
"eval_recall": 0.6, |
|
"eval_runtime": 7.8888, |
|
"eval_samples_per_second": 126.762, |
|
"eval_steps_per_second": 4.056, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0022, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.682, |
|
"eval_f1": 0.671487603305785, |
|
"eval_loss": 3.1687259674072266, |
|
"eval_precision": 0.6944444444444444, |
|
"eval_recall": 0.65, |
|
"eval_runtime": 7.8583, |
|
"eval_samples_per_second": 127.254, |
|
"eval_steps_per_second": 4.072, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.685, |
|
"eval_f1": 0.672216441207076, |
|
"eval_loss": 3.162193775177002, |
|
"eval_precision": 0.7006507592190889, |
|
"eval_recall": 0.646, |
|
"eval_runtime": 7.8615, |
|
"eval_samples_per_second": 127.202, |
|
"eval_steps_per_second": 4.07, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0012, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.698, |
|
"eval_f1": 0.6766595289079229, |
|
"eval_loss": 3.1741974353790283, |
|
"eval_precision": 0.728110599078341, |
|
"eval_recall": 0.632, |
|
"eval_runtime": 7.8547, |
|
"eval_samples_per_second": 127.313, |
|
"eval_steps_per_second": 4.074, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.20977688576e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|