|
{ |
|
"best_metric": 0.8353803157806396, |
|
"best_model_checkpoint": "sbert-ru-huawei-gradient-accumulation/checkpoint-304", |
|
"epoch": 0.9991783073130649, |
|
"eval_steps": 500, |
|
"global_step": 304, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08216926869350863, |
|
"grad_norm": 20.81386947631836, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.7474, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16433853738701726, |
|
"grad_norm": 24.849594116210938, |
|
"learning_rate": 2.554347826086957e-05, |
|
"loss": 0.7068, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2465078060805259, |
|
"grad_norm": 14.201815605163574, |
|
"learning_rate": 3.91304347826087e-05, |
|
"loss": 0.6739, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3286770747740345, |
|
"grad_norm": 19.431638717651367, |
|
"learning_rate": 4.9995413210794864e-05, |
|
"loss": 0.6802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4108463434675431, |
|
"grad_norm": 17.281723022460938, |
|
"learning_rate": 4.9835052243991874e-05, |
|
"loss": 0.6518, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4930156121610518, |
|
"grad_norm": 21.763912200927734, |
|
"learning_rate": 4.944703213375648e-05, |
|
"loss": 0.6609, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5751848808545604, |
|
"grad_norm": 35.13481903076172, |
|
"learning_rate": 4.8834909801373264e-05, |
|
"loss": 0.6544, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.657354149548069, |
|
"grad_norm": 24.737939834594727, |
|
"learning_rate": 4.800429647908354e-05, |
|
"loss": 0.6358, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7395234182415776, |
|
"grad_norm": 22.52487564086914, |
|
"learning_rate": 4.6962806272773564e-05, |
|
"loss": 0.6558, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8216926869350862, |
|
"grad_norm": 18.9419002532959, |
|
"learning_rate": 4.5719986364624866e-05, |
|
"loss": 0.628, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.903861955628595, |
|
"grad_norm": 21.046478271484375, |
|
"learning_rate": 4.428722949554857e-05, |
|
"loss": 0.6414, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.9860312243221035, |
|
"grad_norm": 23.134496688842773, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 0.6489, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9991783073130649, |
|
"eval_accuracy": 0.6643378197883489, |
|
"eval_f1_macro": 0.48932255599208974, |
|
"eval_f1_micro": 0.6643378197883489, |
|
"eval_f1_weighted": 0.6590895238835833, |
|
"eval_loss": 0.8353803157806396, |
|
"eval_precision_macro": 0.5100466324017191, |
|
"eval_precision_micro": 0.6643378197883489, |
|
"eval_precision_weighted": 0.6607527574495663, |
|
"eval_recall_macro": 0.48717407338779406, |
|
"eval_recall_micro": 0.6643378197883489, |
|
"eval_recall_weighted": 0.6643378197883489, |
|
"eval_runtime": 61.8774, |
|
"eval_samples_per_second": 157.295, |
|
"eval_steps_per_second": 4.929, |
|
"step": 304 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 912, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8146751147933696e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|