{ "best_metric": 0.8353803157806396, "best_model_checkpoint": "sbert-ru-huawei-gradient-accumulation/checkpoint-304", "epoch": 0.9991783073130649, "eval_steps": 500, "global_step": 304, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08216926869350863, "grad_norm": 20.81386947631836, "learning_rate": 1.25e-05, "loss": 0.7474, "step": 25 }, { "epoch": 0.16433853738701726, "grad_norm": 24.849594116210938, "learning_rate": 2.554347826086957e-05, "loss": 0.7068, "step": 50 }, { "epoch": 0.2465078060805259, "grad_norm": 14.201815605163574, "learning_rate": 3.91304347826087e-05, "loss": 0.6739, "step": 75 }, { "epoch": 0.3286770747740345, "grad_norm": 19.431638717651367, "learning_rate": 4.9995413210794864e-05, "loss": 0.6802, "step": 100 }, { "epoch": 0.4108463434675431, "grad_norm": 17.281723022460938, "learning_rate": 4.9835052243991874e-05, "loss": 0.6518, "step": 125 }, { "epoch": 0.4930156121610518, "grad_norm": 21.763912200927734, "learning_rate": 4.944703213375648e-05, "loss": 0.6609, "step": 150 }, { "epoch": 0.5751848808545604, "grad_norm": 35.13481903076172, "learning_rate": 4.8834909801373264e-05, "loss": 0.6544, "step": 175 }, { "epoch": 0.657354149548069, "grad_norm": 24.737939834594727, "learning_rate": 4.800429647908354e-05, "loss": 0.6358, "step": 200 }, { "epoch": 0.7395234182415776, "grad_norm": 22.52487564086914, "learning_rate": 4.6962806272773564e-05, "loss": 0.6558, "step": 225 }, { "epoch": 0.8216926869350862, "grad_norm": 18.9419002532959, "learning_rate": 4.5719986364624866e-05, "loss": 0.628, "step": 250 }, { "epoch": 0.903861955628595, "grad_norm": 21.046478271484375, "learning_rate": 4.428722949554857e-05, "loss": 0.6414, "step": 275 }, { "epoch": 0.9860312243221035, "grad_norm": 23.134496688842773, "learning_rate": 4.267766952966369e-05, "loss": 0.6489, "step": 300 }, { "epoch": 0.9991783073130649, "eval_accuracy": 0.6643378197883489, "eval_f1_macro": 0.48932255599208974, "eval_f1_micro": 0.6643378197883489, "eval_f1_weighted": 0.6590895238835833, "eval_loss": 0.8353803157806396, "eval_precision_macro": 0.5100466324017191, "eval_precision_micro": 0.6643378197883489, "eval_precision_weighted": 0.6607527574495663, "eval_recall_macro": 0.48717407338779406, "eval_recall_micro": 0.6643378197883489, "eval_recall_weighted": 0.6643378197883489, "eval_runtime": 61.8774, "eval_samples_per_second": 157.295, "eval_steps_per_second": 4.929, "step": 304 } ], "logging_steps": 25, "max_steps": 912, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8146751147933696e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }