|
{ |
|
"best_metric": 0.9008125554721984, |
|
"best_model_checkpoint": "rubert_classification/checkpoint-3500", |
|
"epoch": 1.674641148325359, |
|
"eval_steps": 500, |
|
"global_step": 3500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.7823053307284399, |
|
"eval_f1": 0.7752692727748154, |
|
"eval_loss": 0.6053181886672974, |
|
"eval_precision": 0.7849961270931392, |
|
"eval_recall": 0.7823053307284399, |
|
"eval_runtime": 38.9969, |
|
"eval_samples_per_second": 183.758, |
|
"eval_steps_per_second": 11.488, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 22.682289123535156, |
|
"learning_rate": 4.975490196078432e-05, |
|
"loss": 0.9433, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.8368685459112476, |
|
"eval_f1": 0.8330214371969107, |
|
"eval_loss": 0.46620890498161316, |
|
"eval_precision": 0.840415327990016, |
|
"eval_recall": 0.8368685459112476, |
|
"eval_runtime": 36.1817, |
|
"eval_samples_per_second": 198.056, |
|
"eval_steps_per_second": 12.382, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.139711380004883, |
|
"learning_rate": 4.82843137254902e-05, |
|
"loss": 0.5182, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.8428691041027072, |
|
"eval_f1": 0.8319455570081821, |
|
"eval_loss": 0.4372938573360443, |
|
"eval_precision": 0.8395761804457677, |
|
"eval_recall": 0.8428691041027072, |
|
"eval_runtime": 35.6356, |
|
"eval_samples_per_second": 201.091, |
|
"eval_steps_per_second": 12.572, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 37.54426574707031, |
|
"learning_rate": 4.681372549019608e-05, |
|
"loss": 0.4508, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.8644990231649455, |
|
"eval_f1": 0.8633019943338417, |
|
"eval_loss": 0.39909306168556213, |
|
"eval_precision": 0.8707480255460968, |
|
"eval_recall": 0.8644990231649455, |
|
"eval_runtime": 35.6445, |
|
"eval_samples_per_second": 201.041, |
|
"eval_steps_per_second": 12.569, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 3.496920108795166, |
|
"learning_rate": 4.5343137254901966e-05, |
|
"loss": 0.3875, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.8898967345799609, |
|
"eval_f1": 0.8875919037418019, |
|
"eval_loss": 0.33941856026649475, |
|
"eval_precision": 0.8906320921346105, |
|
"eval_recall": 0.8898967345799609, |
|
"eval_runtime": 35.5422, |
|
"eval_samples_per_second": 201.619, |
|
"eval_steps_per_second": 12.605, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 16.364843368530273, |
|
"learning_rate": 4.387254901960784e-05, |
|
"loss": 0.3401, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_accuracy": 0.8869662294166899, |
|
"eval_f1": 0.8851872123624062, |
|
"eval_loss": 0.3447243273258209, |
|
"eval_precision": 0.8869707495957468, |
|
"eval_recall": 0.8869662294166899, |
|
"eval_runtime": 35.7225, |
|
"eval_samples_per_second": 200.602, |
|
"eval_steps_per_second": 12.541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.9016187552330449, |
|
"eval_f1": 0.9008125554721984, |
|
"eval_loss": 0.325568288564682, |
|
"eval_precision": 0.9021842242763984, |
|
"eval_recall": 0.9016187552330449, |
|
"eval_runtime": 35.7388, |
|
"eval_samples_per_second": 200.51, |
|
"eval_steps_per_second": 12.535, |
|
"step": 3500 |
|
} |
|
], |
|
"logging_steps": 600, |
|
"max_steps": 20900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1640277252096000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|