|
{ |
|
"best_metric": 0.7901234567901234, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-hc8i9nht/checkpoint-200", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.6317474842071533, |
|
"learning_rate": 8.174509670130923e-05, |
|
"loss": 0.543, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.941434860229492, |
|
"learning_rate": 8.091938865382126e-05, |
|
"loss": 0.4521, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.016721725463867, |
|
"learning_rate": 8.009368060633328e-05, |
|
"loss": 0.4908, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.4489011764526367, |
|
"learning_rate": 7.92679725588453e-05, |
|
"loss": 0.4119, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.7478549480438232, |
|
"learning_rate": 7.844226451135734e-05, |
|
"loss": 0.5418, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 20.588834762573242, |
|
"learning_rate": 7.761655646386936e-05, |
|
"loss": 0.3073, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.3618388175964355, |
|
"learning_rate": 7.67908484163814e-05, |
|
"loss": 0.4359, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 7.6314921379089355, |
|
"learning_rate": 7.596514036889342e-05, |
|
"loss": 0.5815, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.3096113204956055, |
|
"learning_rate": 7.513943232140545e-05, |
|
"loss": 0.4516, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 8.00711441040039, |
|
"learning_rate": 7.431372427391748e-05, |
|
"loss": 0.5594, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.1074318885803223, |
|
"learning_rate": 7.348801622642951e-05, |
|
"loss": 0.4839, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.2857903242111206, |
|
"learning_rate": 7.266230817894153e-05, |
|
"loss": 0.3655, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.47830930352211, |
|
"learning_rate": 7.183660013145356e-05, |
|
"loss": 0.4362, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 6.92142915725708, |
|
"learning_rate": 7.101089208396559e-05, |
|
"loss": 0.3585, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 12.701593399047852, |
|
"learning_rate": 7.018518403647762e-05, |
|
"loss": 0.4539, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.4179468154907227, |
|
"learning_rate": 6.935947598898965e-05, |
|
"loss": 0.291, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 6.713927745819092, |
|
"learning_rate": 6.853376794150168e-05, |
|
"loss": 0.533, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 4.076853275299072, |
|
"learning_rate": 6.77080598940137e-05, |
|
"loss": 0.4721, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 4.515071868896484, |
|
"learning_rate": 6.688235184652574e-05, |
|
"loss": 0.4085, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.7936811447143555, |
|
"learning_rate": 6.605664379903777e-05, |
|
"loss": 0.391, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8725, |
|
"eval_f1": 0.7901234567901234, |
|
"eval_loss": 0.337982714176178, |
|
"eval_precision": 0.8, |
|
"eval_recall": 0.7804878048780488, |
|
"eval_runtime": 1.5299, |
|
"eval_samples_per_second": 261.453, |
|
"eval_steps_per_second": 16.341, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 211815370450944.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 8.25708047487972e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 5, |
|
"per_device_train_batch_size": 8, |
|
"seed": 17 |
|
} |
|
} |
|
|