|
{ |
|
"best_metric": 0.7068273092369478, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-n64penuy/checkpoint-200", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.4925928115844727, |
|
"learning_rate": 6.477989356097638e-06, |
|
"loss": 0.6661, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.3556463718414307, |
|
"learning_rate": 6.254610412783927e-06, |
|
"loss": 0.6128, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.2110280990600586, |
|
"learning_rate": 6.031231469470215e-06, |
|
"loss": 0.5869, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.7765781879425049, |
|
"learning_rate": 5.807852526156504e-06, |
|
"loss": 0.5578, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.7340786457061768, |
|
"learning_rate": 5.584473582842792e-06, |
|
"loss": 0.511, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.574438095092773, |
|
"learning_rate": 5.3610946395290804e-06, |
|
"loss": 0.4485, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.395756244659424, |
|
"learning_rate": 5.1377156962153684e-06, |
|
"loss": 0.5153, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.8996490240097046, |
|
"learning_rate": 4.9143367529016564e-06, |
|
"loss": 0.5166, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.7111058235168457, |
|
"learning_rate": 4.6909578095879444e-06, |
|
"loss": 0.4625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.7154110670089722, |
|
"learning_rate": 4.467578866274233e-06, |
|
"loss": 0.5053, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7475, |
|
"eval_f1": 0.5809128630705395, |
|
"eval_loss": 0.48702481389045715, |
|
"eval_precision": 0.5932203389830508, |
|
"eval_recall": 0.5691056910569106, |
|
"eval_runtime": 1.5237, |
|
"eval_samples_per_second": 262.517, |
|
"eval_steps_per_second": 16.407, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 3.091020107269287, |
|
"learning_rate": 4.244199922960521e-06, |
|
"loss": 0.4949, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.2689990997314453, |
|
"learning_rate": 4.02082097964681e-06, |
|
"loss": 0.4326, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 4.819199085235596, |
|
"learning_rate": 3.797442036333098e-06, |
|
"loss": 0.4239, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.2359201908111572, |
|
"learning_rate": 3.5740630930193865e-06, |
|
"loss": 0.3589, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 3.3298685550689697, |
|
"learning_rate": 3.350684149705675e-06, |
|
"loss": 0.4728, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 7.007627010345459, |
|
"learning_rate": 3.1273052063919634e-06, |
|
"loss": 0.3661, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 5.745215892791748, |
|
"learning_rate": 2.903926263078252e-06, |
|
"loss": 0.3744, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 6.657752990722656, |
|
"learning_rate": 2.6805473197645402e-06, |
|
"loss": 0.3764, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 5.390446662902832, |
|
"learning_rate": 2.4571683764508282e-06, |
|
"loss": 0.374, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.511307716369629, |
|
"learning_rate": 2.2337894331371166e-06, |
|
"loss": 0.4045, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8175, |
|
"eval_f1": 0.7068273092369478, |
|
"eval_loss": 0.3974764943122864, |
|
"eval_precision": 0.6984126984126984, |
|
"eval_recall": 0.7154471544715447, |
|
"eval_runtime": 1.5366, |
|
"eval_samples_per_second": 260.318, |
|
"eval_steps_per_second": 16.27, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 423630740901888.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 6.70136829941135e-06, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 3, |
|
"per_device_train_batch_size": 16, |
|
"seed": 5 |
|
} |
|
} |
|
|