|
{ |
|
"best_metric": 0.823045267489712, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-ud8wy1bz/checkpoint-200", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.0627684593200684, |
|
"learning_rate": 5.8944331187986266e-05, |
|
"loss": 0.5729, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.5791327953338623, |
|
"learning_rate": 5.7945274727172945e-05, |
|
"loss": 0.505, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.052925109863281, |
|
"learning_rate": 5.694621826635961e-05, |
|
"loss": 0.507, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8351855278015137, |
|
"learning_rate": 5.594716180554629e-05, |
|
"loss": 0.4524, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.351097583770752, |
|
"learning_rate": 5.494810534473296e-05, |
|
"loss": 0.3062, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.3773590326309204, |
|
"learning_rate": 5.394904888391964e-05, |
|
"loss": 0.3473, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 3.009101629257202, |
|
"learning_rate": 5.2949992423106304e-05, |
|
"loss": 0.3905, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.680708646774292, |
|
"learning_rate": 5.195093596229298e-05, |
|
"loss": 0.3812, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.7381820678710938, |
|
"learning_rate": 5.0951879501479655e-05, |
|
"loss": 0.3667, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.209010124206543, |
|
"learning_rate": 4.995282304066633e-05, |
|
"loss": 0.2821, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.845, |
|
"eval_f1": 0.693069306930693, |
|
"eval_loss": 0.41840919852256775, |
|
"eval_precision": 0.8860759493670886, |
|
"eval_recall": 0.5691056910569106, |
|
"eval_runtime": 1.5344, |
|
"eval_samples_per_second": 260.687, |
|
"eval_steps_per_second": 16.293, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 12.662829399108887, |
|
"learning_rate": 4.8953766579853e-05, |
|
"loss": 0.2514, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 4.311421871185303, |
|
"learning_rate": 4.795471011903968e-05, |
|
"loss": 0.2339, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 7.38905668258667, |
|
"learning_rate": 4.695565365822635e-05, |
|
"loss": 0.2277, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.4141809940338135, |
|
"learning_rate": 4.595659719741303e-05, |
|
"loss": 0.306, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 2.5219364166259766, |
|
"learning_rate": 4.49575407365997e-05, |
|
"loss": 0.2245, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 2.283900499343872, |
|
"learning_rate": 4.3958484275786364e-05, |
|
"loss": 0.2222, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 7.031118392944336, |
|
"learning_rate": 4.295942781497304e-05, |
|
"loss": 0.3507, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.3153152465820312, |
|
"learning_rate": 4.1960371354159715e-05, |
|
"loss": 0.305, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 5.0271315574646, |
|
"learning_rate": 4.096131489334639e-05, |
|
"loss": 0.2648, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.9085686206817627, |
|
"learning_rate": 3.996225843253306e-05, |
|
"loss": 0.1931, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8925, |
|
"eval_f1": 0.823045267489712, |
|
"eval_loss": 0.30675894021987915, |
|
"eval_precision": 0.8333333333333334, |
|
"eval_recall": 0.8130081300813008, |
|
"eval_runtime": 1.5679, |
|
"eval_samples_per_second": 255.111, |
|
"eval_steps_per_second": 15.944, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 423630740901888.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 5.9943387648799594e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 6, |
|
"per_device_train_batch_size": 16, |
|
"seed": 31 |
|
} |
|
} |
|
|