|
{ |
|
"best_metric": 0.7865168539325843, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-ktqxy3xz/checkpoint-400", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 2.0802054405212402, |
|
"learning_rate": 4.896382897724454e-05, |
|
"loss": 0.6556, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.7415668964385986, |
|
"learning_rate": 4.7708346182956214e-05, |
|
"loss": 0.4257, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 6.465210437774658, |
|
"learning_rate": 4.64528633886679e-05, |
|
"loss": 0.8034, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.8712220191955566, |
|
"learning_rate": 4.5197380594379574e-05, |
|
"loss": 0.6423, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 2.292738676071167, |
|
"learning_rate": 4.3941897800091254e-05, |
|
"loss": 0.6402, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.6097824573516846, |
|
"learning_rate": 4.268641500580293e-05, |
|
"loss": 0.412, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 3.220292806625366, |
|
"learning_rate": 4.143093221151461e-05, |
|
"loss": 0.5088, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.5234849452972412, |
|
"learning_rate": 4.0175449417226294e-05, |
|
"loss": 0.3435, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 1.0715090036392212, |
|
"learning_rate": 3.891996662293797e-05, |
|
"loss": 0.7451, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.492457389831543, |
|
"learning_rate": 3.766448382864965e-05, |
|
"loss": 0.2199, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 1.9438965320587158, |
|
"learning_rate": 3.640900103436132e-05, |
|
"loss": 0.7001, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.4070729613304138, |
|
"learning_rate": 3.5153518240073e-05, |
|
"loss": 0.4277, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 0.3407312035560608, |
|
"learning_rate": 3.389803544578468e-05, |
|
"loss": 0.2982, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.44988295435905457, |
|
"learning_rate": 3.264255265149636e-05, |
|
"loss": 0.3097, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 0.9247645735740662, |
|
"learning_rate": 3.138706985720804e-05, |
|
"loss": 1.0367, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.748664140701294, |
|
"learning_rate": 3.0131587062919714e-05, |
|
"loss": 0.3645, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 7.6074910163879395, |
|
"learning_rate": 2.8876104268631394e-05, |
|
"loss": 0.5364, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 6.632686138153076, |
|
"learning_rate": 2.7620621474343077e-05, |
|
"loss": 0.4973, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 0.3361460268497467, |
|
"learning_rate": 2.6365138680054754e-05, |
|
"loss": 0.4819, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.405449390411377, |
|
"learning_rate": 2.510965588576643e-05, |
|
"loss": 0.3588, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 0.504410445690155, |
|
"learning_rate": 2.3854173091478107e-05, |
|
"loss": 0.5494, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 3.5160725116729736, |
|
"learning_rate": 2.2598690297189787e-05, |
|
"loss": 0.5517, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 10.914329528808594, |
|
"learning_rate": 2.1343207502901464e-05, |
|
"loss": 0.4456, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 13.415484428405762, |
|
"learning_rate": 2.0087724708613147e-05, |
|
"loss": 0.3435, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 0.3822052478790283, |
|
"learning_rate": 1.8832241914324824e-05, |
|
"loss": 0.4514, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 13.96898365020752, |
|
"learning_rate": 1.75767591200365e-05, |
|
"loss": 0.5878, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 14.09548568725586, |
|
"learning_rate": 1.632127632574818e-05, |
|
"loss": 0.4703, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 8.53896713256836, |
|
"learning_rate": 1.5065793531459857e-05, |
|
"loss": 0.3875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 7.014000415802002, |
|
"learning_rate": 1.3810310737171539e-05, |
|
"loss": 0.2587, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4.1433258056640625, |
|
"learning_rate": 1.2554827942883215e-05, |
|
"loss": 0.4717, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 0.7594526410102844, |
|
"learning_rate": 1.1299345148594894e-05, |
|
"loss": 0.6532, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.1434805393218994, |
|
"learning_rate": 1.0043862354306574e-05, |
|
"loss": 0.3508, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 18.727951049804688, |
|
"learning_rate": 8.78837956001825e-06, |
|
"loss": 0.3343, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.23914909362793, |
|
"learning_rate": 7.5328967657299285e-06, |
|
"loss": 0.2194, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 14.330482482910156, |
|
"learning_rate": 6.277413971441608e-06, |
|
"loss": 0.4582, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.7658752202987671, |
|
"learning_rate": 5.021931177153287e-06, |
|
"loss": 0.4202, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 0.7938013672828674, |
|
"learning_rate": 3.7664483828649642e-06, |
|
"loss": 0.2567, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 1.5704269409179688, |
|
"learning_rate": 2.5109655885766434e-06, |
|
"loss": 0.6044, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 5.166118621826172, |
|
"learning_rate": 1.2554827942883217e-06, |
|
"loss": 0.3807, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3811684250831604, |
|
"learning_rate": 0.0, |
|
"loss": 0.4158, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8575, |
|
"eval_f1": 0.7865168539325843, |
|
"eval_loss": 0.42347240447998047, |
|
"eval_precision": 0.7291666666666666, |
|
"eval_recall": 0.8536585365853658, |
|
"eval_runtime": 1.5735, |
|
"eval_samples_per_second": 254.213, |
|
"eval_steps_per_second": 15.888, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 211815370450944.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 5.021931177153286e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 1, |
|
"per_device_train_batch_size": 4, |
|
"seed": 26 |
|
} |
|
} |
|
|