|
{ |
|
"best_metric": 0.7954545454545454, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-vdjsiuit/checkpoint-600", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 600, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.0975422859191895, |
|
"learning_rate": 5.939561377324845e-05, |
|
"loss": 0.5513, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 8.386995315551758, |
|
"learning_rate": 5.83889084550578e-05, |
|
"loss": 0.5725, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.8792715072631836, |
|
"learning_rate": 5.738220313686715e-05, |
|
"loss": 0.5265, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.608365535736084, |
|
"learning_rate": 5.63754978186765e-05, |
|
"loss": 0.5106, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.161665201187134, |
|
"learning_rate": 5.5368792500485845e-05, |
|
"loss": 0.4356, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.6395349502563477, |
|
"learning_rate": 5.4362087182295196e-05, |
|
"loss": 0.5053, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.152228832244873, |
|
"learning_rate": 5.335538186410454e-05, |
|
"loss": 0.3511, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 22.35204315185547, |
|
"learning_rate": 5.234867654591389e-05, |
|
"loss": 0.5189, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.6376490592956543, |
|
"learning_rate": 5.134197122772324e-05, |
|
"loss": 0.3371, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.2198097705841064, |
|
"learning_rate": 5.033526590953259e-05, |
|
"loss": 0.4037, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 7.86613130569458, |
|
"learning_rate": 4.9328560591341935e-05, |
|
"loss": 0.4833, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.6074087619781494, |
|
"learning_rate": 4.8321855273151286e-05, |
|
"loss": 0.2857, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 6.2979230880737305, |
|
"learning_rate": 4.731514995496063e-05, |
|
"loss": 0.4716, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 8.780591011047363, |
|
"learning_rate": 4.630844463676999e-05, |
|
"loss": 0.4018, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 21.96924591064453, |
|
"learning_rate": 4.530173931857933e-05, |
|
"loss": 0.412, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 5.481058597564697, |
|
"learning_rate": 4.4295034000388675e-05, |
|
"loss": 0.5499, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 6.030953884124756, |
|
"learning_rate": 4.3288328682198025e-05, |
|
"loss": 0.4422, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.1474953889846802, |
|
"learning_rate": 4.228162336400737e-05, |
|
"loss": 0.2512, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 13.46764087677002, |
|
"learning_rate": 4.127491804581672e-05, |
|
"loss": 0.4896, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.764571666717529, |
|
"learning_rate": 4.026821272762607e-05, |
|
"loss": 0.3959, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8775, |
|
"eval_f1": 0.776255707762557, |
|
"eval_loss": 0.42286473512649536, |
|
"eval_precision": 0.8854166666666666, |
|
"eval_recall": 0.6910569105691057, |
|
"eval_runtime": 1.5336, |
|
"eval_samples_per_second": 260.823, |
|
"eval_steps_per_second": 16.301, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 13.774749755859375, |
|
"learning_rate": 3.926150740943542e-05, |
|
"loss": 0.2466, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.9035859704017639, |
|
"learning_rate": 3.8254802091244765e-05, |
|
"loss": 0.258, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 10.26409912109375, |
|
"learning_rate": 3.7248096773054115e-05, |
|
"loss": 0.2796, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 16.81245994567871, |
|
"learning_rate": 3.624139145486346e-05, |
|
"loss": 0.2748, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.9752582311630249, |
|
"learning_rate": 3.5234686136672817e-05, |
|
"loss": 0.3794, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.8559211492538452, |
|
"learning_rate": 3.422798081848216e-05, |
|
"loss": 0.3013, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.20962999761104584, |
|
"learning_rate": 3.322127550029151e-05, |
|
"loss": 0.3029, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 9.075166702270508, |
|
"learning_rate": 3.2214570182100855e-05, |
|
"loss": 0.3187, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.8023856282234192, |
|
"learning_rate": 3.1207864863910205e-05, |
|
"loss": 0.4383, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 8.567304611206055, |
|
"learning_rate": 3.0201159545719553e-05, |
|
"loss": 0.1829, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 8.392529487609863, |
|
"learning_rate": 2.91944542275289e-05, |
|
"loss": 0.3928, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.7387467622756958, |
|
"learning_rate": 2.818774890933825e-05, |
|
"loss": 0.1683, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 8.625945091247559, |
|
"learning_rate": 2.7181043591147598e-05, |
|
"loss": 0.2039, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.30814623832702637, |
|
"learning_rate": 2.6174338272956945e-05, |
|
"loss": 0.1959, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 7.88859748840332, |
|
"learning_rate": 2.5167632954766296e-05, |
|
"loss": 0.4723, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.31379058957099915, |
|
"learning_rate": 2.4160927636575643e-05, |
|
"loss": 0.3442, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 9.962308883666992, |
|
"learning_rate": 2.3154222318384993e-05, |
|
"loss": 0.4158, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 3.3801684379577637, |
|
"learning_rate": 2.2147517000194337e-05, |
|
"loss": 0.2539, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 2.923280954360962, |
|
"learning_rate": 2.1140811682003685e-05, |
|
"loss": 0.1824, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.60498046875, |
|
"learning_rate": 2.0134106363813035e-05, |
|
"loss": 0.4522, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8725, |
|
"eval_f1": 0.7792207792207793, |
|
"eval_loss": 0.318487286567688, |
|
"eval_precision": 0.8333333333333334, |
|
"eval_recall": 0.7317073170731707, |
|
"eval_runtime": 1.5288, |
|
"eval_samples_per_second": 261.647, |
|
"eval_steps_per_second": 16.353, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.41749486327171326, |
|
"learning_rate": 1.9127401045622382e-05, |
|
"loss": 0.1156, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.3070080876350403, |
|
"learning_rate": 1.812069572743173e-05, |
|
"loss": 0.0349, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 2.974935293197632, |
|
"learning_rate": 1.711399040924108e-05, |
|
"loss": 0.133, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.4423005282878876, |
|
"learning_rate": 1.6107285091050427e-05, |
|
"loss": 0.0806, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.2689347267150879, |
|
"learning_rate": 1.5100579772859776e-05, |
|
"loss": 0.1243, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.1341865360736847, |
|
"learning_rate": 1.4093874454669125e-05, |
|
"loss": 0.1945, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.14216558635234833, |
|
"learning_rate": 1.3087169136478472e-05, |
|
"loss": 0.2593, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 5.250720977783203, |
|
"learning_rate": 1.2080463818287821e-05, |
|
"loss": 0.3948, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 18.877355575561523, |
|
"learning_rate": 1.1073758500097169e-05, |
|
"loss": 0.2856, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.6348363161087036, |
|
"learning_rate": 1.0067053181906518e-05, |
|
"loss": 0.4102, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.16438539326190948, |
|
"learning_rate": 9.060347863715865e-06, |
|
"loss": 0.2346, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.23815393447875977, |
|
"learning_rate": 8.053642545525214e-06, |
|
"loss": 0.1046, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 25.9287166595459, |
|
"learning_rate": 7.046937227334563e-06, |
|
"loss": 0.0799, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.2302125245332718, |
|
"learning_rate": 6.040231909143911e-06, |
|
"loss": 0.1963, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.14851589500904083, |
|
"learning_rate": 5.033526590953259e-06, |
|
"loss": 0.1668, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 13.211697578430176, |
|
"learning_rate": 4.026821272762607e-06, |
|
"loss": 0.3321, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.20074860751628876, |
|
"learning_rate": 3.0201159545719554e-06, |
|
"loss": 0.1587, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 1.0804275274276733, |
|
"learning_rate": 2.0134106363813034e-06, |
|
"loss": 0.1044, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.2024952471256256, |
|
"learning_rate": 1.0067053181906517e-06, |
|
"loss": 0.3374, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.07060264050960541, |
|
"learning_rate": 0.0, |
|
"loss": 0.0739, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.865, |
|
"eval_f1": 0.7954545454545454, |
|
"eval_loss": 0.45124685764312744, |
|
"eval_precision": 0.7446808510638298, |
|
"eval_recall": 0.8536585365853658, |
|
"eval_runtime": 1.5828, |
|
"eval_samples_per_second": 252.719, |
|
"eval_steps_per_second": 15.795, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 635446111352832.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 6.0402319091439105e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 3, |
|
"per_device_train_batch_size": 8, |
|
"seed": 33 |
|
} |
|
} |
|
|