|
{ |
|
"best_metric": 0.791124713083397, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-2/checkpoint-492", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 492, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06097560975609756, |
|
"grad_norm": 3.2577075958251953, |
|
"learning_rate": 1.7097812500543722e-06, |
|
"loss": 0.7201, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 1.7486741542816162, |
|
"learning_rate": 1.6743086100117504e-06, |
|
"loss": 0.6884, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18292682926829268, |
|
"grad_norm": 2.3384933471679688, |
|
"learning_rate": 1.6388359699691285e-06, |
|
"loss": 0.6482, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 2.0673463344573975, |
|
"learning_rate": 1.6033633299265067e-06, |
|
"loss": 0.6254, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3048780487804878, |
|
"grad_norm": 1.9012755155563354, |
|
"learning_rate": 1.5678906898838849e-06, |
|
"loss": 0.6049, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 1.1691405773162842, |
|
"learning_rate": 1.532418049841263e-06, |
|
"loss": 0.5601, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4268292682926829, |
|
"grad_norm": 2.259779691696167, |
|
"learning_rate": 1.4969454097986412e-06, |
|
"loss": 0.5567, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 1.7687610387802124, |
|
"learning_rate": 1.4614727697560194e-06, |
|
"loss": 0.5499, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5487804878048781, |
|
"grad_norm": 1.2938015460968018, |
|
"learning_rate": 1.4260001297133975e-06, |
|
"loss": 0.5247, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 1.534727692604065, |
|
"learning_rate": 1.3905274896707757e-06, |
|
"loss": 0.5265, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6707317073170732, |
|
"grad_norm": 1.6018732786178589, |
|
"learning_rate": 1.3550548496281539e-06, |
|
"loss": 0.4982, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 1.9590264558792114, |
|
"learning_rate": 1.319582209585532e-06, |
|
"loss": 0.4593, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7926829268292683, |
|
"grad_norm": 1.6383683681488037, |
|
"learning_rate": 1.2841095695429102e-06, |
|
"loss": 0.5132, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 1.6192731857299805, |
|
"learning_rate": 1.2486369295002884e-06, |
|
"loss": 0.4783, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9146341463414634, |
|
"grad_norm": 1.8902620077133179, |
|
"learning_rate": 1.2131642894576666e-06, |
|
"loss": 0.4747, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 2.1500866413116455, |
|
"learning_rate": 1.1776916494150447e-06, |
|
"loss": 0.5078, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7697016067329763, |
|
"eval_f1": 0.0960960960960961, |
|
"eval_loss": 0.4581395089626312, |
|
"eval_precision": 0.7619047619047619, |
|
"eval_recall": 0.05128205128205128, |
|
"eval_runtime": 4.8871, |
|
"eval_samples_per_second": 267.439, |
|
"eval_steps_per_second": 16.779, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.0365853658536586, |
|
"grad_norm": 2.7179384231567383, |
|
"learning_rate": 1.1422190093724229e-06, |
|
"loss": 0.4381, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0975609756097562, |
|
"grad_norm": 1.8275049924850464, |
|
"learning_rate": 1.106746369329801e-06, |
|
"loss": 0.446, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1585365853658536, |
|
"grad_norm": 1.6741889715194702, |
|
"learning_rate": 1.0712737292871792e-06, |
|
"loss": 0.4451, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 1.8023988008499146, |
|
"learning_rate": 1.0358010892445574e-06, |
|
"loss": 0.4538, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2804878048780488, |
|
"grad_norm": 2.3258135318756104, |
|
"learning_rate": 1.0003284492019356e-06, |
|
"loss": 0.4531, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.3414634146341464, |
|
"grad_norm": 2.654583215713501, |
|
"learning_rate": 9.648558091593137e-07, |
|
"loss": 0.426, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4024390243902438, |
|
"grad_norm": 2.7538747787475586, |
|
"learning_rate": 9.293831691166919e-07, |
|
"loss": 0.4056, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 1.887471079826355, |
|
"learning_rate": 8.939105290740702e-07, |
|
"loss": 0.448, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.524390243902439, |
|
"grad_norm": 1.7449885606765747, |
|
"learning_rate": 8.584378890314483e-07, |
|
"loss": 0.4132, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5853658536585367, |
|
"grad_norm": 1.4998096227645874, |
|
"learning_rate": 8.229652489888265e-07, |
|
"loss": 0.4164, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.6463414634146343, |
|
"grad_norm": 4.520744800567627, |
|
"learning_rate": 7.874926089462047e-07, |
|
"loss": 0.4662, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 1.9681720733642578, |
|
"learning_rate": 7.520199689035828e-07, |
|
"loss": 0.3977, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.7682926829268293, |
|
"grad_norm": 3.49233341217041, |
|
"learning_rate": 7.16547328860961e-07, |
|
"loss": 0.4133, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 2.177455425262451, |
|
"learning_rate": 6.810746888183392e-07, |
|
"loss": 0.4403, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8902439024390243, |
|
"grad_norm": 3.338192939758301, |
|
"learning_rate": 6.456020487757174e-07, |
|
"loss": 0.4154, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 2.005662441253662, |
|
"learning_rate": 6.101294087330955e-07, |
|
"loss": 0.4137, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7888293802601377, |
|
"eval_f1": 0.31, |
|
"eval_loss": 0.399868905544281, |
|
"eval_precision": 0.7045454545454546, |
|
"eval_recall": 0.1987179487179487, |
|
"eval_runtime": 4.9013, |
|
"eval_samples_per_second": 266.666, |
|
"eval_steps_per_second": 16.73, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.0121951219512195, |
|
"grad_norm": 1.8187142610549927, |
|
"learning_rate": 5.746567686904737e-07, |
|
"loss": 0.4231, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.073170731707317, |
|
"grad_norm": 2.4677720069885254, |
|
"learning_rate": 5.391841286478519e-07, |
|
"loss": 0.3832, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.1341463414634148, |
|
"grad_norm": 2.2957379817962646, |
|
"learning_rate": 5.0371148860523e-07, |
|
"loss": 0.3671, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 1.6115466356277466, |
|
"learning_rate": 4.682388485626082e-07, |
|
"loss": 0.3776, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.2560975609756095, |
|
"grad_norm": 2.2633934020996094, |
|
"learning_rate": 4.327662085199863e-07, |
|
"loss": 0.4199, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.317073170731707, |
|
"grad_norm": 3.485372304916382, |
|
"learning_rate": 3.972935684773645e-07, |
|
"loss": 0.3784, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.3780487804878048, |
|
"grad_norm": 1.9504722356796265, |
|
"learning_rate": 3.6182092843474265e-07, |
|
"loss": 0.3538, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 2.1430771350860596, |
|
"learning_rate": 3.263482883921208e-07, |
|
"loss": 0.4277, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.9325039386749268, |
|
"learning_rate": 2.90875648349499e-07, |
|
"loss": 0.4083, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.5609756097560976, |
|
"grad_norm": 1.617710828781128, |
|
"learning_rate": 2.5540300830687715e-07, |
|
"loss": 0.4297, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.6219512195121952, |
|
"grad_norm": 3.280532121658325, |
|
"learning_rate": 2.1993036826425532e-07, |
|
"loss": 0.4056, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 2.3135297298431396, |
|
"learning_rate": 1.8445772822163352e-07, |
|
"loss": 0.438, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.7439024390243905, |
|
"grad_norm": 2.195956230163574, |
|
"learning_rate": 1.489850881790117e-07, |
|
"loss": 0.4187, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.8048780487804876, |
|
"grad_norm": 2.371246099472046, |
|
"learning_rate": 1.1351244813638987e-07, |
|
"loss": 0.3459, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.8658536585365852, |
|
"grad_norm": 3.243722915649414, |
|
"learning_rate": 7.803980809376802e-08, |
|
"loss": 0.3754, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 2.472179412841797, |
|
"learning_rate": 4.25671680511462e-08, |
|
"loss": 0.4097, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.9878048780487805, |
|
"grad_norm": 2.2248072624206543, |
|
"learning_rate": 7.094528008524367e-09, |
|
"loss": 0.3784, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.791124713083397, |
|
"eval_f1": 0.3357664233576642, |
|
"eval_loss": 0.386940062046051, |
|
"eval_precision": 0.696969696969697, |
|
"eval_recall": 0.22115384615384615, |
|
"eval_runtime": 4.9814, |
|
"eval_samples_per_second": 262.374, |
|
"eval_steps_per_second": 16.461, |
|
"step": 492 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 492, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2071790114979840.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.745253890096994e-06, |
|
"num_train_epochs": 3, |
|
"per_device_train_batch_size": 32, |
|
"seed": 32 |
|
} |
|
} |
|
|