cvapict's picture
End of training
9edb9ff verified
{
"best_metric": 0.791124713083397,
"best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-2/checkpoint-492",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 492,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06097560975609756,
"grad_norm": 3.2577075958251953,
"learning_rate": 1.7097812500543722e-06,
"loss": 0.7201,
"step": 10
},
{
"epoch": 0.12195121951219512,
"grad_norm": 1.7486741542816162,
"learning_rate": 1.6743086100117504e-06,
"loss": 0.6884,
"step": 20
},
{
"epoch": 0.18292682926829268,
"grad_norm": 2.3384933471679688,
"learning_rate": 1.6388359699691285e-06,
"loss": 0.6482,
"step": 30
},
{
"epoch": 0.24390243902439024,
"grad_norm": 2.0673463344573975,
"learning_rate": 1.6033633299265067e-06,
"loss": 0.6254,
"step": 40
},
{
"epoch": 0.3048780487804878,
"grad_norm": 1.9012755155563354,
"learning_rate": 1.5678906898838849e-06,
"loss": 0.6049,
"step": 50
},
{
"epoch": 0.36585365853658536,
"grad_norm": 1.1691405773162842,
"learning_rate": 1.532418049841263e-06,
"loss": 0.5601,
"step": 60
},
{
"epoch": 0.4268292682926829,
"grad_norm": 2.259779691696167,
"learning_rate": 1.4969454097986412e-06,
"loss": 0.5567,
"step": 70
},
{
"epoch": 0.4878048780487805,
"grad_norm": 1.7687610387802124,
"learning_rate": 1.4614727697560194e-06,
"loss": 0.5499,
"step": 80
},
{
"epoch": 0.5487804878048781,
"grad_norm": 1.2938015460968018,
"learning_rate": 1.4260001297133975e-06,
"loss": 0.5247,
"step": 90
},
{
"epoch": 0.6097560975609756,
"grad_norm": 1.534727692604065,
"learning_rate": 1.3905274896707757e-06,
"loss": 0.5265,
"step": 100
},
{
"epoch": 0.6707317073170732,
"grad_norm": 1.6018732786178589,
"learning_rate": 1.3550548496281539e-06,
"loss": 0.4982,
"step": 110
},
{
"epoch": 0.7317073170731707,
"grad_norm": 1.9590264558792114,
"learning_rate": 1.319582209585532e-06,
"loss": 0.4593,
"step": 120
},
{
"epoch": 0.7926829268292683,
"grad_norm": 1.6383683681488037,
"learning_rate": 1.2841095695429102e-06,
"loss": 0.5132,
"step": 130
},
{
"epoch": 0.8536585365853658,
"grad_norm": 1.6192731857299805,
"learning_rate": 1.2486369295002884e-06,
"loss": 0.4783,
"step": 140
},
{
"epoch": 0.9146341463414634,
"grad_norm": 1.8902620077133179,
"learning_rate": 1.2131642894576666e-06,
"loss": 0.4747,
"step": 150
},
{
"epoch": 0.975609756097561,
"grad_norm": 2.1500866413116455,
"learning_rate": 1.1776916494150447e-06,
"loss": 0.5078,
"step": 160
},
{
"epoch": 1.0,
"eval_accuracy": 0.7697016067329763,
"eval_f1": 0.0960960960960961,
"eval_loss": 0.4581395089626312,
"eval_precision": 0.7619047619047619,
"eval_recall": 0.05128205128205128,
"eval_runtime": 4.8871,
"eval_samples_per_second": 267.439,
"eval_steps_per_second": 16.779,
"step": 164
},
{
"epoch": 1.0365853658536586,
"grad_norm": 2.7179384231567383,
"learning_rate": 1.1422190093724229e-06,
"loss": 0.4381,
"step": 170
},
{
"epoch": 1.0975609756097562,
"grad_norm": 1.8275049924850464,
"learning_rate": 1.106746369329801e-06,
"loss": 0.446,
"step": 180
},
{
"epoch": 1.1585365853658536,
"grad_norm": 1.6741889715194702,
"learning_rate": 1.0712737292871792e-06,
"loss": 0.4451,
"step": 190
},
{
"epoch": 1.2195121951219512,
"grad_norm": 1.8023988008499146,
"learning_rate": 1.0358010892445574e-06,
"loss": 0.4538,
"step": 200
},
{
"epoch": 1.2804878048780488,
"grad_norm": 2.3258135318756104,
"learning_rate": 1.0003284492019356e-06,
"loss": 0.4531,
"step": 210
},
{
"epoch": 1.3414634146341464,
"grad_norm": 2.654583215713501,
"learning_rate": 9.648558091593137e-07,
"loss": 0.426,
"step": 220
},
{
"epoch": 1.4024390243902438,
"grad_norm": 2.7538747787475586,
"learning_rate": 9.293831691166919e-07,
"loss": 0.4056,
"step": 230
},
{
"epoch": 1.4634146341463414,
"grad_norm": 1.887471079826355,
"learning_rate": 8.939105290740702e-07,
"loss": 0.448,
"step": 240
},
{
"epoch": 1.524390243902439,
"grad_norm": 1.7449885606765747,
"learning_rate": 8.584378890314483e-07,
"loss": 0.4132,
"step": 250
},
{
"epoch": 1.5853658536585367,
"grad_norm": 1.4998096227645874,
"learning_rate": 8.229652489888265e-07,
"loss": 0.4164,
"step": 260
},
{
"epoch": 1.6463414634146343,
"grad_norm": 4.520744800567627,
"learning_rate": 7.874926089462047e-07,
"loss": 0.4662,
"step": 270
},
{
"epoch": 1.7073170731707317,
"grad_norm": 1.9681720733642578,
"learning_rate": 7.520199689035828e-07,
"loss": 0.3977,
"step": 280
},
{
"epoch": 1.7682926829268293,
"grad_norm": 3.49233341217041,
"learning_rate": 7.16547328860961e-07,
"loss": 0.4133,
"step": 290
},
{
"epoch": 1.8292682926829267,
"grad_norm": 2.177455425262451,
"learning_rate": 6.810746888183392e-07,
"loss": 0.4403,
"step": 300
},
{
"epoch": 1.8902439024390243,
"grad_norm": 3.338192939758301,
"learning_rate": 6.456020487757174e-07,
"loss": 0.4154,
"step": 310
},
{
"epoch": 1.951219512195122,
"grad_norm": 2.005662441253662,
"learning_rate": 6.101294087330955e-07,
"loss": 0.4137,
"step": 320
},
{
"epoch": 2.0,
"eval_accuracy": 0.7888293802601377,
"eval_f1": 0.31,
"eval_loss": 0.399868905544281,
"eval_precision": 0.7045454545454546,
"eval_recall": 0.1987179487179487,
"eval_runtime": 4.9013,
"eval_samples_per_second": 266.666,
"eval_steps_per_second": 16.73,
"step": 328
},
{
"epoch": 2.0121951219512195,
"grad_norm": 1.8187142610549927,
"learning_rate": 5.746567686904737e-07,
"loss": 0.4231,
"step": 330
},
{
"epoch": 2.073170731707317,
"grad_norm": 2.4677720069885254,
"learning_rate": 5.391841286478519e-07,
"loss": 0.3832,
"step": 340
},
{
"epoch": 2.1341463414634148,
"grad_norm": 2.2957379817962646,
"learning_rate": 5.0371148860523e-07,
"loss": 0.3671,
"step": 350
},
{
"epoch": 2.1951219512195124,
"grad_norm": 1.6115466356277466,
"learning_rate": 4.682388485626082e-07,
"loss": 0.3776,
"step": 360
},
{
"epoch": 2.2560975609756095,
"grad_norm": 2.2633934020996094,
"learning_rate": 4.327662085199863e-07,
"loss": 0.4199,
"step": 370
},
{
"epoch": 2.317073170731707,
"grad_norm": 3.485372304916382,
"learning_rate": 3.972935684773645e-07,
"loss": 0.3784,
"step": 380
},
{
"epoch": 2.3780487804878048,
"grad_norm": 1.9504722356796265,
"learning_rate": 3.6182092843474265e-07,
"loss": 0.3538,
"step": 390
},
{
"epoch": 2.4390243902439024,
"grad_norm": 2.1430771350860596,
"learning_rate": 3.263482883921208e-07,
"loss": 0.4277,
"step": 400
},
{
"epoch": 2.5,
"grad_norm": 1.9325039386749268,
"learning_rate": 2.90875648349499e-07,
"loss": 0.4083,
"step": 410
},
{
"epoch": 2.5609756097560976,
"grad_norm": 1.617710828781128,
"learning_rate": 2.5540300830687715e-07,
"loss": 0.4297,
"step": 420
},
{
"epoch": 2.6219512195121952,
"grad_norm": 3.280532121658325,
"learning_rate": 2.1993036826425532e-07,
"loss": 0.4056,
"step": 430
},
{
"epoch": 2.682926829268293,
"grad_norm": 2.3135297298431396,
"learning_rate": 1.8445772822163352e-07,
"loss": 0.438,
"step": 440
},
{
"epoch": 2.7439024390243905,
"grad_norm": 2.195956230163574,
"learning_rate": 1.489850881790117e-07,
"loss": 0.4187,
"step": 450
},
{
"epoch": 2.8048780487804876,
"grad_norm": 2.371246099472046,
"learning_rate": 1.1351244813638987e-07,
"loss": 0.3459,
"step": 460
},
{
"epoch": 2.8658536585365852,
"grad_norm": 3.243722915649414,
"learning_rate": 7.803980809376802e-08,
"loss": 0.3754,
"step": 470
},
{
"epoch": 2.926829268292683,
"grad_norm": 2.472179412841797,
"learning_rate": 4.25671680511462e-08,
"loss": 0.4097,
"step": 480
},
{
"epoch": 2.9878048780487805,
"grad_norm": 2.2248072624206543,
"learning_rate": 7.094528008524367e-09,
"loss": 0.3784,
"step": 490
},
{
"epoch": 3.0,
"eval_accuracy": 0.791124713083397,
"eval_f1": 0.3357664233576642,
"eval_loss": 0.386940062046051,
"eval_precision": 0.696969696969697,
"eval_recall": 0.22115384615384615,
"eval_runtime": 4.9814,
"eval_samples_per_second": 262.374,
"eval_steps_per_second": 16.461,
"step": 492
}
],
"logging_steps": 10,
"max_steps": 492,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2071790114979840.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"learning_rate": 1.745253890096994e-06,
"num_train_epochs": 3,
"per_device_train_batch_size": 32,
"seed": 32
}
}