PaulTran's picture
Upload model files
8f206cc
raw
history blame
45 kB
{
"best_metric": 0.7120954003407156,
"best_model_checkpoint": "Distil4\\checkpoint-40348",
"epoch": 100.0,
"global_step": 52400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 4.950095419847329e-06,
"loss": 0.4749,
"step": 523
},
{
"epoch": 1.0,
"eval_accuracy": 0.4026503567787971,
"eval_f1": 0.5392528424472117,
"eval_loss": 0.4015094041824341,
"eval_roc_auc": 0.6891349750981157,
"eval_runtime": 9.4449,
"eval_samples_per_second": 103.866,
"eval_steps_per_second": 6.988,
"step": 524
},
{
"epoch": 2.0,
"learning_rate": 4.900190839694656e-06,
"loss": 0.3845,
"step": 1046
},
{
"epoch": 2.0,
"eval_accuracy": 0.5168195718654435,
"eval_f1": 0.6353754940711462,
"eval_loss": 0.36452510952949524,
"eval_roc_auc": 0.747088659888458,
"eval_runtime": 9.9987,
"eval_samples_per_second": 98.113,
"eval_steps_per_second": 6.601,
"step": 1048
},
{
"epoch": 2.99,
"learning_rate": 4.850286259541985e-06,
"loss": 0.3475,
"step": 1569
},
{
"epoch": 3.0,
"eval_accuracy": 0.5310907237512742,
"eval_f1": 0.6470871449205585,
"eval_loss": 0.3549170196056366,
"eval_roc_auc": 0.7563264556700557,
"eval_runtime": 10.07,
"eval_samples_per_second": 97.418,
"eval_steps_per_second": 6.554,
"step": 1572
},
{
"epoch": 3.99,
"learning_rate": 4.800381679389313e-06,
"loss": 0.326,
"step": 2092
},
{
"epoch": 4.0,
"eval_accuracy": 0.5545361875637105,
"eval_f1": 0.674766355140187,
"eval_loss": 0.3448670506477356,
"eval_roc_auc": 0.7760477152234284,
"eval_runtime": 10.1448,
"eval_samples_per_second": 96.7,
"eval_steps_per_second": 6.506,
"step": 2096
},
{
"epoch": 4.99,
"learning_rate": 4.750477099236642e-06,
"loss": 0.3083,
"step": 2615
},
{
"epoch": 5.0,
"eval_accuracy": 0.5565749235474006,
"eval_f1": 0.6812674743709226,
"eval_loss": 0.34419235587120056,
"eval_roc_auc": 0.7803114456863509,
"eval_runtime": 10.1716,
"eval_samples_per_second": 96.445,
"eval_steps_per_second": 6.489,
"step": 2620
},
{
"epoch": 5.99,
"learning_rate": 4.700572519083969e-06,
"loss": 0.2952,
"step": 3138
},
{
"epoch": 6.0,
"eval_accuracy": 0.5718654434250765,
"eval_f1": 0.6965452847805789,
"eval_loss": 0.3325030505657196,
"eval_roc_auc": 0.789289320878566,
"eval_runtime": 10.2795,
"eval_samples_per_second": 95.433,
"eval_steps_per_second": 6.421,
"step": 3144
},
{
"epoch": 6.99,
"learning_rate": 4.650667938931298e-06,
"loss": 0.2799,
"step": 3661
},
{
"epoch": 7.0,
"eval_accuracy": 0.5728848114169215,
"eval_f1": 0.6865116279069767,
"eval_loss": 0.34394344687461853,
"eval_roc_auc": 0.7837168070505612,
"eval_runtime": 10.3609,
"eval_samples_per_second": 94.683,
"eval_steps_per_second": 6.37,
"step": 3668
},
{
"epoch": 7.98,
"learning_rate": 4.600763358778627e-06,
"loss": 0.2626,
"step": 4184
},
{
"epoch": 8.0,
"eval_accuracy": 0.5800203873598369,
"eval_f1": 0.692876965772433,
"eval_loss": 0.33631590008735657,
"eval_roc_auc": 0.7883041472539074,
"eval_runtime": 9.1916,
"eval_samples_per_second": 106.728,
"eval_steps_per_second": 7.18,
"step": 4192
},
{
"epoch": 8.98,
"learning_rate": 4.550858778625955e-06,
"loss": 0.2529,
"step": 4707
},
{
"epoch": 9.0,
"eval_accuracy": 0.5749235474006116,
"eval_f1": 0.6857670979667283,
"eval_loss": 0.34780624508857727,
"eval_roc_auc": 0.7840966468522641,
"eval_runtime": 8.7207,
"eval_samples_per_second": 112.491,
"eval_steps_per_second": 7.568,
"step": 4716
},
{
"epoch": 9.98,
"learning_rate": 4.500954198473283e-06,
"loss": 0.237,
"step": 5230
},
{
"epoch": 10.0,
"eval_accuracy": 0.5667686034658511,
"eval_f1": 0.68721251149954,
"eval_loss": 0.34721097350120544,
"eval_roc_auc": 0.7855741204011842,
"eval_runtime": 8.7533,
"eval_samples_per_second": 112.072,
"eval_steps_per_second": 7.54,
"step": 5240
},
{
"epoch": 10.98,
"learning_rate": 4.451049618320611e-06,
"loss": 0.2223,
"step": 5753
},
{
"epoch": 11.0,
"eval_accuracy": 0.5657492354740061,
"eval_f1": 0.6845698680018207,
"eval_loss": 0.3609465956687927,
"eval_roc_auc": 0.785313626035666,
"eval_runtime": 8.7365,
"eval_samples_per_second": 112.288,
"eval_steps_per_second": 7.555,
"step": 5764
},
{
"epoch": 11.98,
"learning_rate": 4.40114503816794e-06,
"loss": 0.2117,
"step": 6276
},
{
"epoch": 12.0,
"eval_accuracy": 0.581039755351682,
"eval_f1": 0.6981818181818182,
"eval_loss": 0.36286601424217224,
"eval_roc_auc": 0.7939185467398039,
"eval_runtime": 8.7731,
"eval_samples_per_second": 111.819,
"eval_steps_per_second": 7.523,
"step": 6288
},
{
"epoch": 12.98,
"learning_rate": 4.351240458015267e-06,
"loss": 0.2,
"step": 6799
},
{
"epoch": 13.0,
"eval_accuracy": 0.5851172273190621,
"eval_f1": 0.7062999112688554,
"eval_loss": 0.3647877275943756,
"eval_roc_auc": 0.8024597782929012,
"eval_runtime": 8.7204,
"eval_samples_per_second": 112.495,
"eval_steps_per_second": 7.568,
"step": 6812
},
{
"epoch": 13.97,
"learning_rate": 4.301335877862596e-06,
"loss": 0.1909,
"step": 7322
},
{
"epoch": 14.0,
"eval_accuracy": 0.5698267074413863,
"eval_f1": 0.7012058954890575,
"eval_loss": 0.3725411891937256,
"eval_roc_auc": 0.7982735076082716,
"eval_runtime": 8.798,
"eval_samples_per_second": 111.503,
"eval_steps_per_second": 7.502,
"step": 7336
},
{
"epoch": 14.97,
"learning_rate": 4.2514312977099246e-06,
"loss": 0.1782,
"step": 7845
},
{
"epoch": 15.0,
"eval_accuracy": 0.5800203873598369,
"eval_f1": 0.7042128603104212,
"eval_loss": 0.37900835275650024,
"eval_roc_auc": 0.8012003396754722,
"eval_runtime": 8.7328,
"eval_samples_per_second": 112.335,
"eval_steps_per_second": 7.558,
"step": 7860
},
{
"epoch": 15.97,
"learning_rate": 4.2015267175572526e-06,
"loss": 0.1653,
"step": 8368
},
{
"epoch": 16.0,
"eval_accuracy": 0.5739041794087666,
"eval_f1": 0.7040141155712396,
"eval_loss": 0.3948748707771301,
"eval_roc_auc": 0.8018475591563196,
"eval_runtime": 8.6978,
"eval_samples_per_second": 112.788,
"eval_steps_per_second": 7.588,
"step": 8384
},
{
"epoch": 16.97,
"learning_rate": 4.1516221374045806e-06,
"loss": 0.156,
"step": 8891
},
{
"epoch": 17.0,
"eval_accuracy": 0.5769622833843018,
"eval_f1": 0.7074468085106383,
"eval_loss": 0.39961278438568115,
"eval_roc_auc": 0.8033181473916137,
"eval_runtime": 8.6958,
"eval_samples_per_second": 112.813,
"eval_steps_per_second": 7.59,
"step": 8908
},
{
"epoch": 17.97,
"learning_rate": 4.1017175572519085e-06,
"loss": 0.1478,
"step": 9414
},
{
"epoch": 18.0,
"eval_accuracy": 0.5575942915392457,
"eval_f1": 0.6918402777777779,
"eval_loss": 0.41351279616355896,
"eval_roc_auc": 0.79633816070322,
"eval_runtime": 8.7585,
"eval_samples_per_second": 112.006,
"eval_steps_per_second": 7.536,
"step": 9432
},
{
"epoch": 18.96,
"learning_rate": 4.051812977099237e-06,
"loss": 0.1362,
"step": 9937
},
{
"epoch": 19.0,
"eval_accuracy": 0.5616717635066258,
"eval_f1": 0.7035263387026557,
"eval_loss": 0.4201168119907379,
"eval_roc_auc": 0.8034656078584379,
"eval_runtime": 8.7893,
"eval_samples_per_second": 111.613,
"eval_steps_per_second": 7.509,
"step": 9956
},
{
"epoch": 19.96,
"learning_rate": 4.001908396946565e-06,
"loss": 0.1316,
"step": 10460
},
{
"epoch": 20.0,
"eval_accuracy": 0.54638124362895,
"eval_f1": 0.6954270923209663,
"eval_loss": 0.4302760362625122,
"eval_roc_auc": 0.7995323724495651,
"eval_runtime": 8.7776,
"eval_samples_per_second": 111.762,
"eval_steps_per_second": 7.519,
"step": 10480
},
{
"epoch": 20.96,
"learning_rate": 3.952003816793893e-06,
"loss": 0.1189,
"step": 10983
},
{
"epoch": 21.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.6955767562879446,
"eval_loss": 0.4442458152770996,
"eval_roc_auc": 0.7988851529687178,
"eval_runtime": 9.0585,
"eval_samples_per_second": 108.296,
"eval_steps_per_second": 7.286,
"step": 11004
},
{
"epoch": 21.96,
"learning_rate": 3.902099236641222e-06,
"loss": 0.1153,
"step": 11506
},
{
"epoch": 22.0,
"eval_accuracy": 0.5800203873598369,
"eval_f1": 0.7025121198765976,
"eval_loss": 0.4538831412792206,
"eval_roc_auc": 0.8010173050882469,
"eval_runtime": 8.6472,
"eval_samples_per_second": 113.447,
"eval_steps_per_second": 7.633,
"step": 11528
},
{
"epoch": 22.96,
"learning_rate": 3.85219465648855e-06,
"loss": 0.1037,
"step": 12029
},
{
"epoch": 23.0,
"eval_accuracy": 0.5565749235474006,
"eval_f1": 0.7012430347192455,
"eval_loss": 0.4749497175216675,
"eval_roc_auc": 0.804281517523123,
"eval_runtime": 8.6607,
"eval_samples_per_second": 113.27,
"eval_steps_per_second": 7.621,
"step": 12052
},
{
"epoch": 23.95,
"learning_rate": 3.802290076335878e-06,
"loss": 0.0968,
"step": 12552
},
{
"epoch": 24.0,
"eval_accuracy": 0.5606523955147809,
"eval_f1": 0.7047124945957631,
"eval_loss": 0.479137659072876,
"eval_roc_auc": 0.8052666911477818,
"eval_runtime": 8.7017,
"eval_samples_per_second": 112.737,
"eval_steps_per_second": 7.585,
"step": 12576
},
{
"epoch": 24.95,
"learning_rate": 3.752385496183206e-06,
"loss": 0.0923,
"step": 13075
},
{
"epoch": 25.0,
"eval_accuracy": 0.5555555555555556,
"eval_f1": 0.6873362445414847,
"eval_loss": 0.50052809715271,
"eval_roc_auc": 0.7925810745679466,
"eval_runtime": 8.625,
"eval_samples_per_second": 113.739,
"eval_steps_per_second": 7.652,
"step": 13100
},
{
"epoch": 25.95,
"learning_rate": 3.7024809160305346e-06,
"loss": 0.0846,
"step": 13598
},
{
"epoch": 26.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.6971770744225834,
"eval_loss": 0.5201558470726013,
"eval_roc_auc": 0.801924445158477,
"eval_runtime": 8.6427,
"eval_samples_per_second": 113.506,
"eval_steps_per_second": 7.636,
"step": 13624
},
{
"epoch": 26.95,
"learning_rate": 3.6525763358778626e-06,
"loss": 0.0807,
"step": 14121
},
{
"epoch": 27.0,
"eval_accuracy": 0.5688073394495413,
"eval_f1": 0.7090358841331603,
"eval_loss": 0.52068030834198,
"eval_roc_auc": 0.8080810630924239,
"eval_runtime": 8.6305,
"eval_samples_per_second": 113.667,
"eval_steps_per_second": 7.647,
"step": 14148
},
{
"epoch": 27.95,
"learning_rate": 3.602671755725191e-06,
"loss": 0.0748,
"step": 14644
},
{
"epoch": 28.0,
"eval_accuracy": 0.5728848114169215,
"eval_f1": 0.7102321582115221,
"eval_loss": 0.5269947052001953,
"eval_roc_auc": 0.8097203415115557,
"eval_runtime": 8.6597,
"eval_samples_per_second": 113.284,
"eval_steps_per_second": 7.622,
"step": 14672
},
{
"epoch": 28.94,
"learning_rate": 3.5527671755725195e-06,
"loss": 0.0727,
"step": 15167
},
{
"epoch": 29.0,
"eval_accuracy": 0.528032619775739,
"eval_f1": 0.6827004219409282,
"eval_loss": 0.5663571953773499,
"eval_roc_auc": 0.7942691239585964,
"eval_runtime": 8.6961,
"eval_samples_per_second": 112.809,
"eval_steps_per_second": 7.59,
"step": 15196
},
{
"epoch": 29.94,
"learning_rate": 3.5028625954198474e-06,
"loss": 0.0685,
"step": 15690
},
{
"epoch": 30.0,
"eval_accuracy": 0.5586136595310908,
"eval_f1": 0.696027633851468,
"eval_loss": 0.5751686096191406,
"eval_roc_auc": 0.7997997521287095,
"eval_runtime": 8.6393,
"eval_samples_per_second": 113.551,
"eval_steps_per_second": 7.639,
"step": 15720
},
{
"epoch": 30.94,
"learning_rate": 3.452958015267176e-06,
"loss": 0.0629,
"step": 16213
},
{
"epoch": 31.0,
"eval_accuracy": 0.5596330275229358,
"eval_f1": 0.7002606429192008,
"eval_loss": 0.5864209532737732,
"eval_roc_auc": 0.8016714098827202,
"eval_runtime": 8.6544,
"eval_samples_per_second": 113.352,
"eval_steps_per_second": 7.626,
"step": 16244
},
{
"epoch": 31.94,
"learning_rate": 3.403053435114504e-06,
"loss": 0.0586,
"step": 16736
},
{
"epoch": 32.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.7002992731936725,
"eval_loss": 0.5988386273384094,
"eval_roc_auc": 0.8040422528746184,
"eval_runtime": 8.6401,
"eval_samples_per_second": 113.54,
"eval_steps_per_second": 7.639,
"step": 16768
},
{
"epoch": 32.94,
"learning_rate": 3.3531488549618323e-06,
"loss": 0.0573,
"step": 17259
},
{
"epoch": 33.0,
"eval_accuracy": 0.5596330275229358,
"eval_f1": 0.7038148306900984,
"eval_loss": 0.5976923108100891,
"eval_roc_auc": 0.8059701406899085,
"eval_runtime": 10.028,
"eval_samples_per_second": 97.827,
"eval_steps_per_second": 6.582,
"step": 17292
},
{
"epoch": 33.94,
"learning_rate": 3.3032442748091603e-06,
"loss": 0.0549,
"step": 17782
},
{
"epoch": 34.0,
"eval_accuracy": 0.5759429153924567,
"eval_f1": 0.7030091583078936,
"eval_loss": 0.6293498873710632,
"eval_roc_auc": 0.8028746184388699,
"eval_runtime": 9.2788,
"eval_samples_per_second": 105.725,
"eval_steps_per_second": 7.113,
"step": 17816
},
{
"epoch": 34.93,
"learning_rate": 3.2533396946564887e-06,
"loss": 0.0503,
"step": 18305
},
{
"epoch": 35.0,
"eval_accuracy": 0.5667686034658511,
"eval_f1": 0.7081545064377682,
"eval_loss": 0.630769670009613,
"eval_roc_auc": 0.8086227077643385,
"eval_runtime": 9.6294,
"eval_samples_per_second": 101.876,
"eval_steps_per_second": 6.854,
"step": 18340
},
{
"epoch": 35.93,
"learning_rate": 3.203435114503817e-06,
"loss": 0.0477,
"step": 18828
},
{
"epoch": 36.0,
"eval_accuracy": 0.527013251783894,
"eval_f1": 0.6998341625207297,
"eval_loss": 0.6515944004058838,
"eval_roc_auc": 0.8083547543090588,
"eval_runtime": 10.0399,
"eval_samples_per_second": 97.71,
"eval_steps_per_second": 6.574,
"step": 18864
},
{
"epoch": 36.93,
"learning_rate": 3.153530534351145e-06,
"loss": 0.0464,
"step": 19351
},
{
"epoch": 37.0,
"eval_accuracy": 0.5484199796126402,
"eval_f1": 0.7017984107068171,
"eval_loss": 0.6635262370109558,
"eval_roc_auc": 0.8083478689954327,
"eval_runtime": 9.7697,
"eval_samples_per_second": 100.412,
"eval_steps_per_second": 6.756,
"step": 19388
},
{
"epoch": 37.93,
"learning_rate": 3.1036259541984735e-06,
"loss": 0.0436,
"step": 19874
},
{
"epoch": 38.0,
"eval_accuracy": 0.563710499490316,
"eval_f1": 0.7022834984920293,
"eval_loss": 0.6706992387771606,
"eval_roc_auc": 0.8041971724312043,
"eval_runtime": 9.7315,
"eval_samples_per_second": 100.807,
"eval_steps_per_second": 6.782,
"step": 19912
},
{
"epoch": 38.93,
"learning_rate": 3.0537213740458015e-06,
"loss": 0.0394,
"step": 20397
},
{
"epoch": 39.0,
"eval_accuracy": 0.527013251783894,
"eval_f1": 0.6885798567214496,
"eval_loss": 0.7015945911407471,
"eval_roc_auc": 0.7983710495513071,
"eval_runtime": 9.8867,
"eval_samples_per_second": 99.224,
"eval_steps_per_second": 6.676,
"step": 20436
},
{
"epoch": 39.92,
"learning_rate": 3.00381679389313e-06,
"loss": 0.038,
"step": 20920
},
{
"epoch": 40.0,
"eval_accuracy": 0.5372069317023446,
"eval_f1": 0.6997894736842106,
"eval_loss": 0.7073464393615723,
"eval_roc_auc": 0.8059839113171605,
"eval_runtime": 9.9063,
"eval_samples_per_second": 99.028,
"eval_steps_per_second": 6.662,
"step": 20960
},
{
"epoch": 40.92,
"learning_rate": 2.953912213740458e-06,
"loss": 0.036,
"step": 21443
},
{
"epoch": 41.0,
"eval_accuracy": 0.5474006116207951,
"eval_f1": 0.6956521739130435,
"eval_loss": 0.7225540280342102,
"eval_roc_auc": 0.7999896720295611,
"eval_runtime": 9.951,
"eval_samples_per_second": 98.583,
"eval_steps_per_second": 6.632,
"step": 21484
},
{
"epoch": 41.92,
"learning_rate": 2.9040076335877863e-06,
"loss": 0.0341,
"step": 21966
},
{
"epoch": 42.0,
"eval_accuracy": 0.564729867482161,
"eval_f1": 0.7089262613195343,
"eval_loss": 0.7202900052070618,
"eval_roc_auc": 0.8084046728328477,
"eval_runtime": 9.6425,
"eval_samples_per_second": 101.737,
"eval_steps_per_second": 6.845,
"step": 22008
},
{
"epoch": 42.92,
"learning_rate": 2.8541030534351148e-06,
"loss": 0.0323,
"step": 22489
},
{
"epoch": 43.0,
"eval_accuracy": 0.5606523955147809,
"eval_f1": 0.7040552200172563,
"eval_loss": 0.7239031791687012,
"eval_roc_auc": 0.8051611163388492,
"eval_runtime": 9.9111,
"eval_samples_per_second": 98.98,
"eval_steps_per_second": 6.659,
"step": 22532
},
{
"epoch": 43.92,
"learning_rate": 2.8041984732824428e-06,
"loss": 0.0302,
"step": 23012
},
{
"epoch": 44.0,
"eval_accuracy": 0.5535168195718655,
"eval_f1": 0.6988466467321658,
"eval_loss": 0.7510971426963806,
"eval_roc_auc": 0.8032119988065456,
"eval_runtime": 9.9666,
"eval_samples_per_second": 98.429,
"eval_steps_per_second": 6.622,
"step": 23056
},
{
"epoch": 44.91,
"learning_rate": 2.754293893129771e-06,
"loss": 0.0286,
"step": 23535
},
{
"epoch": 45.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.7002132196162046,
"eval_loss": 0.7605226635932922,
"eval_roc_auc": 0.8043658626150422,
"eval_runtime": 9.9921,
"eval_samples_per_second": 98.178,
"eval_steps_per_second": 6.605,
"step": 23580
},
{
"epoch": 45.91,
"learning_rate": 2.704389312977099e-06,
"loss": 0.0275,
"step": 24058
},
{
"epoch": 46.0,
"eval_accuracy": 0.5484199796126402,
"eval_f1": 0.6999573196756296,
"eval_loss": 0.774695634841919,
"eval_roc_auc": 0.8040703679052581,
"eval_runtime": 9.9221,
"eval_samples_per_second": 98.87,
"eval_steps_per_second": 6.652,
"step": 24104
},
{
"epoch": 46.91,
"learning_rate": 2.6544847328244276e-06,
"loss": 0.026,
"step": 24581
},
{
"epoch": 47.0,
"eval_accuracy": 0.5372069317023446,
"eval_f1": 0.6884827879303017,
"eval_loss": 0.7950236797332764,
"eval_roc_auc": 0.7971047256202519,
"eval_runtime": 10.0992,
"eval_samples_per_second": 97.137,
"eval_steps_per_second": 6.535,
"step": 24628
},
{
"epoch": 47.91,
"learning_rate": 2.6045801526717556e-06,
"loss": 0.0247,
"step": 25104
},
{
"epoch": 48.0,
"eval_accuracy": 0.5474006116207951,
"eval_f1": 0.6989293361884368,
"eval_loss": 0.8052034974098206,
"eval_roc_auc": 0.802888389066122,
"eval_runtime": 10.0437,
"eval_samples_per_second": 97.673,
"eval_steps_per_second": 6.571,
"step": 25152
},
{
"epoch": 48.91,
"learning_rate": 2.554675572519084e-06,
"loss": 0.0223,
"step": 25627
},
{
"epoch": 49.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.7021276595744681,
"eval_loss": 0.8275096416473389,
"eval_roc_auc": 0.8059489109728948,
"eval_runtime": 9.9609,
"eval_samples_per_second": 98.485,
"eval_steps_per_second": 6.626,
"step": 25676
},
{
"epoch": 49.9,
"learning_rate": 2.5047709923664124e-06,
"loss": 0.0239,
"step": 26150
},
{
"epoch": 50.0,
"eval_accuracy": 0.5382262996941896,
"eval_f1": 0.6999168744804655,
"eval_loss": 0.8239336609840393,
"eval_roc_auc": 0.8080311445686351,
"eval_runtime": 9.9615,
"eval_samples_per_second": 98.479,
"eval_steps_per_second": 6.626,
"step": 26200
},
{
"epoch": 50.9,
"learning_rate": 2.4548664122137404e-06,
"loss": 0.023,
"step": 26673
},
{
"epoch": 51.0,
"eval_accuracy": 0.5484199796126402,
"eval_f1": 0.7054823629409264,
"eval_loss": 0.8209096193313599,
"eval_roc_auc": 0.8083622133988203,
"eval_runtime": 10.0023,
"eval_samples_per_second": 98.078,
"eval_steps_per_second": 6.598,
"step": 26724
},
{
"epoch": 51.9,
"learning_rate": 2.404961832061069e-06,
"loss": 0.0199,
"step": 27196
},
{
"epoch": 52.0,
"eval_accuracy": 0.5484199796126402,
"eval_f1": 0.7072438919845693,
"eval_loss": 0.828449010848999,
"eval_roc_auc": 0.8082216382456221,
"eval_runtime": 10.055,
"eval_samples_per_second": 97.563,
"eval_steps_per_second": 6.564,
"step": 27248
},
{
"epoch": 52.9,
"learning_rate": 2.3550572519083973e-06,
"loss": 0.0197,
"step": 27719
},
{
"epoch": 53.0,
"eval_accuracy": 0.563710499490316,
"eval_f1": 0.7059333044608056,
"eval_loss": 0.8516786098480225,
"eval_roc_auc": 0.8058014505060705,
"eval_runtime": 10.0044,
"eval_samples_per_second": 98.057,
"eval_steps_per_second": 6.597,
"step": 27772
},
{
"epoch": 53.9,
"learning_rate": 2.3051526717557252e-06,
"loss": 0.0168,
"step": 28242
},
{
"epoch": 54.0,
"eval_accuracy": 0.5351681957186545,
"eval_f1": 0.699581589958159,
"eval_loss": 0.8833754658699036,
"eval_roc_auc": 0.8067929356682197,
"eval_runtime": 10.0433,
"eval_samples_per_second": 97.678,
"eval_steps_per_second": 6.572,
"step": 28296
},
{
"epoch": 54.9,
"learning_rate": 2.2552480916030537e-06,
"loss": 0.018,
"step": 28765
},
{
"epoch": 55.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.7060839760068551,
"eval_loss": 0.8679118156433105,
"eval_roc_auc": 0.8075250740171215,
"eval_runtime": 9.9308,
"eval_samples_per_second": 98.784,
"eval_steps_per_second": 6.646,
"step": 28820
},
{
"epoch": 55.89,
"learning_rate": 2.2053435114503817e-06,
"loss": 0.0169,
"step": 29288
},
{
"epoch": 56.0,
"eval_accuracy": 0.5575942915392457,
"eval_f1": 0.7040417209908736,
"eval_loss": 0.8795809149742126,
"eval_roc_auc": 0.8040565972780059,
"eval_runtime": 9.9865,
"eval_samples_per_second": 98.232,
"eval_steps_per_second": 6.609,
"step": 29344
},
{
"epoch": 56.89,
"learning_rate": 2.15543893129771e-06,
"loss": 0.0168,
"step": 29811
},
{
"epoch": 57.0,
"eval_accuracy": 0.5433231396534148,
"eval_f1": 0.6988879384088965,
"eval_loss": 0.9083885550498962,
"eval_roc_auc": 0.8030501939363338,
"eval_runtime": 10.0982,
"eval_samples_per_second": 97.146,
"eval_steps_per_second": 6.536,
"step": 29868
},
{
"epoch": 57.89,
"learning_rate": 2.105534351145038e-06,
"loss": 0.0156,
"step": 30334
},
{
"epoch": 58.0,
"eval_accuracy": 0.5606523955147809,
"eval_f1": 0.710651142733937,
"eval_loss": 0.9000456929206848,
"eval_roc_auc": 0.8095304216107044,
"eval_runtime": 10.0091,
"eval_samples_per_second": 98.011,
"eval_steps_per_second": 6.594,
"step": 30392
},
{
"epoch": 58.89,
"learning_rate": 2.055629770992367e-06,
"loss": 0.0138,
"step": 30857
},
{
"epoch": 59.0,
"eval_accuracy": 0.5382262996941896,
"eval_f1": 0.6970849176172369,
"eval_loss": 0.9262450337409973,
"eval_roc_auc": 0.8036761837001675,
"eval_runtime": 9.8719,
"eval_samples_per_second": 99.373,
"eval_steps_per_second": 6.686,
"step": 30916
},
{
"epoch": 59.89,
"learning_rate": 2.005725190839695e-06,
"loss": 0.0139,
"step": 31380
},
{
"epoch": 60.0,
"eval_accuracy": 0.5596330275229358,
"eval_f1": 0.7065868263473054,
"eval_loss": 0.923125684261322,
"eval_roc_auc": 0.8081160634366895,
"eval_runtime": 10.0204,
"eval_samples_per_second": 97.901,
"eval_steps_per_second": 6.587,
"step": 31440
},
{
"epoch": 60.88,
"learning_rate": 1.955820610687023e-06,
"loss": 0.0155,
"step": 31903
},
{
"epoch": 61.0,
"eval_accuracy": 0.5596330275229358,
"eval_f1": 0.7081545064377682,
"eval_loss": 0.9300869107246399,
"eval_roc_auc": 0.8086227077643385,
"eval_runtime": 9.9625,
"eval_samples_per_second": 98.47,
"eval_steps_per_second": 6.625,
"step": 31964
},
{
"epoch": 61.88,
"learning_rate": 1.9059160305343513e-06,
"loss": 0.0149,
"step": 32426
},
{
"epoch": 62.0,
"eval_accuracy": 0.5504587155963303,
"eval_f1": 0.7104930467762326,
"eval_loss": 0.9461256861686707,
"eval_roc_auc": 0.8130057836634459,
"eval_runtime": 10.0052,
"eval_samples_per_second": 98.049,
"eval_steps_per_second": 6.597,
"step": 32488
},
{
"epoch": 62.88,
"learning_rate": 1.8560114503816795e-06,
"loss": 0.0124,
"step": 32949
},
{
"epoch": 63.0,
"eval_accuracy": 0.5565749235474006,
"eval_f1": 0.7081380485726461,
"eval_loss": 0.9584424495697021,
"eval_roc_auc": 0.809727226825182,
"eval_runtime": 9.989,
"eval_samples_per_second": 98.208,
"eval_steps_per_second": 6.607,
"step": 33012
},
{
"epoch": 63.88,
"learning_rate": 1.8061068702290077e-06,
"loss": 0.011,
"step": 33472
},
{
"epoch": 64.0,
"eval_accuracy": 0.5545361875637105,
"eval_f1": 0.7051226861816616,
"eval_loss": 0.9570773243904114,
"eval_roc_auc": 0.8061812903077735,
"eval_runtime": 10.0472,
"eval_samples_per_second": 97.639,
"eval_steps_per_second": 6.569,
"step": 33536
},
{
"epoch": 64.88,
"learning_rate": 1.756202290076336e-06,
"loss": 0.0114,
"step": 33995
},
{
"epoch": 65.0,
"eval_accuracy": 0.5565749235474006,
"eval_f1": 0.7043701799485862,
"eval_loss": 0.9560405015945435,
"eval_roc_auc": 0.8063993252392647,
"eval_runtime": 10.0628,
"eval_samples_per_second": 97.488,
"eval_steps_per_second": 6.559,
"step": 34060
},
{
"epoch": 65.87,
"learning_rate": 1.7062977099236644e-06,
"loss": 0.011,
"step": 34518
},
{
"epoch": 66.0,
"eval_accuracy": 0.5504587155963303,
"eval_f1": 0.7023809523809524,
"eval_loss": 0.9797949194908142,
"eval_roc_auc": 0.8062444056826787,
"eval_runtime": 10.0681,
"eval_samples_per_second": 97.436,
"eval_steps_per_second": 6.555,
"step": 34584
},
{
"epoch": 66.87,
"learning_rate": 1.6563931297709926e-06,
"loss": 0.0107,
"step": 35041
},
{
"epoch": 67.0,
"eval_accuracy": 0.545361875637105,
"eval_f1": 0.6986301369863014,
"eval_loss": 0.9825329780578613,
"eval_roc_auc": 0.8027546992265496,
"eval_runtime": 10.0516,
"eval_samples_per_second": 97.596,
"eval_steps_per_second": 6.566,
"step": 35108
},
{
"epoch": 67.87,
"learning_rate": 1.6064885496183208e-06,
"loss": 0.0091,
"step": 35564
},
{
"epoch": 68.0,
"eval_accuracy": 0.5606523955147809,
"eval_f1": 0.7040552200172563,
"eval_loss": 0.9886102676391602,
"eval_roc_auc": 0.8051611163388492,
"eval_runtime": 10.0044,
"eval_samples_per_second": 98.056,
"eval_steps_per_second": 6.597,
"step": 35632
},
{
"epoch": 68.87,
"learning_rate": 1.556583969465649e-06,
"loss": 0.0095,
"step": 36087
},
{
"epoch": 69.0,
"eval_accuracy": 0.5392456676860347,
"eval_f1": 0.7075,
"eval_loss": 1.007102131843567,
"eval_roc_auc": 0.8127734043285672,
"eval_runtime": 10.0072,
"eval_samples_per_second": 98.029,
"eval_steps_per_second": 6.595,
"step": 36156
},
{
"epoch": 69.87,
"learning_rate": 1.5066793893129772e-06,
"loss": 0.0088,
"step": 36610
},
{
"epoch": 70.0,
"eval_accuracy": 0.5443425076452599,
"eval_f1": 0.7051336444633007,
"eval_loss": 1.004128098487854,
"eval_roc_auc": 0.8083903284294599,
"eval_runtime": 10.0021,
"eval_samples_per_second": 98.079,
"eval_steps_per_second": 6.599,
"step": 36680
},
{
"epoch": 70.86,
"learning_rate": 1.4567748091603054e-06,
"loss": 0.0102,
"step": 37133
},
{
"epoch": 71.0,
"eval_accuracy": 0.5474006116207951,
"eval_f1": 0.7023354564755838,
"eval_loss": 1.023705244064331,
"eval_roc_auc": 0.8064062105528907,
"eval_runtime": 10.0301,
"eval_samples_per_second": 97.806,
"eval_steps_per_second": 6.58,
"step": 37204
},
{
"epoch": 71.86,
"learning_rate": 1.4068702290076336e-06,
"loss": 0.0086,
"step": 37656
},
{
"epoch": 72.0,
"eval_accuracy": 0.5606523955147809,
"eval_f1": 0.7097887020267357,
"eval_loss": 1.0078336000442505,
"eval_roc_auc": 0.808967547221776,
"eval_runtime": 10.0138,
"eval_samples_per_second": 97.964,
"eval_steps_per_second": 6.591,
"step": 37728
},
{
"epoch": 72.86,
"learning_rate": 1.356965648854962e-06,
"loss": 0.0084,
"step": 38179
},
{
"epoch": 73.0,
"eval_accuracy": 0.5504587155963303,
"eval_f1": 0.7039249146757679,
"eval_loss": 1.0251305103302002,
"eval_roc_auc": 0.8067510500103279,
"eval_runtime": 9.9979,
"eval_samples_per_second": 98.12,
"eval_steps_per_second": 6.601,
"step": 38252
},
{
"epoch": 73.86,
"learning_rate": 1.3070610687022902e-06,
"loss": 0.0084,
"step": 38702
},
{
"epoch": 74.0,
"eval_accuracy": 0.5565749235474006,
"eval_f1": 0.7060839760068551,
"eval_loss": 1.0233700275421143,
"eval_roc_auc": 0.8075250740171215,
"eval_runtime": 10.0267,
"eval_samples_per_second": 97.839,
"eval_steps_per_second": 6.582,
"step": 38776
},
{
"epoch": 74.86,
"learning_rate": 1.2571564885496184e-06,
"loss": 0.0076,
"step": 39225
},
{
"epoch": 75.0,
"eval_accuracy": 0.5433231396534148,
"eval_f1": 0.7029787234042555,
"eval_loss": 1.0505975484848022,
"eval_roc_auc": 0.8065117853618233,
"eval_runtime": 9.9951,
"eval_samples_per_second": 98.148,
"eval_steps_per_second": 6.603,
"step": 39300
},
{
"epoch": 75.85,
"learning_rate": 1.2072519083969466e-06,
"loss": 0.0089,
"step": 39748
},
{
"epoch": 76.0,
"eval_accuracy": 0.5575942915392457,
"eval_f1": 0.7084398976982098,
"eval_loss": 1.0305790901184082,
"eval_roc_auc": 0.8098609166647541,
"eval_runtime": 10.0015,
"eval_samples_per_second": 98.086,
"eval_steps_per_second": 6.599,
"step": 39824
},
{
"epoch": 76.85,
"learning_rate": 1.1573473282442748e-06,
"loss": 0.0074,
"step": 40271
},
{
"epoch": 77.0,
"eval_accuracy": 0.5514780835881753,
"eval_f1": 0.7120954003407156,
"eval_loss": 1.037413477897644,
"eval_roc_auc": 0.8124079089302518,
"eval_runtime": 10.0466,
"eval_samples_per_second": 97.645,
"eval_steps_per_second": 6.569,
"step": 40348
},
{
"epoch": 77.85,
"learning_rate": 1.1074427480916033e-06,
"loss": 0.0064,
"step": 40794
},
{
"epoch": 78.0,
"eval_accuracy": 0.563710499490316,
"eval_f1": 0.7064418504107222,
"eval_loss": 1.0435516834259033,
"eval_roc_auc": 0.8063924399256387,
"eval_runtime": 9.9844,
"eval_samples_per_second": 98.254,
"eval_steps_per_second": 6.61,
"step": 40872
},
{
"epoch": 78.85,
"learning_rate": 1.0575381679389315e-06,
"loss": 0.0079,
"step": 41317
},
{
"epoch": 79.0,
"eval_accuracy": 0.5535168195718655,
"eval_f1": 0.7080479452054794,
"eval_loss": 1.0525715351104736,
"eval_roc_auc": 0.8089463175047622,
"eval_runtime": 10.0182,
"eval_samples_per_second": 97.922,
"eval_steps_per_second": 6.588,
"step": 41396
},
{
"epoch": 79.85,
"learning_rate": 1.0076335877862597e-06,
"loss": 0.0059,
"step": 41840
},
{
"epoch": 80.0,
"eval_accuracy": 0.545361875637105,
"eval_f1": 0.7050359712230215,
"eval_loss": 1.0556447505950928,
"eval_roc_auc": 0.8087139381698836,
"eval_runtime": 9.9919,
"eval_samples_per_second": 98.18,
"eval_steps_per_second": 6.605,
"step": 41920
},
{
"epoch": 80.85,
"learning_rate": 9.577290076335879e-07,
"loss": 0.0063,
"step": 42363
},
{
"epoch": 81.0,
"eval_accuracy": 0.5412844036697247,
"eval_f1": 0.7034834324553951,
"eval_loss": 1.0627212524414062,
"eval_roc_auc": 0.8071027747813913,
"eval_runtime": 10.1039,
"eval_samples_per_second": 97.091,
"eval_steps_per_second": 6.532,
"step": 42444
},
{
"epoch": 81.84,
"learning_rate": 9.078244274809162e-07,
"loss": 0.0064,
"step": 42886
},
{
"epoch": 82.0,
"eval_accuracy": 0.5382262996941896,
"eval_f1": 0.7048903878583473,
"eval_loss": 1.0669602155685425,
"eval_roc_auc": 0.8091993527805191,
"eval_runtime": 10.2231,
"eval_samples_per_second": 95.96,
"eval_steps_per_second": 6.456,
"step": 42968
},
{
"epoch": 82.84,
"learning_rate": 8.579198473282444e-07,
"loss": 0.0064,
"step": 43409
},
{
"epoch": 83.0,
"eval_accuracy": 0.5575942915392457,
"eval_f1": 0.70824434002563,
"eval_loss": 1.0584640502929688,
"eval_roc_auc": 0.8094036170847583,
"eval_runtime": 10.0727,
"eval_samples_per_second": 97.392,
"eval_steps_per_second": 6.552,
"step": 43492
},
{
"epoch": 83.84,
"learning_rate": 8.080152671755725e-07,
"loss": 0.006,
"step": 43932
},
{
"epoch": 84.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.7100340136054422,
"eval_loss": 1.0683461427688599,
"eval_roc_auc": 0.8113102751830346,
"eval_runtime": 10.0298,
"eval_samples_per_second": 97.809,
"eval_steps_per_second": 6.58,
"step": 44016
},
{
"epoch": 84.84,
"learning_rate": 7.581106870229009e-07,
"loss": 0.0056,
"step": 44455
},
{
"epoch": 85.0,
"eval_accuracy": 0.5484199796126402,
"eval_f1": 0.7041294167730949,
"eval_loss": 1.0729014873504639,
"eval_roc_auc": 0.8072083495903238,
"eval_runtime": 10.0817,
"eval_samples_per_second": 97.305,
"eval_steps_per_second": 6.547,
"step": 44540
},
{
"epoch": 85.84,
"learning_rate": 7.082061068702291e-07,
"loss": 0.0063,
"step": 44978
},
{
"epoch": 86.0,
"eval_accuracy": 0.564729867482161,
"eval_f1": 0.7094274644855791,
"eval_loss": 1.0694997310638428,
"eval_roc_auc": 0.8089956622524155,
"eval_runtime": 10.0647,
"eval_samples_per_second": 97.469,
"eval_steps_per_second": 6.558,
"step": 45064
},
{
"epoch": 86.83,
"learning_rate": 6.583015267175573e-07,
"loss": 0.0051,
"step": 45501
},
{
"epoch": 87.0,
"eval_accuracy": 0.5555555555555556,
"eval_f1": 0.7070967741935484,
"eval_loss": 1.0698884725570679,
"eval_roc_auc": 0.8076025337954145,
"eval_runtime": 10.0544,
"eval_samples_per_second": 97.569,
"eval_steps_per_second": 6.564,
"step": 45588
},
{
"epoch": 87.83,
"learning_rate": 6.083969465648855e-07,
"loss": 0.0059,
"step": 46024
},
{
"epoch": 88.0,
"eval_accuracy": 0.5494393476044852,
"eval_f1": 0.7032119914346895,
"eval_loss": 1.073889970779419,
"eval_roc_auc": 0.8057027610107641,
"eval_runtime": 10.0443,
"eval_samples_per_second": 97.668,
"eval_steps_per_second": 6.571,
"step": 46112
},
{
"epoch": 88.83,
"learning_rate": 5.584923664122137e-07,
"loss": 0.0067,
"step": 46547
},
{
"epoch": 89.0,
"eval_accuracy": 0.5535168195718655,
"eval_f1": 0.7074422583404619,
"eval_loss": 1.0765037536621094,
"eval_roc_auc": 0.808678937825618,
"eval_runtime": 10.0208,
"eval_samples_per_second": 97.897,
"eval_steps_per_second": 6.586,
"step": 46636
},
{
"epoch": 89.83,
"learning_rate": 5.085877862595421e-07,
"loss": 0.0055,
"step": 47070
},
{
"epoch": 90.0,
"eval_accuracy": 0.5423037716615698,
"eval_f1": 0.705531914893617,
"eval_loss": 1.0747418403625488,
"eval_roc_auc": 0.8082004085286085,
"eval_runtime": 10.0179,
"eval_samples_per_second": 97.924,
"eval_steps_per_second": 6.588,
"step": 47160
},
{
"epoch": 90.83,
"learning_rate": 4.586832061068703e-07,
"loss": 0.0041,
"step": 47593
},
{
"epoch": 91.0,
"eval_accuracy": 0.5524974515800204,
"eval_f1": 0.7061855670103093,
"eval_loss": 1.077362060546875,
"eval_roc_auc": 0.8072014642766979,
"eval_runtime": 9.974,
"eval_samples_per_second": 98.356,
"eval_steps_per_second": 6.617,
"step": 47684
},
{
"epoch": 91.82,
"learning_rate": 4.0877862595419847e-07,
"loss": 0.0051,
"step": 48116
},
{
"epoch": 92.0,
"eval_accuracy": 0.5565749235474006,
"eval_f1": 0.706792777300086,
"eval_loss": 1.0821139812469482,
"eval_roc_auc": 0.8074688439558423,
"eval_runtime": 9.9576,
"eval_samples_per_second": 98.517,
"eval_steps_per_second": 6.628,
"step": 48208
},
{
"epoch": 92.82,
"learning_rate": 3.588740458015268e-07,
"loss": 0.0053,
"step": 48639
},
{
"epoch": 93.0,
"eval_accuracy": 0.5535168195718655,
"eval_f1": 0.7076526225279449,
"eval_loss": 1.0813453197479248,
"eval_roc_auc": 0.8080317183447706,
"eval_runtime": 10.0101,
"eval_samples_per_second": 98.001,
"eval_steps_per_second": 6.593,
"step": 48732
},
{
"epoch": 93.82,
"learning_rate": 3.08969465648855e-07,
"loss": 0.0052,
"step": 49162
},
{
"epoch": 94.0,
"eval_accuracy": 0.5494393476044852,
"eval_f1": 0.7078364565587735,
"eval_loss": 1.0872832536697388,
"eval_roc_auc": 0.8095935369856098,
"eval_runtime": 10.0385,
"eval_samples_per_second": 97.724,
"eval_steps_per_second": 6.575,
"step": 49256
},
{
"epoch": 94.82,
"learning_rate": 2.5906488549618325e-07,
"loss": 0.0049,
"step": 49685
},
{
"epoch": 95.0,
"eval_accuracy": 0.5504587155963303,
"eval_f1": 0.7095681625740897,
"eval_loss": 1.0950355529785156,
"eval_roc_auc": 0.8116619999540979,
"eval_runtime": 10.0471,
"eval_samples_per_second": 97.64,
"eval_steps_per_second": 6.569,
"step": 49780
},
{
"epoch": 95.82,
"learning_rate": 2.0916030534351148e-07,
"loss": 0.0055,
"step": 50208
},
{
"epoch": 96.0,
"eval_accuracy": 0.54638124362895,
"eval_f1": 0.7064846416382252,
"eval_loss": 1.091185450553894,
"eval_roc_auc": 0.8084396731771133,
"eval_runtime": 10.0192,
"eval_samples_per_second": 97.912,
"eval_steps_per_second": 6.587,
"step": 50304
},
{
"epoch": 96.81,
"learning_rate": 1.5925572519083971e-07,
"loss": 0.0048,
"step": 50731
},
{
"epoch": 97.0,
"eval_accuracy": 0.5555555555555556,
"eval_f1": 0.70926243567753,
"eval_loss": 1.0918930768966675,
"eval_roc_auc": 0.809481076863051,
"eval_runtime": 10.1024,
"eval_samples_per_second": 97.106,
"eval_steps_per_second": 6.533,
"step": 50828
},
{
"epoch": 97.81,
"learning_rate": 1.0935114503816793e-07,
"loss": 0.0052,
"step": 51254
},
{
"epoch": 98.0,
"eval_accuracy": 0.5514780835881753,
"eval_f1": 0.7080479452054794,
"eval_loss": 1.0924346446990967,
"eval_roc_auc": 0.8089463175047622,
"eval_runtime": 10.042,
"eval_samples_per_second": 97.689,
"eval_steps_per_second": 6.572,
"step": 51352
},
{
"epoch": 98.81,
"learning_rate": 5.9446564885496193e-08,
"loss": 0.0041,
"step": 51777
},
{
"epoch": 99.0,
"eval_accuracy": 0.5504587155963303,
"eval_f1": 0.70824434002563,
"eval_loss": 1.0939857959747314,
"eval_roc_auc": 0.8094036170847583,
"eval_runtime": 10.0515,
"eval_samples_per_second": 97.598,
"eval_steps_per_second": 6.566,
"step": 51876
},
{
"epoch": 99.81,
"learning_rate": 9.541984732824428e-09,
"loss": 0.0043,
"step": 52300
},
{
"epoch": 100.0,
"eval_accuracy": 0.5514780835881753,
"eval_f1": 0.7073378839590444,
"eval_loss": 1.0944114923477173,
"eval_roc_auc": 0.8090025475660416,
"eval_runtime": 10.0289,
"eval_samples_per_second": 97.818,
"eval_steps_per_second": 6.581,
"step": 52400
}
],
"max_steps": 52400,
"num_train_epochs": 100,
"total_flos": 1.03939481527296e+17,
"trial_name": null,
"trial_params": null
}