|
{ |
|
"best_metric": 0.834983498349835, |
|
"best_model_checkpoint": "vit-base-patch16-224-finetuned-food101/checkpoint-1596", |
|
"epoch": 2.995776630689817, |
|
"eval_steps": 500, |
|
"global_step": 1596, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018770530267480056, |
|
"grad_norm": 2.9933698177337646, |
|
"learning_rate": 3.125e-06, |
|
"loss": 4.739, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03754106053496011, |
|
"grad_norm": 3.0962929725646973, |
|
"learning_rate": 6.25e-06, |
|
"loss": 4.7159, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05631159080244017, |
|
"grad_norm": 3.2671728134155273, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 4.6674, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07508212106992022, |
|
"grad_norm": 3.073523759841919, |
|
"learning_rate": 1.25e-05, |
|
"loss": 4.6293, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09385265133740028, |
|
"grad_norm": 3.1302390098571777, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 4.5709, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11262318160488034, |
|
"grad_norm": 2.9043614864349365, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 4.4634, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1313937118723604, |
|
"grad_norm": 3.4175333976745605, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 4.3879, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15016424213984045, |
|
"grad_norm": 3.323888063430786, |
|
"learning_rate": 2.5e-05, |
|
"loss": 4.2587, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1689347724073205, |
|
"grad_norm": 3.0213475227355957, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 4.0885, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18770530267480057, |
|
"grad_norm": 3.02178955078125, |
|
"learning_rate": 3.125e-05, |
|
"loss": 3.9028, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2064758329422806, |
|
"grad_norm": 3.3495538234710693, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 3.7353, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22524636320976069, |
|
"grad_norm": 3.1515848636627197, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 3.4697, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24401689347724073, |
|
"grad_norm": 3.284679889678955, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 3.2815, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2627874237447208, |
|
"grad_norm": 3.226576805114746, |
|
"learning_rate": 4.375e-05, |
|
"loss": 2.9895, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28155795401220085, |
|
"grad_norm": 3.181521415710449, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 2.7421, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3003284842796809, |
|
"grad_norm": 3.1563026905059814, |
|
"learning_rate": 5e-05, |
|
"loss": 2.587, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31909901454716094, |
|
"grad_norm": 3.2322466373443604, |
|
"learning_rate": 4.965181058495822e-05, |
|
"loss": 2.3671, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.337869544814641, |
|
"grad_norm": 3.2258055210113525, |
|
"learning_rate": 4.930362116991643e-05, |
|
"loss": 2.1425, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3566400750821211, |
|
"grad_norm": 3.141265392303467, |
|
"learning_rate": 4.895543175487465e-05, |
|
"loss": 2.0185, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37541060534960113, |
|
"grad_norm": 3.157705307006836, |
|
"learning_rate": 4.860724233983287e-05, |
|
"loss": 1.9035, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3941811356170812, |
|
"grad_norm": 3.300532102584839, |
|
"learning_rate": 4.825905292479109e-05, |
|
"loss": 1.8364, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4129516658845612, |
|
"grad_norm": 3.4612457752227783, |
|
"learning_rate": 4.79108635097493e-05, |
|
"loss": 1.693, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.43172219615204127, |
|
"grad_norm": 3.306436777114868, |
|
"learning_rate": 4.756267409470752e-05, |
|
"loss": 1.5683, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.45049272641952137, |
|
"grad_norm": 3.522141695022583, |
|
"learning_rate": 4.721448467966574e-05, |
|
"loss": 1.5139, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4692632566870014, |
|
"grad_norm": 3.511997938156128, |
|
"learning_rate": 4.686629526462396e-05, |
|
"loss": 1.4348, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.48803378695448146, |
|
"grad_norm": 3.343367576599121, |
|
"learning_rate": 4.6518105849582176e-05, |
|
"loss": 1.3879, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5068043172219615, |
|
"grad_norm": 3.2004270553588867, |
|
"learning_rate": 4.6169916434540394e-05, |
|
"loss": 1.3787, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5255748474894416, |
|
"grad_norm": 3.113529682159424, |
|
"learning_rate": 4.582172701949861e-05, |
|
"loss": 1.3503, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5443453777569216, |
|
"grad_norm": 3.2955970764160156, |
|
"learning_rate": 4.547353760445683e-05, |
|
"loss": 1.3139, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5631159080244017, |
|
"grad_norm": 3.1911182403564453, |
|
"learning_rate": 4.5125348189415044e-05, |
|
"loss": 1.2995, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5818864382918817, |
|
"grad_norm": 3.416416645050049, |
|
"learning_rate": 4.477715877437326e-05, |
|
"loss": 1.2014, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6006569685593618, |
|
"grad_norm": 4.6215410232543945, |
|
"learning_rate": 4.442896935933148e-05, |
|
"loss": 1.1872, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6194274988268419, |
|
"grad_norm": 3.202340841293335, |
|
"learning_rate": 4.40807799442897e-05, |
|
"loss": 1.1448, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6381980290943219, |
|
"grad_norm": 3.1017255783081055, |
|
"learning_rate": 4.373259052924791e-05, |
|
"loss": 1.2542, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.656968559361802, |
|
"grad_norm": 3.4146809577941895, |
|
"learning_rate": 4.338440111420613e-05, |
|
"loss": 1.143, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.675739089629282, |
|
"grad_norm": 3.4860126972198486, |
|
"learning_rate": 4.303621169916435e-05, |
|
"loss": 1.1936, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6945096198967621, |
|
"grad_norm": 3.000502109527588, |
|
"learning_rate": 4.268802228412256e-05, |
|
"loss": 1.0896, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7132801501642422, |
|
"grad_norm": 3.3682737350463867, |
|
"learning_rate": 4.233983286908078e-05, |
|
"loss": 0.9863, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7320506804317222, |
|
"grad_norm": 2.903545379638672, |
|
"learning_rate": 4.1991643454039e-05, |
|
"loss": 1.0206, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7508212106992023, |
|
"grad_norm": 3.5237250328063965, |
|
"learning_rate": 4.164345403899722e-05, |
|
"loss": 1.0655, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7695917409666823, |
|
"grad_norm": 3.5761749744415283, |
|
"learning_rate": 4.129526462395543e-05, |
|
"loss": 1.0016, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.7883622712341624, |
|
"grad_norm": 3.3111650943756104, |
|
"learning_rate": 4.094707520891365e-05, |
|
"loss": 1.0225, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8071328015016425, |
|
"grad_norm": 2.9664759635925293, |
|
"learning_rate": 4.0598885793871866e-05, |
|
"loss": 1.0624, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8259033317691225, |
|
"grad_norm": 3.750908136367798, |
|
"learning_rate": 4.0250696378830085e-05, |
|
"loss": 1.0223, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8446738620366026, |
|
"grad_norm": 3.176248788833618, |
|
"learning_rate": 3.9902506963788303e-05, |
|
"loss": 0.9834, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8634443923040825, |
|
"grad_norm": 3.428868293762207, |
|
"learning_rate": 3.955431754874652e-05, |
|
"loss": 1.0338, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.8822149225715626, |
|
"grad_norm": 3.6257553100585938, |
|
"learning_rate": 3.920612813370474e-05, |
|
"loss": 1.0156, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9009854528390427, |
|
"grad_norm": 3.195758104324341, |
|
"learning_rate": 3.885793871866296e-05, |
|
"loss": 1.0171, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9197559831065227, |
|
"grad_norm": 2.6419994831085205, |
|
"learning_rate": 3.850974930362117e-05, |
|
"loss": 0.9735, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9385265133740028, |
|
"grad_norm": 3.462597608566284, |
|
"learning_rate": 3.816155988857939e-05, |
|
"loss": 1.0049, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9572970436414828, |
|
"grad_norm": 2.8139870166778564, |
|
"learning_rate": 3.781337047353761e-05, |
|
"loss": 0.9696, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9760675739089629, |
|
"grad_norm": 3.027811288833618, |
|
"learning_rate": 3.746518105849583e-05, |
|
"loss": 0.9242, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.994838104176443, |
|
"grad_norm": 3.6760294437408447, |
|
"learning_rate": 3.711699164345404e-05, |
|
"loss": 0.912, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.998592210229939, |
|
"eval_accuracy": 0.7968316831683169, |
|
"eval_loss": 0.8397366404533386, |
|
"eval_runtime": 120.4032, |
|
"eval_samples_per_second": 62.914, |
|
"eval_steps_per_second": 1.968, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.013608634443923, |
|
"grad_norm": 2.9862356185913086, |
|
"learning_rate": 3.676880222841226e-05, |
|
"loss": 0.8398, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.0323791647114031, |
|
"grad_norm": 3.0735647678375244, |
|
"learning_rate": 3.642061281337048e-05, |
|
"loss": 0.8379, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.0511496949788832, |
|
"grad_norm": 3.5392494201660156, |
|
"learning_rate": 3.607242339832869e-05, |
|
"loss": 0.8264, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.069920225246363, |
|
"grad_norm": 3.511627435684204, |
|
"learning_rate": 3.572423398328691e-05, |
|
"loss": 0.8204, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.0886907555138432, |
|
"grad_norm": 3.393699884414673, |
|
"learning_rate": 3.5376044568245126e-05, |
|
"loss": 0.8963, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.1074612857813233, |
|
"grad_norm": 4.332097053527832, |
|
"learning_rate": 3.5027855153203345e-05, |
|
"loss": 0.8623, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.1262318160488034, |
|
"grad_norm": 3.1262927055358887, |
|
"learning_rate": 3.4679665738161556e-05, |
|
"loss": 0.8025, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.1450023463162835, |
|
"grad_norm": 3.1130638122558594, |
|
"learning_rate": 3.4331476323119775e-05, |
|
"loss": 0.8388, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.1637728765837636, |
|
"grad_norm": 3.509665012359619, |
|
"learning_rate": 3.3983286908077994e-05, |
|
"loss": 0.8306, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.1825434068512435, |
|
"grad_norm": 3.595564842224121, |
|
"learning_rate": 3.363509749303621e-05, |
|
"loss": 0.8241, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.2013139371187236, |
|
"grad_norm": 2.9736037254333496, |
|
"learning_rate": 3.328690807799443e-05, |
|
"loss": 0.8333, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.2200844673862037, |
|
"grad_norm": 3.5477538108825684, |
|
"learning_rate": 3.293871866295265e-05, |
|
"loss": 0.84, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.2388549976536838, |
|
"grad_norm": 3.617574453353882, |
|
"learning_rate": 3.259052924791087e-05, |
|
"loss": 0.7652, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.2576255279211637, |
|
"grad_norm": 3.621431589126587, |
|
"learning_rate": 3.224233983286909e-05, |
|
"loss": 0.7634, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.2763960581886438, |
|
"grad_norm": 3.5540874004364014, |
|
"learning_rate": 3.18941504178273e-05, |
|
"loss": 0.7951, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2951665884561239, |
|
"grad_norm": 3.1876299381256104, |
|
"learning_rate": 3.154596100278552e-05, |
|
"loss": 0.787, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.313937118723604, |
|
"grad_norm": 3.8441293239593506, |
|
"learning_rate": 3.1197771587743737e-05, |
|
"loss": 0.7799, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.332707648991084, |
|
"grad_norm": 3.2278223037719727, |
|
"learning_rate": 3.0849582172701955e-05, |
|
"loss": 0.845, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.3514781792585642, |
|
"grad_norm": 3.1881773471832275, |
|
"learning_rate": 3.050139275766017e-05, |
|
"loss": 0.7415, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.370248709526044, |
|
"grad_norm": 3.34291672706604, |
|
"learning_rate": 3.0153203342618386e-05, |
|
"loss": 0.7424, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.3890192397935242, |
|
"grad_norm": 5.416168689727783, |
|
"learning_rate": 2.98050139275766e-05, |
|
"loss": 0.8025, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.4077897700610043, |
|
"grad_norm": 3.111943244934082, |
|
"learning_rate": 2.945682451253482e-05, |
|
"loss": 0.8537, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.4265603003284844, |
|
"grad_norm": 3.118394374847412, |
|
"learning_rate": 2.9108635097493035e-05, |
|
"loss": 0.7796, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.4453308305959642, |
|
"grad_norm": 3.8577425479888916, |
|
"learning_rate": 2.8760445682451254e-05, |
|
"loss": 0.7926, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.4641013608634443, |
|
"grad_norm": 4.177425384521484, |
|
"learning_rate": 2.841225626740947e-05, |
|
"loss": 0.7863, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.4828718911309244, |
|
"grad_norm": 3.465301275253296, |
|
"learning_rate": 2.8064066852367688e-05, |
|
"loss": 0.7687, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.5016424213984045, |
|
"grad_norm": 3.2810630798339844, |
|
"learning_rate": 2.7715877437325903e-05, |
|
"loss": 0.8167, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5204129516658846, |
|
"grad_norm": 3.05501127243042, |
|
"learning_rate": 2.736768802228412e-05, |
|
"loss": 0.7299, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.5391834819333647, |
|
"grad_norm": 2.9829261302948, |
|
"learning_rate": 2.7019498607242337e-05, |
|
"loss": 0.7677, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.5579540122008448, |
|
"grad_norm": 3.561396360397339, |
|
"learning_rate": 2.6671309192200562e-05, |
|
"loss": 0.7813, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.5767245424683247, |
|
"grad_norm": 2.8982436656951904, |
|
"learning_rate": 2.6323119777158778e-05, |
|
"loss": 0.731, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.5954950727358048, |
|
"grad_norm": 3.406423330307007, |
|
"learning_rate": 2.5974930362116996e-05, |
|
"loss": 0.73, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.6142656030032847, |
|
"grad_norm": 3.0049679279327393, |
|
"learning_rate": 2.562674094707521e-05, |
|
"loss": 0.7826, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.6330361332707648, |
|
"grad_norm": 3.500458002090454, |
|
"learning_rate": 2.527855153203343e-05, |
|
"loss": 0.7499, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.651806663538245, |
|
"grad_norm": 3.0891637802124023, |
|
"learning_rate": 2.4930362116991646e-05, |
|
"loss": 0.7587, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.670577193805725, |
|
"grad_norm": 3.149742841720581, |
|
"learning_rate": 2.4582172701949864e-05, |
|
"loss": 0.7374, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.689347724073205, |
|
"grad_norm": 3.1518661975860596, |
|
"learning_rate": 2.423398328690808e-05, |
|
"loss": 0.7893, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.7081182543406852, |
|
"grad_norm": 3.08024525642395, |
|
"learning_rate": 2.3885793871866298e-05, |
|
"loss": 0.7853, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.7268887846081653, |
|
"grad_norm": 3.158390522003174, |
|
"learning_rate": 2.3537604456824514e-05, |
|
"loss": 0.7694, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.7456593148756452, |
|
"grad_norm": 3.0718069076538086, |
|
"learning_rate": 2.318941504178273e-05, |
|
"loss": 0.7843, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.7644298451431253, |
|
"grad_norm": 3.4767684936523438, |
|
"learning_rate": 2.2841225626740948e-05, |
|
"loss": 0.7483, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.7832003754106054, |
|
"grad_norm": 3.350541353225708, |
|
"learning_rate": 2.2493036211699163e-05, |
|
"loss": 0.7457, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.8019709056780853, |
|
"grad_norm": 3.4785940647125244, |
|
"learning_rate": 2.214484679665738e-05, |
|
"loss": 0.7586, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.8207414359455654, |
|
"grad_norm": 3.245809316635132, |
|
"learning_rate": 2.17966573816156e-05, |
|
"loss": 0.7035, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.8395119662130455, |
|
"grad_norm": 3.292048692703247, |
|
"learning_rate": 2.144846796657382e-05, |
|
"loss": 0.7366, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.8582824964805256, |
|
"grad_norm": 4.539200305938721, |
|
"learning_rate": 2.1100278551532034e-05, |
|
"loss": 0.7377, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.8770530267480057, |
|
"grad_norm": 2.6051247119903564, |
|
"learning_rate": 2.0752089136490253e-05, |
|
"loss": 0.8442, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.8958235570154858, |
|
"grad_norm": 3.7407584190368652, |
|
"learning_rate": 2.0403899721448468e-05, |
|
"loss": 0.732, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.9145940872829659, |
|
"grad_norm": 3.855624198913574, |
|
"learning_rate": 2.0055710306406687e-05, |
|
"loss": 0.8147, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.9333646175504458, |
|
"grad_norm": 3.7936253547668457, |
|
"learning_rate": 1.9707520891364902e-05, |
|
"loss": 0.6852, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.9521351478179259, |
|
"grad_norm": 3.4030821323394775, |
|
"learning_rate": 1.935933147632312e-05, |
|
"loss": 0.7311, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.970905678085406, |
|
"grad_norm": 3.5526273250579834, |
|
"learning_rate": 1.9011142061281336e-05, |
|
"loss": 0.7846, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.9896762083528858, |
|
"grad_norm": 3.1862409114837646, |
|
"learning_rate": 1.8662952646239558e-05, |
|
"loss": 0.7233, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.999061473486626, |
|
"eval_accuracy": 0.8294389438943894, |
|
"eval_loss": 0.6780887842178345, |
|
"eval_runtime": 125.2435, |
|
"eval_samples_per_second": 60.482, |
|
"eval_steps_per_second": 1.892, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.008446738620366, |
|
"grad_norm": 3.521763563156128, |
|
"learning_rate": 1.8314763231197773e-05, |
|
"loss": 0.6907, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.027217268887846, |
|
"grad_norm": 3.0328750610351562, |
|
"learning_rate": 1.7966573816155992e-05, |
|
"loss": 0.6829, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.045987799155326, |
|
"grad_norm": 4.274998188018799, |
|
"learning_rate": 1.7618384401114207e-05, |
|
"loss": 0.635, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.0647583294228062, |
|
"grad_norm": 3.0744423866271973, |
|
"learning_rate": 1.7270194986072426e-05, |
|
"loss": 0.7163, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.0835288596902863, |
|
"grad_norm": 3.0927815437316895, |
|
"learning_rate": 1.692200557103064e-05, |
|
"loss": 0.6817, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.1022993899577664, |
|
"grad_norm": 2.5418238639831543, |
|
"learning_rate": 1.6573816155988857e-05, |
|
"loss": 0.6235, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.1210699202252465, |
|
"grad_norm": 3.1327621936798096, |
|
"learning_rate": 1.6225626740947075e-05, |
|
"loss": 0.6351, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.139840450492726, |
|
"grad_norm": 4.186623573303223, |
|
"learning_rate": 1.587743732590529e-05, |
|
"loss": 0.6383, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.1586109807602063, |
|
"grad_norm": 3.007977247238159, |
|
"learning_rate": 1.552924791086351e-05, |
|
"loss": 0.6994, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.1773815110276864, |
|
"grad_norm": 3.232109308242798, |
|
"learning_rate": 1.518105849582173e-05, |
|
"loss": 0.6615, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.1961520412951665, |
|
"grad_norm": 2.8354580402374268, |
|
"learning_rate": 1.4832869080779947e-05, |
|
"loss": 0.6686, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.2149225715626466, |
|
"grad_norm": 3.6194567680358887, |
|
"learning_rate": 1.4484679665738164e-05, |
|
"loss": 0.6649, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.2336931018301267, |
|
"grad_norm": 4.279024600982666, |
|
"learning_rate": 1.413649025069638e-05, |
|
"loss": 0.6529, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.252463632097607, |
|
"grad_norm": 3.035931348800659, |
|
"learning_rate": 1.3788300835654596e-05, |
|
"loss": 0.6796, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.271234162365087, |
|
"grad_norm": 3.2257440090179443, |
|
"learning_rate": 1.3440111420612813e-05, |
|
"loss": 0.6501, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.290004692632567, |
|
"grad_norm": 3.176237106323242, |
|
"learning_rate": 1.309192200557103e-05, |
|
"loss": 0.7252, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.308775222900047, |
|
"grad_norm": 3.575956344604492, |
|
"learning_rate": 1.2743732590529247e-05, |
|
"loss": 0.6798, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.327545753167527, |
|
"grad_norm": 3.1033012866973877, |
|
"learning_rate": 1.2395543175487466e-05, |
|
"loss": 0.6291, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.346316283435007, |
|
"grad_norm": 3.797513961791992, |
|
"learning_rate": 1.2047353760445683e-05, |
|
"loss": 0.6908, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.365086813702487, |
|
"grad_norm": 3.399435043334961, |
|
"learning_rate": 1.16991643454039e-05, |
|
"loss": 0.5915, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.383857343969967, |
|
"grad_norm": 2.9869837760925293, |
|
"learning_rate": 1.1350974930362116e-05, |
|
"loss": 0.6665, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.402627874237447, |
|
"grad_norm": 3.171600580215454, |
|
"learning_rate": 1.1002785515320335e-05, |
|
"loss": 0.6202, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.4213984045049273, |
|
"grad_norm": 2.0792176723480225, |
|
"learning_rate": 1.0654596100278552e-05, |
|
"loss": 0.5773, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.4401689347724074, |
|
"grad_norm": 3.339883804321289, |
|
"learning_rate": 1.0306406685236769e-05, |
|
"loss": 0.6435, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.4589394650398875, |
|
"grad_norm": 3.2008817195892334, |
|
"learning_rate": 9.958217270194986e-06, |
|
"loss": 0.6448, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.4777099953073676, |
|
"grad_norm": 2.828279733657837, |
|
"learning_rate": 9.610027855153205e-06, |
|
"loss": 0.6121, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.4964805255748477, |
|
"grad_norm": 2.920414924621582, |
|
"learning_rate": 9.261838440111422e-06, |
|
"loss": 0.6569, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.5152510558423273, |
|
"grad_norm": 3.4083826541900635, |
|
"learning_rate": 8.913649025069639e-06, |
|
"loss": 0.6819, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.534021586109808, |
|
"grad_norm": 2.8921279907226562, |
|
"learning_rate": 8.565459610027856e-06, |
|
"loss": 0.6257, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.5527921163772875, |
|
"grad_norm": 3.33734130859375, |
|
"learning_rate": 8.217270194986073e-06, |
|
"loss": 0.6439, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.5715626466447676, |
|
"grad_norm": 4.184520244598389, |
|
"learning_rate": 7.869080779944291e-06, |
|
"loss": 0.6742, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.5903331769122477, |
|
"grad_norm": 3.5494306087493896, |
|
"learning_rate": 7.5208913649025075e-06, |
|
"loss": 0.6485, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.609103707179728, |
|
"grad_norm": 2.791755437850952, |
|
"learning_rate": 7.1727019498607245e-06, |
|
"loss": 0.6061, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.627874237447208, |
|
"grad_norm": 3.616508960723877, |
|
"learning_rate": 6.8245125348189415e-06, |
|
"loss": 0.7124, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.646644767714688, |
|
"grad_norm": 3.5052719116210938, |
|
"learning_rate": 6.4763231197771585e-06, |
|
"loss": 0.6545, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.665415297982168, |
|
"grad_norm": 3.463571786880493, |
|
"learning_rate": 6.128133704735376e-06, |
|
"loss": 0.5905, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.684185828249648, |
|
"grad_norm": 3.2912559509277344, |
|
"learning_rate": 5.779944289693594e-06, |
|
"loss": 0.5468, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.7029563585171283, |
|
"grad_norm": 3.0410854816436768, |
|
"learning_rate": 5.43175487465181e-06, |
|
"loss": 0.6286, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.721726888784608, |
|
"grad_norm": 3.565953016281128, |
|
"learning_rate": 5.083565459610028e-06, |
|
"loss": 0.6567, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.740497419052088, |
|
"grad_norm": 3.0948688983917236, |
|
"learning_rate": 4.735376044568245e-06, |
|
"loss": 0.644, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.759267949319568, |
|
"grad_norm": 3.734225034713745, |
|
"learning_rate": 4.387186629526462e-06, |
|
"loss": 0.65, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.7780384795870483, |
|
"grad_norm": 3.245823860168457, |
|
"learning_rate": 4.03899721448468e-06, |
|
"loss": 0.613, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.7968090098545284, |
|
"grad_norm": 2.6476266384124756, |
|
"learning_rate": 3.690807799442897e-06, |
|
"loss": 0.6562, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.8155795401220085, |
|
"grad_norm": 3.1951568126678467, |
|
"learning_rate": 3.3426183844011147e-06, |
|
"loss": 0.5897, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.8343500703894886, |
|
"grad_norm": 3.754561424255371, |
|
"learning_rate": 2.9944289693593313e-06, |
|
"loss": 0.5686, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.8531206006569687, |
|
"grad_norm": 2.6758432388305664, |
|
"learning_rate": 2.6462395543175487e-06, |
|
"loss": 0.6791, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.871891130924449, |
|
"grad_norm": 2.6981005668640137, |
|
"learning_rate": 2.298050139275766e-06, |
|
"loss": 0.5544, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.8906616611919285, |
|
"grad_norm": 3.503272294998169, |
|
"learning_rate": 1.9498607242339835e-06, |
|
"loss": 0.6921, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.9094321914594086, |
|
"grad_norm": 3.2283263206481934, |
|
"learning_rate": 1.6016713091922007e-06, |
|
"loss": 0.6307, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.9282027217268887, |
|
"grad_norm": 3.2601311206817627, |
|
"learning_rate": 1.253481894150418e-06, |
|
"loss": 0.653, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.9469732519943688, |
|
"grad_norm": 3.5451338291168213, |
|
"learning_rate": 9.052924791086352e-07, |
|
"loss": 0.5953, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.965743782261849, |
|
"grad_norm": 3.605889081954956, |
|
"learning_rate": 5.571030640668524e-07, |
|
"loss": 0.6338, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.984514312529329, |
|
"grad_norm": 4.055668830871582, |
|
"learning_rate": 2.0891364902506967e-07, |
|
"loss": 0.6047, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.995776630689817, |
|
"eval_accuracy": 0.834983498349835, |
|
"eval_loss": 0.6400949358940125, |
|
"eval_runtime": 126.916, |
|
"eval_samples_per_second": 59.685, |
|
"eval_steps_per_second": 1.867, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 2.995776630689817, |
|
"step": 1596, |
|
"total_flos": 1.584209316447959e+19, |
|
"train_loss": 1.1699181137825911, |
|
"train_runtime": 8274.5103, |
|
"train_samples_per_second": 24.717, |
|
"train_steps_per_second": 0.193 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1596, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.584209316447959e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|