|
{ |
|
"best_metric": 0.7937743190661478, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-7naa0m57/checkpoint-1200", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1200, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 4.0042338371276855, |
|
"learning_rate": 4.843201071989088e-05, |
|
"loss": 0.6448, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 9.521341323852539, |
|
"learning_rate": 4.8025019033169106e-05, |
|
"loss": 0.5938, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 3.919041156768799, |
|
"learning_rate": 4.7618027346447335e-05, |
|
"loss": 0.6308, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.4371910095214844, |
|
"learning_rate": 4.7211035659725565e-05, |
|
"loss": 0.5535, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 4.366234302520752, |
|
"learning_rate": 4.6804043973003794e-05, |
|
"loss": 0.4918, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.843981742858887, |
|
"learning_rate": 4.639705228628202e-05, |
|
"loss": 0.4615, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 17.52216911315918, |
|
"learning_rate": 4.5990060599560246e-05, |
|
"loss": 0.2985, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.17047187685966492, |
|
"learning_rate": 4.5583068912838476e-05, |
|
"loss": 0.4669, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 8.667387008666992, |
|
"learning_rate": 4.5176077226116705e-05, |
|
"loss": 0.2397, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.633122444152832, |
|
"learning_rate": 4.476908553939493e-05, |
|
"loss": 0.8068, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 26.120525360107422, |
|
"learning_rate": 4.436209385267316e-05, |
|
"loss": 0.3711, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 12.639058113098145, |
|
"learning_rate": 4.395510216595139e-05, |
|
"loss": 0.6239, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 0.3218459188938141, |
|
"learning_rate": 4.3548110479229617e-05, |
|
"loss": 0.4321, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.446372985839844, |
|
"learning_rate": 4.3141118792507846e-05, |
|
"loss": 0.6092, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 2.5795059204101562, |
|
"learning_rate": 4.2734127105786075e-05, |
|
"loss": 0.5735, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.5706645250320435, |
|
"learning_rate": 4.23271354190643e-05, |
|
"loss": 0.3974, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 28.565317153930664, |
|
"learning_rate": 4.192014373234253e-05, |
|
"loss": 0.5174, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3528432548046112, |
|
"learning_rate": 4.151315204562076e-05, |
|
"loss": 0.3349, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 0.09431620687246323, |
|
"learning_rate": 4.1106160358898987e-05, |
|
"loss": 0.522, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 17.289031982421875, |
|
"learning_rate": 4.0699168672177216e-05, |
|
"loss": 0.4926, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 4.251012802124023, |
|
"learning_rate": 4.029217698545544e-05, |
|
"loss": 0.3561, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 7.21864652633667, |
|
"learning_rate": 3.988518529873367e-05, |
|
"loss": 0.4018, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 18.046688079833984, |
|
"learning_rate": 3.94781936120119e-05, |
|
"loss": 0.4736, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.16905403137207, |
|
"learning_rate": 3.907120192529013e-05, |
|
"loss": 0.5017, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 26.532289505004883, |
|
"learning_rate": 3.866421023856835e-05, |
|
"loss": 0.3272, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 42.285945892333984, |
|
"learning_rate": 3.825721855184658e-05, |
|
"loss": 0.3567, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 10.441671371459961, |
|
"learning_rate": 3.785022686512481e-05, |
|
"loss": 0.8528, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 10.992928504943848, |
|
"learning_rate": 3.744323517840304e-05, |
|
"loss": 0.5057, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 0.5464246273040771, |
|
"learning_rate": 3.703624349168126e-05, |
|
"loss": 0.4161, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.2611326277256012, |
|
"learning_rate": 3.662925180495949e-05, |
|
"loss": 0.0977, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 0.4221794009208679, |
|
"learning_rate": 3.622226011823772e-05, |
|
"loss": 0.2858, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 20.115922927856445, |
|
"learning_rate": 3.581526843151594e-05, |
|
"loss": 0.5096, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 5.4764275550842285, |
|
"learning_rate": 3.540827674479417e-05, |
|
"loss": 0.5093, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.22513504326343536, |
|
"learning_rate": 3.50012850580724e-05, |
|
"loss": 0.2849, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 9.947456359863281, |
|
"learning_rate": 3.459429337135063e-05, |
|
"loss": 0.4448, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 17.894901275634766, |
|
"learning_rate": 3.4187301684628854e-05, |
|
"loss": 1.1117, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 0.4906502962112427, |
|
"learning_rate": 3.378030999790708e-05, |
|
"loss": 0.2753, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 5.391515731811523, |
|
"learning_rate": 3.337331831118531e-05, |
|
"loss": 0.1935, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 2.0084915161132812, |
|
"learning_rate": 3.296632662446354e-05, |
|
"loss": 0.3626, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 12.514921188354492, |
|
"learning_rate": 3.2559334937741765e-05, |
|
"loss": 0.735, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7875, |
|
"eval_f1": 0.7301587301587301, |
|
"eval_loss": 0.7141464948654175, |
|
"eval_precision": 0.5989583333333334, |
|
"eval_recall": 0.9349593495934959, |
|
"eval_runtime": 1.5243, |
|
"eval_samples_per_second": 262.418, |
|
"eval_steps_per_second": 16.401, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 11.676839828491211, |
|
"learning_rate": 3.2152343251019994e-05, |
|
"loss": 0.3937, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 6.971975803375244, |
|
"learning_rate": 3.1745351564298224e-05, |
|
"loss": 0.4352, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 0.2390863448381424, |
|
"learning_rate": 3.133835987757645e-05, |
|
"loss": 0.2793, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.1350153088569641, |
|
"learning_rate": 3.093136819085468e-05, |
|
"loss": 0.5645, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 0.14674435555934906, |
|
"learning_rate": 3.0524376504132905e-05, |
|
"loss": 0.0916, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.0798887237906456, |
|
"learning_rate": 3.0117384817411138e-05, |
|
"loss": 0.1877, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 0.7960764169692993, |
|
"learning_rate": 2.971039313068936e-05, |
|
"loss": 0.306, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.27005836367607117, |
|
"learning_rate": 2.930340144396759e-05, |
|
"loss": 0.5818, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 6.420020580291748, |
|
"learning_rate": 2.889640975724582e-05, |
|
"loss": 0.3856, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.0868489220738411, |
|
"learning_rate": 2.848941807052405e-05, |
|
"loss": 0.4574, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 0.12724563479423523, |
|
"learning_rate": 2.8082426383802275e-05, |
|
"loss": 0.5441, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 16.045637130737305, |
|
"learning_rate": 2.7675434697080505e-05, |
|
"loss": 0.4005, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 0.8872644901275635, |
|
"learning_rate": 2.726844301035873e-05, |
|
"loss": 0.1532, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.1785900890827179, |
|
"learning_rate": 2.686145132363696e-05, |
|
"loss": 0.1264, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 0.10678562521934509, |
|
"learning_rate": 2.6454459636915186e-05, |
|
"loss": 0.1142, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.07380059361457825, |
|
"learning_rate": 2.6047467950193416e-05, |
|
"loss": 0.4046, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 5.716458797454834, |
|
"learning_rate": 2.5640476263471645e-05, |
|
"loss": 0.0074, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 64.4542007446289, |
|
"learning_rate": 2.5233484576749875e-05, |
|
"loss": 0.7207, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 0.7955445647239685, |
|
"learning_rate": 2.4826492890028097e-05, |
|
"loss": 0.3275, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 20.74042320251465, |
|
"learning_rate": 2.4419501203306327e-05, |
|
"loss": 0.4701, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 0.36268430948257446, |
|
"learning_rate": 2.4012509516584553e-05, |
|
"loss": 0.1863, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.19778229296207428, |
|
"learning_rate": 2.3605517829862782e-05, |
|
"loss": 0.2283, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 0.08780460059642792, |
|
"learning_rate": 2.319852614314101e-05, |
|
"loss": 0.1166, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 12.093305587768555, |
|
"learning_rate": 2.2791534456419238e-05, |
|
"loss": 0.1709, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 5.422878265380859, |
|
"learning_rate": 2.2384542769697464e-05, |
|
"loss": 0.667, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 13.302699089050293, |
|
"learning_rate": 2.1977551082975694e-05, |
|
"loss": 0.3448, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 0.15557953715324402, |
|
"learning_rate": 2.1570559396253923e-05, |
|
"loss": 0.4754, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 47.38977813720703, |
|
"learning_rate": 2.116356770953215e-05, |
|
"loss": 0.3435, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 0.6711559891700745, |
|
"learning_rate": 2.075657602281038e-05, |
|
"loss": 0.1915, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 49.03521728515625, |
|
"learning_rate": 2.0349584336088608e-05, |
|
"loss": 0.1532, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 15.385904312133789, |
|
"learning_rate": 1.9942592649366834e-05, |
|
"loss": 0.6809, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 30.673913955688477, |
|
"learning_rate": 1.9535600962645064e-05, |
|
"loss": 0.2397, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.825, |
|
"grad_norm": 0.08131251484155655, |
|
"learning_rate": 1.912860927592329e-05, |
|
"loss": 0.3932, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 16.99766731262207, |
|
"learning_rate": 1.872161758920152e-05, |
|
"loss": 0.2055, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.846882700920105, |
|
"learning_rate": 1.8314625902479745e-05, |
|
"loss": 0.4532, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.139489084482193, |
|
"learning_rate": 1.790763421575797e-05, |
|
"loss": 0.0558, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.925, |
|
"grad_norm": 10.560323715209961, |
|
"learning_rate": 1.75006425290362e-05, |
|
"loss": 0.6155, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.5508285760879517, |
|
"learning_rate": 1.7093650842314427e-05, |
|
"loss": 0.1549, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.975, |
|
"grad_norm": 36.786476135253906, |
|
"learning_rate": 1.6686659155592656e-05, |
|
"loss": 0.4735, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.08513189852237701, |
|
"learning_rate": 1.6279667468870882e-05, |
|
"loss": 0.0196, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8625, |
|
"eval_f1": 0.7859922178988327, |
|
"eval_loss": 0.48169881105422974, |
|
"eval_precision": 0.753731343283582, |
|
"eval_recall": 0.8211382113821138, |
|
"eval_runtime": 1.522, |
|
"eval_samples_per_second": 262.817, |
|
"eval_steps_per_second": 16.426, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.025, |
|
"grad_norm": 2.163971424102783, |
|
"learning_rate": 1.5872675782149112e-05, |
|
"loss": 0.211, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 3.05389404296875, |
|
"learning_rate": 1.546568409542734e-05, |
|
"loss": 0.2051, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.075, |
|
"grad_norm": 66.39513397216797, |
|
"learning_rate": 1.5058692408705569e-05, |
|
"loss": 0.1411, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.11405447870492935, |
|
"learning_rate": 1.4651700721983795e-05, |
|
"loss": 0.0306, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.125, |
|
"grad_norm": 0.03801630809903145, |
|
"learning_rate": 1.4244709035262025e-05, |
|
"loss": 0.0271, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.3133273422718048, |
|
"learning_rate": 1.3837717348540252e-05, |
|
"loss": 0.2901, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.175, |
|
"grad_norm": 0.07565152645111084, |
|
"learning_rate": 1.343072566181848e-05, |
|
"loss": 0.2415, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 84.18476867675781, |
|
"learning_rate": 1.3023733975096708e-05, |
|
"loss": 0.2952, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.225, |
|
"grad_norm": 0.11311139911413193, |
|
"learning_rate": 1.2616742288374937e-05, |
|
"loss": 0.5811, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 14.530881881713867, |
|
"learning_rate": 1.2209750601653163e-05, |
|
"loss": 0.3296, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.275, |
|
"grad_norm": 24.885290145874023, |
|
"learning_rate": 1.1802758914931391e-05, |
|
"loss": 0.1924, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.055552396923303604, |
|
"learning_rate": 1.1395767228209619e-05, |
|
"loss": 0.2252, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.325, |
|
"grad_norm": 0.8402379751205444, |
|
"learning_rate": 1.0988775541487847e-05, |
|
"loss": 0.4053, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 32.10145568847656, |
|
"learning_rate": 1.0581783854766075e-05, |
|
"loss": 0.1891, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.375, |
|
"grad_norm": 45.39471435546875, |
|
"learning_rate": 1.0174792168044304e-05, |
|
"loss": 0.0353, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.07263578474521637, |
|
"learning_rate": 9.767800481322532e-06, |
|
"loss": 0.1106, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.425, |
|
"grad_norm": 0.032821133732795715, |
|
"learning_rate": 9.36080879460076e-06, |
|
"loss": 0.3095, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.025584425777196884, |
|
"learning_rate": 8.953817107878986e-06, |
|
"loss": 0.1076, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.475, |
|
"grad_norm": 0.0450764037668705, |
|
"learning_rate": 8.546825421157213e-06, |
|
"loss": 0.0046, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.0470331646502018, |
|
"learning_rate": 8.139833734435441e-06, |
|
"loss": 0.1165, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.525, |
|
"grad_norm": 0.04697234183549881, |
|
"learning_rate": 7.73284204771367e-06, |
|
"loss": 0.0749, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.04398488625884056, |
|
"learning_rate": 7.3258503609918976e-06, |
|
"loss": 0.2088, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.575, |
|
"grad_norm": 0.046139203011989594, |
|
"learning_rate": 6.918858674270126e-06, |
|
"loss": 0.206, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.0412064827978611, |
|
"learning_rate": 6.511866987548354e-06, |
|
"loss": 0.3262, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.625, |
|
"grad_norm": 13.24852466583252, |
|
"learning_rate": 6.104875300826582e-06, |
|
"loss": 0.3108, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.028375335037708282, |
|
"learning_rate": 5.6978836141048095e-06, |
|
"loss": 0.0038, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.675, |
|
"grad_norm": 0.202249214053154, |
|
"learning_rate": 5.290891927383037e-06, |
|
"loss": 0.1516, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.02920701913535595, |
|
"learning_rate": 4.883900240661266e-06, |
|
"loss": 0.1316, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.725, |
|
"grad_norm": 0.08516351878643036, |
|
"learning_rate": 4.476908553939493e-06, |
|
"loss": 0.0915, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.03471991792321205, |
|
"learning_rate": 4.069916867217721e-06, |
|
"loss": 0.0177, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.775, |
|
"grad_norm": 0.06554219126701355, |
|
"learning_rate": 3.6629251804959488e-06, |
|
"loss": 0.2439, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.2342367321252823, |
|
"learning_rate": 3.255933493774177e-06, |
|
"loss": 0.1958, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.825, |
|
"grad_norm": 0.02388150431215763, |
|
"learning_rate": 2.8489418070524047e-06, |
|
"loss": 0.0264, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.10175933688879013, |
|
"learning_rate": 2.441950120330633e-06, |
|
"loss": 0.2652, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.875, |
|
"grad_norm": 50.01603317260742, |
|
"learning_rate": 2.0349584336088603e-06, |
|
"loss": 0.3156, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.07987692207098007, |
|
"learning_rate": 1.6279667468870885e-06, |
|
"loss": 0.0203, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.925, |
|
"grad_norm": 20.490392684936523, |
|
"learning_rate": 1.2209750601653165e-06, |
|
"loss": 0.1305, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 2.933145046234131, |
|
"learning_rate": 8.139833734435442e-07, |
|
"loss": 0.0051, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.975, |
|
"grad_norm": 0.023663857951760292, |
|
"learning_rate": 4.069916867217721e-07, |
|
"loss": 0.0157, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.89351487159729, |
|
"learning_rate": 0.0, |
|
"loss": 0.0026, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8675, |
|
"eval_f1": 0.7937743190661478, |
|
"eval_loss": 0.5679348111152649, |
|
"eval_precision": 0.7611940298507462, |
|
"eval_recall": 0.8292682926829268, |
|
"eval_runtime": 1.5586, |
|
"eval_samples_per_second": 256.645, |
|
"eval_steps_per_second": 16.04, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 635446111352832.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 4.8839002406612654e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 3, |
|
"per_device_train_batch_size": 4, |
|
"seed": 1 |
|
} |
|
} |
|
|