|
{ |
|
"best_metric": 0.784, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-ovq62xze/checkpoint-1000", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.8580408096313477, |
|
"learning_rate": 7.324907510001364e-05, |
|
"loss": 0.5558, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.629230976104736, |
|
"learning_rate": 7.250918545253876e-05, |
|
"loss": 0.5753, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.6798982620239258, |
|
"learning_rate": 7.176929580506387e-05, |
|
"loss": 0.5007, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 4.710047245025635, |
|
"learning_rate": 7.1029406157589e-05, |
|
"loss": 0.5568, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 16.165040969848633, |
|
"learning_rate": 7.02895165101141e-05, |
|
"loss": 0.5409, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.3471007347106934, |
|
"learning_rate": 6.954962686263921e-05, |
|
"loss": 0.3631, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.770080089569092, |
|
"learning_rate": 6.880973721516433e-05, |
|
"loss": 0.1851, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 10.271796226501465, |
|
"learning_rate": 6.806984756768946e-05, |
|
"loss": 0.7488, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.5753231048583984, |
|
"learning_rate": 6.732995792021456e-05, |
|
"loss": 0.5854, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.666292905807495, |
|
"learning_rate": 6.659006827273969e-05, |
|
"loss": 0.412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.2799642086029053, |
|
"learning_rate": 6.58501786252648e-05, |
|
"loss": 0.557, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7298201322555542, |
|
"learning_rate": 6.51102889777899e-05, |
|
"loss": 0.4918, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.6464192271232605, |
|
"learning_rate": 6.437039933031503e-05, |
|
"loss": 0.4231, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 4.867911338806152, |
|
"learning_rate": 6.363050968284013e-05, |
|
"loss": 0.4482, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 6.947429656982422, |
|
"learning_rate": 6.289062003536524e-05, |
|
"loss": 0.3369, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 6.325223445892334, |
|
"learning_rate": 6.215073038789037e-05, |
|
"loss": 0.6414, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.6585001945495605, |
|
"learning_rate": 6.141084074041547e-05, |
|
"loss": 0.488, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.477282524108887, |
|
"learning_rate": 6.067095109294059e-05, |
|
"loss": 0.2685, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.3661060333251953, |
|
"learning_rate": 5.993106144546571e-05, |
|
"loss": 0.4817, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.6013305187225342, |
|
"learning_rate": 5.919117179799083e-05, |
|
"loss": 0.351, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.805, |
|
"eval_f1": 0.743421052631579, |
|
"eval_loss": 0.5347685813903809, |
|
"eval_precision": 0.6243093922651933, |
|
"eval_recall": 0.9186991869918699, |
|
"eval_runtime": 1.5641, |
|
"eval_samples_per_second": 255.745, |
|
"eval_steps_per_second": 15.984, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 3.003253698348999, |
|
"learning_rate": 5.8451282150515943e-05, |
|
"loss": 0.5513, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.9436538219451904, |
|
"learning_rate": 5.771139250304106e-05, |
|
"loss": 0.4827, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 4.9929656982421875, |
|
"learning_rate": 5.697150285556617e-05, |
|
"loss": 0.3016, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 10.904691696166992, |
|
"learning_rate": 5.623161320809128e-05, |
|
"loss": 0.2161, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 5.562251567840576, |
|
"learning_rate": 5.54917235606164e-05, |
|
"loss": 0.2296, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 21.09694480895996, |
|
"learning_rate": 5.4751833913141514e-05, |
|
"loss": 0.3892, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.3990909457206726, |
|
"learning_rate": 5.401194426566663e-05, |
|
"loss": 0.3668, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.7531650066375732, |
|
"learning_rate": 5.327205461819174e-05, |
|
"loss": 0.2828, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.6674296259880066, |
|
"learning_rate": 5.2532164970716854e-05, |
|
"loss": 0.3773, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 16.001766204833984, |
|
"learning_rate": 5.179227532324197e-05, |
|
"loss": 0.2175, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 6.789841651916504, |
|
"learning_rate": 5.1052385675767085e-05, |
|
"loss": 0.3477, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.3401732444763184, |
|
"learning_rate": 5.031249602829221e-05, |
|
"loss": 0.2424, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 7.187565326690674, |
|
"learning_rate": 4.9572606380817316e-05, |
|
"loss": 0.2964, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.6464786529541016, |
|
"learning_rate": 4.883271673334243e-05, |
|
"loss": 0.3348, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 7.238956451416016, |
|
"learning_rate": 4.809282708586755e-05, |
|
"loss": 0.3278, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.914743423461914, |
|
"learning_rate": 4.735293743839266e-05, |
|
"loss": 0.2056, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 13.46723747253418, |
|
"learning_rate": 4.661304779091778e-05, |
|
"loss": 0.5176, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 6.557673931121826, |
|
"learning_rate": 4.5873158143442887e-05, |
|
"loss": 0.3953, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 2.1576859951019287, |
|
"learning_rate": 4.5133268495968e-05, |
|
"loss": 0.2685, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.825181007385254, |
|
"learning_rate": 4.439337884849312e-05, |
|
"loss": 0.4442, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.83, |
|
"eval_f1": 0.7536231884057971, |
|
"eval_loss": 0.4959351420402527, |
|
"eval_precision": 0.6797385620915033, |
|
"eval_recall": 0.8455284552845529, |
|
"eval_runtime": 1.509, |
|
"eval_samples_per_second": 265.069, |
|
"eval_steps_per_second": 16.567, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.3849884271621704, |
|
"learning_rate": 4.365348920101823e-05, |
|
"loss": 0.0339, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 15.426344871520996, |
|
"learning_rate": 4.291359955354335e-05, |
|
"loss": 0.3828, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 3.521732807159424, |
|
"learning_rate": 4.217370990606846e-05, |
|
"loss": 0.2092, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.6053170561790466, |
|
"learning_rate": 4.143382025859358e-05, |
|
"loss": 0.15, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 2.9728167057037354, |
|
"learning_rate": 4.0693930611118695e-05, |
|
"loss": 0.1983, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 1.158972978591919, |
|
"learning_rate": 3.995404096364381e-05, |
|
"loss": 0.1547, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.08352199196815491, |
|
"learning_rate": 3.9214151316168926e-05, |
|
"loss": 0.3832, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 0.16624656319618225, |
|
"learning_rate": 3.8474261668694035e-05, |
|
"loss": 0.1125, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 13.895761489868164, |
|
"learning_rate": 3.773437202121915e-05, |
|
"loss": 0.1891, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.07783038169145584, |
|
"learning_rate": 3.6994482373744266e-05, |
|
"loss": 0.1845, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 5.39270544052124, |
|
"learning_rate": 3.625459272626938e-05, |
|
"loss": 0.3712, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 8.580484390258789, |
|
"learning_rate": 3.55147030787945e-05, |
|
"loss": 0.2922, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 5.49284553527832, |
|
"learning_rate": 3.4774813431319605e-05, |
|
"loss": 0.0888, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 2.509190559387207, |
|
"learning_rate": 3.403492378384473e-05, |
|
"loss": 0.2952, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.1146562099456787, |
|
"learning_rate": 3.329503413636984e-05, |
|
"loss": 0.2025, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.5424324870109558, |
|
"learning_rate": 3.255514448889495e-05, |
|
"loss": 0.3394, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 1.1997493505477905, |
|
"learning_rate": 3.181525484142007e-05, |
|
"loss": 0.2041, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 18.82257080078125, |
|
"learning_rate": 3.107536519394518e-05, |
|
"loss": 0.5699, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 11.223532676696777, |
|
"learning_rate": 3.0335475546470295e-05, |
|
"loss": 0.2722, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.048981666564941, |
|
"learning_rate": 2.9595585898995414e-05, |
|
"loss": 0.1852, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8625, |
|
"eval_f1": 0.7773279352226721, |
|
"eval_loss": 0.45909401774406433, |
|
"eval_precision": 0.7741935483870968, |
|
"eval_recall": 0.7804878048780488, |
|
"eval_runtime": 1.5277, |
|
"eval_samples_per_second": 261.826, |
|
"eval_steps_per_second": 16.364, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 2.716031074523926, |
|
"learning_rate": 2.885569625152053e-05, |
|
"loss": 0.0911, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.11326009035110474, |
|
"learning_rate": 2.811580660404564e-05, |
|
"loss": 0.0525, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 67.94048309326172, |
|
"learning_rate": 2.7375916956570757e-05, |
|
"loss": 0.1708, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.16798238456249237, |
|
"learning_rate": 2.663602730909587e-05, |
|
"loss": 0.1027, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 8.024996757507324, |
|
"learning_rate": 2.5896137661620985e-05, |
|
"loss": 0.3075, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 0.16531293094158173, |
|
"learning_rate": 2.5156248014146104e-05, |
|
"loss": 0.1509, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.06556364893913269, |
|
"learning_rate": 2.4416358366671216e-05, |
|
"loss": 0.2373, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.1460653692483902, |
|
"learning_rate": 2.367646871919633e-05, |
|
"loss": 0.0401, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.07078050822019577, |
|
"learning_rate": 2.2936579071721443e-05, |
|
"loss": 0.0538, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 5.139517307281494, |
|
"learning_rate": 2.219668942424656e-05, |
|
"loss": 0.3995, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 0.08660732954740524, |
|
"learning_rate": 2.1456799776771674e-05, |
|
"loss": 0.0592, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.20419315993785858, |
|
"learning_rate": 2.071691012929679e-05, |
|
"loss": 0.1184, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.03444228321313858, |
|
"learning_rate": 1.9977020481821905e-05, |
|
"loss": 0.0935, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.4479585587978363, |
|
"learning_rate": 1.9237130834347017e-05, |
|
"loss": 0.0884, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.24001504480838776, |
|
"learning_rate": 1.8497241186872133e-05, |
|
"loss": 0.0098, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 0.12233974784612656, |
|
"learning_rate": 1.775735153939725e-05, |
|
"loss": 0.0209, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 3.510254383087158, |
|
"learning_rate": 1.7017461891922364e-05, |
|
"loss": 0.3062, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.15961907804012299, |
|
"learning_rate": 1.6277572244447476e-05, |
|
"loss": 0.1364, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 0.13841329514980316, |
|
"learning_rate": 1.553768259697259e-05, |
|
"loss": 0.0952, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.308049440383911, |
|
"learning_rate": 1.4797792949497707e-05, |
|
"loss": 0.1577, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.84, |
|
"eval_f1": 0.7697841726618705, |
|
"eval_loss": 0.6505396366119385, |
|
"eval_precision": 0.6903225806451613, |
|
"eval_recall": 0.8699186991869918, |
|
"eval_runtime": 1.5135, |
|
"eval_samples_per_second": 264.29, |
|
"eval_steps_per_second": 16.518, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"grad_norm": 0.12980560958385468, |
|
"learning_rate": 1.405790330202282e-05, |
|
"loss": 0.0537, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 15.213448524475098, |
|
"learning_rate": 1.3318013654547935e-05, |
|
"loss": 0.0635, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"grad_norm": 0.13449443876743317, |
|
"learning_rate": 1.2578124007073052e-05, |
|
"loss": 0.0357, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.11500319093465805, |
|
"learning_rate": 1.1838234359598166e-05, |
|
"loss": 0.2038, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"grad_norm": 0.05510816350579262, |
|
"learning_rate": 1.109834471212328e-05, |
|
"loss": 0.0265, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"grad_norm": 0.11758866161108017, |
|
"learning_rate": 1.0358455064648395e-05, |
|
"loss": 0.0069, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"grad_norm": 5.604867935180664, |
|
"learning_rate": 9.618565417173509e-06, |
|
"loss": 0.1583, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 0.17312869429588318, |
|
"learning_rate": 8.878675769698624e-06, |
|
"loss": 0.2303, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"grad_norm": 0.10631933808326721, |
|
"learning_rate": 8.138786122223738e-06, |
|
"loss": 0.005, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 0.11875050514936447, |
|
"learning_rate": 7.3988964747488535e-06, |
|
"loss": 0.0861, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.25447019934654236, |
|
"learning_rate": 6.659006827273967e-06, |
|
"loss": 0.0067, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 0.15403024852275848, |
|
"learning_rate": 5.919117179799083e-06, |
|
"loss": 0.1064, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 0.10930292308330536, |
|
"learning_rate": 5.1792275323241974e-06, |
|
"loss": 0.0978, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"grad_norm": 2.734524726867676, |
|
"learning_rate": 4.439337884849312e-06, |
|
"loss": 0.1193, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 3.565016746520996, |
|
"learning_rate": 3.6994482373744267e-06, |
|
"loss": 0.1922, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 2.8288533687591553, |
|
"learning_rate": 2.9595585898995414e-06, |
|
"loss": 0.058, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"grad_norm": 3.0187721252441406, |
|
"learning_rate": 2.219668942424656e-06, |
|
"loss": 0.16, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"grad_norm": 0.1212492510676384, |
|
"learning_rate": 1.4797792949497707e-06, |
|
"loss": 0.076, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 0.08606864511966705, |
|
"learning_rate": 7.398896474748853e-07, |
|
"loss": 0.0058, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.0966118648648262, |
|
"learning_rate": 0.0, |
|
"loss": 0.0051, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.865, |
|
"eval_f1": 0.784, |
|
"eval_loss": 0.5539087057113647, |
|
"eval_precision": 0.7716535433070866, |
|
"eval_recall": 0.7967479674796748, |
|
"eval_runtime": 1.5704, |
|
"eval_samples_per_second": 254.715, |
|
"eval_steps_per_second": 15.92, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1059076852254720.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 7.398896474748853e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 5, |
|
"per_device_train_batch_size": 8, |
|
"seed": 14 |
|
} |
|
} |
|
|