|
{ |
|
"best_metric": 0.9373365167161658, |
|
"best_model_checkpoint": "vivit-surf-analytics-runpod/checkpoint-11115", |
|
"epoch": 15.001349527665317, |
|
"eval_steps": 500, |
|
"global_step": 11116, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006747638326585695, |
|
"grad_norm": 32.80915451049805, |
|
"learning_rate": 3.373819163292848e-07, |
|
"loss": 1.5069, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001349527665317139, |
|
"grad_norm": 22.916248321533203, |
|
"learning_rate": 6.747638326585696e-07, |
|
"loss": 1.354, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0020242914979757085, |
|
"grad_norm": 34.89827346801758, |
|
"learning_rate": 1.0121457489878542e-06, |
|
"loss": 1.5187, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002699055330634278, |
|
"grad_norm": 22.8042049407959, |
|
"learning_rate": 1.3495276653171391e-06, |
|
"loss": 1.478, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0033738191632928477, |
|
"grad_norm": 27.662748336791992, |
|
"learning_rate": 1.6869095816464238e-06, |
|
"loss": 1.2862, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.004048582995951417, |
|
"grad_norm": 24.901159286499023, |
|
"learning_rate": 2.0242914979757085e-06, |
|
"loss": 1.2586, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004723346828609987, |
|
"grad_norm": 25.327184677124023, |
|
"learning_rate": 2.3616734143049934e-06, |
|
"loss": 1.2728, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005398110661268556, |
|
"grad_norm": 18.19566535949707, |
|
"learning_rate": 2.6990553306342783e-06, |
|
"loss": 1.0159, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006072874493927126, |
|
"grad_norm": 20.370386123657227, |
|
"learning_rate": 3.0364372469635627e-06, |
|
"loss": 1.2504, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006747638326585695, |
|
"grad_norm": 12.196557998657227, |
|
"learning_rate": 3.3738191632928476e-06, |
|
"loss": 2.0246, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007422402159244264, |
|
"grad_norm": 12.822103500366211, |
|
"learning_rate": 3.711201079622133e-06, |
|
"loss": 0.8519, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.008097165991902834, |
|
"grad_norm": 6.872288227081299, |
|
"learning_rate": 4.048582995951417e-06, |
|
"loss": 0.7749, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.008771929824561403, |
|
"grad_norm": 40.45072937011719, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 1.3494, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.009446693657219974, |
|
"grad_norm": 3.996568441390991, |
|
"learning_rate": 4.723346828609987e-06, |
|
"loss": 0.9678, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.010121457489878543, |
|
"grad_norm": 2.117781400680542, |
|
"learning_rate": 5.060728744939271e-06, |
|
"loss": 1.8095, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.010796221322537112, |
|
"grad_norm": 1.1970853805541992, |
|
"learning_rate": 5.3981106612685565e-06, |
|
"loss": 1.3044, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.011470985155195682, |
|
"grad_norm": 56.31877136230469, |
|
"learning_rate": 5.735492577597841e-06, |
|
"loss": 3.0015, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.012145748987854251, |
|
"grad_norm": 1.2758257389068604, |
|
"learning_rate": 6.0728744939271254e-06, |
|
"loss": 1.7654, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01282051282051282, |
|
"grad_norm": 49.485626220703125, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 1.9578, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.01349527665317139, |
|
"grad_norm": 1.0414538383483887, |
|
"learning_rate": 6.747638326585695e-06, |
|
"loss": 2.0202, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01417004048582996, |
|
"grad_norm": 46.221031188964844, |
|
"learning_rate": 7.0850202429149805e-06, |
|
"loss": 2.0222, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.014844804318488529, |
|
"grad_norm": 5.171656131744385, |
|
"learning_rate": 7.422402159244266e-06, |
|
"loss": 2.3988, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0155195681511471, |
|
"grad_norm": 40.51677703857422, |
|
"learning_rate": 7.75978407557355e-06, |
|
"loss": 1.1011, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.016194331983805668, |
|
"grad_norm": 0.3821451961994171, |
|
"learning_rate": 8.097165991902834e-06, |
|
"loss": 1.7582, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.016869095816464237, |
|
"grad_norm": 56.244895935058594, |
|
"learning_rate": 8.43454790823212e-06, |
|
"loss": 1.628, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.017543859649122806, |
|
"grad_norm": 2.9704697132110596, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 2.4795, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.018218623481781375, |
|
"grad_norm": 2.420311689376831, |
|
"learning_rate": 9.109311740890689e-06, |
|
"loss": 1.2225, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.018893387314439947, |
|
"grad_norm": 3.02461314201355, |
|
"learning_rate": 9.446693657219973e-06, |
|
"loss": 1.813, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.019568151147098516, |
|
"grad_norm": 1.8302630186080933, |
|
"learning_rate": 9.784075573549258e-06, |
|
"loss": 1.6011, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.020242914979757085, |
|
"grad_norm": 62.22663497924805, |
|
"learning_rate": 1.0121457489878542e-05, |
|
"loss": 2.1712, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.020917678812415654, |
|
"grad_norm": 4.137598037719727, |
|
"learning_rate": 1.0458839406207829e-05, |
|
"loss": 1.691, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.021592442645074223, |
|
"grad_norm": 1.1848357915878296, |
|
"learning_rate": 1.0796221322537113e-05, |
|
"loss": 0.8611, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.022267206477732792, |
|
"grad_norm": 48.48101043701172, |
|
"learning_rate": 1.1133603238866398e-05, |
|
"loss": 2.4268, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.022941970310391364, |
|
"grad_norm": 1.995662808418274, |
|
"learning_rate": 1.1470985155195682e-05, |
|
"loss": 1.6822, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.023616734143049933, |
|
"grad_norm": 4.30789041519165, |
|
"learning_rate": 1.1808367071524966e-05, |
|
"loss": 1.5158, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.024291497975708502, |
|
"grad_norm": 2.7685494422912598, |
|
"learning_rate": 1.2145748987854251e-05, |
|
"loss": 0.9964, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.02496626180836707, |
|
"grad_norm": 47.719268798828125, |
|
"learning_rate": 1.2483130904183535e-05, |
|
"loss": 2.2256, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 48.7852783203125, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 1.7263, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.02631578947368421, |
|
"grad_norm": 57.43927001953125, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 1.9673, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.02699055330634278, |
|
"grad_norm": 44.42695617675781, |
|
"learning_rate": 1.349527665317139e-05, |
|
"loss": 1.6821, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02766531713900135, |
|
"grad_norm": 26.7230167388916, |
|
"learning_rate": 1.3832658569500675e-05, |
|
"loss": 1.4562, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.02834008097165992, |
|
"grad_norm": 39.75962448120117, |
|
"learning_rate": 1.4170040485829961e-05, |
|
"loss": 0.7446, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.029014844804318488, |
|
"grad_norm": 45.954254150390625, |
|
"learning_rate": 1.4507422402159246e-05, |
|
"loss": 1.1802, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.029689608636977057, |
|
"grad_norm": 25.454557418823242, |
|
"learning_rate": 1.4844804318488532e-05, |
|
"loss": 1.1458, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.030364372469635626, |
|
"grad_norm": 39.98874282836914, |
|
"learning_rate": 1.5182186234817813e-05, |
|
"loss": 0.512, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0310391363022942, |
|
"grad_norm": 22.448896408081055, |
|
"learning_rate": 1.55195681511471e-05, |
|
"loss": 1.5049, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.03171390013495277, |
|
"grad_norm": 26.93549346923828, |
|
"learning_rate": 1.5856950067476383e-05, |
|
"loss": 0.7132, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.032388663967611336, |
|
"grad_norm": 21.29535675048828, |
|
"learning_rate": 1.6194331983805668e-05, |
|
"loss": 1.3895, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.033063427800269905, |
|
"grad_norm": 0.4730716645717621, |
|
"learning_rate": 1.6531713900134956e-05, |
|
"loss": 0.1323, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.033738191632928474, |
|
"grad_norm": 6.3616862297058105, |
|
"learning_rate": 1.686909581646424e-05, |
|
"loss": 0.9272, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03441295546558704, |
|
"grad_norm": 0.5018609762191772, |
|
"learning_rate": 1.720647773279352e-05, |
|
"loss": 1.3049, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.03508771929824561, |
|
"grad_norm": 62.65403747558594, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 2.8324, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.03576248313090418, |
|
"grad_norm": 12.991227149963379, |
|
"learning_rate": 1.7881241565452094e-05, |
|
"loss": 0.5474, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.03643724696356275, |
|
"grad_norm": 0.5492640733718872, |
|
"learning_rate": 1.8218623481781378e-05, |
|
"loss": 1.2093, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.037112010796221326, |
|
"grad_norm": 0.07551870495080948, |
|
"learning_rate": 1.8556005398110663e-05, |
|
"loss": 1.2592, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.037786774628879895, |
|
"grad_norm": 1.5378609895706177, |
|
"learning_rate": 1.8893387314439947e-05, |
|
"loss": 0.3359, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 0.23666121065616608, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 1.3151, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.03913630229419703, |
|
"grad_norm": 7.869609832763672, |
|
"learning_rate": 1.9568151147098516e-05, |
|
"loss": 0.6125, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0398110661268556, |
|
"grad_norm": 13.923602104187012, |
|
"learning_rate": 1.99055330634278e-05, |
|
"loss": 1.1772, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.04048582995951417, |
|
"grad_norm": 29.88282585144043, |
|
"learning_rate": 2.0242914979757085e-05, |
|
"loss": 0.1969, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04116059379217274, |
|
"grad_norm": 24.888872146606445, |
|
"learning_rate": 2.058029689608637e-05, |
|
"loss": 0.7039, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.04183535762483131, |
|
"grad_norm": 0.40080440044403076, |
|
"learning_rate": 2.0917678812415657e-05, |
|
"loss": 1.008, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.04251012145748988, |
|
"grad_norm": 4.866868495941162, |
|
"learning_rate": 2.125506072874494e-05, |
|
"loss": 1.0502, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.043184885290148446, |
|
"grad_norm": 1.2502915859222412, |
|
"learning_rate": 2.1592442645074226e-05, |
|
"loss": 0.7422, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.043859649122807015, |
|
"grad_norm": 0.6650214791297913, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.6072, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.044534412955465584, |
|
"grad_norm": 16.356952667236328, |
|
"learning_rate": 2.2267206477732795e-05, |
|
"loss": 0.8198, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.04520917678812416, |
|
"grad_norm": 169.0858154296875, |
|
"learning_rate": 2.260458839406208e-05, |
|
"loss": 1.2371, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.04588394062078273, |
|
"grad_norm": 98.78671264648438, |
|
"learning_rate": 2.2941970310391364e-05, |
|
"loss": 1.1303, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.0465587044534413, |
|
"grad_norm": 99.31029510498047, |
|
"learning_rate": 2.327935222672065e-05, |
|
"loss": 0.7183, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.04723346828609987, |
|
"grad_norm": 30.58230209350586, |
|
"learning_rate": 2.3616734143049933e-05, |
|
"loss": 1.1701, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.047908232118758436, |
|
"grad_norm": 0.05228818207979202, |
|
"learning_rate": 2.395411605937922e-05, |
|
"loss": 0.1544, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.048582995951417005, |
|
"grad_norm": 1.974684715270996, |
|
"learning_rate": 2.4291497975708502e-05, |
|
"loss": 1.4774, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.049257759784075573, |
|
"grad_norm": 0.12068396061658859, |
|
"learning_rate": 2.4628879892037786e-05, |
|
"loss": 0.6994, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.04993252361673414, |
|
"grad_norm": 76.24126434326172, |
|
"learning_rate": 2.496626180836707e-05, |
|
"loss": 1.3533, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.8392857142857143, |
|
"eval_f1": 0.8382276099228692, |
|
"eval_loss": 0.7030884623527527, |
|
"eval_runtime": 74.2993, |
|
"eval_samples_per_second": 1.507, |
|
"eval_steps_per_second": 1.507, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.0006072874493928, |
|
"grad_norm": 0.14856617152690887, |
|
"learning_rate": 2.530364372469636e-05, |
|
"loss": 0.4158, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0012820512820513, |
|
"grad_norm": 0.07458806782960892, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.0042, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.0019568151147098, |
|
"grad_norm": 0.08816417306661606, |
|
"learning_rate": 2.5978407557354928e-05, |
|
"loss": 0.0175, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0026315789473683, |
|
"grad_norm": 0.07340700924396515, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.0039, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.003306342780027, |
|
"grad_norm": 0.08517087250947952, |
|
"learning_rate": 2.66531713900135e-05, |
|
"loss": 0.0075, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.0039811066126856, |
|
"grad_norm": 0.07905049622058868, |
|
"learning_rate": 2.699055330634278e-05, |
|
"loss": 0.0021, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.004655870445344, |
|
"grad_norm": 0.13749797642230988, |
|
"learning_rate": 2.732793522267207e-05, |
|
"loss": 0.7603, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.0053306342780026, |
|
"grad_norm": 0.04107066988945007, |
|
"learning_rate": 2.766531713900135e-05, |
|
"loss": 0.0033, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.0060053981106614, |
|
"grad_norm": 0.05302370712161064, |
|
"learning_rate": 2.8002699055330634e-05, |
|
"loss": 0.0487, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.0066801619433199, |
|
"grad_norm": 0.050035424530506134, |
|
"learning_rate": 2.8340080971659922e-05, |
|
"loss": 0.0166, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.0073549257759784, |
|
"grad_norm": 0.17594772577285767, |
|
"learning_rate": 2.8677462887989203e-05, |
|
"loss": 0.9013, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.008029689608637, |
|
"grad_norm": 5.9323811531066895, |
|
"learning_rate": 2.901484480431849e-05, |
|
"loss": 0.8083, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.0087044534412954, |
|
"grad_norm": 0.2871362566947937, |
|
"learning_rate": 2.9352226720647776e-05, |
|
"loss": 0.6024, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.0093792172739542, |
|
"grad_norm": 0.3136674463748932, |
|
"learning_rate": 2.9689608636977063e-05, |
|
"loss": 0.5028, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.0100539811066127, |
|
"grad_norm": 0.05438687652349472, |
|
"learning_rate": 3.0026990553306344e-05, |
|
"loss": 0.1368, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.0107287449392712, |
|
"grad_norm": 0.05301366746425629, |
|
"learning_rate": 3.0364372469635626e-05, |
|
"loss": 0.1373, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0114035087719297, |
|
"grad_norm": 0.015999358147382736, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.0301, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.0120782726045885, |
|
"grad_norm": 0.014771537855267525, |
|
"learning_rate": 3.10391363022942e-05, |
|
"loss": 0.0195, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.012753036437247, |
|
"grad_norm": 31.934608459472656, |
|
"learning_rate": 3.137651821862348e-05, |
|
"loss": 1.6569, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0134278002699055, |
|
"grad_norm": 0.031412914395332336, |
|
"learning_rate": 3.171390013495277e-05, |
|
"loss": 0.0009, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.014102564102564, |
|
"grad_norm": 0.028489330783486366, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.1698, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0147773279352226, |
|
"grad_norm": 105.16389465332031, |
|
"learning_rate": 3.2388663967611336e-05, |
|
"loss": 0.1704, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0154520917678813, |
|
"grad_norm": 0.024373585358262062, |
|
"learning_rate": 3.272604588394062e-05, |
|
"loss": 0.6264, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.0161268556005398, |
|
"grad_norm": 0.024133900180459023, |
|
"learning_rate": 3.306342780026991e-05, |
|
"loss": 0.8147, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.0168016194331984, |
|
"grad_norm": 92.91847229003906, |
|
"learning_rate": 3.340080971659919e-05, |
|
"loss": 1.1899, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.0174763832658569, |
|
"grad_norm": 0.3631739616394043, |
|
"learning_rate": 3.373819163292848e-05, |
|
"loss": 1.2713, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0181511470985156, |
|
"grad_norm": 7.820636749267578, |
|
"learning_rate": 3.407557354925776e-05, |
|
"loss": 0.5495, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.0188259109311741, |
|
"grad_norm": 55.31717300415039, |
|
"learning_rate": 3.441295546558704e-05, |
|
"loss": 0.4036, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.0195006747638327, |
|
"grad_norm": 0.013262225314974785, |
|
"learning_rate": 3.4750337381916334e-05, |
|
"loss": 0.0089, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.0201754385964912, |
|
"grad_norm": 0.8780525922775269, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.3444, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.0208502024291497, |
|
"grad_norm": 2.9044814109802246, |
|
"learning_rate": 3.54251012145749e-05, |
|
"loss": 0.0081, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.0215249662618084, |
|
"grad_norm": 0.020421041175723076, |
|
"learning_rate": 3.576248313090419e-05, |
|
"loss": 1.0599, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.022199730094467, |
|
"grad_norm": 0.012594003230333328, |
|
"learning_rate": 3.609986504723347e-05, |
|
"loss": 0.0616, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.0228744939271255, |
|
"grad_norm": 0.018383637070655823, |
|
"learning_rate": 3.6437246963562756e-05, |
|
"loss": 0.9516, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.023549257759784, |
|
"grad_norm": 0.04205102473497391, |
|
"learning_rate": 3.6774628879892034e-05, |
|
"loss": 0.4867, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.0242240215924427, |
|
"grad_norm": 0.022214779630303383, |
|
"learning_rate": 3.7112010796221325e-05, |
|
"loss": 0.0101, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.0248987854251013, |
|
"grad_norm": 0.026110410690307617, |
|
"learning_rate": 3.744939271255061e-05, |
|
"loss": 0.3327, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.0255735492577598, |
|
"grad_norm": 0.16947214305400848, |
|
"learning_rate": 3.7786774628879894e-05, |
|
"loss": 0.6535, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.0262483130904183, |
|
"grad_norm": 0.019961325451731682, |
|
"learning_rate": 3.812415654520918e-05, |
|
"loss": 0.0014, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.0269230769230768, |
|
"grad_norm": 213.16741943359375, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.348, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.0275978407557356, |
|
"grad_norm": 0.26998648047447205, |
|
"learning_rate": 3.879892037786775e-05, |
|
"loss": 1.8138, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.028272604588394, |
|
"grad_norm": 16.201974868774414, |
|
"learning_rate": 3.913630229419703e-05, |
|
"loss": 1.1767, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.0289473684210526, |
|
"grad_norm": 0.46378159523010254, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.3981, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.0296221322537111, |
|
"grad_norm": 0.16117815673351288, |
|
"learning_rate": 3.98110661268556e-05, |
|
"loss": 0.6503, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.0302968960863699, |
|
"grad_norm": 0.09139110147953033, |
|
"learning_rate": 4.014844804318489e-05, |
|
"loss": 0.0009, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.0309716599190284, |
|
"grad_norm": 59.96378707885742, |
|
"learning_rate": 4.048582995951417e-05, |
|
"loss": 2.7966, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.031646423751687, |
|
"grad_norm": 0.1793028563261032, |
|
"learning_rate": 4.082321187584346e-05, |
|
"loss": 0.7813, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.0323211875843454, |
|
"grad_norm": 0.04233807325363159, |
|
"learning_rate": 4.116059379217274e-05, |
|
"loss": 0.0117, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.032995951417004, |
|
"grad_norm": 0.10781926661729813, |
|
"learning_rate": 4.149797570850202e-05, |
|
"loss": 0.0029, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.0336707152496627, |
|
"grad_norm": 0.04887605831027031, |
|
"learning_rate": 4.1835357624831314e-05, |
|
"loss": 0.0023, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0343454790823212, |
|
"grad_norm": 0.0070233517326414585, |
|
"learning_rate": 4.217273954116059e-05, |
|
"loss": 0.5274, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0350202429149797, |
|
"grad_norm": 0.009842370636761189, |
|
"learning_rate": 4.251012145748988e-05, |
|
"loss": 1.1471, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.0356950067476383, |
|
"grad_norm": 117.02069091796875, |
|
"learning_rate": 4.284750337381917e-05, |
|
"loss": 0.0518, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.036369770580297, |
|
"grad_norm": 0.011584924533963203, |
|
"learning_rate": 4.318488529014845e-05, |
|
"loss": 0.0088, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.0370445344129555, |
|
"grad_norm": 0.04845478758215904, |
|
"learning_rate": 4.3522267206477737e-05, |
|
"loss": 0.937, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.037719298245614, |
|
"grad_norm": 12.870345115661621, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.1796, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0383940620782726, |
|
"grad_norm": 0.18226304650306702, |
|
"learning_rate": 4.4197031039136306e-05, |
|
"loss": 0.7725, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.039068825910931, |
|
"grad_norm": 0.038409680128097534, |
|
"learning_rate": 4.453441295546559e-05, |
|
"loss": 0.5331, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.0397435897435898, |
|
"grad_norm": 1.686890721321106, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.6265, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.0404183535762483, |
|
"grad_norm": 0.009872148744761944, |
|
"learning_rate": 4.520917678812416e-05, |
|
"loss": 1.2371, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.0410931174089069, |
|
"grad_norm": 0.016034213826060295, |
|
"learning_rate": 4.5546558704453443e-05, |
|
"loss": 0.8008, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0417678812415654, |
|
"grad_norm": 161.0729217529297, |
|
"learning_rate": 4.588394062078273e-05, |
|
"loss": 1.6563, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.0424426450742241, |
|
"grad_norm": 0.039535123854875565, |
|
"learning_rate": 4.622132253711201e-05, |
|
"loss": 0.698, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.0431174089068826, |
|
"grad_norm": 0.02719847857952118, |
|
"learning_rate": 4.65587044534413e-05, |
|
"loss": 0.1234, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0437921727395412, |
|
"grad_norm": 79.83929443359375, |
|
"learning_rate": 4.689608636977058e-05, |
|
"loss": 1.275, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0444669365721997, |
|
"grad_norm": 0.2730661928653717, |
|
"learning_rate": 4.7233468286099866e-05, |
|
"loss": 0.4828, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0451417004048582, |
|
"grad_norm": 0.025355026125907898, |
|
"learning_rate": 4.757085020242915e-05, |
|
"loss": 0.1393, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.045816464237517, |
|
"grad_norm": 8.70992374420166, |
|
"learning_rate": 4.790823211875844e-05, |
|
"loss": 0.0709, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0464912280701755, |
|
"grad_norm": 37.11697006225586, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 2.5881, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.047165991902834, |
|
"grad_norm": 52.71913528442383, |
|
"learning_rate": 4.8582995951417004e-05, |
|
"loss": 2.0305, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0478407557354925, |
|
"grad_norm": 1.0200884342193604, |
|
"learning_rate": 4.8920377867746295e-05, |
|
"loss": 0.0119, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0485155195681513, |
|
"grad_norm": 0.16433711349964142, |
|
"learning_rate": 4.925775978407557e-05, |
|
"loss": 1.4951, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.0491902834008098, |
|
"grad_norm": 0.04836498573422432, |
|
"learning_rate": 4.9595141700404864e-05, |
|
"loss": 0.5514, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0498650472334683, |
|
"grad_norm": 0.021334873512387276, |
|
"learning_rate": 4.993252361673414e-05, |
|
"loss": 0.0028, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.8482142857142857, |
|
"eval_f1": 0.8460469703429654, |
|
"eval_loss": 0.7499637603759766, |
|
"eval_runtime": 75.3886, |
|
"eval_samples_per_second": 1.486, |
|
"eval_steps_per_second": 1.486, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 2.000539811066127, |
|
"grad_norm": 0.006392825860530138, |
|
"learning_rate": 4.9970010496326286e-05, |
|
"loss": 0.0052, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.0012145748987855, |
|
"grad_norm": 0.1666487753391266, |
|
"learning_rate": 4.993252361673414e-05, |
|
"loss": 0.0088, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.001889338731444, |
|
"grad_norm": 0.27092501521110535, |
|
"learning_rate": 4.9895036737142004e-05, |
|
"loss": 0.0005, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.0025641025641026, |
|
"grad_norm": 0.01244429126381874, |
|
"learning_rate": 4.985754985754986e-05, |
|
"loss": 1.2941, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.003238866396761, |
|
"grad_norm": 0.07986637949943542, |
|
"learning_rate": 4.9820062977957716e-05, |
|
"loss": 1.3347, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.0039136302294196, |
|
"grad_norm": 0.010807895101606846, |
|
"learning_rate": 4.978257609836557e-05, |
|
"loss": 0.4777, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.004588394062078, |
|
"grad_norm": 0.010884225368499756, |
|
"learning_rate": 4.9745089218773434e-05, |
|
"loss": 1.7184, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.0052631578947366, |
|
"grad_norm": 0.17375628650188446, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.0067, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.0059379217273956, |
|
"grad_norm": 0.006022674031555653, |
|
"learning_rate": 4.9670115459589145e-05, |
|
"loss": 0.0014, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.006612685560054, |
|
"grad_norm": 0.07748937606811523, |
|
"learning_rate": 4.9632628579997e-05, |
|
"loss": 0.2612, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.0072874493927126, |
|
"grad_norm": 0.2620987296104431, |
|
"learning_rate": 4.9595141700404864e-05, |
|
"loss": 0.6517, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.007962213225371, |
|
"grad_norm": 148.11007690429688, |
|
"learning_rate": 4.955765482081271e-05, |
|
"loss": 0.5783, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.0086369770580297, |
|
"grad_norm": 0.0034163114614784718, |
|
"learning_rate": 4.9520167941220575e-05, |
|
"loss": 0.0304, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.009311740890688, |
|
"grad_norm": 0.02201319858431816, |
|
"learning_rate": 4.948268106162843e-05, |
|
"loss": 0.3777, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.0099865047233467, |
|
"grad_norm": 0.01761261560022831, |
|
"learning_rate": 4.9445194182036294e-05, |
|
"loss": 0.6914, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.0106612685560052, |
|
"grad_norm": 0.02757342904806137, |
|
"learning_rate": 4.940770730244414e-05, |
|
"loss": 0.001, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.0113360323886638, |
|
"grad_norm": 0.016815010458230972, |
|
"learning_rate": 4.9370220422852005e-05, |
|
"loss": 0.0006, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.0120107962213227, |
|
"grad_norm": 0.7724957466125488, |
|
"learning_rate": 4.933273354325986e-05, |
|
"loss": 0.4395, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.0126855600539812, |
|
"grad_norm": 0.003277893178164959, |
|
"learning_rate": 4.9295246663667724e-05, |
|
"loss": 0.0023, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.0133603238866398, |
|
"grad_norm": 0.010450620204210281, |
|
"learning_rate": 4.925775978407557e-05, |
|
"loss": 0.0003, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.0140350877192983, |
|
"grad_norm": 0.008632444776594639, |
|
"learning_rate": 4.9220272904483435e-05, |
|
"loss": 0.0588, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.014709851551957, |
|
"grad_norm": 0.2135269045829773, |
|
"learning_rate": 4.918278602489129e-05, |
|
"loss": 0.8012, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.0153846153846153, |
|
"grad_norm": 0.006235187407582998, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.0007, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.016059379217274, |
|
"grad_norm": 0.013167057186365128, |
|
"learning_rate": 4.9107812265707e-05, |
|
"loss": 0.0004, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.0167341430499324, |
|
"grad_norm": 0.008585361763834953, |
|
"learning_rate": 4.9070325386114865e-05, |
|
"loss": 0.0006, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.017408906882591, |
|
"grad_norm": 55.19523620605469, |
|
"learning_rate": 4.903283850652272e-05, |
|
"loss": 0.8423, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.01808367071525, |
|
"grad_norm": 0.005840742029249668, |
|
"learning_rate": 4.8995351626930576e-05, |
|
"loss": 0.432, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.0187584345479084, |
|
"grad_norm": 0.007270222995430231, |
|
"learning_rate": 4.895786474733843e-05, |
|
"loss": 0.5094, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.019433198380567, |
|
"grad_norm": 0.013795904815196991, |
|
"learning_rate": 4.8920377867746295e-05, |
|
"loss": 0.6975, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.0201079622132254, |
|
"grad_norm": 0.44005972146987915, |
|
"learning_rate": 4.888289098815415e-05, |
|
"loss": 0.0006, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.020782726045884, |
|
"grad_norm": 0.020803041756153107, |
|
"learning_rate": 4.8845404108562006e-05, |
|
"loss": 0.0003, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.0214574898785425, |
|
"grad_norm": 0.004395525902509689, |
|
"learning_rate": 4.880791722896986e-05, |
|
"loss": 0.0007, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.022132253711201, |
|
"grad_norm": 0.07428783923387527, |
|
"learning_rate": 4.8770430349377725e-05, |
|
"loss": 0.0006, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.0228070175438595, |
|
"grad_norm": 0.007445579394698143, |
|
"learning_rate": 4.8732943469785574e-05, |
|
"loss": 0.0002, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.023481781376518, |
|
"grad_norm": 0.02664661407470703, |
|
"learning_rate": 4.8695456590193436e-05, |
|
"loss": 0.0002, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.024156545209177, |
|
"grad_norm": 0.08112671971321106, |
|
"learning_rate": 4.865796971060129e-05, |
|
"loss": 0.0003, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.0248313090418355, |
|
"grad_norm": 0.002486151410266757, |
|
"learning_rate": 4.862048283100915e-05, |
|
"loss": 0.0018, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.025506072874494, |
|
"grad_norm": 0.00320970406755805, |
|
"learning_rate": 4.8582995951417004e-05, |
|
"loss": 0.2271, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.0261808367071525, |
|
"grad_norm": 0.1994234174489975, |
|
"learning_rate": 4.8545509071824866e-05, |
|
"loss": 0.5385, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.026855600539811, |
|
"grad_norm": 0.0024550287052989006, |
|
"learning_rate": 4.850802219223272e-05, |
|
"loss": 0.0759, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.0275303643724696, |
|
"grad_norm": 0.004535624757409096, |
|
"learning_rate": 4.847053531264058e-05, |
|
"loss": 0.0006, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.028205128205128, |
|
"grad_norm": 0.07630165666341782, |
|
"learning_rate": 4.8433048433048433e-05, |
|
"loss": 0.0002, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.0288798920377866, |
|
"grad_norm": 0.005508648231625557, |
|
"learning_rate": 4.839556155345629e-05, |
|
"loss": 0.0048, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.029554655870445, |
|
"grad_norm": 0.00268650334328413, |
|
"learning_rate": 4.835807467386415e-05, |
|
"loss": 0.021, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.030229419703104, |
|
"grad_norm": 0.6032857894897461, |
|
"learning_rate": 4.832058779427201e-05, |
|
"loss": 0.9845, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0309041835357626, |
|
"grad_norm": 0.0025021624751389027, |
|
"learning_rate": 4.828310091467986e-05, |
|
"loss": 0.0005, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.031578947368421, |
|
"grad_norm": 0.0031213329639285803, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 0.1197, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.0322537112010797, |
|
"grad_norm": 0.011701357550919056, |
|
"learning_rate": 4.820812715549558e-05, |
|
"loss": 0.0004, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.032928475033738, |
|
"grad_norm": 0.002749168314039707, |
|
"learning_rate": 4.817064027590343e-05, |
|
"loss": 0.0001, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.0336032388663967, |
|
"grad_norm": 0.003767299233004451, |
|
"learning_rate": 4.813315339631129e-05, |
|
"loss": 0.0002, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.0342780026990552, |
|
"grad_norm": 0.005788211710751057, |
|
"learning_rate": 4.809566651671915e-05, |
|
"loss": 0.0012, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.0349527665317138, |
|
"grad_norm": 329.865966796875, |
|
"learning_rate": 4.805817963712701e-05, |
|
"loss": 1.4817, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0356275303643723, |
|
"grad_norm": 0.011975220404565334, |
|
"learning_rate": 4.802069275753486e-05, |
|
"loss": 1.3289, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.0363022941970312, |
|
"grad_norm": 0.0021649515256285667, |
|
"learning_rate": 4.798320587794272e-05, |
|
"loss": 0.0055, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.0369770580296898, |
|
"grad_norm": 0.0019632915500551462, |
|
"learning_rate": 4.794571899835058e-05, |
|
"loss": 0.0002, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.0376518218623483, |
|
"grad_norm": 0.005742478650063276, |
|
"learning_rate": 4.790823211875844e-05, |
|
"loss": 0.0011, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.038326585695007, |
|
"grad_norm": 0.009554996155202389, |
|
"learning_rate": 4.787074523916629e-05, |
|
"loss": 0.8594, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.0390013495276653, |
|
"grad_norm": 0.0015004322631284595, |
|
"learning_rate": 4.783325835957415e-05, |
|
"loss": 0.0138, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.039676113360324, |
|
"grad_norm": 0.005102177150547504, |
|
"learning_rate": 4.779577147998201e-05, |
|
"loss": 0.7251, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.0403508771929824, |
|
"grad_norm": 0.0036967694759368896, |
|
"learning_rate": 4.7758284600389865e-05, |
|
"loss": 0.004, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.041025641025641, |
|
"grad_norm": 0.0025739429984241724, |
|
"learning_rate": 4.772079772079772e-05, |
|
"loss": 0.9565, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.0417004048582994, |
|
"grad_norm": 0.006292873062193394, |
|
"learning_rate": 4.768331084120558e-05, |
|
"loss": 0.0182, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.0423751686909584, |
|
"grad_norm": 0.007768746931105852, |
|
"learning_rate": 4.764582396161344e-05, |
|
"loss": 0.4385, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.043049932523617, |
|
"grad_norm": 0.005842685699462891, |
|
"learning_rate": 4.7608337082021294e-05, |
|
"loss": 0.6865, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.0437246963562754, |
|
"grad_norm": 0.003818152705207467, |
|
"learning_rate": 4.757085020242915e-05, |
|
"loss": 0.1049, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.044399460188934, |
|
"grad_norm": 0.0034294927027076483, |
|
"learning_rate": 4.753336332283701e-05, |
|
"loss": 0.0004, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.0450742240215924, |
|
"grad_norm": 0.005487513262778521, |
|
"learning_rate": 4.749587644324487e-05, |
|
"loss": 0.2808, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.045748987854251, |
|
"grad_norm": 0.004234930034726858, |
|
"learning_rate": 4.7458389563652724e-05, |
|
"loss": 0.0096, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.0464237516869095, |
|
"grad_norm": 0.004304991569370031, |
|
"learning_rate": 4.742090268406058e-05, |
|
"loss": 1.0463, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.047098515519568, |
|
"grad_norm": 0.06642390042543411, |
|
"learning_rate": 4.738341580446844e-05, |
|
"loss": 0.0003, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.0477732793522265, |
|
"grad_norm": 21.008607864379883, |
|
"learning_rate": 4.73459289248763e-05, |
|
"loss": 0.0073, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.0484480431848855, |
|
"grad_norm": 0.003075533313676715, |
|
"learning_rate": 4.7308442045284154e-05, |
|
"loss": 0.6295, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.049122807017544, |
|
"grad_norm": 0.002309370320290327, |
|
"learning_rate": 4.727095516569201e-05, |
|
"loss": 0.1005, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.0497975708502025, |
|
"grad_norm": 0.0027971486561000347, |
|
"learning_rate": 4.7233468286099866e-05, |
|
"loss": 0.0021, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_accuracy": 0.8839285714285714, |
|
"eval_f1": 0.882116388637625, |
|
"eval_loss": 0.5603616833686829, |
|
"eval_runtime": 74.4272, |
|
"eval_samples_per_second": 1.505, |
|
"eval_steps_per_second": 1.505, |
|
"step": 2223 |
|
}, |
|
{ |
|
"epoch": 3.000472334682861, |
|
"grad_norm": 0.01166750118136406, |
|
"learning_rate": 4.719598140650772e-05, |
|
"loss": 0.0163, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.0011470985155198, |
|
"grad_norm": 96.65850067138672, |
|
"learning_rate": 4.7158494526915584e-05, |
|
"loss": 1.6819, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.0018218623481783, |
|
"grad_norm": 0.05787191540002823, |
|
"learning_rate": 4.712100764732344e-05, |
|
"loss": 0.6805, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.002496626180837, |
|
"grad_norm": 0.0013539530336856842, |
|
"learning_rate": 4.7083520767731296e-05, |
|
"loss": 0.0011, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.0031713900134953, |
|
"grad_norm": 0.010776277631521225, |
|
"learning_rate": 4.704603388813915e-05, |
|
"loss": 0.0148, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.003846153846154, |
|
"grad_norm": 0.024213161319494247, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.0008, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.0045209176788124, |
|
"grad_norm": 0.0691986232995987, |
|
"learning_rate": 4.697106012895487e-05, |
|
"loss": 0.006, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.005195681511471, |
|
"grad_norm": 0.009089670144021511, |
|
"learning_rate": 4.6933573249362725e-05, |
|
"loss": 0.6432, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.0058704453441294, |
|
"grad_norm": 0.005548300687223673, |
|
"learning_rate": 4.689608636977058e-05, |
|
"loss": 0.0018, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.006545209176788, |
|
"grad_norm": 0.006319984793663025, |
|
"learning_rate": 4.685859949017844e-05, |
|
"loss": 0.0001, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.007219973009447, |
|
"grad_norm": 0.007898062467575073, |
|
"learning_rate": 4.68211126105863e-05, |
|
"loss": 0.0008, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.0078947368421054, |
|
"grad_norm": 0.003347884165123105, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.0002, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.008569500674764, |
|
"grad_norm": 0.009431365877389908, |
|
"learning_rate": 4.674613885140201e-05, |
|
"loss": 0.0001, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.0092442645074224, |
|
"grad_norm": 0.006901255808770657, |
|
"learning_rate": 4.670865197180987e-05, |
|
"loss": 0.0001, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.009919028340081, |
|
"grad_norm": 0.00315679213963449, |
|
"learning_rate": 4.667116509221773e-05, |
|
"loss": 0.0002, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.0105937921727395, |
|
"grad_norm": 0.21266283094882965, |
|
"learning_rate": 4.663367821262558e-05, |
|
"loss": 0.0006, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.011268556005398, |
|
"grad_norm": 0.004384478088468313, |
|
"learning_rate": 4.659619133303344e-05, |
|
"loss": 0.0006, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.0119433198380565, |
|
"grad_norm": 0.013708599843084812, |
|
"learning_rate": 4.65587044534413e-05, |
|
"loss": 0.2589, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.012618083670715, |
|
"grad_norm": 308.8554992675781, |
|
"learning_rate": 4.652121757384916e-05, |
|
"loss": 1.1215, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.013292847503374, |
|
"grad_norm": 0.0031652101315557957, |
|
"learning_rate": 4.648373069425701e-05, |
|
"loss": 0.5235, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.0139676113360325, |
|
"grad_norm": 0.00223003257997334, |
|
"learning_rate": 4.644624381466487e-05, |
|
"loss": 0.0001, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.014642375168691, |
|
"grad_norm": 0.0067682513035833836, |
|
"learning_rate": 4.640875693507273e-05, |
|
"loss": 0.0001, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.0153171390013496, |
|
"grad_norm": 0.0025887356605380774, |
|
"learning_rate": 4.637127005548059e-05, |
|
"loss": 0.0003, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.015991902834008, |
|
"grad_norm": 0.0077194697223603725, |
|
"learning_rate": 4.633378317588844e-05, |
|
"loss": 0.6217, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.0166666666666666, |
|
"grad_norm": 0.03473236411809921, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.8179, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.017341430499325, |
|
"grad_norm": 0.014423678629100323, |
|
"learning_rate": 4.6258809416704157e-05, |
|
"loss": 0.2263, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.0180161943319836, |
|
"grad_norm": 0.006188780535012484, |
|
"learning_rate": 4.622132253711201e-05, |
|
"loss": 0.6701, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.018690958164642, |
|
"grad_norm": 0.35851019620895386, |
|
"learning_rate": 4.618383565751987e-05, |
|
"loss": 0.0008, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.019365721997301, |
|
"grad_norm": 0.0032032101880759, |
|
"learning_rate": 4.614634877792773e-05, |
|
"loss": 0.0137, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.0200404858299597, |
|
"grad_norm": 0.006460473407059908, |
|
"learning_rate": 4.6108861898335586e-05, |
|
"loss": 0.0016, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.020715249662618, |
|
"grad_norm": 0.0026447370182722807, |
|
"learning_rate": 4.607137501874344e-05, |
|
"loss": 0.0014, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.0213900134952767, |
|
"grad_norm": 0.0030527382623404264, |
|
"learning_rate": 4.60338881391513e-05, |
|
"loss": 0.0001, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.022064777327935, |
|
"grad_norm": 0.007262419909238815, |
|
"learning_rate": 4.599640125955916e-05, |
|
"loss": 0.0001, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.0227395411605937, |
|
"grad_norm": 0.0038091035094112158, |
|
"learning_rate": 4.5958914379967016e-05, |
|
"loss": 0.2222, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.0234143049932523, |
|
"grad_norm": 0.0035387033130973577, |
|
"learning_rate": 4.592142750037487e-05, |
|
"loss": 0.8073, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.0240890688259108, |
|
"grad_norm": 0.0033677646424621344, |
|
"learning_rate": 4.588394062078273e-05, |
|
"loss": 0.0006, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.0247638326585693, |
|
"grad_norm": 0.006484442390501499, |
|
"learning_rate": 4.5846453741190584e-05, |
|
"loss": 0.0004, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.0254385964912283, |
|
"grad_norm": 0.010489704087376595, |
|
"learning_rate": 4.580896686159844e-05, |
|
"loss": 0.0009, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.026113360323887, |
|
"grad_norm": 0.0032699282746762037, |
|
"learning_rate": 4.57714799820063e-05, |
|
"loss": 0.0039, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.0267881241565453, |
|
"grad_norm": 97.59777069091797, |
|
"learning_rate": 4.573399310241416e-05, |
|
"loss": 1.8702, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.027462887989204, |
|
"grad_norm": 0.05291153863072395, |
|
"learning_rate": 4.5696506222822014e-05, |
|
"loss": 0.9784, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.0281376518218623, |
|
"grad_norm": 0.0015122004551813006, |
|
"learning_rate": 4.565901934322987e-05, |
|
"loss": 0.0008, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.028812415654521, |
|
"grad_norm": 0.10103687644004822, |
|
"learning_rate": 4.5621532463637725e-05, |
|
"loss": 0.0013, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.0294871794871794, |
|
"grad_norm": 0.002090906724333763, |
|
"learning_rate": 4.558404558404559e-05, |
|
"loss": 0.0005, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.030161943319838, |
|
"grad_norm": 0.0011990441707894206, |
|
"learning_rate": 4.5546558704453443e-05, |
|
"loss": 0.772, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.0308367071524964, |
|
"grad_norm": 0.0113350385800004, |
|
"learning_rate": 4.55090718248613e-05, |
|
"loss": 0.0039, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.0315114709851554, |
|
"grad_norm": 379.9673156738281, |
|
"learning_rate": 4.5471584945269155e-05, |
|
"loss": 1.1636, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.032186234817814, |
|
"grad_norm": 0.03890157490968704, |
|
"learning_rate": 4.543409806567702e-05, |
|
"loss": 1.1822, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.0328609986504724, |
|
"grad_norm": 0.0033322779927402735, |
|
"learning_rate": 4.5396611186084866e-05, |
|
"loss": 0.0014, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.033535762483131, |
|
"grad_norm": 0.6530995965003967, |
|
"learning_rate": 4.535912430649273e-05, |
|
"loss": 0.6605, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.0342105263157895, |
|
"grad_norm": 0.03727166727185249, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 0.9511, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.034885290148448, |
|
"grad_norm": 0.0015920967562124133, |
|
"learning_rate": 4.528415054730845e-05, |
|
"loss": 0.0008, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.0355600539811065, |
|
"grad_norm": 0.08293965458869934, |
|
"learning_rate": 4.5246663667716296e-05, |
|
"loss": 0.0007, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.036234817813765, |
|
"grad_norm": 0.04548066109418869, |
|
"learning_rate": 4.520917678812416e-05, |
|
"loss": 0.0015, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.0369095816464236, |
|
"grad_norm": 0.011057593859732151, |
|
"learning_rate": 4.5171689908532015e-05, |
|
"loss": 0.6973, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.0375843454790825, |
|
"grad_norm": 113.07095336914062, |
|
"learning_rate": 4.513420302893988e-05, |
|
"loss": 0.3667, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.038259109311741, |
|
"grad_norm": 0.0016718521947041154, |
|
"learning_rate": 4.5096716149347726e-05, |
|
"loss": 0.6746, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.0389338731443996, |
|
"grad_norm": 0.006011700723320246, |
|
"learning_rate": 4.505922926975559e-05, |
|
"loss": 1.4009, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.039608636977058, |
|
"grad_norm": 0.0032539258245378733, |
|
"learning_rate": 4.5021742390163445e-05, |
|
"loss": 0.005, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.0402834008097166, |
|
"grad_norm": 0.024762948974967003, |
|
"learning_rate": 4.498425551057131e-05, |
|
"loss": 0.0007, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.040958164642375, |
|
"grad_norm": 0.008271398954093456, |
|
"learning_rate": 4.4946768630979156e-05, |
|
"loss": 0.0004, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.0416329284750336, |
|
"grad_norm": 0.0073724472895264626, |
|
"learning_rate": 4.490928175138702e-05, |
|
"loss": 0.7153, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.042307692307692, |
|
"grad_norm": 0.01329676155000925, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.1339, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.0429824561403507, |
|
"grad_norm": 0.00492237601429224, |
|
"learning_rate": 4.483430799220273e-05, |
|
"loss": 0.7432, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.0436572199730096, |
|
"grad_norm": 0.006463408935815096, |
|
"learning_rate": 4.4796821112610586e-05, |
|
"loss": 0.0007, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.044331983805668, |
|
"grad_norm": 0.0007826614892110229, |
|
"learning_rate": 4.475933423301845e-05, |
|
"loss": 0.5263, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.0450067476383267, |
|
"grad_norm": 0.0012907817726954818, |
|
"learning_rate": 4.4721847353426304e-05, |
|
"loss": 0.0017, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.045681511470985, |
|
"grad_norm": 0.0011142657604068518, |
|
"learning_rate": 4.468436047383416e-05, |
|
"loss": 0.0004, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0463562753036437, |
|
"grad_norm": 0.0039123659953475, |
|
"learning_rate": 4.4646873594242016e-05, |
|
"loss": 0.025, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.0470310391363022, |
|
"grad_norm": 0.006876886822283268, |
|
"learning_rate": 4.460938671464988e-05, |
|
"loss": 0.5972, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.0477058029689608, |
|
"grad_norm": 0.0013078979682177305, |
|
"learning_rate": 4.4571899835057734e-05, |
|
"loss": 0.0216, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.0483805668016193, |
|
"grad_norm": 0.01804491877555847, |
|
"learning_rate": 4.453441295546559e-05, |
|
"loss": 0.0025, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.049055330634278, |
|
"grad_norm": 0.0017017913050949574, |
|
"learning_rate": 4.4496926075873446e-05, |
|
"loss": 0.1553, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.0497300944669368, |
|
"grad_norm": 0.004222176969051361, |
|
"learning_rate": 4.445943919628131e-05, |
|
"loss": 0.0002, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_accuracy": 0.9017857142857143, |
|
"eval_f1": 0.900079642364192, |
|
"eval_loss": 0.3880017399787903, |
|
"eval_runtime": 72.9967, |
|
"eval_samples_per_second": 1.534, |
|
"eval_steps_per_second": 1.534, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 4.0004048582995955, |
|
"grad_norm": 0.0011517743114382029, |
|
"learning_rate": 4.442195231668916e-05, |
|
"loss": 0.4772, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.001079622132254, |
|
"grad_norm": 0.0008661440806463361, |
|
"learning_rate": 4.438446543709702e-05, |
|
"loss": 0.0001, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.0017543859649125, |
|
"grad_norm": 0.005399093497544527, |
|
"learning_rate": 4.4346978557504876e-05, |
|
"loss": 0.0033, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.002429149797571, |
|
"grad_norm": 0.0038267234340310097, |
|
"learning_rate": 4.430949167791273e-05, |
|
"loss": 0.0005, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0031039136302295, |
|
"grad_norm": 0.0029461942613124847, |
|
"learning_rate": 4.427200479832059e-05, |
|
"loss": 0.0002, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.003778677462888, |
|
"grad_norm": 0.0006391266360878944, |
|
"learning_rate": 4.423451791872845e-05, |
|
"loss": 0.0001, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.004453441295547, |
|
"grad_norm": 0.004189279396086931, |
|
"learning_rate": 4.4197031039136306e-05, |
|
"loss": 0.0001, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.005128205128205, |
|
"grad_norm": 0.0011289932299405336, |
|
"learning_rate": 4.415954415954416e-05, |
|
"loss": 0.0001, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.005802968960864, |
|
"grad_norm": 0.0023520805407315493, |
|
"learning_rate": 4.412205727995202e-05, |
|
"loss": 0.0001, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.006477732793522, |
|
"grad_norm": 0.0018153834389522672, |
|
"learning_rate": 4.408457040035987e-05, |
|
"loss": 0.8745, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.007152496626181, |
|
"grad_norm": 0.001743017346598208, |
|
"learning_rate": 4.4047083520767735e-05, |
|
"loss": 0.0003, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 4.007827260458839, |
|
"grad_norm": 0.002831714926287532, |
|
"learning_rate": 4.400959664117559e-05, |
|
"loss": 0.0066, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 4.008502024291498, |
|
"grad_norm": 0.005015307106077671, |
|
"learning_rate": 4.397210976158345e-05, |
|
"loss": 0.1127, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 4.009176788124156, |
|
"grad_norm": 0.0019009409006685019, |
|
"learning_rate": 4.39346228819913e-05, |
|
"loss": 0.001, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.009851551956815, |
|
"grad_norm": 0.0011994624510407448, |
|
"learning_rate": 4.3897136002399165e-05, |
|
"loss": 0.8256, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 4.010526315789473, |
|
"grad_norm": 0.002758684800937772, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.0002, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 4.011201079622133, |
|
"grad_norm": 0.014079189859330654, |
|
"learning_rate": 4.382216224321488e-05, |
|
"loss": 0.0001, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 4.011875843454791, |
|
"grad_norm": 0.001694743288680911, |
|
"learning_rate": 4.378467536362273e-05, |
|
"loss": 0.0001, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 4.01255060728745, |
|
"grad_norm": 0.005108845420181751, |
|
"learning_rate": 4.3747188484030595e-05, |
|
"loss": 0.0001, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.013225371120108, |
|
"grad_norm": 0.0009567590313963592, |
|
"learning_rate": 4.3709701604438444e-05, |
|
"loss": 0.0003, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 4.013900134952767, |
|
"grad_norm": 0.005206429865211248, |
|
"learning_rate": 4.367221472484631e-05, |
|
"loss": 0.0139, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 4.014574898785425, |
|
"grad_norm": 0.0010895140003412962, |
|
"learning_rate": 4.363472784525416e-05, |
|
"loss": 0.0001, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 4.015249662618084, |
|
"grad_norm": 0.0026008691638708115, |
|
"learning_rate": 4.3597240965662025e-05, |
|
"loss": 0.0002, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 4.015924426450742, |
|
"grad_norm": 0.00945541262626648, |
|
"learning_rate": 4.3559754086069874e-05, |
|
"loss": 0.0001, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.016599190283401, |
|
"grad_norm": 0.002652823692187667, |
|
"learning_rate": 4.3522267206477737e-05, |
|
"loss": 0.0003, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 4.017273954116059, |
|
"grad_norm": 0.011731209233403206, |
|
"learning_rate": 4.348478032688559e-05, |
|
"loss": 0.0001, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.017948717948718, |
|
"grad_norm": 0.002854161197319627, |
|
"learning_rate": 4.344729344729345e-05, |
|
"loss": 0.0001, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 4.018623481781376, |
|
"grad_norm": 0.0006263653049245477, |
|
"learning_rate": 4.3409806567701304e-05, |
|
"loss": 0.0002, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.019298245614035, |
|
"grad_norm": 0.008615193888545036, |
|
"learning_rate": 4.3372319688109166e-05, |
|
"loss": 0.7675, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.0199730094466934, |
|
"grad_norm": 0.0012555683497339487, |
|
"learning_rate": 4.333483280851702e-05, |
|
"loss": 0.0001, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 4.020647773279352, |
|
"grad_norm": 0.0026209617499262094, |
|
"learning_rate": 4.329734592892488e-05, |
|
"loss": 0.0001, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 4.0213225371120105, |
|
"grad_norm": 0.0008131062495522201, |
|
"learning_rate": 4.3259859049332734e-05, |
|
"loss": 0.495, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.021997300944669, |
|
"grad_norm": 0.004160483367741108, |
|
"learning_rate": 4.3222372169740596e-05, |
|
"loss": 0.0001, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 4.0226720647773275, |
|
"grad_norm": 0.00135552987921983, |
|
"learning_rate": 4.318488529014845e-05, |
|
"loss": 0.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.023346828609987, |
|
"grad_norm": 0.0020715997088700533, |
|
"learning_rate": 4.314739841055631e-05, |
|
"loss": 0.0001, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 4.0240215924426455, |
|
"grad_norm": 0.0006134248687885702, |
|
"learning_rate": 4.3109911530964164e-05, |
|
"loss": 0.0003, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 4.024696356275304, |
|
"grad_norm": 0.005337740760296583, |
|
"learning_rate": 4.3072424651372026e-05, |
|
"loss": 0.0002, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 4.0253711201079625, |
|
"grad_norm": 0.002447796519845724, |
|
"learning_rate": 4.303493777177988e-05, |
|
"loss": 0.0013, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.026045883940621, |
|
"grad_norm": 0.0020753496792167425, |
|
"learning_rate": 4.299745089218774e-05, |
|
"loss": 0.0001, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.0267206477732795, |
|
"grad_norm": 0.001169373164884746, |
|
"learning_rate": 4.2959964012595594e-05, |
|
"loss": 0.4363, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.027395411605938, |
|
"grad_norm": 0.0031577907502651215, |
|
"learning_rate": 4.292247713300345e-05, |
|
"loss": 0.4359, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 4.028070175438597, |
|
"grad_norm": 0.0011828079586848617, |
|
"learning_rate": 4.2884990253411305e-05, |
|
"loss": 0.0001, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 4.028744939271255, |
|
"grad_norm": 0.0016030353726819158, |
|
"learning_rate": 4.284750337381917e-05, |
|
"loss": 0.0001, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 4.029419703103914, |
|
"grad_norm": 0.014403590932488441, |
|
"learning_rate": 4.2810016494227023e-05, |
|
"loss": 0.7807, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.030094466936572, |
|
"grad_norm": 0.005019639153033495, |
|
"learning_rate": 4.277252961463488e-05, |
|
"loss": 0.4727, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 4.030769230769231, |
|
"grad_norm": 0.002246898366138339, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 0.0499, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 4.031443994601889, |
|
"grad_norm": 0.013324781320989132, |
|
"learning_rate": 4.269755585545059e-05, |
|
"loss": 0.5992, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 4.032118758434548, |
|
"grad_norm": 0.0579649917781353, |
|
"learning_rate": 4.266006897585845e-05, |
|
"loss": 0.0039, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 4.032793522267206, |
|
"grad_norm": 1.7032642364501953, |
|
"learning_rate": 4.262258209626631e-05, |
|
"loss": 0.6145, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.033468286099865, |
|
"grad_norm": 0.013759407214820385, |
|
"learning_rate": 4.2585095216674165e-05, |
|
"loss": 0.0002, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.034143049932523, |
|
"grad_norm": 0.00753359729424119, |
|
"learning_rate": 4.254760833708202e-05, |
|
"loss": 0.0071, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 4.034817813765182, |
|
"grad_norm": 0.0020441561937332153, |
|
"learning_rate": 4.251012145748988e-05, |
|
"loss": 0.001, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 4.035492577597841, |
|
"grad_norm": 0.001379093388095498, |
|
"learning_rate": 4.247263457789773e-05, |
|
"loss": 0.0013, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 4.0361673414305, |
|
"grad_norm": 0.002510966034606099, |
|
"learning_rate": 4.2435147698305595e-05, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.036842105263158, |
|
"grad_norm": 0.0011007965076714754, |
|
"learning_rate": 4.239766081871345e-05, |
|
"loss": 1.0836, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 4.037516869095817, |
|
"grad_norm": 0.022373057901859283, |
|
"learning_rate": 4.236017393912131e-05, |
|
"loss": 0.0838, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.038191632928475, |
|
"grad_norm": 0.0008921432308852673, |
|
"learning_rate": 4.232268705952916e-05, |
|
"loss": 0.0001, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 4.038866396761134, |
|
"grad_norm": 0.0007166191353462636, |
|
"learning_rate": 4.2285200179937025e-05, |
|
"loss": 0.0076, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 4.039541160593792, |
|
"grad_norm": 8.101381301879883, |
|
"learning_rate": 4.224771330034488e-05, |
|
"loss": 0.0233, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.040215924426451, |
|
"grad_norm": 0.0007625047001056373, |
|
"learning_rate": 4.221022642075274e-05, |
|
"loss": 0.0003, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 4.040890688259109, |
|
"grad_norm": 0.00398569880053401, |
|
"learning_rate": 4.217273954116059e-05, |
|
"loss": 0.0001, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 4.041565452091768, |
|
"grad_norm": 0.0010361782042309642, |
|
"learning_rate": 4.2135252661568455e-05, |
|
"loss": 0.0001, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 4.042240215924426, |
|
"grad_norm": 0.001946108415722847, |
|
"learning_rate": 4.209776578197631e-05, |
|
"loss": 0.0004, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 4.042914979757085, |
|
"grad_norm": 0.003806932596489787, |
|
"learning_rate": 4.2060278902384166e-05, |
|
"loss": 0.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.043589743589743, |
|
"grad_norm": 0.0009996455628424883, |
|
"learning_rate": 4.202279202279202e-05, |
|
"loss": 0.0002, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 4.044264507422402, |
|
"grad_norm": 0.0016769858775660396, |
|
"learning_rate": 4.1985305143199884e-05, |
|
"loss": 0.0001, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 4.0449392712550605, |
|
"grad_norm": 0.00047590630128979683, |
|
"learning_rate": 4.194781826360774e-05, |
|
"loss": 0.0001, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 4.045614035087719, |
|
"grad_norm": 0.0010459835175424814, |
|
"learning_rate": 4.1910331384015596e-05, |
|
"loss": 0.3976, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 4.0462887989203775, |
|
"grad_norm": 0.003536689095199108, |
|
"learning_rate": 4.187284450442345e-05, |
|
"loss": 0.5592, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 4.046963562753036, |
|
"grad_norm": 0.004078584257513285, |
|
"learning_rate": 4.1835357624831314e-05, |
|
"loss": 0.2639, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 4.0476383265856954, |
|
"grad_norm": 0.01091256644576788, |
|
"learning_rate": 4.179787074523917e-05, |
|
"loss": 0.0001, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 4.048313090418354, |
|
"grad_norm": 0.0032140237744897604, |
|
"learning_rate": 4.1760383865647026e-05, |
|
"loss": 0.2047, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 4.0489878542510125, |
|
"grad_norm": 0.003986234311014414, |
|
"learning_rate": 4.172289698605488e-05, |
|
"loss": 0.0019, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 4.049662618083671, |
|
"grad_norm": 0.0013649433385580778, |
|
"learning_rate": 4.1685410106462744e-05, |
|
"loss": 0.0001, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_f1": 0.9284473859473861, |
|
"eval_loss": 0.43087735772132874, |
|
"eval_runtime": 74.3247, |
|
"eval_samples_per_second": 1.507, |
|
"eval_steps_per_second": 1.507, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 5.00033738191633, |
|
"grad_norm": 0.0009709022124297917, |
|
"learning_rate": 4.16479232268706e-05, |
|
"loss": 0.0001, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 5.001012145748988, |
|
"grad_norm": 0.00450406176969409, |
|
"learning_rate": 4.1610436347278456e-05, |
|
"loss": 0.0001, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 5.001686909581647, |
|
"grad_norm": 490.396240234375, |
|
"learning_rate": 4.157294946768631e-05, |
|
"loss": 0.3041, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 5.002361673414305, |
|
"grad_norm": 0.00026446336414664984, |
|
"learning_rate": 4.153546258809417e-05, |
|
"loss": 0.0001, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 5.003036437246964, |
|
"grad_norm": 0.0011977544054389, |
|
"learning_rate": 4.149797570850202e-05, |
|
"loss": 0.0001, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 5.003711201079622, |
|
"grad_norm": 0.0008563337032683194, |
|
"learning_rate": 4.1460488828909886e-05, |
|
"loss": 0.6888, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 5.004385964912281, |
|
"grad_norm": 0.0008433638722635806, |
|
"learning_rate": 4.142300194931774e-05, |
|
"loss": 0.0003, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 5.005060728744939, |
|
"grad_norm": 0.0007336140261031687, |
|
"learning_rate": 4.13855150697256e-05, |
|
"loss": 0.5238, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 5.005735492577598, |
|
"grad_norm": 0.0012576148146763444, |
|
"learning_rate": 4.134802819013345e-05, |
|
"loss": 0.0023, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 5.006410256410256, |
|
"grad_norm": 0.0009189122938551009, |
|
"learning_rate": 4.131054131054131e-05, |
|
"loss": 0.0131, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.007085020242915, |
|
"grad_norm": 0.008739179000258446, |
|
"learning_rate": 4.127305443094917e-05, |
|
"loss": 0.0003, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 5.007759784075573, |
|
"grad_norm": 0.0012460118159651756, |
|
"learning_rate": 4.123556755135703e-05, |
|
"loss": 0.0001, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 5.008434547908232, |
|
"grad_norm": 0.002039340790361166, |
|
"learning_rate": 4.119808067176488e-05, |
|
"loss": 0.0003, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 5.0091093117408905, |
|
"grad_norm": 0.0009501971653662622, |
|
"learning_rate": 4.116059379217274e-05, |
|
"loss": 0.0052, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 5.009784075573549, |
|
"grad_norm": 0.07869889587163925, |
|
"learning_rate": 4.11231069125806e-05, |
|
"loss": 0.0002, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 5.0104588394062075, |
|
"grad_norm": 0.0006638221675530076, |
|
"learning_rate": 4.108562003298845e-05, |
|
"loss": 0.0005, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 5.011133603238866, |
|
"grad_norm": 0.0008539034170098603, |
|
"learning_rate": 4.104813315339631e-05, |
|
"loss": 0.0001, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 5.0118083670715246, |
|
"grad_norm": 0.0006605815142393112, |
|
"learning_rate": 4.101064627380417e-05, |
|
"loss": 0.0004, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 5.012483130904184, |
|
"grad_norm": 0.0008256967412307858, |
|
"learning_rate": 4.097315939421203e-05, |
|
"loss": 0.0001, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 5.0131578947368425, |
|
"grad_norm": 0.008075601421296597, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.0018, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.013832658569501, |
|
"grad_norm": 0.0012110425159335136, |
|
"learning_rate": 4.089818563502774e-05, |
|
"loss": 0.0011, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 5.0145074224021595, |
|
"grad_norm": 0.0048310281708836555, |
|
"learning_rate": 4.08606987554356e-05, |
|
"loss": 0.0001, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 5.015182186234818, |
|
"grad_norm": 0.0012771515175700188, |
|
"learning_rate": 4.082321187584346e-05, |
|
"loss": 0.0003, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 5.015856950067477, |
|
"grad_norm": 0.0013642838457599282, |
|
"learning_rate": 4.078572499625131e-05, |
|
"loss": 0.0001, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 5.016531713900135, |
|
"grad_norm": 311.0769348144531, |
|
"learning_rate": 4.074823811665917e-05, |
|
"loss": 0.7081, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 5.017206477732794, |
|
"grad_norm": 0.002835233462974429, |
|
"learning_rate": 4.071075123706703e-05, |
|
"loss": 0.0003, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 5.017881241565452, |
|
"grad_norm": 0.0006811009370721877, |
|
"learning_rate": 4.067326435747489e-05, |
|
"loss": 0.4166, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 5.018556005398111, |
|
"grad_norm": 0.0010262362193316221, |
|
"learning_rate": 4.063577747788274e-05, |
|
"loss": 0.0001, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 5.019230769230769, |
|
"grad_norm": 0.11619503796100616, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.0002, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 5.019905533063428, |
|
"grad_norm": 0.011183816939592361, |
|
"learning_rate": 4.056080371869846e-05, |
|
"loss": 0.0006, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.020580296896086, |
|
"grad_norm": 0.0007078946800902486, |
|
"learning_rate": 4.0523316839106314e-05, |
|
"loss": 0.0004, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 5.021255060728745, |
|
"grad_norm": 0.008296789601445198, |
|
"learning_rate": 4.048582995951417e-05, |
|
"loss": 0.0134, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 5.021929824561403, |
|
"grad_norm": 0.013501118868589401, |
|
"learning_rate": 4.044834307992203e-05, |
|
"loss": 0.0003, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 5.022604588394062, |
|
"grad_norm": 0.15977753698825836, |
|
"learning_rate": 4.041085620032989e-05, |
|
"loss": 0.0001, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 5.02327935222672, |
|
"grad_norm": 0.004472650587558746, |
|
"learning_rate": 4.0373369320737744e-05, |
|
"loss": 0.0032, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 5.023954116059379, |
|
"grad_norm": 0.0012224495876580477, |
|
"learning_rate": 4.03358824411456e-05, |
|
"loss": 0.0, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 5.024628879892038, |
|
"grad_norm": 0.0016181441023945808, |
|
"learning_rate": 4.029839556155346e-05, |
|
"loss": 0.7806, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 5.025303643724697, |
|
"grad_norm": 0.004258355125784874, |
|
"learning_rate": 4.026090868196132e-05, |
|
"loss": 0.0, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 5.025978407557355, |
|
"grad_norm": 0.0011408330174162984, |
|
"learning_rate": 4.0223421802369174e-05, |
|
"loss": 0.0001, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 5.026653171390014, |
|
"grad_norm": 0.010054398328065872, |
|
"learning_rate": 4.018593492277703e-05, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.027327935222672, |
|
"grad_norm": 0.0009806094458326697, |
|
"learning_rate": 4.014844804318489e-05, |
|
"loss": 0.0001, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 5.028002699055331, |
|
"grad_norm": 0.0007722462760284543, |
|
"learning_rate": 4.011096116359274e-05, |
|
"loss": 0.0003, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 5.028677462887989, |
|
"grad_norm": 0.01538068987429142, |
|
"learning_rate": 4.0073474284000604e-05, |
|
"loss": 0.6961, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 5.029352226720648, |
|
"grad_norm": 0.00021896508405916393, |
|
"learning_rate": 4.003598740440846e-05, |
|
"loss": 0.0001, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 5.030026990553306, |
|
"grad_norm": 0.0006867019692435861, |
|
"learning_rate": 3.9998500524816315e-05, |
|
"loss": 0.0, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 5.030701754385965, |
|
"grad_norm": 0.0021174189168959856, |
|
"learning_rate": 3.996101364522417e-05, |
|
"loss": 0.0, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 5.031376518218623, |
|
"grad_norm": 0.0005668731173500419, |
|
"learning_rate": 3.992352676563203e-05, |
|
"loss": 0.0, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 5.032051282051282, |
|
"grad_norm": 0.0007015119190327823, |
|
"learning_rate": 3.988603988603989e-05, |
|
"loss": 0.4088, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.0327260458839405, |
|
"grad_norm": 0.007248507812619209, |
|
"learning_rate": 3.9848553006447745e-05, |
|
"loss": 0.0212, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 5.033400809716599, |
|
"grad_norm": 0.0023328044917434454, |
|
"learning_rate": 3.98110661268556e-05, |
|
"loss": 0.0001, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.0340755735492575, |
|
"grad_norm": 0.0011781149078160524, |
|
"learning_rate": 3.9773579247263456e-05, |
|
"loss": 0.0001, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 5.034750337381916, |
|
"grad_norm": 0.000842131907120347, |
|
"learning_rate": 3.973609236767132e-05, |
|
"loss": 0.0001, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 5.0354251012145745, |
|
"grad_norm": 0.0013578764628618956, |
|
"learning_rate": 3.9698605488079175e-05, |
|
"loss": 0.0001, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 5.036099865047233, |
|
"grad_norm": 0.0005201473250053823, |
|
"learning_rate": 3.966111860848703e-05, |
|
"loss": 0.0001, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 5.0367746288798925, |
|
"grad_norm": 0.0011828228598460555, |
|
"learning_rate": 3.9623631728894886e-05, |
|
"loss": 0.0065, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 5.037449392712551, |
|
"grad_norm": 0.000755178218241781, |
|
"learning_rate": 3.958614484930275e-05, |
|
"loss": 0.207, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 5.0381241565452095, |
|
"grad_norm": 0.0009751113248057663, |
|
"learning_rate": 3.95486579697106e-05, |
|
"loss": 0.0001, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 5.038798920377868, |
|
"grad_norm": 0.00031620432855561376, |
|
"learning_rate": 3.951117109011846e-05, |
|
"loss": 0.337, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 5.0394736842105265, |
|
"grad_norm": 0.0007090018480084836, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.0006, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 5.040148448043185, |
|
"grad_norm": 0.0010267384350299835, |
|
"learning_rate": 3.943619733093418e-05, |
|
"loss": 0.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.040823211875844, |
|
"grad_norm": 0.014587147161364555, |
|
"learning_rate": 3.939871045134203e-05, |
|
"loss": 0.0001, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 5.041497975708502, |
|
"grad_norm": 0.000788258679676801, |
|
"learning_rate": 3.936122357174989e-05, |
|
"loss": 0.0, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 5.042172739541161, |
|
"grad_norm": 0.0006495325942523777, |
|
"learning_rate": 3.9323736692157746e-05, |
|
"loss": 0.0, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 5.042847503373819, |
|
"grad_norm": 0.0006167737883515656, |
|
"learning_rate": 3.928624981256561e-05, |
|
"loss": 0.1018, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 5.043522267206478, |
|
"grad_norm": 0.0014920184621587396, |
|
"learning_rate": 3.924876293297346e-05, |
|
"loss": 0.0, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 5.044197031039136, |
|
"grad_norm": 0.0015535310376435518, |
|
"learning_rate": 3.921127605338132e-05, |
|
"loss": 0.0007, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 5.044871794871795, |
|
"grad_norm": 0.0006431335350498557, |
|
"learning_rate": 3.9173789173789176e-05, |
|
"loss": 0.0001, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 5.045546558704453, |
|
"grad_norm": 0.005366568453609943, |
|
"learning_rate": 3.913630229419703e-05, |
|
"loss": 0.0, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 5.046221322537112, |
|
"grad_norm": 0.0013297253753989935, |
|
"learning_rate": 3.909881541460489e-05, |
|
"loss": 0.0, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 5.04689608636977, |
|
"grad_norm": 0.0004990586312487721, |
|
"learning_rate": 3.906132853501275e-05, |
|
"loss": 0.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.047570850202429, |
|
"grad_norm": 0.0013985860859975219, |
|
"learning_rate": 3.9023841655420606e-05, |
|
"loss": 0.0, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 5.048245614035087, |
|
"grad_norm": 0.0006711781024932861, |
|
"learning_rate": 3.898635477582846e-05, |
|
"loss": 0.0, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 5.048920377867747, |
|
"grad_norm": 0.0006565306102856994, |
|
"learning_rate": 3.894886789623632e-05, |
|
"loss": 0.0, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 5.049595141700405, |
|
"grad_norm": 0.0009195157326757908, |
|
"learning_rate": 3.891138101664418e-05, |
|
"loss": 0.0001, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.9107142857142857, |
|
"eval_f1": 0.9105137981578073, |
|
"eval_loss": 0.7364658117294312, |
|
"eval_runtime": 73.1769, |
|
"eval_samples_per_second": 1.531, |
|
"eval_steps_per_second": 1.531, |
|
"step": 4446 |
|
}, |
|
{ |
|
"epoch": 6.000269905533063, |
|
"grad_norm": 0.0008725410443730652, |
|
"learning_rate": 3.8873894137052036e-05, |
|
"loss": 0.0, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 6.000944669365722, |
|
"grad_norm": 0.0006686112028546631, |
|
"learning_rate": 3.883640725745989e-05, |
|
"loss": 0.0, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 6.001619433198381, |
|
"grad_norm": 0.000973099609836936, |
|
"learning_rate": 3.879892037786775e-05, |
|
"loss": 0.0, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 6.0022941970310395, |
|
"grad_norm": 0.0036273570731282234, |
|
"learning_rate": 3.876143349827561e-05, |
|
"loss": 0.0, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 6.002968960863698, |
|
"grad_norm": 0.0030524057801812887, |
|
"learning_rate": 3.8723946618683466e-05, |
|
"loss": 0.9891, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 6.0036437246963565, |
|
"grad_norm": 0.0005925680161453784, |
|
"learning_rate": 3.868645973909132e-05, |
|
"loss": 0.0001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.004318488529015, |
|
"grad_norm": 0.0012102797627449036, |
|
"learning_rate": 3.864897285949918e-05, |
|
"loss": 0.0004, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 6.004993252361674, |
|
"grad_norm": 0.001870299456641078, |
|
"learning_rate": 3.861148597990703e-05, |
|
"loss": 0.0001, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 6.005668016194332, |
|
"grad_norm": 0.0008334846352227032, |
|
"learning_rate": 3.857399910031489e-05, |
|
"loss": 0.0, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 6.006342780026991, |
|
"grad_norm": 0.0909259095788002, |
|
"learning_rate": 3.853651222072275e-05, |
|
"loss": 0.0033, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 6.007017543859649, |
|
"grad_norm": 0.08534003794193268, |
|
"learning_rate": 3.849902534113061e-05, |
|
"loss": 0.0001, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 6.007692307692308, |
|
"grad_norm": 0.009015407413244247, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.0001, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 6.008367071524966, |
|
"grad_norm": 0.0005771831492893398, |
|
"learning_rate": 3.842405158194632e-05, |
|
"loss": 0.0, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 6.009041835357625, |
|
"grad_norm": 0.00015217051259241998, |
|
"learning_rate": 3.8386564702354174e-05, |
|
"loss": 0.0, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 6.009716599190283, |
|
"grad_norm": 0.001618007430806756, |
|
"learning_rate": 3.834907782276204e-05, |
|
"loss": 0.0001, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 6.010391363022942, |
|
"grad_norm": 0.0008747613755986094, |
|
"learning_rate": 3.831159094316989e-05, |
|
"loss": 0.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.0110661268556, |
|
"grad_norm": 0.0011886496795341372, |
|
"learning_rate": 3.827410406357775e-05, |
|
"loss": 0.0001, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 6.011740890688259, |
|
"grad_norm": 0.0006136572919785976, |
|
"learning_rate": 3.8236617183985604e-05, |
|
"loss": 0.0, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 6.012415654520917, |
|
"grad_norm": 0.0002797636261675507, |
|
"learning_rate": 3.819913030439347e-05, |
|
"loss": 0.0, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 6.013090418353576, |
|
"grad_norm": 0.0005924575380049646, |
|
"learning_rate": 3.8161643424801316e-05, |
|
"loss": 0.0, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 6.013765182186235, |
|
"grad_norm": 381.5912170410156, |
|
"learning_rate": 3.812415654520918e-05, |
|
"loss": 0.6612, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 6.014439946018894, |
|
"grad_norm": 0.0007501631625927985, |
|
"learning_rate": 3.8086669665617034e-05, |
|
"loss": 0.057, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 6.015114709851552, |
|
"grad_norm": 0.00048053194768726826, |
|
"learning_rate": 3.80491827860249e-05, |
|
"loss": 0.7472, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 6.015789473684211, |
|
"grad_norm": 0.0008806756814010441, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.0, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 6.016464237516869, |
|
"grad_norm": 0.0007039654301479459, |
|
"learning_rate": 3.797420902684061e-05, |
|
"loss": 0.0002, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 6.017139001349528, |
|
"grad_norm": 0.0005677440203726292, |
|
"learning_rate": 3.7936722147248464e-05, |
|
"loss": 0.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.017813765182186, |
|
"grad_norm": 0.0006246105185709894, |
|
"learning_rate": 3.7899235267656327e-05, |
|
"loss": 0.0002, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 6.018488529014845, |
|
"grad_norm": 0.0003905866760760546, |
|
"learning_rate": 3.7861748388064176e-05, |
|
"loss": 0.0, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 6.019163292847503, |
|
"grad_norm": 0.0004027994582429528, |
|
"learning_rate": 3.782426150847204e-05, |
|
"loss": 0.0002, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 6.019838056680162, |
|
"grad_norm": 0.0017455661436542869, |
|
"learning_rate": 3.7786774628879894e-05, |
|
"loss": 0.0001, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 6.02051282051282, |
|
"grad_norm": 0.0022832180839031935, |
|
"learning_rate": 3.774928774928775e-05, |
|
"loss": 0.0001, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 6.021187584345479, |
|
"grad_norm": 295.60693359375, |
|
"learning_rate": 3.7711800869695605e-05, |
|
"loss": 0.7359, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 6.0218623481781375, |
|
"grad_norm": 0.0004823520721402019, |
|
"learning_rate": 3.767431399010347e-05, |
|
"loss": 0.0, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 6.022537112010796, |
|
"grad_norm": 0.003145309165120125, |
|
"learning_rate": 3.7636827110511324e-05, |
|
"loss": 0.0, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 6.0232118758434545, |
|
"grad_norm": 0.00026828868431039155, |
|
"learning_rate": 3.759934023091918e-05, |
|
"loss": 0.0, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 6.023886639676113, |
|
"grad_norm": 0.000310034112771973, |
|
"learning_rate": 3.7561853351327035e-05, |
|
"loss": 0.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.024561403508772, |
|
"grad_norm": 0.00041966387652792037, |
|
"learning_rate": 3.75243664717349e-05, |
|
"loss": 0.0, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 6.02523616734143, |
|
"grad_norm": 0.0011529176263138652, |
|
"learning_rate": 3.7486879592142754e-05, |
|
"loss": 0.5445, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 6.0259109311740895, |
|
"grad_norm": 0.02147838845849037, |
|
"learning_rate": 3.744939271255061e-05, |
|
"loss": 1.0205, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 6.026585695006748, |
|
"grad_norm": 0.000508416909724474, |
|
"learning_rate": 3.7411905832958465e-05, |
|
"loss": 0.001, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 6.0272604588394065, |
|
"grad_norm": 0.008615111000835896, |
|
"learning_rate": 3.737441895336633e-05, |
|
"loss": 0.0001, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 6.027935222672065, |
|
"grad_norm": 0.444153755903244, |
|
"learning_rate": 3.7336932073774184e-05, |
|
"loss": 0.9325, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 6.028609986504724, |
|
"grad_norm": 0.0013290736824274063, |
|
"learning_rate": 3.729944519418204e-05, |
|
"loss": 0.0001, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 6.029284750337382, |
|
"grad_norm": 0.000803654664196074, |
|
"learning_rate": 3.7261958314589895e-05, |
|
"loss": 0.0044, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 6.029959514170041, |
|
"grad_norm": 0.0021947200875729322, |
|
"learning_rate": 3.722447143499775e-05, |
|
"loss": 0.9785, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 6.030634278002699, |
|
"grad_norm": 0.0023971525952219963, |
|
"learning_rate": 3.718698455540561e-05, |
|
"loss": 0.0001, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 6.031309041835358, |
|
"grad_norm": 0.00609954446554184, |
|
"learning_rate": 3.714949767581347e-05, |
|
"loss": 0.0002, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 6.031983805668016, |
|
"grad_norm": 0.0020932150073349476, |
|
"learning_rate": 3.7112010796221325e-05, |
|
"loss": 0.0002, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 6.032658569500675, |
|
"grad_norm": 0.0034460346214473248, |
|
"learning_rate": 3.707452391662918e-05, |
|
"loss": 0.0004, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 6.033333333333333, |
|
"grad_norm": 0.0021088484209030867, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0001, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 6.034008097165992, |
|
"grad_norm": 0.002742623910307884, |
|
"learning_rate": 3.699955015744489e-05, |
|
"loss": 0.0006, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 6.03468286099865, |
|
"grad_norm": 0.002541649155318737, |
|
"learning_rate": 3.6962063277852755e-05, |
|
"loss": 0.0001, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 6.035357624831309, |
|
"grad_norm": 0.000678271462675184, |
|
"learning_rate": 3.692457639826061e-05, |
|
"loss": 0.0, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 6.036032388663967, |
|
"grad_norm": 0.0022359860595315695, |
|
"learning_rate": 3.6887089518668466e-05, |
|
"loss": 0.0002, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 6.036707152496626, |
|
"grad_norm": 0.003631311934441328, |
|
"learning_rate": 3.684960263907632e-05, |
|
"loss": 0.0139, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 6.037381916329284, |
|
"grad_norm": 408.66119384765625, |
|
"learning_rate": 3.6812115759484185e-05, |
|
"loss": 0.3617, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.038056680161944, |
|
"grad_norm": 0.001363090705126524, |
|
"learning_rate": 3.6774628879892034e-05, |
|
"loss": 0.7014, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 6.038731443994602, |
|
"grad_norm": 0.0028585607651621103, |
|
"learning_rate": 3.6737142000299896e-05, |
|
"loss": 0.0209, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 6.039406207827261, |
|
"grad_norm": 0.0029073706828057766, |
|
"learning_rate": 3.669965512070775e-05, |
|
"loss": 0.0007, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 6.040080971659919, |
|
"grad_norm": 0.021762054413557053, |
|
"learning_rate": 3.6662168241115615e-05, |
|
"loss": 3.0967, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 6.040755735492578, |
|
"grad_norm": 1.7035624980926514, |
|
"learning_rate": 3.6624681361523464e-05, |
|
"loss": 1.3983, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 6.041430499325236, |
|
"grad_norm": 0.07881853729486465, |
|
"learning_rate": 3.6587194481931326e-05, |
|
"loss": 0.8778, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 6.042105263157895, |
|
"grad_norm": 49.91697311401367, |
|
"learning_rate": 3.654970760233918e-05, |
|
"loss": 0.0293, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 6.042780026990553, |
|
"grad_norm": 0.01630672998726368, |
|
"learning_rate": 3.6512220722747045e-05, |
|
"loss": 1.1156, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 6.043454790823212, |
|
"grad_norm": 0.007935232482850552, |
|
"learning_rate": 3.6474733843154894e-05, |
|
"loss": 0.0031, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 6.04412955465587, |
|
"grad_norm": 299.0083923339844, |
|
"learning_rate": 3.6437246963562756e-05, |
|
"loss": 0.6953, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 6.044804318488529, |
|
"grad_norm": 0.014369282871484756, |
|
"learning_rate": 3.639976008397061e-05, |
|
"loss": 0.0002, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 6.0454790823211875, |
|
"grad_norm": 0.0033456783276051283, |
|
"learning_rate": 3.6362273204378474e-05, |
|
"loss": 0.0009, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 6.046153846153846, |
|
"grad_norm": 0.0012127397349104285, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.0001, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 6.0468286099865045, |
|
"grad_norm": 0.003025912446901202, |
|
"learning_rate": 3.6287299445194186e-05, |
|
"loss": 0.0001, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 6.047503373819163, |
|
"grad_norm": 0.006771762855350971, |
|
"learning_rate": 3.624981256560204e-05, |
|
"loss": 0.0003, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 6.0481781376518216, |
|
"grad_norm": 0.006291988305747509, |
|
"learning_rate": 3.62123256860099e-05, |
|
"loss": 0.6232, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 6.04885290148448, |
|
"grad_norm": 0.010942903347313404, |
|
"learning_rate": 3.617483880641775e-05, |
|
"loss": 0.9909, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 6.049527665317139, |
|
"grad_norm": 0.0050459960475564, |
|
"learning_rate": 3.6137351926825616e-05, |
|
"loss": 0.8987, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_accuracy": 0.8392857142857143, |
|
"eval_f1": 0.8294011707968183, |
|
"eval_loss": 0.930968701839447, |
|
"eval_runtime": 74.4165, |
|
"eval_samples_per_second": 1.505, |
|
"eval_steps_per_second": 1.505, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 7.000202429149797, |
|
"grad_norm": 0.012029584497213364, |
|
"learning_rate": 3.609986504723347e-05, |
|
"loss": 0.0003, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 7.000877192982456, |
|
"grad_norm": 0.002462017349898815, |
|
"learning_rate": 3.606237816764133e-05, |
|
"loss": 0.0005, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.001551956815114, |
|
"grad_norm": 0.0375690832734108, |
|
"learning_rate": 3.602489128804918e-05, |
|
"loss": 0.1058, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 7.002226720647773, |
|
"grad_norm": 0.026218879967927933, |
|
"learning_rate": 3.5987404408457046e-05, |
|
"loss": 0.0083, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 7.002901484480432, |
|
"grad_norm": 0.0031192379537969828, |
|
"learning_rate": 3.59499175288649e-05, |
|
"loss": 0.0342, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 7.003576248313091, |
|
"grad_norm": 0.002261426765471697, |
|
"learning_rate": 3.591243064927276e-05, |
|
"loss": 0.8758, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 7.004251012145749, |
|
"grad_norm": 0.7252321839332581, |
|
"learning_rate": 3.587494376968061e-05, |
|
"loss": 0.0008, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 7.004925775978408, |
|
"grad_norm": 0.002154165878891945, |
|
"learning_rate": 3.583745689008847e-05, |
|
"loss": 0.0002, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 7.005600539811066, |
|
"grad_norm": 0.0012370734475553036, |
|
"learning_rate": 3.5799970010496325e-05, |
|
"loss": 1.0515, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 7.006275303643725, |
|
"grad_norm": 0.0021348996087908745, |
|
"learning_rate": 3.576248313090419e-05, |
|
"loss": 0.0001, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 7.006950067476383, |
|
"grad_norm": 0.006049524061381817, |
|
"learning_rate": 3.572499625131204e-05, |
|
"loss": 0.0003, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 7.007624831309042, |
|
"grad_norm": 0.01275632157921791, |
|
"learning_rate": 3.56875093717199e-05, |
|
"loss": 0.0021, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 7.0082995951417, |
|
"grad_norm": 0.0016850440297275782, |
|
"learning_rate": 3.5650022492127754e-05, |
|
"loss": 0.0001, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 7.008974358974359, |
|
"grad_norm": 0.0009741022950038314, |
|
"learning_rate": 3.561253561253561e-05, |
|
"loss": 0.0005, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 7.0096491228070175, |
|
"grad_norm": 0.000799846719019115, |
|
"learning_rate": 3.557504873294347e-05, |
|
"loss": 0.0002, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 7.010323886639676, |
|
"grad_norm": 0.0008095399825833738, |
|
"learning_rate": 3.553756185335133e-05, |
|
"loss": 0.0024, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 7.0109986504723345, |
|
"grad_norm": 0.0016390876844525337, |
|
"learning_rate": 3.5500074973759184e-05, |
|
"loss": 0.0, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 7.011673414304993, |
|
"grad_norm": 0.0013130076695233583, |
|
"learning_rate": 3.546258809416704e-05, |
|
"loss": 0.8843, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 7.0123481781376515, |
|
"grad_norm": 0.015013671480119228, |
|
"learning_rate": 3.54251012145749e-05, |
|
"loss": 0.7296, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 7.01302294197031, |
|
"grad_norm": 0.003729419782757759, |
|
"learning_rate": 3.538761433498276e-05, |
|
"loss": 0.004, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 7.013697705802969, |
|
"grad_norm": 0.007766401395201683, |
|
"learning_rate": 3.5350127455390614e-05, |
|
"loss": 0.0001, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 7.014372469635627, |
|
"grad_norm": 0.03760051354765892, |
|
"learning_rate": 3.531264057579847e-05, |
|
"loss": 0.0002, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.0150472334682865, |
|
"grad_norm": 0.003396588610485196, |
|
"learning_rate": 3.527515369620633e-05, |
|
"loss": 0.0001, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 7.015721997300945, |
|
"grad_norm": 0.005965414922684431, |
|
"learning_rate": 3.523766681661418e-05, |
|
"loss": 0.0001, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 7.0163967611336036, |
|
"grad_norm": 0.002591415075585246, |
|
"learning_rate": 3.5200179937022044e-05, |
|
"loss": 0.0001, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 7.017071524966262, |
|
"grad_norm": 0.0007187007577158511, |
|
"learning_rate": 3.51626930574299e-05, |
|
"loss": 0.6273, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 7.017746288798921, |
|
"grad_norm": 0.0018147805240005255, |
|
"learning_rate": 3.512520617783776e-05, |
|
"loss": 0.0001, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 7.018421052631579, |
|
"grad_norm": 0.0007241186103783548, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.0002, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 7.019095816464238, |
|
"grad_norm": 0.002352670766413212, |
|
"learning_rate": 3.5050232418653474e-05, |
|
"loss": 0.0001, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 7.019770580296896, |
|
"grad_norm": 0.0018704934045672417, |
|
"learning_rate": 3.501274553906133e-05, |
|
"loss": 0.0004, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 7.020445344129555, |
|
"grad_norm": 0.002092360518872738, |
|
"learning_rate": 3.497525865946919e-05, |
|
"loss": 0.0001, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 7.021120107962213, |
|
"grad_norm": 0.001126096467487514, |
|
"learning_rate": 3.493777177987704e-05, |
|
"loss": 0.6823, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.021794871794872, |
|
"grad_norm": 0.0008661505416966975, |
|
"learning_rate": 3.4900284900284904e-05, |
|
"loss": 0.0001, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 7.02246963562753, |
|
"grad_norm": 0.02058524824678898, |
|
"learning_rate": 3.486279802069276e-05, |
|
"loss": 0.0001, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 7.023144399460189, |
|
"grad_norm": 0.002387122018262744, |
|
"learning_rate": 3.4825311141100615e-05, |
|
"loss": 0.0002, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 7.023819163292847, |
|
"grad_norm": 0.0011330017587170005, |
|
"learning_rate": 3.478782426150847e-05, |
|
"loss": 0.0001, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 7.024493927125506, |
|
"grad_norm": 0.0005625615012831986, |
|
"learning_rate": 3.4750337381916334e-05, |
|
"loss": 0.0002, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 7.025168690958164, |
|
"grad_norm": 0.0008695796132087708, |
|
"learning_rate": 3.471285050232419e-05, |
|
"loss": 0.0, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 7.025843454790823, |
|
"grad_norm": 0.0016092468285933137, |
|
"learning_rate": 3.4675363622732045e-05, |
|
"loss": 0.0001, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 7.026518218623481, |
|
"grad_norm": 0.0011349094565957785, |
|
"learning_rate": 3.46378767431399e-05, |
|
"loss": 0.0003, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 7.027192982456141, |
|
"grad_norm": 0.0005459162639454007, |
|
"learning_rate": 3.4600389863547764e-05, |
|
"loss": 0.0001, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 7.027867746288799, |
|
"grad_norm": 0.0009417292312718928, |
|
"learning_rate": 3.456290298395562e-05, |
|
"loss": 0.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.028542510121458, |
|
"grad_norm": 0.0005761535139754415, |
|
"learning_rate": 3.4525416104363475e-05, |
|
"loss": 0.0, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 7.029217273954116, |
|
"grad_norm": 0.0007409591344185174, |
|
"learning_rate": 3.448792922477133e-05, |
|
"loss": 0.0001, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 7.029892037786775, |
|
"grad_norm": 0.004374117590487003, |
|
"learning_rate": 3.4450442345179194e-05, |
|
"loss": 0.0, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 7.030566801619433, |
|
"grad_norm": 0.017210789024829865, |
|
"learning_rate": 3.441295546558704e-05, |
|
"loss": 0.0001, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 7.031241565452092, |
|
"grad_norm": 0.0008836057968437672, |
|
"learning_rate": 3.4375468585994905e-05, |
|
"loss": 0.0012, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 7.03191632928475, |
|
"grad_norm": 0.0015315774362534285, |
|
"learning_rate": 3.433798170640276e-05, |
|
"loss": 0.0, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 7.032591093117409, |
|
"grad_norm": 0.0006376684177666903, |
|
"learning_rate": 3.4300494826810617e-05, |
|
"loss": 0.0, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 7.0332658569500675, |
|
"grad_norm": 0.0005232661496847868, |
|
"learning_rate": 3.426300794721847e-05, |
|
"loss": 0.0, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 7.033940620782726, |
|
"grad_norm": 0.0008468987653031945, |
|
"learning_rate": 3.4225521067626335e-05, |
|
"loss": 0.0003, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 7.0346153846153845, |
|
"grad_norm": 0.000993360416032374, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 7.035290148448043, |
|
"grad_norm": 0.0020066085271537304, |
|
"learning_rate": 3.4150547308442046e-05, |
|
"loss": 0.0001, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 7.0359649122807015, |
|
"grad_norm": 0.00036297430051490664, |
|
"learning_rate": 3.41130604288499e-05, |
|
"loss": 0.0, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 7.03663967611336, |
|
"grad_norm": 0.0009432988590560853, |
|
"learning_rate": 3.407557354925776e-05, |
|
"loss": 0.0001, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 7.037314439946019, |
|
"grad_norm": 0.0018047185149043798, |
|
"learning_rate": 3.403808666966562e-05, |
|
"loss": 0.8627, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 7.037989203778677, |
|
"grad_norm": 0.0037690841127187014, |
|
"learning_rate": 3.4000599790073476e-05, |
|
"loss": 0.0781, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 7.038663967611336, |
|
"grad_norm": 0.023057781159877777, |
|
"learning_rate": 3.396311291048133e-05, |
|
"loss": 0.0001, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 7.039338731443995, |
|
"grad_norm": 0.004012484569102526, |
|
"learning_rate": 3.392562603088919e-05, |
|
"loss": 0.0001, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 7.0400134952766535, |
|
"grad_norm": 0.0012608218239620328, |
|
"learning_rate": 3.388813915129705e-05, |
|
"loss": 0.0001, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 7.040688259109312, |
|
"grad_norm": 0.002351221162825823, |
|
"learning_rate": 3.38506522717049e-05, |
|
"loss": 0.0001, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 7.041363022941971, |
|
"grad_norm": 0.000716827402357012, |
|
"learning_rate": 3.381316539211276e-05, |
|
"loss": 0.0005, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.042037786774629, |
|
"grad_norm": 0.0029892646707594395, |
|
"learning_rate": 3.377567851252062e-05, |
|
"loss": 0.0, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 7.042712550607288, |
|
"grad_norm": 372.2917175292969, |
|
"learning_rate": 3.373819163292848e-05, |
|
"loss": 0.5735, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 7.043387314439946, |
|
"grad_norm": 0.0010425182990729809, |
|
"learning_rate": 3.370070475333633e-05, |
|
"loss": 0.0, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 7.044062078272605, |
|
"grad_norm": 43.60670852661133, |
|
"learning_rate": 3.366321787374419e-05, |
|
"loss": 1.7134, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 7.044736842105263, |
|
"grad_norm": 44.16180419921875, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 0.4081, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 7.045411605937922, |
|
"grad_norm": 0.002386684063822031, |
|
"learning_rate": 3.358824411455991e-05, |
|
"loss": 0.0014, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 7.04608636977058, |
|
"grad_norm": 0.000626052962616086, |
|
"learning_rate": 3.355075723496776e-05, |
|
"loss": 0.6338, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 7.046761133603239, |
|
"grad_norm": 0.0048158965073525906, |
|
"learning_rate": 3.351327035537562e-05, |
|
"loss": 0.0002, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 7.047435897435897, |
|
"grad_norm": 102.3766860961914, |
|
"learning_rate": 3.347578347578348e-05, |
|
"loss": 0.7, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 7.048110661268556, |
|
"grad_norm": 0.0005689793615601957, |
|
"learning_rate": 3.343829659619133e-05, |
|
"loss": 0.7073, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 7.048785425101214, |
|
"grad_norm": 0.013288695365190506, |
|
"learning_rate": 3.340080971659919e-05, |
|
"loss": 0.0572, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 7.049460188933873, |
|
"grad_norm": 0.0011189569486305118, |
|
"learning_rate": 3.336332283700705e-05, |
|
"loss": 0.4888, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_accuracy": 0.875, |
|
"eval_f1": 0.8702947845804988, |
|
"eval_loss": 0.856253445148468, |
|
"eval_runtime": 74.1698, |
|
"eval_samples_per_second": 1.51, |
|
"eval_steps_per_second": 1.51, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 8.000134952766532, |
|
"grad_norm": 0.0010889604454860091, |
|
"learning_rate": 3.332583595741491e-05, |
|
"loss": 0.0492, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 8.000809716599191, |
|
"grad_norm": 0.0005811200244352221, |
|
"learning_rate": 3.328834907782276e-05, |
|
"loss": 0.0003, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 8.001484480431849, |
|
"grad_norm": 0.0028562431689351797, |
|
"learning_rate": 3.325086219823062e-05, |
|
"loss": 0.0003, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 8.002159244264508, |
|
"grad_norm": 0.0011086298618465662, |
|
"learning_rate": 3.321337531863848e-05, |
|
"loss": 0.0001, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 8.002834008097166, |
|
"grad_norm": 0.0018863864243030548, |
|
"learning_rate": 3.317588843904634e-05, |
|
"loss": 0.0001, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 8.003508771929825, |
|
"grad_norm": 0.0009740761015564203, |
|
"learning_rate": 3.313840155945419e-05, |
|
"loss": 0.0002, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 8.004183535762483, |
|
"grad_norm": 0.0005378098576329648, |
|
"learning_rate": 3.310091467986205e-05, |
|
"loss": 0.0001, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 8.004858299595142, |
|
"grad_norm": 0.001058222958818078, |
|
"learning_rate": 3.306342780026991e-05, |
|
"loss": 0.0001, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.0055330634278, |
|
"grad_norm": 0.0010611525503918529, |
|
"learning_rate": 3.302594092067777e-05, |
|
"loss": 0.0001, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 8.006207827260459, |
|
"grad_norm": 0.002727857790887356, |
|
"learning_rate": 3.298845404108562e-05, |
|
"loss": 0.0001, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 8.006882591093117, |
|
"grad_norm": 0.0007821051403880119, |
|
"learning_rate": 3.295096716149348e-05, |
|
"loss": 0.0017, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 8.007557354925776, |
|
"grad_norm": 0.001169922179542482, |
|
"learning_rate": 3.2913480281901335e-05, |
|
"loss": 0.0001, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 8.008232118758434, |
|
"grad_norm": 0.0011363876983523369, |
|
"learning_rate": 3.287599340230919e-05, |
|
"loss": 0.0001, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 8.008906882591093, |
|
"grad_norm": 0.0005207853973843157, |
|
"learning_rate": 3.283850652271705e-05, |
|
"loss": 0.6813, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 8.00958164642375, |
|
"grad_norm": 0.0005264964420348406, |
|
"learning_rate": 3.280101964312491e-05, |
|
"loss": 0.0001, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 8.01025641025641, |
|
"grad_norm": 0.0005870209424756467, |
|
"learning_rate": 3.2763532763532764e-05, |
|
"loss": 0.0001, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 8.01093117408907, |
|
"grad_norm": 0.0016355343395844102, |
|
"learning_rate": 3.272604588394062e-05, |
|
"loss": 0.0004, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 8.011605937921727, |
|
"grad_norm": 0.004568756558001041, |
|
"learning_rate": 3.2688559004348476e-05, |
|
"loss": 0.0004, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 8.012280701754387, |
|
"grad_norm": 0.0005888245650567114, |
|
"learning_rate": 3.265107212475634e-05, |
|
"loss": 0.0001, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 8.012955465587044, |
|
"grad_norm": 0.0023943374399095774, |
|
"learning_rate": 3.2613585245164194e-05, |
|
"loss": 0.0, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 8.013630229419704, |
|
"grad_norm": 0.0004357252037152648, |
|
"learning_rate": 3.257609836557205e-05, |
|
"loss": 0.0002, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 8.014304993252361, |
|
"grad_norm": 0.0006332327611744404, |
|
"learning_rate": 3.2538611485979906e-05, |
|
"loss": 0.0001, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 8.01497975708502, |
|
"grad_norm": 0.0006531701656058431, |
|
"learning_rate": 3.250112460638777e-05, |
|
"loss": 0.0001, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 8.015654520917678, |
|
"grad_norm": 0.0005107235629111528, |
|
"learning_rate": 3.246363772679562e-05, |
|
"loss": 0.0, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 8.016329284750338, |
|
"grad_norm": 0.012723034247756004, |
|
"learning_rate": 3.242615084720348e-05, |
|
"loss": 0.0001, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 8.017004048582995, |
|
"grad_norm": 0.0427851527929306, |
|
"learning_rate": 3.2388663967611336e-05, |
|
"loss": 0.0002, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 8.017678812415655, |
|
"grad_norm": 0.001141960732638836, |
|
"learning_rate": 3.23511770880192e-05, |
|
"loss": 0.0, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 8.018353576248312, |
|
"grad_norm": 0.0015029623173177242, |
|
"learning_rate": 3.231369020842705e-05, |
|
"loss": 0.0001, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.019028340080972, |
|
"grad_norm": 0.0005648156511597335, |
|
"learning_rate": 3.227620332883491e-05, |
|
"loss": 0.0001, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 8.01970310391363, |
|
"grad_norm": 0.0006971880211494863, |
|
"learning_rate": 3.2238716449242766e-05, |
|
"loss": 0.0, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 8.020377867746289, |
|
"grad_norm": 0.0005205124034546316, |
|
"learning_rate": 3.220122956965063e-05, |
|
"loss": 0.0001, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 8.021052631578947, |
|
"grad_norm": 0.0007245125016197562, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.0001, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 8.021727395411606, |
|
"grad_norm": 0.0005247213994152844, |
|
"learning_rate": 3.212625581046634e-05, |
|
"loss": 0.0001, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 8.022402159244265, |
|
"grad_norm": 0.0005060233525000513, |
|
"learning_rate": 3.2088768930874195e-05, |
|
"loss": 0.0014, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 8.023076923076923, |
|
"grad_norm": 0.01399776991456747, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.0001, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 8.023751686909582, |
|
"grad_norm": 0.0013257871614769101, |
|
"learning_rate": 3.201379517168991e-05, |
|
"loss": 0.0, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 8.02442645074224, |
|
"grad_norm": 0.00038729843799956143, |
|
"learning_rate": 3.197630829209777e-05, |
|
"loss": 0.0, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 8.0251012145749, |
|
"grad_norm": 0.0013562028761953115, |
|
"learning_rate": 3.1938821412505625e-05, |
|
"loss": 0.0002, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 8.025775978407557, |
|
"grad_norm": 0.0023358569014817476, |
|
"learning_rate": 3.190133453291348e-05, |
|
"loss": 0.0, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 8.026450742240216, |
|
"grad_norm": 0.0007051244028843939, |
|
"learning_rate": 3.186384765332134e-05, |
|
"loss": 0.0, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 8.027125506072874, |
|
"grad_norm": 0.00045763421803712845, |
|
"learning_rate": 3.18263607737292e-05, |
|
"loss": 0.0, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 8.027800269905534, |
|
"grad_norm": 0.0003405519819352776, |
|
"learning_rate": 3.1788873894137055e-05, |
|
"loss": 0.0, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 8.028475033738191, |
|
"grad_norm": 0.0009031207882799208, |
|
"learning_rate": 3.175138701454491e-05, |
|
"loss": 0.3007, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 8.02914979757085, |
|
"grad_norm": 0.00048344547394663095, |
|
"learning_rate": 3.171390013495277e-05, |
|
"loss": 0.0, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 8.029824561403508, |
|
"grad_norm": 0.005110772326588631, |
|
"learning_rate": 3.167641325536063e-05, |
|
"loss": 0.0, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 8.030499325236168, |
|
"grad_norm": 0.0005193505785427988, |
|
"learning_rate": 3.1638926375768485e-05, |
|
"loss": 1.2393, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 8.031174089068825, |
|
"grad_norm": 0.001544152619317174, |
|
"learning_rate": 3.160143949617634e-05, |
|
"loss": 0.0, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 8.031848852901485, |
|
"grad_norm": 0.004015884827822447, |
|
"learning_rate": 3.15639526165842e-05, |
|
"loss": 0.0001, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.032523616734142, |
|
"grad_norm": 0.005030054599046707, |
|
"learning_rate": 3.152646573699205e-05, |
|
"loss": 0.0002, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 8.033198380566802, |
|
"grad_norm": 0.08386117219924927, |
|
"learning_rate": 3.148897885739991e-05, |
|
"loss": 0.0002, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 8.03387314439946, |
|
"grad_norm": 0.004819917026907206, |
|
"learning_rate": 3.145149197780777e-05, |
|
"loss": 0.1815, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 8.034547908232119, |
|
"grad_norm": 0.0022033504210412502, |
|
"learning_rate": 3.1414005098215627e-05, |
|
"loss": 0.0001, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 8.035222672064778, |
|
"grad_norm": 0.0040964060463011265, |
|
"learning_rate": 3.137651821862348e-05, |
|
"loss": 0.0001, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 8.035897435897436, |
|
"grad_norm": 0.004042464308440685, |
|
"learning_rate": 3.133903133903134e-05, |
|
"loss": 0.0001, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 8.036572199730095, |
|
"grad_norm": 0.0027346210554242134, |
|
"learning_rate": 3.1301544459439194e-05, |
|
"loss": 0.0001, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 8.037246963562753, |
|
"grad_norm": 0.0005888897576369345, |
|
"learning_rate": 3.1264057579847056e-05, |
|
"loss": 0.0001, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 8.037921727395412, |
|
"grad_norm": 0.004620389547199011, |
|
"learning_rate": 3.122657070025491e-05, |
|
"loss": 0.0001, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 8.03859649122807, |
|
"grad_norm": 0.0017953782808035612, |
|
"learning_rate": 3.118908382066277e-05, |
|
"loss": 0.0001, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.03927125506073, |
|
"grad_norm": 0.0019287167815491557, |
|
"learning_rate": 3.1151596941070624e-05, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 8.039946018893387, |
|
"grad_norm": 0.017189156264066696, |
|
"learning_rate": 3.1114110061478486e-05, |
|
"loss": 0.0001, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 8.040620782726046, |
|
"grad_norm": 0.0002868880983442068, |
|
"learning_rate": 3.107662318188634e-05, |
|
"loss": 0.0001, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 8.041295546558704, |
|
"grad_norm": 0.003237192053347826, |
|
"learning_rate": 3.10391363022942e-05, |
|
"loss": 0.0, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 8.041970310391363, |
|
"grad_norm": 0.010104048997163773, |
|
"learning_rate": 3.1001649422702054e-05, |
|
"loss": 0.0001, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 8.042645074224021, |
|
"grad_norm": 0.0012962371110916138, |
|
"learning_rate": 3.0964162543109916e-05, |
|
"loss": 0.0001, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 8.04331983805668, |
|
"grad_norm": 0.0021973999682813883, |
|
"learning_rate": 3.0926675663517765e-05, |
|
"loss": 0.0001, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 8.043994601889338, |
|
"grad_norm": 0.004213243722915649, |
|
"learning_rate": 3.088918878392563e-05, |
|
"loss": 0.0001, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 8.044669365721997, |
|
"grad_norm": 0.0007371046231128275, |
|
"learning_rate": 3.0851701904333484e-05, |
|
"loss": 0.0, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 8.045344129554655, |
|
"grad_norm": 0.0029181931167840958, |
|
"learning_rate": 3.0814215024741346e-05, |
|
"loss": 0.0001, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.046018893387314, |
|
"grad_norm": 0.001932345563545823, |
|
"learning_rate": 3.0776728145149195e-05, |
|
"loss": 0.0001, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 8.046693657219974, |
|
"grad_norm": 0.0020557758398354053, |
|
"learning_rate": 3.073924126555706e-05, |
|
"loss": 1.2559, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 8.047368421052632, |
|
"grad_norm": 0.005378492642194033, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.084, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 8.048043184885291, |
|
"grad_norm": 0.0023935220669955015, |
|
"learning_rate": 3.0664267506372776e-05, |
|
"loss": 0.2057, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 8.048717948717949, |
|
"grad_norm": 0.0029694943223148584, |
|
"learning_rate": 3.0626780626780625e-05, |
|
"loss": 0.0001, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 8.049392712550608, |
|
"grad_norm": 0.0006969795795157552, |
|
"learning_rate": 3.058929374718849e-05, |
|
"loss": 0.0001, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_accuracy": 0.8928571428571429, |
|
"eval_f1": 0.8894495468057416, |
|
"eval_loss": 0.6908820867538452, |
|
"eval_runtime": 70.8817, |
|
"eval_samples_per_second": 1.58, |
|
"eval_steps_per_second": 1.58, |
|
"step": 6669 |
|
}, |
|
{ |
|
"epoch": 9.000067476383267, |
|
"grad_norm": 0.007031037472188473, |
|
"learning_rate": 3.055180686759634e-05, |
|
"loss": 0.0001, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 9.000742240215924, |
|
"grad_norm": 0.0013843988999724388, |
|
"learning_rate": 3.05143199880042e-05, |
|
"loss": 0.0001, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 9.001417004048584, |
|
"grad_norm": 0.07400429248809814, |
|
"learning_rate": 3.0476833108412055e-05, |
|
"loss": 0.0002, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 9.002091767881241, |
|
"grad_norm": 0.001845911960117519, |
|
"learning_rate": 3.0439346228819914e-05, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 9.0027665317139, |
|
"grad_norm": 0.00020295576541684568, |
|
"learning_rate": 3.0401859349227773e-05, |
|
"loss": 0.0014, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 9.003441295546558, |
|
"grad_norm": 0.001036637695506215, |
|
"learning_rate": 3.0364372469635626e-05, |
|
"loss": 0.0003, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 9.004116059379218, |
|
"grad_norm": 0.001262377598322928, |
|
"learning_rate": 3.0326885590043485e-05, |
|
"loss": 0.0001, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 9.004790823211875, |
|
"grad_norm": 0.0012167665408924222, |
|
"learning_rate": 3.0289398710451344e-05, |
|
"loss": 0.6511, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 9.005465587044535, |
|
"grad_norm": 0.0019521707436069846, |
|
"learning_rate": 3.0251911830859203e-05, |
|
"loss": 0.0, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 9.006140350877192, |
|
"grad_norm": 0.0013618938392028213, |
|
"learning_rate": 3.0214424951267055e-05, |
|
"loss": 0.0001, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 9.006815114709852, |
|
"grad_norm": 0.0009306151187047362, |
|
"learning_rate": 3.0176938071674915e-05, |
|
"loss": 0.0001, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 9.00748987854251, |
|
"grad_norm": 0.0007624576683156192, |
|
"learning_rate": 3.0139451192082774e-05, |
|
"loss": 0.0001, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 9.008164642375169, |
|
"grad_norm": 0.0007957870257087052, |
|
"learning_rate": 3.0101964312490626e-05, |
|
"loss": 0.4765, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 9.008839406207827, |
|
"grad_norm": 0.47597193717956543, |
|
"learning_rate": 3.0064477432898485e-05, |
|
"loss": 0.0019, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 9.009514170040486, |
|
"grad_norm": 0.0003396008105482906, |
|
"learning_rate": 3.0026990553306344e-05, |
|
"loss": 0.0001, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 9.010188933873144, |
|
"grad_norm": 0.0011485237628221512, |
|
"learning_rate": 2.9989503673714204e-05, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 9.010863697705803, |
|
"grad_norm": 0.0008013169863261282, |
|
"learning_rate": 2.9952016794122056e-05, |
|
"loss": 0.0001, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 9.011538461538462, |
|
"grad_norm": 0.00038786802906543016, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 9.01221322537112, |
|
"grad_norm": 0.003582603298127651, |
|
"learning_rate": 2.9877043034937774e-05, |
|
"loss": 0.9191, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 9.01288798920378, |
|
"grad_norm": 0.0014808046398684382, |
|
"learning_rate": 2.9839556155345634e-05, |
|
"loss": 0.0001, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 9.013562753036437, |
|
"grad_norm": 0.01157829724252224, |
|
"learning_rate": 2.9802069275753486e-05, |
|
"loss": 0.0001, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 9.014237516869096, |
|
"grad_norm": 0.007076776586472988, |
|
"learning_rate": 2.9764582396161345e-05, |
|
"loss": 0.0012, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 9.014912280701754, |
|
"grad_norm": 0.003984262701123953, |
|
"learning_rate": 2.9727095516569204e-05, |
|
"loss": 0.0001, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 9.015587044534414, |
|
"grad_norm": 0.00039073076914064586, |
|
"learning_rate": 2.9689608636977063e-05, |
|
"loss": 0.0001, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.016261808367071, |
|
"grad_norm": 0.005625125020742416, |
|
"learning_rate": 2.9652121757384916e-05, |
|
"loss": 0.0001, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 9.01693657219973, |
|
"grad_norm": 0.0015515730483457446, |
|
"learning_rate": 2.9614634877792775e-05, |
|
"loss": 0.0, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 9.017611336032388, |
|
"grad_norm": 0.0017237714491784573, |
|
"learning_rate": 2.9577147998200634e-05, |
|
"loss": 0.0, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 9.018286099865048, |
|
"grad_norm": 0.008184783160686493, |
|
"learning_rate": 2.9539661118608486e-05, |
|
"loss": 0.0001, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 9.018960863697705, |
|
"grad_norm": 0.002028749557211995, |
|
"learning_rate": 2.9502174239016346e-05, |
|
"loss": 0.0001, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 9.019635627530365, |
|
"grad_norm": 0.0036216990556567907, |
|
"learning_rate": 2.9464687359424205e-05, |
|
"loss": 0.0, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 9.020310391363022, |
|
"grad_norm": 0.0013016269076615572, |
|
"learning_rate": 2.942720047983206e-05, |
|
"loss": 0.0001, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 9.020985155195682, |
|
"grad_norm": 0.00772570027038455, |
|
"learning_rate": 2.9389713600239916e-05, |
|
"loss": 0.0001, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 9.02165991902834, |
|
"grad_norm": 0.0003020280273631215, |
|
"learning_rate": 2.9352226720647776e-05, |
|
"loss": 0.0, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 9.022334682860999, |
|
"grad_norm": 0.0012822924181818962, |
|
"learning_rate": 2.931473984105563e-05, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.023009446693656, |
|
"grad_norm": 0.0010099551873281598, |
|
"learning_rate": 2.927725296146349e-05, |
|
"loss": 0.0001, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 9.023684210526316, |
|
"grad_norm": 0.0024363386910408735, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.0001, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 9.024358974358975, |
|
"grad_norm": 0.0023049945011734962, |
|
"learning_rate": 2.9202279202279202e-05, |
|
"loss": 0.0001, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 9.025033738191633, |
|
"grad_norm": 0.0029273051768541336, |
|
"learning_rate": 2.916479232268706e-05, |
|
"loss": 0.0, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 9.025708502024292, |
|
"grad_norm": 0.003555365838110447, |
|
"learning_rate": 2.9127305443094917e-05, |
|
"loss": 0.0001, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 9.02638326585695, |
|
"grad_norm": 0.0033711865544319153, |
|
"learning_rate": 2.9089818563502773e-05, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 9.02705802968961, |
|
"grad_norm": 0.00046359331463463604, |
|
"learning_rate": 2.9052331683910632e-05, |
|
"loss": 0.0001, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 9.027732793522267, |
|
"grad_norm": 0.0003137718595098704, |
|
"learning_rate": 2.901484480431849e-05, |
|
"loss": 0.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 9.028407557354926, |
|
"grad_norm": 0.0016707087634131312, |
|
"learning_rate": 2.8977357924726343e-05, |
|
"loss": 0.0, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 9.029082321187584, |
|
"grad_norm": 0.0012837687972933054, |
|
"learning_rate": 2.8939871045134203e-05, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 9.029757085020243, |
|
"grad_norm": 0.00030405522556975484, |
|
"learning_rate": 2.8902384165542062e-05, |
|
"loss": 0.0, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 9.030431848852901, |
|
"grad_norm": 0.000334856566041708, |
|
"learning_rate": 2.886489728594992e-05, |
|
"loss": 0.0, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 9.03110661268556, |
|
"grad_norm": 0.00024141219910234213, |
|
"learning_rate": 2.8827410406357773e-05, |
|
"loss": 0.0, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 9.031781376518218, |
|
"grad_norm": 0.0014251351822167635, |
|
"learning_rate": 2.8789923526765633e-05, |
|
"loss": 0.0001, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 9.032456140350877, |
|
"grad_norm": 0.0001798996381694451, |
|
"learning_rate": 2.875243664717349e-05, |
|
"loss": 0.0, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 9.033130904183535, |
|
"grad_norm": 0.00026806764071807265, |
|
"learning_rate": 2.871494976758135e-05, |
|
"loss": 0.0, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 9.033805668016194, |
|
"grad_norm": 0.001039984286762774, |
|
"learning_rate": 2.8677462887989203e-05, |
|
"loss": 0.0, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 9.034480431848852, |
|
"grad_norm": 0.00029442558297887444, |
|
"learning_rate": 2.8639976008397062e-05, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 9.035155195681511, |
|
"grad_norm": 0.0010803727200254798, |
|
"learning_rate": 2.860248912880492e-05, |
|
"loss": 0.0, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 9.035829959514171, |
|
"grad_norm": 0.0009579784818924963, |
|
"learning_rate": 2.8565002249212774e-05, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 9.036504723346829, |
|
"grad_norm": 0.00148207473102957, |
|
"learning_rate": 2.8527515369620633e-05, |
|
"loss": 0.5707, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 9.037179487179488, |
|
"grad_norm": 0.0010521633084863424, |
|
"learning_rate": 2.8490028490028492e-05, |
|
"loss": 0.0627, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 9.037854251012146, |
|
"grad_norm": 0.0016639038221910596, |
|
"learning_rate": 2.845254161043635e-05, |
|
"loss": 0.0205, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 9.038529014844805, |
|
"grad_norm": 0.0019760627765208483, |
|
"learning_rate": 2.8415054730844204e-05, |
|
"loss": 0.0001, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 9.039203778677463, |
|
"grad_norm": 0.0023020838852971792, |
|
"learning_rate": 2.8377567851252063e-05, |
|
"loss": 0.0001, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 9.039878542510122, |
|
"grad_norm": 0.9819605946540833, |
|
"learning_rate": 2.8340080971659922e-05, |
|
"loss": 0.0009, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 9.04055330634278, |
|
"grad_norm": 0.002409159205853939, |
|
"learning_rate": 2.830259409206778e-05, |
|
"loss": 0.6277, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 9.041228070175439, |
|
"grad_norm": 298.6535339355469, |
|
"learning_rate": 2.8265107212475634e-05, |
|
"loss": 0.947, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 9.041902834008097, |
|
"grad_norm": 0.034443099051713943, |
|
"learning_rate": 2.8227620332883493e-05, |
|
"loss": 0.0001, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 9.042577597840756, |
|
"grad_norm": 0.040302518755197525, |
|
"learning_rate": 2.8190133453291352e-05, |
|
"loss": 0.003, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 9.043252361673414, |
|
"grad_norm": 0.0009369853651151061, |
|
"learning_rate": 2.8152646573699204e-05, |
|
"loss": 0.0, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 9.043927125506073, |
|
"grad_norm": 0.0013028283137828112, |
|
"learning_rate": 2.8115159694107064e-05, |
|
"loss": 0.0, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 9.04460188933873, |
|
"grad_norm": 0.001541333505883813, |
|
"learning_rate": 2.8077672814514923e-05, |
|
"loss": 0.0, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 9.04527665317139, |
|
"grad_norm": 0.000400466175051406, |
|
"learning_rate": 2.8040185934922782e-05, |
|
"loss": 0.0006, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 9.045951417004048, |
|
"grad_norm": 0.001137162558734417, |
|
"learning_rate": 2.8002699055330634e-05, |
|
"loss": 0.0002, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 9.046626180836707, |
|
"grad_norm": 0.0009733253973536193, |
|
"learning_rate": 2.7965212175738493e-05, |
|
"loss": 0.0, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 9.047300944669365, |
|
"grad_norm": 0.0002777110203169286, |
|
"learning_rate": 2.792772529614635e-05, |
|
"loss": 0.0, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 9.047975708502024, |
|
"grad_norm": 0.0009547212393954396, |
|
"learning_rate": 2.789023841655421e-05, |
|
"loss": 0.0, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 9.048650472334684, |
|
"grad_norm": 0.0003457583661656827, |
|
"learning_rate": 2.7852751536962064e-05, |
|
"loss": 0.0008, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 9.049325236167341, |
|
"grad_norm": 0.0019107568077743053, |
|
"learning_rate": 2.781526465736992e-05, |
|
"loss": 0.0009, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"grad_norm": 0.0008839426445774734, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0018, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_accuracy": 0.8928571428571429, |
|
"eval_f1": 0.8916871416871418, |
|
"eval_loss": 0.9169295430183411, |
|
"eval_runtime": 70.5688, |
|
"eval_samples_per_second": 1.587, |
|
"eval_steps_per_second": 1.587, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 10.00067476383266, |
|
"grad_norm": 0.0002696131123229861, |
|
"learning_rate": 2.7740290898185638e-05, |
|
"loss": 0.002, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 10.001349527665317, |
|
"grad_norm": 0.00017847323033493012, |
|
"learning_rate": 2.770280401859349e-05, |
|
"loss": 0.0, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 10.002024291497976, |
|
"grad_norm": 0.0010017943568527699, |
|
"learning_rate": 2.766531713900135e-05, |
|
"loss": 0.0, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 10.002699055330634, |
|
"grad_norm": 0.0006036867271177471, |
|
"learning_rate": 2.762783025940921e-05, |
|
"loss": 0.0, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 10.003373819163293, |
|
"grad_norm": 0.00019583333050832152, |
|
"learning_rate": 2.759034337981706e-05, |
|
"loss": 0.1066, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 10.004048582995951, |
|
"grad_norm": 0.08632688224315643, |
|
"learning_rate": 2.755285650022492e-05, |
|
"loss": 0.0003, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 10.00472334682861, |
|
"grad_norm": 0.00013941490033175796, |
|
"learning_rate": 2.751536962063278e-05, |
|
"loss": 0.0, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 10.005398110661268, |
|
"grad_norm": 0.0003023550088983029, |
|
"learning_rate": 2.747788274104064e-05, |
|
"loss": 0.0001, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 10.006072874493928, |
|
"grad_norm": 0.0005739156622439623, |
|
"learning_rate": 2.744039586144849e-05, |
|
"loss": 0.3069, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.006747638326585, |
|
"grad_norm": 0.0005304102669470012, |
|
"learning_rate": 2.740290898185635e-05, |
|
"loss": 0.0, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 10.007422402159245, |
|
"grad_norm": 0.0009174313163384795, |
|
"learning_rate": 2.736542210226421e-05, |
|
"loss": 0.0, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 10.008097165991902, |
|
"grad_norm": 0.0004933126620016992, |
|
"learning_rate": 2.732793522267207e-05, |
|
"loss": 0.0, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 10.008771929824562, |
|
"grad_norm": 0.002700564218685031, |
|
"learning_rate": 2.729044834307992e-05, |
|
"loss": 0.0, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 10.00944669365722, |
|
"grad_norm": 0.0008284652722068131, |
|
"learning_rate": 2.725296146348778e-05, |
|
"loss": 0.7602, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 10.010121457489879, |
|
"grad_norm": 0.0005742429639212787, |
|
"learning_rate": 2.721547458389564e-05, |
|
"loss": 0.0013, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 10.010796221322536, |
|
"grad_norm": 0.0001865791855379939, |
|
"learning_rate": 2.7177987704303492e-05, |
|
"loss": 1.0409, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 10.011470985155196, |
|
"grad_norm": 0.0005401599337346852, |
|
"learning_rate": 2.714050082471135e-05, |
|
"loss": 0.0, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 10.012145748987853, |
|
"grad_norm": 10.862272262573242, |
|
"learning_rate": 2.710301394511921e-05, |
|
"loss": 0.6573, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 10.012820512820513, |
|
"grad_norm": 37.7309455871582, |
|
"learning_rate": 2.706552706552707e-05, |
|
"loss": 0.7899, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 10.013495276653172, |
|
"grad_norm": 0.0009414503001607955, |
|
"learning_rate": 2.7028040185934922e-05, |
|
"loss": 0.0011, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 10.01417004048583, |
|
"grad_norm": 0.0004630287585314363, |
|
"learning_rate": 2.699055330634278e-05, |
|
"loss": 0.0001, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 10.01484480431849, |
|
"grad_norm": 0.0013565809931606054, |
|
"learning_rate": 2.695306642675064e-05, |
|
"loss": 0.0001, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 10.015519568151147, |
|
"grad_norm": 0.0022902884520590305, |
|
"learning_rate": 2.69155795471585e-05, |
|
"loss": 0.0, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 10.016194331983806, |
|
"grad_norm": 0.0009432418155483902, |
|
"learning_rate": 2.687809266756635e-05, |
|
"loss": 0.0, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 10.016869095816464, |
|
"grad_norm": 0.0009669333812780678, |
|
"learning_rate": 2.684060578797421e-05, |
|
"loss": 0.0001, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 10.017543859649123, |
|
"grad_norm": 0.0011604432947933674, |
|
"learning_rate": 2.680311890838207e-05, |
|
"loss": 0.0011, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 10.018218623481781, |
|
"grad_norm": 0.0037133977748453617, |
|
"learning_rate": 2.6765632028789922e-05, |
|
"loss": 0.0089, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 10.01889338731444, |
|
"grad_norm": 0.0019840672612190247, |
|
"learning_rate": 2.672814514919778e-05, |
|
"loss": 0.0001, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 10.019568151147098, |
|
"grad_norm": 0.0010515927569940686, |
|
"learning_rate": 2.669065826960564e-05, |
|
"loss": 0.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 10.020242914979757, |
|
"grad_norm": 0.00031027224031277, |
|
"learning_rate": 2.66531713900135e-05, |
|
"loss": 0.0001, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 10.020917678812415, |
|
"grad_norm": 0.0026109693571925163, |
|
"learning_rate": 2.6615684510421352e-05, |
|
"loss": 0.0001, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 10.021592442645074, |
|
"grad_norm": 0.001366731128655374, |
|
"learning_rate": 2.657819763082921e-05, |
|
"loss": 0.0, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 10.022267206477732, |
|
"grad_norm": 0.0010099642677232623, |
|
"learning_rate": 2.654071075123707e-05, |
|
"loss": 0.0, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 10.022941970310391, |
|
"grad_norm": 0.0007431610720232129, |
|
"learning_rate": 2.6503223871644926e-05, |
|
"loss": 0.3974, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 10.023616734143049, |
|
"grad_norm": 0.0005235990975052118, |
|
"learning_rate": 2.6465736992052782e-05, |
|
"loss": 0.0001, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 10.024291497975709, |
|
"grad_norm": 0.002703143283724785, |
|
"learning_rate": 2.642825011246064e-05, |
|
"loss": 0.9305, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 10.024966261808368, |
|
"grad_norm": 0.0013169089797884226, |
|
"learning_rate": 2.6390763232868497e-05, |
|
"loss": 0.0061, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 10.025641025641026, |
|
"grad_norm": 0.0006970075191929936, |
|
"learning_rate": 2.6353276353276356e-05, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 10.026315789473685, |
|
"grad_norm": 0.0022921450436115265, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 2.1218, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 10.026990553306343, |
|
"grad_norm": 0.015075190924108028, |
|
"learning_rate": 2.6278302594092068e-05, |
|
"loss": 0.0001, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 10.027665317139002, |
|
"grad_norm": 0.0003634750028140843, |
|
"learning_rate": 2.6240815714499927e-05, |
|
"loss": 0.0003, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 10.02834008097166, |
|
"grad_norm": 0.005189963150769472, |
|
"learning_rate": 2.6203328834907783e-05, |
|
"loss": 0.0002, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 10.029014844804319, |
|
"grad_norm": 0.0013347219210118055, |
|
"learning_rate": 2.616584195531564e-05, |
|
"loss": 0.0004, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 10.029689608636977, |
|
"grad_norm": 0.011999278329312801, |
|
"learning_rate": 2.6128355075723498e-05, |
|
"loss": 0.0001, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 10.030364372469636, |
|
"grad_norm": 0.0007896720780991018, |
|
"learning_rate": 2.6090868196131357e-05, |
|
"loss": 0.0001, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 10.031039136302294, |
|
"grad_norm": 0.004586980678141117, |
|
"learning_rate": 2.605338131653921e-05, |
|
"loss": 0.0001, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 10.031713900134953, |
|
"grad_norm": 0.001417971565388143, |
|
"learning_rate": 2.601589443694707e-05, |
|
"loss": 0.0, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 10.03238866396761, |
|
"grad_norm": 0.0019554668106138706, |
|
"learning_rate": 2.5978407557354928e-05, |
|
"loss": 0.0001, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 10.03306342780027, |
|
"grad_norm": 0.028743397444486618, |
|
"learning_rate": 2.5940920677762787e-05, |
|
"loss": 0.0001, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 10.033738191632928, |
|
"grad_norm": 0.0008731328416615725, |
|
"learning_rate": 2.590343379817064e-05, |
|
"loss": 0.0001, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 10.034412955465587, |
|
"grad_norm": 0.0012366612209007144, |
|
"learning_rate": 2.5865946918578498e-05, |
|
"loss": 0.0001, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 10.035087719298245, |
|
"grad_norm": 0.0026165838353335857, |
|
"learning_rate": 2.5828460038986357e-05, |
|
"loss": 0.0001, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 10.035762483130904, |
|
"grad_norm": 0.014659812673926353, |
|
"learning_rate": 2.579097315939421e-05, |
|
"loss": 0.0002, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 10.036437246963562, |
|
"grad_norm": 0.00143991329241544, |
|
"learning_rate": 2.575348627980207e-05, |
|
"loss": 0.0, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 10.037112010796221, |
|
"grad_norm": 0.00752654206007719, |
|
"learning_rate": 2.5715999400209928e-05, |
|
"loss": 0.0001, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 10.03778677462888, |
|
"grad_norm": 0.0011906948639079928, |
|
"learning_rate": 2.5678512520617787e-05, |
|
"loss": 0.0001, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 10.038461538461538, |
|
"grad_norm": 0.004429694265127182, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.0001, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 10.039136302294198, |
|
"grad_norm": 0.00023650593357160687, |
|
"learning_rate": 2.56035387614335e-05, |
|
"loss": 0.0001, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 10.039811066126855, |
|
"grad_norm": 0.0007866804371587932, |
|
"learning_rate": 2.5566051881841358e-05, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.040485829959515, |
|
"grad_norm": 0.0013989802682772279, |
|
"learning_rate": 2.5528565002249217e-05, |
|
"loss": 0.0001, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 10.041160593792172, |
|
"grad_norm": 0.0008867586147971451, |
|
"learning_rate": 2.549107812265707e-05, |
|
"loss": 0.2682, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 10.041835357624832, |
|
"grad_norm": 0.001083207200281322, |
|
"learning_rate": 2.545359124306493e-05, |
|
"loss": 0.0, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 10.04251012145749, |
|
"grad_norm": 0.0010164374252781272, |
|
"learning_rate": 2.5416104363472788e-05, |
|
"loss": 0.0014, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 10.043184885290149, |
|
"grad_norm": 0.0032585004810243845, |
|
"learning_rate": 2.5378617483880647e-05, |
|
"loss": 0.0001, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 10.043859649122806, |
|
"grad_norm": 0.0007220272673293948, |
|
"learning_rate": 2.53411306042885e-05, |
|
"loss": 0.0001, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 10.044534412955466, |
|
"grad_norm": 0.0010795597918331623, |
|
"learning_rate": 2.530364372469636e-05, |
|
"loss": 0.0001, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 10.045209176788124, |
|
"grad_norm": 0.0033428198657929897, |
|
"learning_rate": 2.5266156845104218e-05, |
|
"loss": 0.0, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 10.045883940620783, |
|
"grad_norm": 0.0007780479500070214, |
|
"learning_rate": 2.522866996551207e-05, |
|
"loss": 0.0003, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 10.04655870445344, |
|
"grad_norm": 0.002177152084186673, |
|
"learning_rate": 2.519118308591993e-05, |
|
"loss": 1.034, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 10.0472334682861, |
|
"grad_norm": 0.012076308950781822, |
|
"learning_rate": 2.515369620632779e-05, |
|
"loss": 0.0001, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 10.047908232118758, |
|
"grad_norm": 0.00882900319993496, |
|
"learning_rate": 2.5116209326735644e-05, |
|
"loss": 0.0001, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 10.048582995951417, |
|
"grad_norm": 0.0017163383308798075, |
|
"learning_rate": 2.50787224471435e-05, |
|
"loss": 0.0002, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 10.049257759784076, |
|
"grad_norm": 0.07908181846141815, |
|
"learning_rate": 2.504123556755136e-05, |
|
"loss": 0.0002, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 10.049932523616734, |
|
"grad_norm": 0.0007900993805378675, |
|
"learning_rate": 2.5003748687959215e-05, |
|
"loss": 0.0, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"eval_accuracy": 0.8928571428571429, |
|
"eval_f1": 0.8927764491849939, |
|
"eval_loss": 0.6104062795639038, |
|
"eval_runtime": 74.468, |
|
"eval_samples_per_second": 1.504, |
|
"eval_steps_per_second": 1.504, |
|
"step": 8151 |
|
}, |
|
{ |
|
"epoch": 11.000607287449393, |
|
"grad_norm": 0.0007240193081088364, |
|
"learning_rate": 2.496626180836707e-05, |
|
"loss": 0.0, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 11.001282051282052, |
|
"grad_norm": 0.0002468556631356478, |
|
"learning_rate": 2.492877492877493e-05, |
|
"loss": 0.11, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 11.00195681511471, |
|
"grad_norm": 0.0006738382508046925, |
|
"learning_rate": 2.4891288049182786e-05, |
|
"loss": 0.0009, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 11.00263157894737, |
|
"grad_norm": 0.0002363823732594028, |
|
"learning_rate": 2.485380116959064e-05, |
|
"loss": 0.0001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 11.003306342780027, |
|
"grad_norm": 0.01611531712114811, |
|
"learning_rate": 2.48163142899985e-05, |
|
"loss": 0.0001, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 11.003981106612686, |
|
"grad_norm": 0.00017891006427817047, |
|
"learning_rate": 2.4778827410406356e-05, |
|
"loss": 0.0001, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 11.004655870445344, |
|
"grad_norm": 0.0012173757422715425, |
|
"learning_rate": 2.4741340530814216e-05, |
|
"loss": 0.0, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 11.005330634278003, |
|
"grad_norm": 0.00027030581259168684, |
|
"learning_rate": 2.470385365122207e-05, |
|
"loss": 0.0001, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 11.006005398110661, |
|
"grad_norm": 0.0007059440249577165, |
|
"learning_rate": 2.466636677162993e-05, |
|
"loss": 0.0038, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 11.00668016194332, |
|
"grad_norm": 0.0038354801945388317, |
|
"learning_rate": 2.4628879892037786e-05, |
|
"loss": 0.0, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 11.007354925775978, |
|
"grad_norm": 0.002050234004855156, |
|
"learning_rate": 2.4591393012445645e-05, |
|
"loss": 0.0001, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 11.008029689608637, |
|
"grad_norm": 0.0007953056483529508, |
|
"learning_rate": 2.45539061328535e-05, |
|
"loss": 0.0001, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 11.008704453441295, |
|
"grad_norm": 0.0005133861559443176, |
|
"learning_rate": 2.451641925326136e-05, |
|
"loss": 0.0001, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 11.009379217273954, |
|
"grad_norm": 0.00046163739170879126, |
|
"learning_rate": 2.4478932373669216e-05, |
|
"loss": 0.0, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 11.010053981106612, |
|
"grad_norm": 0.0001449552073609084, |
|
"learning_rate": 2.4441445494077075e-05, |
|
"loss": 0.3172, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 11.010728744939271, |
|
"grad_norm": 164.93666076660156, |
|
"learning_rate": 2.440395861448493e-05, |
|
"loss": 0.6368, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 11.011403508771929, |
|
"grad_norm": 0.000476795103168115, |
|
"learning_rate": 2.4366471734892787e-05, |
|
"loss": 0.0, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 11.012078272604588, |
|
"grad_norm": 0.0037983739748597145, |
|
"learning_rate": 2.4328984855300646e-05, |
|
"loss": 0.0002, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 11.012753036437246, |
|
"grad_norm": 0.000796021893620491, |
|
"learning_rate": 2.4291497975708502e-05, |
|
"loss": 0.0, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 11.013427800269906, |
|
"grad_norm": 0.0005037175142206252, |
|
"learning_rate": 2.425401109611636e-05, |
|
"loss": 0.0002, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 11.014102564102565, |
|
"grad_norm": 0.0043189083226025105, |
|
"learning_rate": 2.4216524216524217e-05, |
|
"loss": 0.0001, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 11.014777327935223, |
|
"grad_norm": 0.0015088323270902038, |
|
"learning_rate": 2.4179037336932076e-05, |
|
"loss": 0.0, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 11.015452091767882, |
|
"grad_norm": 0.009932787157595158, |
|
"learning_rate": 2.414155045733993e-05, |
|
"loss": 0.0, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 11.01612685560054, |
|
"grad_norm": 0.0006705676787532866, |
|
"learning_rate": 2.410406357774779e-05, |
|
"loss": 0.0, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 11.016801619433199, |
|
"grad_norm": 0.0004983929102309048, |
|
"learning_rate": 2.4066576698155647e-05, |
|
"loss": 0.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 11.017476383265857, |
|
"grad_norm": 0.0002321622014278546, |
|
"learning_rate": 2.4029089818563506e-05, |
|
"loss": 0.0, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 11.018151147098516, |
|
"grad_norm": 0.00045225844951346517, |
|
"learning_rate": 2.399160293897136e-05, |
|
"loss": 0.0001, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 11.018825910931174, |
|
"grad_norm": 0.0006059862207621336, |
|
"learning_rate": 2.395411605937922e-05, |
|
"loss": 0.0, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 11.019500674763833, |
|
"grad_norm": 0.00025944746448658407, |
|
"learning_rate": 2.3916629179787076e-05, |
|
"loss": 0.0, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 11.02017543859649, |
|
"grad_norm": 0.005270655732601881, |
|
"learning_rate": 2.3879142300194932e-05, |
|
"loss": 0.0, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 11.02085020242915, |
|
"grad_norm": 0.0001714004756649956, |
|
"learning_rate": 2.384165542060279e-05, |
|
"loss": 0.0, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 11.021524966261808, |
|
"grad_norm": 0.0004896153695881367, |
|
"learning_rate": 2.3804168541010647e-05, |
|
"loss": 0.0001, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 11.022199730094467, |
|
"grad_norm": 0.0004871699493378401, |
|
"learning_rate": 2.3766681661418506e-05, |
|
"loss": 0.0, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 11.022874493927125, |
|
"grad_norm": 0.00332398503087461, |
|
"learning_rate": 2.3729194781826362e-05, |
|
"loss": 0.0, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 11.023549257759784, |
|
"grad_norm": 0.0004967203130945563, |
|
"learning_rate": 2.369170790223422e-05, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.024224021592442, |
|
"grad_norm": 0.0006828585756011307, |
|
"learning_rate": 2.3654221022642077e-05, |
|
"loss": 0.0, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 11.024898785425101, |
|
"grad_norm": 0.00026437186170369387, |
|
"learning_rate": 2.3616734143049933e-05, |
|
"loss": 0.0923, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 11.025573549257759, |
|
"grad_norm": 0.001157809398137033, |
|
"learning_rate": 2.3579247263457792e-05, |
|
"loss": 0.3747, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 11.026248313090418, |
|
"grad_norm": 0.0006130054825916886, |
|
"learning_rate": 2.3541760383865648e-05, |
|
"loss": 0.1333, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 11.026923076923078, |
|
"grad_norm": 0.004360508639365435, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.0001, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 11.027597840755735, |
|
"grad_norm": 0.0038445070385932922, |
|
"learning_rate": 2.3466786624681363e-05, |
|
"loss": 0.0005, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 11.028272604588395, |
|
"grad_norm": 0.0003999462933279574, |
|
"learning_rate": 2.342929974508922e-05, |
|
"loss": 0.0, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 11.028947368421052, |
|
"grad_norm": 0.0013614681083709002, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.0, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 11.029622132253712, |
|
"grad_norm": 8.867596625350416e-05, |
|
"learning_rate": 2.3354325985904933e-05, |
|
"loss": 0.0, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 11.03029689608637, |
|
"grad_norm": 0.0005633147084154189, |
|
"learning_rate": 2.331683910631279e-05, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 11.030971659919029, |
|
"grad_norm": 0.0005220117163844407, |
|
"learning_rate": 2.327935222672065e-05, |
|
"loss": 0.0001, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 11.031646423751686, |
|
"grad_norm": 0.0004213712236378342, |
|
"learning_rate": 2.3241865347128504e-05, |
|
"loss": 0.0, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 11.032321187584346, |
|
"grad_norm": 0.00038689616485498846, |
|
"learning_rate": 2.3204378467536363e-05, |
|
"loss": 0.0, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 11.032995951417004, |
|
"grad_norm": 0.00039902018033899367, |
|
"learning_rate": 2.316689158794422e-05, |
|
"loss": 0.0, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 11.033670715249663, |
|
"grad_norm": 0.0026982324197888374, |
|
"learning_rate": 2.3129404708352078e-05, |
|
"loss": 0.0, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 11.03434547908232, |
|
"grad_norm": 0.0001991643221117556, |
|
"learning_rate": 2.3091917828759934e-05, |
|
"loss": 0.0, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 11.03502024291498, |
|
"grad_norm": 0.0019273010548204184, |
|
"learning_rate": 2.3054430949167793e-05, |
|
"loss": 0.0, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 11.035695006747638, |
|
"grad_norm": 0.000698404386639595, |
|
"learning_rate": 2.301694406957565e-05, |
|
"loss": 0.0001, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 11.036369770580297, |
|
"grad_norm": 0.00025344561436213553, |
|
"learning_rate": 2.2979457189983508e-05, |
|
"loss": 0.0, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 11.037044534412955, |
|
"grad_norm": 0.0027119882870465517, |
|
"learning_rate": 2.2941970310391364e-05, |
|
"loss": 0.4804, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 11.037719298245614, |
|
"grad_norm": 0.00020401214715093374, |
|
"learning_rate": 2.290448343079922e-05, |
|
"loss": 0.0, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 11.038394062078273, |
|
"grad_norm": 0.0004772163520101458, |
|
"learning_rate": 2.286699655120708e-05, |
|
"loss": 0.0, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 11.039068825910931, |
|
"grad_norm": 0.0004061859508510679, |
|
"learning_rate": 2.2829509671614935e-05, |
|
"loss": 0.0001, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 11.03974358974359, |
|
"grad_norm": 0.0010080209467560053, |
|
"learning_rate": 2.2792022792022794e-05, |
|
"loss": 0.0, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 11.040418353576248, |
|
"grad_norm": 0.00021367882436607033, |
|
"learning_rate": 2.275453591243065e-05, |
|
"loss": 0.0, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 11.041093117408908, |
|
"grad_norm": 0.002230451675131917, |
|
"learning_rate": 2.271704903283851e-05, |
|
"loss": 0.0, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 11.041767881241565, |
|
"grad_norm": 0.0003300936659798026, |
|
"learning_rate": 2.2679562153246365e-05, |
|
"loss": 0.0, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 11.042442645074225, |
|
"grad_norm": 0.0023498530499637127, |
|
"learning_rate": 2.2642075273654224e-05, |
|
"loss": 0.0001, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 11.043117408906882, |
|
"grad_norm": 0.0011958705727010965, |
|
"learning_rate": 2.260458839406208e-05, |
|
"loss": 0.0001, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 11.043792172739542, |
|
"grad_norm": 0.0022039199247956276, |
|
"learning_rate": 2.256710151446994e-05, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 11.0444669365722, |
|
"grad_norm": 0.0003688375581987202, |
|
"learning_rate": 2.2529614634877794e-05, |
|
"loss": 0.0059, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 11.045141700404859, |
|
"grad_norm": 0.0007805086788721383, |
|
"learning_rate": 2.2492127755285654e-05, |
|
"loss": 0.0, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 11.045816464237516, |
|
"grad_norm": 0.0009934029076248407, |
|
"learning_rate": 2.245464087569351e-05, |
|
"loss": 0.0, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 11.046491228070176, |
|
"grad_norm": 0.001246001455001533, |
|
"learning_rate": 2.2417153996101365e-05, |
|
"loss": 0.0, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 11.047165991902833, |
|
"grad_norm": 9.812816279008985e-05, |
|
"learning_rate": 2.2379667116509224e-05, |
|
"loss": 0.0, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 11.047840755735493, |
|
"grad_norm": 0.0004926809924654663, |
|
"learning_rate": 2.234218023691708e-05, |
|
"loss": 0.0, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 11.04851551956815, |
|
"grad_norm": 0.0003611448628362268, |
|
"learning_rate": 2.230469335732494e-05, |
|
"loss": 0.0, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 11.04919028340081, |
|
"grad_norm": 0.000520729401614517, |
|
"learning_rate": 2.2267206477732795e-05, |
|
"loss": 0.0, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 11.049865047233467, |
|
"grad_norm": 0.00023293115373235196, |
|
"learning_rate": 2.2229719598140654e-05, |
|
"loss": 0.0, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"eval_accuracy": 0.9196428571428571, |
|
"eval_f1": 0.9207212368977075, |
|
"eval_loss": 0.6125034689903259, |
|
"eval_runtime": 71.2839, |
|
"eval_samples_per_second": 1.571, |
|
"eval_steps_per_second": 1.571, |
|
"step": 8892 |
|
}, |
|
{ |
|
"epoch": 12.000539811066126, |
|
"grad_norm": 0.0007233197102323174, |
|
"learning_rate": 2.219223271854851e-05, |
|
"loss": 0.4448, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 12.001214574898786, |
|
"grad_norm": 0.0002516189415473491, |
|
"learning_rate": 2.2154745838956366e-05, |
|
"loss": 0.0354, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 12.001889338731443, |
|
"grad_norm": 0.0003420355205889791, |
|
"learning_rate": 2.2117258959364225e-05, |
|
"loss": 0.0, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 12.002564102564103, |
|
"grad_norm": 0.0004494291788432747, |
|
"learning_rate": 2.207977207977208e-05, |
|
"loss": 0.0, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 12.003238866396762, |
|
"grad_norm": 0.00031234361813403666, |
|
"learning_rate": 2.2042285200179936e-05, |
|
"loss": 0.0, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 12.00391363022942, |
|
"grad_norm": 0.00012721461826004088, |
|
"learning_rate": 2.2004798320587796e-05, |
|
"loss": 0.0838, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 12.004588394062079, |
|
"grad_norm": 0.000489223632030189, |
|
"learning_rate": 2.196731144099565e-05, |
|
"loss": 0.0, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 12.005263157894737, |
|
"grad_norm": 0.0033521486911922693, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.1973, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 12.005937921727396, |
|
"grad_norm": 0.009397713467478752, |
|
"learning_rate": 2.1892337681811366e-05, |
|
"loss": 0.0, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 12.006612685560054, |
|
"grad_norm": 0.006849181838333607, |
|
"learning_rate": 2.1854850802219222e-05, |
|
"loss": 0.0, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 12.007287449392713, |
|
"grad_norm": 0.0006626849644817412, |
|
"learning_rate": 2.181736392262708e-05, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.00796221322537, |
|
"grad_norm": 0.000323317275615409, |
|
"learning_rate": 2.1779877043034937e-05, |
|
"loss": 0.4233, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 12.00863697705803, |
|
"grad_norm": 0.00013916198804508895, |
|
"learning_rate": 2.1742390163442796e-05, |
|
"loss": 0.0006, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 12.009311740890688, |
|
"grad_norm": 0.0004281499423086643, |
|
"learning_rate": 2.1704903283850652e-05, |
|
"loss": 0.0, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 12.009986504723347, |
|
"grad_norm": 0.0038120527751743793, |
|
"learning_rate": 2.166741640425851e-05, |
|
"loss": 0.2496, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 12.010661268556005, |
|
"grad_norm": 0.007827537134289742, |
|
"learning_rate": 2.1629929524666367e-05, |
|
"loss": 0.0, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 12.011336032388664, |
|
"grad_norm": 0.0004882031353190541, |
|
"learning_rate": 2.1592442645074226e-05, |
|
"loss": 0.0236, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 12.012010796221322, |
|
"grad_norm": 0.0006974562420509756, |
|
"learning_rate": 2.1554955765482082e-05, |
|
"loss": 0.0, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 12.012685560053981, |
|
"grad_norm": 0.0007927274564281106, |
|
"learning_rate": 2.151746888588994e-05, |
|
"loss": 0.0, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 12.013360323886639, |
|
"grad_norm": 0.0005972622311674058, |
|
"learning_rate": 2.1479982006297797e-05, |
|
"loss": 0.0, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 12.014035087719298, |
|
"grad_norm": 0.0020678879227489233, |
|
"learning_rate": 2.1442495126705653e-05, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 12.014709851551958, |
|
"grad_norm": 0.0017037901561707258, |
|
"learning_rate": 2.1405008247113512e-05, |
|
"loss": 0.0, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 12.015384615384615, |
|
"grad_norm": 0.002625885419547558, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.0, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 12.016059379217275, |
|
"grad_norm": 0.00016007163503672928, |
|
"learning_rate": 2.1330034487929227e-05, |
|
"loss": 0.0, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 12.016734143049932, |
|
"grad_norm": 8.975803211797029e-05, |
|
"learning_rate": 2.1292547608337082e-05, |
|
"loss": 0.0, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 12.017408906882592, |
|
"grad_norm": 0.00010270516213495284, |
|
"learning_rate": 2.125506072874494e-05, |
|
"loss": 0.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 12.01808367071525, |
|
"grad_norm": 0.0003781057021114975, |
|
"learning_rate": 2.1217573849152797e-05, |
|
"loss": 0.0, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 12.018758434547909, |
|
"grad_norm": 0.00045806102571077645, |
|
"learning_rate": 2.1180086969560657e-05, |
|
"loss": 0.0, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 12.019433198380566, |
|
"grad_norm": 0.00040667993016541004, |
|
"learning_rate": 2.1142600089968512e-05, |
|
"loss": 0.0, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 12.020107962213226, |
|
"grad_norm": 7.579607336083427e-05, |
|
"learning_rate": 2.110511321037637e-05, |
|
"loss": 0.0, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 12.020782726045883, |
|
"grad_norm": 0.0002768370322883129, |
|
"learning_rate": 2.1067626330784227e-05, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 12.021457489878543, |
|
"grad_norm": 0.0010953324381262064, |
|
"learning_rate": 2.1030139451192083e-05, |
|
"loss": 0.0, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 12.0221322537112, |
|
"grad_norm": 0.00658809207379818, |
|
"learning_rate": 2.0992652571599942e-05, |
|
"loss": 0.0919, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 12.02280701754386, |
|
"grad_norm": 0.0006163925281725824, |
|
"learning_rate": 2.0955165692007798e-05, |
|
"loss": 0.0, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 12.023481781376518, |
|
"grad_norm": 0.000813082791864872, |
|
"learning_rate": 2.0917678812415657e-05, |
|
"loss": 0.0001, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 12.024156545209177, |
|
"grad_norm": 0.00046772375935688615, |
|
"learning_rate": 2.0880191932823513e-05, |
|
"loss": 0.0, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 12.024831309041835, |
|
"grad_norm": 0.0005937941023148596, |
|
"learning_rate": 2.0842705053231372e-05, |
|
"loss": 0.0002, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 12.025506072874494, |
|
"grad_norm": 0.000659748911857605, |
|
"learning_rate": 2.0805218173639228e-05, |
|
"loss": 0.0, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 12.026180836707152, |
|
"grad_norm": 0.0006786544108763337, |
|
"learning_rate": 2.0767731294047084e-05, |
|
"loss": 0.0, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 12.026855600539811, |
|
"grad_norm": 0.000225842886720784, |
|
"learning_rate": 2.0730244414454943e-05, |
|
"loss": 0.0, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 12.02753036437247, |
|
"grad_norm": 0.0006020697182975709, |
|
"learning_rate": 2.06927575348628e-05, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 12.028205128205128, |
|
"grad_norm": 0.0005702193011529744, |
|
"learning_rate": 2.0655270655270654e-05, |
|
"loss": 0.0, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 12.028879892037788, |
|
"grad_norm": 0.000844390713609755, |
|
"learning_rate": 2.0617783775678514e-05, |
|
"loss": 0.0, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 12.029554655870445, |
|
"grad_norm": 9.666190453572199e-05, |
|
"learning_rate": 2.058029689608637e-05, |
|
"loss": 0.0, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 12.030229419703105, |
|
"grad_norm": 0.0001864578080130741, |
|
"learning_rate": 2.0542810016494225e-05, |
|
"loss": 0.0, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 12.030904183535762, |
|
"grad_norm": 0.00014394025492947549, |
|
"learning_rate": 2.0505323136902084e-05, |
|
"loss": 0.0, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 12.031578947368422, |
|
"grad_norm": 0.00027057836996391416, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.0, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 12.03225371120108, |
|
"grad_norm": 0.0004066646215505898, |
|
"learning_rate": 2.04303493777178e-05, |
|
"loss": 0.0, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 12.032928475033739, |
|
"grad_norm": 0.00043117342283949256, |
|
"learning_rate": 2.0392862498125655e-05, |
|
"loss": 0.0, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 12.033603238866396, |
|
"grad_norm": 0.00019329691713210195, |
|
"learning_rate": 2.0355375618533514e-05, |
|
"loss": 0.0001, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 12.034278002699056, |
|
"grad_norm": 0.00036019805702380836, |
|
"learning_rate": 2.031788873894137e-05, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 12.034952766531713, |
|
"grad_norm": 0.0006936113350093365, |
|
"learning_rate": 2.028040185934923e-05, |
|
"loss": 0.0, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 12.035627530364373, |
|
"grad_norm": 0.00041965124546550214, |
|
"learning_rate": 2.0242914979757085e-05, |
|
"loss": 0.0, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 12.03630229419703, |
|
"grad_norm": 0.00011109585466329008, |
|
"learning_rate": 2.0205428100164944e-05, |
|
"loss": 0.0, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 12.03697705802969, |
|
"grad_norm": 0.000144297766382806, |
|
"learning_rate": 2.01679412205728e-05, |
|
"loss": 0.0281, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 12.037651821862347, |
|
"grad_norm": 0.0002551145735196769, |
|
"learning_rate": 2.013045434098066e-05, |
|
"loss": 0.0, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 12.038326585695007, |
|
"grad_norm": 0.006847582757472992, |
|
"learning_rate": 2.0092967461388515e-05, |
|
"loss": 0.0, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 12.039001349527666, |
|
"grad_norm": 0.00011437670036684722, |
|
"learning_rate": 2.005548058179637e-05, |
|
"loss": 0.0, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 12.039676113360324, |
|
"grad_norm": 0.00040303889545612037, |
|
"learning_rate": 2.001799370220423e-05, |
|
"loss": 0.0, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 12.040350877192983, |
|
"grad_norm": 0.00046083523193374276, |
|
"learning_rate": 1.9980506822612085e-05, |
|
"loss": 0.0, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 12.04102564102564, |
|
"grad_norm": 0.0006515540299005806, |
|
"learning_rate": 1.9943019943019945e-05, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 12.0417004048583, |
|
"grad_norm": 0.00014752485731150955, |
|
"learning_rate": 1.99055330634278e-05, |
|
"loss": 0.0, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 12.042375168690958, |
|
"grad_norm": 0.0005620931042358279, |
|
"learning_rate": 1.986804618383566e-05, |
|
"loss": 0.0, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 12.043049932523617, |
|
"grad_norm": 0.00011923335841856897, |
|
"learning_rate": 1.9830559304243515e-05, |
|
"loss": 0.0, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 12.043724696356275, |
|
"grad_norm": 0.0002657576696947217, |
|
"learning_rate": 1.9793072424651374e-05, |
|
"loss": 0.0, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 12.044399460188934, |
|
"grad_norm": 0.0001235770614584908, |
|
"learning_rate": 1.975558554505923e-05, |
|
"loss": 0.0, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 12.045074224021592, |
|
"grad_norm": 0.0001751129748299718, |
|
"learning_rate": 1.971809866546709e-05, |
|
"loss": 0.4854, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 12.045748987854251, |
|
"grad_norm": 0.000554791884496808, |
|
"learning_rate": 1.9680611785874945e-05, |
|
"loss": 0.0, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 12.046423751686909, |
|
"grad_norm": 0.0003107208467554301, |
|
"learning_rate": 1.9643124906282804e-05, |
|
"loss": 0.0, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 12.047098515519568, |
|
"grad_norm": 0.0002857028157450259, |
|
"learning_rate": 1.960563802669066e-05, |
|
"loss": 0.0, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 12.047773279352226, |
|
"grad_norm": 0.0001487692934460938, |
|
"learning_rate": 1.9568151147098516e-05, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 12.048448043184885, |
|
"grad_norm": 0.0004835377912968397, |
|
"learning_rate": 1.9530664267506375e-05, |
|
"loss": 0.0, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 12.049122807017543, |
|
"grad_norm": 0.004288305062800646, |
|
"learning_rate": 1.949317738791423e-05, |
|
"loss": 0.0, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 12.049797570850203, |
|
"grad_norm": 0.0002630397502798587, |
|
"learning_rate": 1.945569050832209e-05, |
|
"loss": 0.0, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_f1": 0.9281167328042328, |
|
"eval_loss": 0.5643919110298157, |
|
"eval_runtime": 75.5753, |
|
"eval_samples_per_second": 1.482, |
|
"eval_steps_per_second": 1.482, |
|
"step": 9633 |
|
}, |
|
{ |
|
"epoch": 13.000472334682861, |
|
"grad_norm": 0.00026892725145444274, |
|
"learning_rate": 1.9418203628729946e-05, |
|
"loss": 0.0, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 13.001147098515519, |
|
"grad_norm": 0.00012843680451624095, |
|
"learning_rate": 1.9380716749137805e-05, |
|
"loss": 0.0, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 13.001821862348178, |
|
"grad_norm": 0.00029701701714657247, |
|
"learning_rate": 1.934322986954566e-05, |
|
"loss": 0.0, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 13.002496626180836, |
|
"grad_norm": 0.00036974012618884444, |
|
"learning_rate": 1.9305742989953516e-05, |
|
"loss": 0.0, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 13.003171390013495, |
|
"grad_norm": 0.0001296445552725345, |
|
"learning_rate": 1.9268256110361376e-05, |
|
"loss": 0.0078, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 13.003846153846155, |
|
"grad_norm": 0.0002359377540415153, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.0, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 13.004520917678812, |
|
"grad_norm": 0.0003535948053468019, |
|
"learning_rate": 1.9193282351177087e-05, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 13.005195681511472, |
|
"grad_norm": 0.00025236004148609936, |
|
"learning_rate": 1.9155795471584946e-05, |
|
"loss": 0.0, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 13.00587044534413, |
|
"grad_norm": 0.0002863478730432689, |
|
"learning_rate": 1.9118308591992802e-05, |
|
"loss": 0.0, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 13.006545209176789, |
|
"grad_norm": 0.00016143821994774044, |
|
"learning_rate": 1.9080821712400658e-05, |
|
"loss": 0.3645, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 13.007219973009446, |
|
"grad_norm": 0.0004113702161703259, |
|
"learning_rate": 1.9043334832808517e-05, |
|
"loss": 0.0, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 13.007894736842106, |
|
"grad_norm": 0.0008134804083965719, |
|
"learning_rate": 1.9005847953216373e-05, |
|
"loss": 0.0, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 13.008569500674763, |
|
"grad_norm": 0.00027760997181758285, |
|
"learning_rate": 1.8968361073624232e-05, |
|
"loss": 0.0, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 13.009244264507423, |
|
"grad_norm": 0.0016426608199253678, |
|
"learning_rate": 1.8930874194032088e-05, |
|
"loss": 0.0, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 13.00991902834008, |
|
"grad_norm": 0.0008006367716006935, |
|
"learning_rate": 1.8893387314439947e-05, |
|
"loss": 0.0, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 13.01059379217274, |
|
"grad_norm": 0.00025531640858389437, |
|
"learning_rate": 1.8855900434847803e-05, |
|
"loss": 0.0, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 13.011268556005398, |
|
"grad_norm": 0.0003084157651755959, |
|
"learning_rate": 1.8818413555255662e-05, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 13.011943319838057, |
|
"grad_norm": 0.0007207695161923766, |
|
"learning_rate": 1.8780926675663518e-05, |
|
"loss": 0.0001, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 13.012618083670715, |
|
"grad_norm": 0.00012202781363157555, |
|
"learning_rate": 1.8743439796071377e-05, |
|
"loss": 0.0, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 13.013292847503374, |
|
"grad_norm": 0.0012473361566662788, |
|
"learning_rate": 1.8705952916479233e-05, |
|
"loss": 0.0, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 13.013967611336032, |
|
"grad_norm": 0.0007895145681686699, |
|
"learning_rate": 1.8668466036887092e-05, |
|
"loss": 0.0, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 13.014642375168691, |
|
"grad_norm": 0.0002717502065934241, |
|
"learning_rate": 1.8630979157294948e-05, |
|
"loss": 0.0, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 13.015317139001349, |
|
"grad_norm": 0.0002320138446521014, |
|
"learning_rate": 1.8593492277702803e-05, |
|
"loss": 0.0, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 13.015991902834008, |
|
"grad_norm": 0.0002716576855164021, |
|
"learning_rate": 1.8556005398110663e-05, |
|
"loss": 0.0, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 13.016666666666667, |
|
"grad_norm": 7.131123129511252e-05, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 13.017341430499325, |
|
"grad_norm": 0.00045431696344166994, |
|
"learning_rate": 1.8481031638926377e-05, |
|
"loss": 0.0, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 13.018016194331985, |
|
"grad_norm": 0.00013243043213151395, |
|
"learning_rate": 1.8443544759334233e-05, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 13.018690958164642, |
|
"grad_norm": 0.00031196267809718847, |
|
"learning_rate": 1.8406057879742092e-05, |
|
"loss": 0.0, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 13.019365721997302, |
|
"grad_norm": 0.000940505473408848, |
|
"learning_rate": 1.8368571000149948e-05, |
|
"loss": 0.0, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 13.02004048582996, |
|
"grad_norm": 0.0002774264430627227, |
|
"learning_rate": 1.8331084120557807e-05, |
|
"loss": 0.0, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 13.020715249662619, |
|
"grad_norm": 0.0002633021795190871, |
|
"learning_rate": 1.8293597240965663e-05, |
|
"loss": 0.0, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 13.021390013495276, |
|
"grad_norm": 7.044156518531963e-05, |
|
"learning_rate": 1.8256110361373522e-05, |
|
"loss": 0.0, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 13.022064777327936, |
|
"grad_norm": 0.00017661662423051894, |
|
"learning_rate": 1.8218623481781378e-05, |
|
"loss": 0.0, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 13.022739541160593, |
|
"grad_norm": 0.00028747491887770593, |
|
"learning_rate": 1.8181136602189237e-05, |
|
"loss": 0.0, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 13.023414304993253, |
|
"grad_norm": 0.00039829890010878444, |
|
"learning_rate": 1.8143649722597093e-05, |
|
"loss": 0.0, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 13.02408906882591, |
|
"grad_norm": 0.00022789667127653956, |
|
"learning_rate": 1.810616284300495e-05, |
|
"loss": 0.0, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 13.02476383265857, |
|
"grad_norm": 0.00028411843231879175, |
|
"learning_rate": 1.8068675963412808e-05, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 13.025438596491227, |
|
"grad_norm": 0.0002080064732581377, |
|
"learning_rate": 1.8031189083820664e-05, |
|
"loss": 0.0, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 13.026113360323887, |
|
"grad_norm": 0.00023453705944120884, |
|
"learning_rate": 1.7993702204228523e-05, |
|
"loss": 0.0096, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 13.026788124156544, |
|
"grad_norm": 0.00010610045865178108, |
|
"learning_rate": 1.795621532463638e-05, |
|
"loss": 0.0, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 13.027462887989204, |
|
"grad_norm": 0.0001514716714154929, |
|
"learning_rate": 1.7918728445044234e-05, |
|
"loss": 0.0, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 13.028137651821863, |
|
"grad_norm": 0.00033169661764986813, |
|
"learning_rate": 1.7881241565452094e-05, |
|
"loss": 0.0, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 13.02881241565452, |
|
"grad_norm": 0.00013784744078293443, |
|
"learning_rate": 1.784375468585995e-05, |
|
"loss": 0.0, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 13.02948717948718, |
|
"grad_norm": 8.872824400896206e-05, |
|
"learning_rate": 1.7806267806267805e-05, |
|
"loss": 0.0, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 13.030161943319838, |
|
"grad_norm": 0.00037344591692090034, |
|
"learning_rate": 1.7768780926675664e-05, |
|
"loss": 0.0, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 13.030836707152497, |
|
"grad_norm": 0.0003687291464302689, |
|
"learning_rate": 1.773129404708352e-05, |
|
"loss": 0.0, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 13.031511470985155, |
|
"grad_norm": 0.00017588827176950872, |
|
"learning_rate": 1.769380716749138e-05, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 13.032186234817814, |
|
"grad_norm": 0.00026350162806920707, |
|
"learning_rate": 1.7656320287899235e-05, |
|
"loss": 0.0, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 13.032860998650472, |
|
"grad_norm": 9.849424532148987e-05, |
|
"learning_rate": 1.761883340830709e-05, |
|
"loss": 0.0, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 13.033535762483131, |
|
"grad_norm": 0.00028973835287615657, |
|
"learning_rate": 1.758134652871495e-05, |
|
"loss": 0.0, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 13.034210526315789, |
|
"grad_norm": 0.00022602990793529898, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.0, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 13.034885290148448, |
|
"grad_norm": 0.000543447386007756, |
|
"learning_rate": 1.7506372769530665e-05, |
|
"loss": 0.0, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 13.035560053981106, |
|
"grad_norm": 0.0006508603109978139, |
|
"learning_rate": 1.746888588993852e-05, |
|
"loss": 0.0, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 13.036234817813765, |
|
"grad_norm": 6.645211396971717e-05, |
|
"learning_rate": 1.743139901034638e-05, |
|
"loss": 0.4286, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 13.036909581646423, |
|
"grad_norm": 0.00017078538076020777, |
|
"learning_rate": 1.7393912130754236e-05, |
|
"loss": 0.0, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 13.037584345479083, |
|
"grad_norm": 0.0010123905958607793, |
|
"learning_rate": 1.7356425251162095e-05, |
|
"loss": 0.0, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 13.03825910931174, |
|
"grad_norm": 0.00027252710424363613, |
|
"learning_rate": 1.731893837156995e-05, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 13.0389338731444, |
|
"grad_norm": 0.00013458417379297316, |
|
"learning_rate": 1.728145149197781e-05, |
|
"loss": 0.0, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 13.039608636977057, |
|
"grad_norm": 0.00022678014647681266, |
|
"learning_rate": 1.7243964612385665e-05, |
|
"loss": 0.0, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 13.040283400809717, |
|
"grad_norm": 0.00022790237562730908, |
|
"learning_rate": 1.720647773279352e-05, |
|
"loss": 0.0, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 13.040958164642376, |
|
"grad_norm": 0.0002460694231558591, |
|
"learning_rate": 1.716899085320138e-05, |
|
"loss": 0.0, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 13.041632928475034, |
|
"grad_norm": 0.00018956181884277612, |
|
"learning_rate": 1.7131503973609236e-05, |
|
"loss": 0.0, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 13.042307692307693, |
|
"grad_norm": 0.00017144810408353806, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.0, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 13.04298245614035, |
|
"grad_norm": 0.0002925437001977116, |
|
"learning_rate": 1.705653021442495e-05, |
|
"loss": 0.0, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 13.04365721997301, |
|
"grad_norm": 0.0002330515708308667, |
|
"learning_rate": 1.701904333483281e-05, |
|
"loss": 0.013, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 13.044331983805668, |
|
"grad_norm": 0.00011631449160631746, |
|
"learning_rate": 1.6981556455240666e-05, |
|
"loss": 0.0, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 13.045006747638327, |
|
"grad_norm": 0.0003174786688759923, |
|
"learning_rate": 1.6944069575648525e-05, |
|
"loss": 0.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 13.045681511470985, |
|
"grad_norm": 0.0001684718154137954, |
|
"learning_rate": 1.690658269605638e-05, |
|
"loss": 0.0, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 13.046356275303644, |
|
"grad_norm": 0.001750526949763298, |
|
"learning_rate": 1.686909581646424e-05, |
|
"loss": 0.0, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 13.047031039136302, |
|
"grad_norm": 0.00024045804457273334, |
|
"learning_rate": 1.6831608936872096e-05, |
|
"loss": 0.0, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 13.047705802968961, |
|
"grad_norm": 0.0006596571765840054, |
|
"learning_rate": 1.6794122057279955e-05, |
|
"loss": 0.0, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 13.048380566801619, |
|
"grad_norm": 0.001252808142453432, |
|
"learning_rate": 1.675663517768781e-05, |
|
"loss": 0.3996, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 13.049055330634278, |
|
"grad_norm": 0.0002453498891554773, |
|
"learning_rate": 1.6719148298095667e-05, |
|
"loss": 0.0, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 13.049730094466936, |
|
"grad_norm": 0.0005040777614340186, |
|
"learning_rate": 1.6681661418503526e-05, |
|
"loss": 0.0, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_f1": 0.9285714285714286, |
|
"eval_loss": 0.5062018632888794, |
|
"eval_runtime": 72.8565, |
|
"eval_samples_per_second": 1.537, |
|
"eval_steps_per_second": 1.537, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 14.000404858299595, |
|
"grad_norm": 6.942117033759132e-05, |
|
"learning_rate": 1.664417453891138e-05, |
|
"loss": 0.0, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 14.001079622132254, |
|
"grad_norm": 0.0004584739508572966, |
|
"learning_rate": 1.660668765931924e-05, |
|
"loss": 0.0, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 14.001754385964912, |
|
"grad_norm": 0.0002316083264304325, |
|
"learning_rate": 1.6569200779727097e-05, |
|
"loss": 0.2714, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 14.002429149797571, |
|
"grad_norm": 0.00024051779473666102, |
|
"learning_rate": 1.6531713900134956e-05, |
|
"loss": 0.0, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 14.003103913630229, |
|
"grad_norm": 0.0008334843441843987, |
|
"learning_rate": 1.649422702054281e-05, |
|
"loss": 0.0, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 14.003778677462888, |
|
"grad_norm": 0.00020968765602447093, |
|
"learning_rate": 1.6456740140950667e-05, |
|
"loss": 0.0178, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 14.004453441295546, |
|
"grad_norm": 0.00022330092906486243, |
|
"learning_rate": 1.6419253261358526e-05, |
|
"loss": 0.0, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 14.005128205128205, |
|
"grad_norm": 0.00021671153081115335, |
|
"learning_rate": 1.6381766381766382e-05, |
|
"loss": 0.009, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 14.005802968960865, |
|
"grad_norm": 0.00033940834691748023, |
|
"learning_rate": 1.6344279502174238e-05, |
|
"loss": 0.0, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 14.006477732793522, |
|
"grad_norm": 0.00048104580491781235, |
|
"learning_rate": 1.6306792622582097e-05, |
|
"loss": 0.0, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 14.007152496626182, |
|
"grad_norm": 0.00029779202304780483, |
|
"learning_rate": 1.6269305742989953e-05, |
|
"loss": 0.0, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 14.00782726045884, |
|
"grad_norm": 0.0004120915837120265, |
|
"learning_rate": 1.623181886339781e-05, |
|
"loss": 0.0, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 14.008502024291499, |
|
"grad_norm": 0.0003056660061702132, |
|
"learning_rate": 1.6194331983805668e-05, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 14.009176788124156, |
|
"grad_norm": 0.000378406752133742, |
|
"learning_rate": 1.6156845104213524e-05, |
|
"loss": 0.0039, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 14.009851551956816, |
|
"grad_norm": 0.0005049049505032599, |
|
"learning_rate": 1.6119358224621383e-05, |
|
"loss": 0.0, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 14.010526315789473, |
|
"grad_norm": 0.00025037440354935825, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.0, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 14.011201079622133, |
|
"grad_norm": 0.00037562023499049246, |
|
"learning_rate": 1.6044384465437098e-05, |
|
"loss": 0.0, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 14.01187584345479, |
|
"grad_norm": 0.0003121852350886911, |
|
"learning_rate": 1.6006897585844954e-05, |
|
"loss": 0.0, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 14.01255060728745, |
|
"grad_norm": 0.0003679589426610619, |
|
"learning_rate": 1.5969410706252813e-05, |
|
"loss": 0.0, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 14.013225371120107, |
|
"grad_norm": 0.00028154728352092206, |
|
"learning_rate": 1.593192382666067e-05, |
|
"loss": 0.0, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 14.013900134952767, |
|
"grad_norm": 0.00020654525724239647, |
|
"learning_rate": 1.5894436947068528e-05, |
|
"loss": 0.0, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 14.014574898785424, |
|
"grad_norm": 0.00034096045419573784, |
|
"learning_rate": 1.5856950067476383e-05, |
|
"loss": 0.0, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 14.015249662618084, |
|
"grad_norm": 0.00026030451408587396, |
|
"learning_rate": 1.5819463187884243e-05, |
|
"loss": 0.0, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 14.015924426450741, |
|
"grad_norm": 8.031875040614977e-05, |
|
"learning_rate": 1.57819763082921e-05, |
|
"loss": 0.0, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 14.0165991902834, |
|
"grad_norm": 0.000621096114628017, |
|
"learning_rate": 1.5744489428699954e-05, |
|
"loss": 0.0, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 14.01727395411606, |
|
"grad_norm": 0.000524580420460552, |
|
"learning_rate": 1.5707002549107813e-05, |
|
"loss": 0.0, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 14.017948717948718, |
|
"grad_norm": 0.00011200064182048663, |
|
"learning_rate": 1.566951566951567e-05, |
|
"loss": 0.0, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 14.018623481781377, |
|
"grad_norm": 0.00032178129185922444, |
|
"learning_rate": 1.5632028789923528e-05, |
|
"loss": 0.0, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 14.019298245614035, |
|
"grad_norm": 0.00024140749883372337, |
|
"learning_rate": 1.5594541910331384e-05, |
|
"loss": 0.0, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 14.019973009446694, |
|
"grad_norm": 0.00022133818129077554, |
|
"learning_rate": 1.5557055030739243e-05, |
|
"loss": 0.0, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 14.020647773279352, |
|
"grad_norm": 0.0002797930792439729, |
|
"learning_rate": 1.55195681511471e-05, |
|
"loss": 0.0, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 14.021322537112011, |
|
"grad_norm": 0.0002334755117772147, |
|
"learning_rate": 1.5482081271554958e-05, |
|
"loss": 0.0, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 14.021997300944669, |
|
"grad_norm": 0.0002469551400281489, |
|
"learning_rate": 1.5444594391962814e-05, |
|
"loss": 0.0, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 14.022672064777328, |
|
"grad_norm": 8.5323081293609e-05, |
|
"learning_rate": 1.5407107512370673e-05, |
|
"loss": 0.0, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 14.023346828609986, |
|
"grad_norm": 0.00019482328207232058, |
|
"learning_rate": 1.536962063277853e-05, |
|
"loss": 0.0, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 14.024021592442645, |
|
"grad_norm": 0.00021449346968438476, |
|
"learning_rate": 1.5332133753186388e-05, |
|
"loss": 0.4463, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 14.024696356275303, |
|
"grad_norm": 0.00064310641027987, |
|
"learning_rate": 1.5294646873594244e-05, |
|
"loss": 0.0, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 14.025371120107962, |
|
"grad_norm": 0.00020890735322609544, |
|
"learning_rate": 1.52571599940021e-05, |
|
"loss": 0.0, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 14.02604588394062, |
|
"grad_norm": 0.0005201689782552421, |
|
"learning_rate": 1.5219673114409957e-05, |
|
"loss": 0.0, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 14.02672064777328, |
|
"grad_norm": 0.0005751597345806658, |
|
"learning_rate": 1.5182186234817813e-05, |
|
"loss": 0.0, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 14.027395411605937, |
|
"grad_norm": 0.0009388537146151066, |
|
"learning_rate": 1.5144699355225672e-05, |
|
"loss": 0.0, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 14.028070175438597, |
|
"grad_norm": 0.0005402613314799964, |
|
"learning_rate": 1.5107212475633528e-05, |
|
"loss": 0.0, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 14.028744939271254, |
|
"grad_norm": 0.00010339209256926551, |
|
"learning_rate": 1.5069725596041387e-05, |
|
"loss": 0.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 14.029419703103914, |
|
"grad_norm": 0.0005152708035893738, |
|
"learning_rate": 1.5032238716449243e-05, |
|
"loss": 0.0, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 14.030094466936573, |
|
"grad_norm": 0.0007186134462244809, |
|
"learning_rate": 1.4994751836857102e-05, |
|
"loss": 0.0, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 14.03076923076923, |
|
"grad_norm": 0.0005925975274294615, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.0, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 14.03144399460189, |
|
"grad_norm": 0.00019110101857222617, |
|
"learning_rate": 1.4919778077672817e-05, |
|
"loss": 0.0, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 14.032118758434548, |
|
"grad_norm": 0.00018360813555773348, |
|
"learning_rate": 1.4882291198080673e-05, |
|
"loss": 0.0, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 14.032793522267207, |
|
"grad_norm": 0.00020973542996216565, |
|
"learning_rate": 1.4844804318488532e-05, |
|
"loss": 0.0, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 14.033468286099865, |
|
"grad_norm": 0.0007199271931312978, |
|
"learning_rate": 1.4807317438896387e-05, |
|
"loss": 0.0, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 14.034143049932524, |
|
"grad_norm": 9.265208791475743e-05, |
|
"learning_rate": 1.4769830559304243e-05, |
|
"loss": 0.0, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 14.034817813765182, |
|
"grad_norm": 8.818476635497063e-05, |
|
"learning_rate": 1.4732343679712102e-05, |
|
"loss": 0.0, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 14.035492577597841, |
|
"grad_norm": 0.00018744076078291982, |
|
"learning_rate": 1.4694856800119958e-05, |
|
"loss": 0.0, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 14.036167341430499, |
|
"grad_norm": 0.0003558373427949846, |
|
"learning_rate": 1.4657369920527816e-05, |
|
"loss": 0.0, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 14.036842105263158, |
|
"grad_norm": 0.00015756840002723038, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.0, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 14.037516869095816, |
|
"grad_norm": 0.00011693660053424537, |
|
"learning_rate": 1.458239616134353e-05, |
|
"loss": 0.0, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 14.038191632928475, |
|
"grad_norm": 0.00013403450429905206, |
|
"learning_rate": 1.4544909281751386e-05, |
|
"loss": 0.0, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 14.038866396761133, |
|
"grad_norm": 0.00014881876995787024, |
|
"learning_rate": 1.4507422402159246e-05, |
|
"loss": 0.0, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 14.039541160593792, |
|
"grad_norm": 0.00014527350140269846, |
|
"learning_rate": 1.4469935522567101e-05, |
|
"loss": 0.0, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 14.04021592442645, |
|
"grad_norm": 0.00016278887051157653, |
|
"learning_rate": 1.443244864297496e-05, |
|
"loss": 0.0, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 14.04089068825911, |
|
"grad_norm": 8.402692037634552e-05, |
|
"learning_rate": 1.4394961763382816e-05, |
|
"loss": 0.0, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 14.041565452091769, |
|
"grad_norm": 0.00017224009206984192, |
|
"learning_rate": 1.4357474883790675e-05, |
|
"loss": 0.0, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 14.042240215924426, |
|
"grad_norm": 0.0005430065211839974, |
|
"learning_rate": 1.4319988004198531e-05, |
|
"loss": 0.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.042914979757086, |
|
"grad_norm": 0.0009919034782797098, |
|
"learning_rate": 1.4282501124606387e-05, |
|
"loss": 0.0, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 14.043589743589743, |
|
"grad_norm": 0.0003526155778672546, |
|
"learning_rate": 1.4245014245014246e-05, |
|
"loss": 0.0, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 14.044264507422403, |
|
"grad_norm": 9.54778806772083e-05, |
|
"learning_rate": 1.4207527365422102e-05, |
|
"loss": 0.0, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 14.04493927125506, |
|
"grad_norm": 0.0001671431091381237, |
|
"learning_rate": 1.4170040485829961e-05, |
|
"loss": 0.0, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 14.04561403508772, |
|
"grad_norm": 0.00022146198898553848, |
|
"learning_rate": 1.4132553606237817e-05, |
|
"loss": 0.3607, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 14.046288798920378, |
|
"grad_norm": 0.0001517270429758355, |
|
"learning_rate": 1.4095066726645676e-05, |
|
"loss": 0.0, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 14.046963562753037, |
|
"grad_norm": 0.0006123693310655653, |
|
"learning_rate": 1.4057579847053532e-05, |
|
"loss": 0.0, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 14.047638326585695, |
|
"grad_norm": 0.001610752660781145, |
|
"learning_rate": 1.4020092967461391e-05, |
|
"loss": 0.0, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 14.048313090418354, |
|
"grad_norm": 0.0001440331107005477, |
|
"learning_rate": 1.3982606087869247e-05, |
|
"loss": 0.0, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 14.048987854251012, |
|
"grad_norm": 0.0007454275619238615, |
|
"learning_rate": 1.3945119208277104e-05, |
|
"loss": 0.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 14.049662618083671, |
|
"grad_norm": 0.0003447613853495568, |
|
"learning_rate": 1.390763232868496e-05, |
|
"loss": 0.0, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"eval_accuracy": 0.9375, |
|
"eval_f1": 0.9373365167161658, |
|
"eval_loss": 0.5185861587524414, |
|
"eval_runtime": 73.7028, |
|
"eval_samples_per_second": 1.52, |
|
"eval_steps_per_second": 1.52, |
|
"step": 11115 |
|
}, |
|
{ |
|
"epoch": 15.001349527665317, |
|
"eval_accuracy": 0.9023255813953488, |
|
"eval_f1": 0.9016146713373171, |
|
"eval_loss": 0.7568970918655396, |
|
"eval_runtime": 137.2112, |
|
"eval_samples_per_second": 1.567, |
|
"eval_steps_per_second": 1.567, |
|
"step": 11116 |
|
}, |
|
{ |
|
"epoch": 15.001349527665317, |
|
"step": 11116, |
|
"total_flos": 2.8480212872085897e+19, |
|
"train_loss": 5.147429101647338e-09, |
|
"train_runtime": 143.5742, |
|
"train_samples_per_second": 5.161, |
|
"train_steps_per_second": 5.161 |
|
}, |
|
{ |
|
"epoch": 15.001349527665317, |
|
"eval_accuracy": 0.9375, |
|
"eval_f1": 0.9373365167161658, |
|
"eval_loss": 0.5185860991477966, |
|
"eval_runtime": 72.3734, |
|
"eval_samples_per_second": 1.548, |
|
"eval_steps_per_second": 1.548, |
|
"step": 11116 |
|
}, |
|
{ |
|
"epoch": 15.001349527665317, |
|
"eval_accuracy": 0.9023255813953488, |
|
"eval_f1": 0.9016146713373171, |
|
"eval_loss": 0.756963849067688, |
|
"eval_runtime": 137.6677, |
|
"eval_samples_per_second": 1.562, |
|
"eval_steps_per_second": 1.562, |
|
"step": 11116 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 741, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8480212872085897e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|