vivit-surf-analytics-runpod / trainer_state.json
deschamps-g's picture
Model save
a29a26b verified
raw
history blame
201 kB
{
"best_metric": 0.9373365167161658,
"best_model_checkpoint": "vivit-surf-analytics-runpod/checkpoint-11115",
"epoch": 15.001349527665317,
"eval_steps": 500,
"global_step": 11116,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006747638326585695,
"grad_norm": 32.80915451049805,
"learning_rate": 3.373819163292848e-07,
"loss": 1.5069,
"step": 10
},
{
"epoch": 0.001349527665317139,
"grad_norm": 22.916248321533203,
"learning_rate": 6.747638326585696e-07,
"loss": 1.354,
"step": 20
},
{
"epoch": 0.0020242914979757085,
"grad_norm": 34.89827346801758,
"learning_rate": 1.0121457489878542e-06,
"loss": 1.5187,
"step": 30
},
{
"epoch": 0.002699055330634278,
"grad_norm": 22.8042049407959,
"learning_rate": 1.3495276653171391e-06,
"loss": 1.478,
"step": 40
},
{
"epoch": 0.0033738191632928477,
"grad_norm": 27.662748336791992,
"learning_rate": 1.6869095816464238e-06,
"loss": 1.2862,
"step": 50
},
{
"epoch": 0.004048582995951417,
"grad_norm": 24.901159286499023,
"learning_rate": 2.0242914979757085e-06,
"loss": 1.2586,
"step": 60
},
{
"epoch": 0.004723346828609987,
"grad_norm": 25.327184677124023,
"learning_rate": 2.3616734143049934e-06,
"loss": 1.2728,
"step": 70
},
{
"epoch": 0.005398110661268556,
"grad_norm": 18.19566535949707,
"learning_rate": 2.6990553306342783e-06,
"loss": 1.0159,
"step": 80
},
{
"epoch": 0.006072874493927126,
"grad_norm": 20.370386123657227,
"learning_rate": 3.0364372469635627e-06,
"loss": 1.2504,
"step": 90
},
{
"epoch": 0.006747638326585695,
"grad_norm": 12.196557998657227,
"learning_rate": 3.3738191632928476e-06,
"loss": 2.0246,
"step": 100
},
{
"epoch": 0.007422402159244264,
"grad_norm": 12.822103500366211,
"learning_rate": 3.711201079622133e-06,
"loss": 0.8519,
"step": 110
},
{
"epoch": 0.008097165991902834,
"grad_norm": 6.872288227081299,
"learning_rate": 4.048582995951417e-06,
"loss": 0.7749,
"step": 120
},
{
"epoch": 0.008771929824561403,
"grad_norm": 40.45072937011719,
"learning_rate": 4.3859649122807014e-06,
"loss": 1.3494,
"step": 130
},
{
"epoch": 0.009446693657219974,
"grad_norm": 3.996568441390991,
"learning_rate": 4.723346828609987e-06,
"loss": 0.9678,
"step": 140
},
{
"epoch": 0.010121457489878543,
"grad_norm": 2.117781400680542,
"learning_rate": 5.060728744939271e-06,
"loss": 1.8095,
"step": 150
},
{
"epoch": 0.010796221322537112,
"grad_norm": 1.1970853805541992,
"learning_rate": 5.3981106612685565e-06,
"loss": 1.3044,
"step": 160
},
{
"epoch": 0.011470985155195682,
"grad_norm": 56.31877136230469,
"learning_rate": 5.735492577597841e-06,
"loss": 3.0015,
"step": 170
},
{
"epoch": 0.012145748987854251,
"grad_norm": 1.2758257389068604,
"learning_rate": 6.0728744939271254e-06,
"loss": 1.7654,
"step": 180
},
{
"epoch": 0.01282051282051282,
"grad_norm": 49.485626220703125,
"learning_rate": 6.41025641025641e-06,
"loss": 1.9578,
"step": 190
},
{
"epoch": 0.01349527665317139,
"grad_norm": 1.0414538383483887,
"learning_rate": 6.747638326585695e-06,
"loss": 2.0202,
"step": 200
},
{
"epoch": 0.01417004048582996,
"grad_norm": 46.221031188964844,
"learning_rate": 7.0850202429149805e-06,
"loss": 2.0222,
"step": 210
},
{
"epoch": 0.014844804318488529,
"grad_norm": 5.171656131744385,
"learning_rate": 7.422402159244266e-06,
"loss": 2.3988,
"step": 220
},
{
"epoch": 0.0155195681511471,
"grad_norm": 40.51677703857422,
"learning_rate": 7.75978407557355e-06,
"loss": 1.1011,
"step": 230
},
{
"epoch": 0.016194331983805668,
"grad_norm": 0.3821451961994171,
"learning_rate": 8.097165991902834e-06,
"loss": 1.7582,
"step": 240
},
{
"epoch": 0.016869095816464237,
"grad_norm": 56.244895935058594,
"learning_rate": 8.43454790823212e-06,
"loss": 1.628,
"step": 250
},
{
"epoch": 0.017543859649122806,
"grad_norm": 2.9704697132110596,
"learning_rate": 8.771929824561403e-06,
"loss": 2.4795,
"step": 260
},
{
"epoch": 0.018218623481781375,
"grad_norm": 2.420311689376831,
"learning_rate": 9.109311740890689e-06,
"loss": 1.2225,
"step": 270
},
{
"epoch": 0.018893387314439947,
"grad_norm": 3.02461314201355,
"learning_rate": 9.446693657219973e-06,
"loss": 1.813,
"step": 280
},
{
"epoch": 0.019568151147098516,
"grad_norm": 1.8302630186080933,
"learning_rate": 9.784075573549258e-06,
"loss": 1.6011,
"step": 290
},
{
"epoch": 0.020242914979757085,
"grad_norm": 62.22663497924805,
"learning_rate": 1.0121457489878542e-05,
"loss": 2.1712,
"step": 300
},
{
"epoch": 0.020917678812415654,
"grad_norm": 4.137598037719727,
"learning_rate": 1.0458839406207829e-05,
"loss": 1.691,
"step": 310
},
{
"epoch": 0.021592442645074223,
"grad_norm": 1.1848357915878296,
"learning_rate": 1.0796221322537113e-05,
"loss": 0.8611,
"step": 320
},
{
"epoch": 0.022267206477732792,
"grad_norm": 48.48101043701172,
"learning_rate": 1.1133603238866398e-05,
"loss": 2.4268,
"step": 330
},
{
"epoch": 0.022941970310391364,
"grad_norm": 1.995662808418274,
"learning_rate": 1.1470985155195682e-05,
"loss": 1.6822,
"step": 340
},
{
"epoch": 0.023616734143049933,
"grad_norm": 4.30789041519165,
"learning_rate": 1.1808367071524966e-05,
"loss": 1.5158,
"step": 350
},
{
"epoch": 0.024291497975708502,
"grad_norm": 2.7685494422912598,
"learning_rate": 1.2145748987854251e-05,
"loss": 0.9964,
"step": 360
},
{
"epoch": 0.02496626180836707,
"grad_norm": 47.719268798828125,
"learning_rate": 1.2483130904183535e-05,
"loss": 2.2256,
"step": 370
},
{
"epoch": 0.02564102564102564,
"grad_norm": 48.7852783203125,
"learning_rate": 1.282051282051282e-05,
"loss": 1.7263,
"step": 380
},
{
"epoch": 0.02631578947368421,
"grad_norm": 57.43927001953125,
"learning_rate": 1.3157894736842106e-05,
"loss": 1.9673,
"step": 390
},
{
"epoch": 0.02699055330634278,
"grad_norm": 44.42695617675781,
"learning_rate": 1.349527665317139e-05,
"loss": 1.6821,
"step": 400
},
{
"epoch": 0.02766531713900135,
"grad_norm": 26.7230167388916,
"learning_rate": 1.3832658569500675e-05,
"loss": 1.4562,
"step": 410
},
{
"epoch": 0.02834008097165992,
"grad_norm": 39.75962448120117,
"learning_rate": 1.4170040485829961e-05,
"loss": 0.7446,
"step": 420
},
{
"epoch": 0.029014844804318488,
"grad_norm": 45.954254150390625,
"learning_rate": 1.4507422402159246e-05,
"loss": 1.1802,
"step": 430
},
{
"epoch": 0.029689608636977057,
"grad_norm": 25.454557418823242,
"learning_rate": 1.4844804318488532e-05,
"loss": 1.1458,
"step": 440
},
{
"epoch": 0.030364372469635626,
"grad_norm": 39.98874282836914,
"learning_rate": 1.5182186234817813e-05,
"loss": 0.512,
"step": 450
},
{
"epoch": 0.0310391363022942,
"grad_norm": 22.448896408081055,
"learning_rate": 1.55195681511471e-05,
"loss": 1.5049,
"step": 460
},
{
"epoch": 0.03171390013495277,
"grad_norm": 26.93549346923828,
"learning_rate": 1.5856950067476383e-05,
"loss": 0.7132,
"step": 470
},
{
"epoch": 0.032388663967611336,
"grad_norm": 21.29535675048828,
"learning_rate": 1.6194331983805668e-05,
"loss": 1.3895,
"step": 480
},
{
"epoch": 0.033063427800269905,
"grad_norm": 0.4730716645717621,
"learning_rate": 1.6531713900134956e-05,
"loss": 0.1323,
"step": 490
},
{
"epoch": 0.033738191632928474,
"grad_norm": 6.3616862297058105,
"learning_rate": 1.686909581646424e-05,
"loss": 0.9272,
"step": 500
},
{
"epoch": 0.03441295546558704,
"grad_norm": 0.5018609762191772,
"learning_rate": 1.720647773279352e-05,
"loss": 1.3049,
"step": 510
},
{
"epoch": 0.03508771929824561,
"grad_norm": 62.65403747558594,
"learning_rate": 1.7543859649122806e-05,
"loss": 2.8324,
"step": 520
},
{
"epoch": 0.03576248313090418,
"grad_norm": 12.991227149963379,
"learning_rate": 1.7881241565452094e-05,
"loss": 0.5474,
"step": 530
},
{
"epoch": 0.03643724696356275,
"grad_norm": 0.5492640733718872,
"learning_rate": 1.8218623481781378e-05,
"loss": 1.2093,
"step": 540
},
{
"epoch": 0.037112010796221326,
"grad_norm": 0.07551870495080948,
"learning_rate": 1.8556005398110663e-05,
"loss": 1.2592,
"step": 550
},
{
"epoch": 0.037786774628879895,
"grad_norm": 1.5378609895706177,
"learning_rate": 1.8893387314439947e-05,
"loss": 0.3359,
"step": 560
},
{
"epoch": 0.038461538461538464,
"grad_norm": 0.23666121065616608,
"learning_rate": 1.923076923076923e-05,
"loss": 1.3151,
"step": 570
},
{
"epoch": 0.03913630229419703,
"grad_norm": 7.869609832763672,
"learning_rate": 1.9568151147098516e-05,
"loss": 0.6125,
"step": 580
},
{
"epoch": 0.0398110661268556,
"grad_norm": 13.923602104187012,
"learning_rate": 1.99055330634278e-05,
"loss": 1.1772,
"step": 590
},
{
"epoch": 0.04048582995951417,
"grad_norm": 29.88282585144043,
"learning_rate": 2.0242914979757085e-05,
"loss": 0.1969,
"step": 600
},
{
"epoch": 0.04116059379217274,
"grad_norm": 24.888872146606445,
"learning_rate": 2.058029689608637e-05,
"loss": 0.7039,
"step": 610
},
{
"epoch": 0.04183535762483131,
"grad_norm": 0.40080440044403076,
"learning_rate": 2.0917678812415657e-05,
"loss": 1.008,
"step": 620
},
{
"epoch": 0.04251012145748988,
"grad_norm": 4.866868495941162,
"learning_rate": 2.125506072874494e-05,
"loss": 1.0502,
"step": 630
},
{
"epoch": 0.043184885290148446,
"grad_norm": 1.2502915859222412,
"learning_rate": 2.1592442645074226e-05,
"loss": 0.7422,
"step": 640
},
{
"epoch": 0.043859649122807015,
"grad_norm": 0.6650214791297913,
"learning_rate": 2.1929824561403507e-05,
"loss": 0.6072,
"step": 650
},
{
"epoch": 0.044534412955465584,
"grad_norm": 16.356952667236328,
"learning_rate": 2.2267206477732795e-05,
"loss": 0.8198,
"step": 660
},
{
"epoch": 0.04520917678812416,
"grad_norm": 169.0858154296875,
"learning_rate": 2.260458839406208e-05,
"loss": 1.2371,
"step": 670
},
{
"epoch": 0.04588394062078273,
"grad_norm": 98.78671264648438,
"learning_rate": 2.2941970310391364e-05,
"loss": 1.1303,
"step": 680
},
{
"epoch": 0.0465587044534413,
"grad_norm": 99.31029510498047,
"learning_rate": 2.327935222672065e-05,
"loss": 0.7183,
"step": 690
},
{
"epoch": 0.04723346828609987,
"grad_norm": 30.58230209350586,
"learning_rate": 2.3616734143049933e-05,
"loss": 1.1701,
"step": 700
},
{
"epoch": 0.047908232118758436,
"grad_norm": 0.05228818207979202,
"learning_rate": 2.395411605937922e-05,
"loss": 0.1544,
"step": 710
},
{
"epoch": 0.048582995951417005,
"grad_norm": 1.974684715270996,
"learning_rate": 2.4291497975708502e-05,
"loss": 1.4774,
"step": 720
},
{
"epoch": 0.049257759784075573,
"grad_norm": 0.12068396061658859,
"learning_rate": 2.4628879892037786e-05,
"loss": 0.6994,
"step": 730
},
{
"epoch": 0.04993252361673414,
"grad_norm": 76.24126434326172,
"learning_rate": 2.496626180836707e-05,
"loss": 1.3533,
"step": 740
},
{
"epoch": 0.05,
"eval_accuracy": 0.8392857142857143,
"eval_f1": 0.8382276099228692,
"eval_loss": 0.7030884623527527,
"eval_runtime": 74.2993,
"eval_samples_per_second": 1.507,
"eval_steps_per_second": 1.507,
"step": 741
},
{
"epoch": 1.0006072874493928,
"grad_norm": 0.14856617152690887,
"learning_rate": 2.530364372469636e-05,
"loss": 0.4158,
"step": 750
},
{
"epoch": 1.0012820512820513,
"grad_norm": 0.07458806782960892,
"learning_rate": 2.564102564102564e-05,
"loss": 0.0042,
"step": 760
},
{
"epoch": 1.0019568151147098,
"grad_norm": 0.08816417306661606,
"learning_rate": 2.5978407557354928e-05,
"loss": 0.0175,
"step": 770
},
{
"epoch": 1.0026315789473683,
"grad_norm": 0.07340700924396515,
"learning_rate": 2.6315789473684212e-05,
"loss": 0.0039,
"step": 780
},
{
"epoch": 1.003306342780027,
"grad_norm": 0.08517087250947952,
"learning_rate": 2.66531713900135e-05,
"loss": 0.0075,
"step": 790
},
{
"epoch": 1.0039811066126856,
"grad_norm": 0.07905049622058868,
"learning_rate": 2.699055330634278e-05,
"loss": 0.0021,
"step": 800
},
{
"epoch": 1.004655870445344,
"grad_norm": 0.13749797642230988,
"learning_rate": 2.732793522267207e-05,
"loss": 0.7603,
"step": 810
},
{
"epoch": 1.0053306342780026,
"grad_norm": 0.04107066988945007,
"learning_rate": 2.766531713900135e-05,
"loss": 0.0033,
"step": 820
},
{
"epoch": 1.0060053981106614,
"grad_norm": 0.05302370712161064,
"learning_rate": 2.8002699055330634e-05,
"loss": 0.0487,
"step": 830
},
{
"epoch": 1.0066801619433199,
"grad_norm": 0.050035424530506134,
"learning_rate": 2.8340080971659922e-05,
"loss": 0.0166,
"step": 840
},
{
"epoch": 1.0073549257759784,
"grad_norm": 0.17594772577285767,
"learning_rate": 2.8677462887989203e-05,
"loss": 0.9013,
"step": 850
},
{
"epoch": 1.008029689608637,
"grad_norm": 5.9323811531066895,
"learning_rate": 2.901484480431849e-05,
"loss": 0.8083,
"step": 860
},
{
"epoch": 1.0087044534412954,
"grad_norm": 0.2871362566947937,
"learning_rate": 2.9352226720647776e-05,
"loss": 0.6024,
"step": 870
},
{
"epoch": 1.0093792172739542,
"grad_norm": 0.3136674463748932,
"learning_rate": 2.9689608636977063e-05,
"loss": 0.5028,
"step": 880
},
{
"epoch": 1.0100539811066127,
"grad_norm": 0.05438687652349472,
"learning_rate": 3.0026990553306344e-05,
"loss": 0.1368,
"step": 890
},
{
"epoch": 1.0107287449392712,
"grad_norm": 0.05301366746425629,
"learning_rate": 3.0364372469635626e-05,
"loss": 0.1373,
"step": 900
},
{
"epoch": 1.0114035087719297,
"grad_norm": 0.015999358147382736,
"learning_rate": 3.0701754385964913e-05,
"loss": 0.0301,
"step": 910
},
{
"epoch": 1.0120782726045885,
"grad_norm": 0.014771537855267525,
"learning_rate": 3.10391363022942e-05,
"loss": 0.0195,
"step": 920
},
{
"epoch": 1.012753036437247,
"grad_norm": 31.934608459472656,
"learning_rate": 3.137651821862348e-05,
"loss": 1.6569,
"step": 930
},
{
"epoch": 1.0134278002699055,
"grad_norm": 0.031412914395332336,
"learning_rate": 3.171390013495277e-05,
"loss": 0.0009,
"step": 940
},
{
"epoch": 1.014102564102564,
"grad_norm": 0.028489330783486366,
"learning_rate": 3.205128205128206e-05,
"loss": 0.1698,
"step": 950
},
{
"epoch": 1.0147773279352226,
"grad_norm": 105.16389465332031,
"learning_rate": 3.2388663967611336e-05,
"loss": 0.1704,
"step": 960
},
{
"epoch": 1.0154520917678813,
"grad_norm": 0.024373585358262062,
"learning_rate": 3.272604588394062e-05,
"loss": 0.6264,
"step": 970
},
{
"epoch": 1.0161268556005398,
"grad_norm": 0.024133900180459023,
"learning_rate": 3.306342780026991e-05,
"loss": 0.8147,
"step": 980
},
{
"epoch": 1.0168016194331984,
"grad_norm": 92.91847229003906,
"learning_rate": 3.340080971659919e-05,
"loss": 1.1899,
"step": 990
},
{
"epoch": 1.0174763832658569,
"grad_norm": 0.3631739616394043,
"learning_rate": 3.373819163292848e-05,
"loss": 1.2713,
"step": 1000
},
{
"epoch": 1.0181511470985156,
"grad_norm": 7.820636749267578,
"learning_rate": 3.407557354925776e-05,
"loss": 0.5495,
"step": 1010
},
{
"epoch": 1.0188259109311741,
"grad_norm": 55.31717300415039,
"learning_rate": 3.441295546558704e-05,
"loss": 0.4036,
"step": 1020
},
{
"epoch": 1.0195006747638327,
"grad_norm": 0.013262225314974785,
"learning_rate": 3.4750337381916334e-05,
"loss": 0.0089,
"step": 1030
},
{
"epoch": 1.0201754385964912,
"grad_norm": 0.8780525922775269,
"learning_rate": 3.508771929824561e-05,
"loss": 0.3444,
"step": 1040
},
{
"epoch": 1.0208502024291497,
"grad_norm": 2.9044814109802246,
"learning_rate": 3.54251012145749e-05,
"loss": 0.0081,
"step": 1050
},
{
"epoch": 1.0215249662618084,
"grad_norm": 0.020421041175723076,
"learning_rate": 3.576248313090419e-05,
"loss": 1.0599,
"step": 1060
},
{
"epoch": 1.022199730094467,
"grad_norm": 0.012594003230333328,
"learning_rate": 3.609986504723347e-05,
"loss": 0.0616,
"step": 1070
},
{
"epoch": 1.0228744939271255,
"grad_norm": 0.018383637070655823,
"learning_rate": 3.6437246963562756e-05,
"loss": 0.9516,
"step": 1080
},
{
"epoch": 1.023549257759784,
"grad_norm": 0.04205102473497391,
"learning_rate": 3.6774628879892034e-05,
"loss": 0.4867,
"step": 1090
},
{
"epoch": 1.0242240215924427,
"grad_norm": 0.022214779630303383,
"learning_rate": 3.7112010796221325e-05,
"loss": 0.0101,
"step": 1100
},
{
"epoch": 1.0248987854251013,
"grad_norm": 0.026110410690307617,
"learning_rate": 3.744939271255061e-05,
"loss": 0.3327,
"step": 1110
},
{
"epoch": 1.0255735492577598,
"grad_norm": 0.16947214305400848,
"learning_rate": 3.7786774628879894e-05,
"loss": 0.6535,
"step": 1120
},
{
"epoch": 1.0262483130904183,
"grad_norm": 0.019961325451731682,
"learning_rate": 3.812415654520918e-05,
"loss": 0.0014,
"step": 1130
},
{
"epoch": 1.0269230769230768,
"grad_norm": 213.16741943359375,
"learning_rate": 3.846153846153846e-05,
"loss": 0.348,
"step": 1140
},
{
"epoch": 1.0275978407557356,
"grad_norm": 0.26998648047447205,
"learning_rate": 3.879892037786775e-05,
"loss": 1.8138,
"step": 1150
},
{
"epoch": 1.028272604588394,
"grad_norm": 16.201974868774414,
"learning_rate": 3.913630229419703e-05,
"loss": 1.1767,
"step": 1160
},
{
"epoch": 1.0289473684210526,
"grad_norm": 0.46378159523010254,
"learning_rate": 3.9473684210526316e-05,
"loss": 0.3981,
"step": 1170
},
{
"epoch": 1.0296221322537111,
"grad_norm": 0.16117815673351288,
"learning_rate": 3.98110661268556e-05,
"loss": 0.6503,
"step": 1180
},
{
"epoch": 1.0302968960863699,
"grad_norm": 0.09139110147953033,
"learning_rate": 4.014844804318489e-05,
"loss": 0.0009,
"step": 1190
},
{
"epoch": 1.0309716599190284,
"grad_norm": 59.96378707885742,
"learning_rate": 4.048582995951417e-05,
"loss": 2.7966,
"step": 1200
},
{
"epoch": 1.031646423751687,
"grad_norm": 0.1793028563261032,
"learning_rate": 4.082321187584346e-05,
"loss": 0.7813,
"step": 1210
},
{
"epoch": 1.0323211875843454,
"grad_norm": 0.04233807325363159,
"learning_rate": 4.116059379217274e-05,
"loss": 0.0117,
"step": 1220
},
{
"epoch": 1.032995951417004,
"grad_norm": 0.10781926661729813,
"learning_rate": 4.149797570850202e-05,
"loss": 0.0029,
"step": 1230
},
{
"epoch": 1.0336707152496627,
"grad_norm": 0.04887605831027031,
"learning_rate": 4.1835357624831314e-05,
"loss": 0.0023,
"step": 1240
},
{
"epoch": 1.0343454790823212,
"grad_norm": 0.0070233517326414585,
"learning_rate": 4.217273954116059e-05,
"loss": 0.5274,
"step": 1250
},
{
"epoch": 1.0350202429149797,
"grad_norm": 0.009842370636761189,
"learning_rate": 4.251012145748988e-05,
"loss": 1.1471,
"step": 1260
},
{
"epoch": 1.0356950067476383,
"grad_norm": 117.02069091796875,
"learning_rate": 4.284750337381917e-05,
"loss": 0.0518,
"step": 1270
},
{
"epoch": 1.036369770580297,
"grad_norm": 0.011584924533963203,
"learning_rate": 4.318488529014845e-05,
"loss": 0.0088,
"step": 1280
},
{
"epoch": 1.0370445344129555,
"grad_norm": 0.04845478758215904,
"learning_rate": 4.3522267206477737e-05,
"loss": 0.937,
"step": 1290
},
{
"epoch": 1.037719298245614,
"grad_norm": 12.870345115661621,
"learning_rate": 4.3859649122807014e-05,
"loss": 0.1796,
"step": 1300
},
{
"epoch": 1.0383940620782726,
"grad_norm": 0.18226304650306702,
"learning_rate": 4.4197031039136306e-05,
"loss": 0.7725,
"step": 1310
},
{
"epoch": 1.039068825910931,
"grad_norm": 0.038409680128097534,
"learning_rate": 4.453441295546559e-05,
"loss": 0.5331,
"step": 1320
},
{
"epoch": 1.0397435897435898,
"grad_norm": 1.686890721321106,
"learning_rate": 4.4871794871794874e-05,
"loss": 0.6265,
"step": 1330
},
{
"epoch": 1.0404183535762483,
"grad_norm": 0.009872148744761944,
"learning_rate": 4.520917678812416e-05,
"loss": 1.2371,
"step": 1340
},
{
"epoch": 1.0410931174089069,
"grad_norm": 0.016034213826060295,
"learning_rate": 4.5546558704453443e-05,
"loss": 0.8008,
"step": 1350
},
{
"epoch": 1.0417678812415654,
"grad_norm": 161.0729217529297,
"learning_rate": 4.588394062078273e-05,
"loss": 1.6563,
"step": 1360
},
{
"epoch": 1.0424426450742241,
"grad_norm": 0.039535123854875565,
"learning_rate": 4.622132253711201e-05,
"loss": 0.698,
"step": 1370
},
{
"epoch": 1.0431174089068826,
"grad_norm": 0.02719847857952118,
"learning_rate": 4.65587044534413e-05,
"loss": 0.1234,
"step": 1380
},
{
"epoch": 1.0437921727395412,
"grad_norm": 79.83929443359375,
"learning_rate": 4.689608636977058e-05,
"loss": 1.275,
"step": 1390
},
{
"epoch": 1.0444669365721997,
"grad_norm": 0.2730661928653717,
"learning_rate": 4.7233468286099866e-05,
"loss": 0.4828,
"step": 1400
},
{
"epoch": 1.0451417004048582,
"grad_norm": 0.025355026125907898,
"learning_rate": 4.757085020242915e-05,
"loss": 0.1393,
"step": 1410
},
{
"epoch": 1.045816464237517,
"grad_norm": 8.70992374420166,
"learning_rate": 4.790823211875844e-05,
"loss": 0.0709,
"step": 1420
},
{
"epoch": 1.0464912280701755,
"grad_norm": 37.11697006225586,
"learning_rate": 4.824561403508772e-05,
"loss": 2.5881,
"step": 1430
},
{
"epoch": 1.047165991902834,
"grad_norm": 52.71913528442383,
"learning_rate": 4.8582995951417004e-05,
"loss": 2.0305,
"step": 1440
},
{
"epoch": 1.0478407557354925,
"grad_norm": 1.0200884342193604,
"learning_rate": 4.8920377867746295e-05,
"loss": 0.0119,
"step": 1450
},
{
"epoch": 1.0485155195681513,
"grad_norm": 0.16433711349964142,
"learning_rate": 4.925775978407557e-05,
"loss": 1.4951,
"step": 1460
},
{
"epoch": 1.0491902834008098,
"grad_norm": 0.04836498573422432,
"learning_rate": 4.9595141700404864e-05,
"loss": 0.5514,
"step": 1470
},
{
"epoch": 1.0498650472334683,
"grad_norm": 0.021334873512387276,
"learning_rate": 4.993252361673414e-05,
"loss": 0.0028,
"step": 1480
},
{
"epoch": 1.05,
"eval_accuracy": 0.8482142857142857,
"eval_f1": 0.8460469703429654,
"eval_loss": 0.7499637603759766,
"eval_runtime": 75.3886,
"eval_samples_per_second": 1.486,
"eval_steps_per_second": 1.486,
"step": 1482
},
{
"epoch": 2.000539811066127,
"grad_norm": 0.006392825860530138,
"learning_rate": 4.9970010496326286e-05,
"loss": 0.0052,
"step": 1490
},
{
"epoch": 2.0012145748987855,
"grad_norm": 0.1666487753391266,
"learning_rate": 4.993252361673414e-05,
"loss": 0.0088,
"step": 1500
},
{
"epoch": 2.001889338731444,
"grad_norm": 0.27092501521110535,
"learning_rate": 4.9895036737142004e-05,
"loss": 0.0005,
"step": 1510
},
{
"epoch": 2.0025641025641026,
"grad_norm": 0.01244429126381874,
"learning_rate": 4.985754985754986e-05,
"loss": 1.2941,
"step": 1520
},
{
"epoch": 2.003238866396761,
"grad_norm": 0.07986637949943542,
"learning_rate": 4.9820062977957716e-05,
"loss": 1.3347,
"step": 1530
},
{
"epoch": 2.0039136302294196,
"grad_norm": 0.010807895101606846,
"learning_rate": 4.978257609836557e-05,
"loss": 0.4777,
"step": 1540
},
{
"epoch": 2.004588394062078,
"grad_norm": 0.010884225368499756,
"learning_rate": 4.9745089218773434e-05,
"loss": 1.7184,
"step": 1550
},
{
"epoch": 2.0052631578947366,
"grad_norm": 0.17375628650188446,
"learning_rate": 4.970760233918128e-05,
"loss": 0.0067,
"step": 1560
},
{
"epoch": 2.0059379217273956,
"grad_norm": 0.006022674031555653,
"learning_rate": 4.9670115459589145e-05,
"loss": 0.0014,
"step": 1570
},
{
"epoch": 2.006612685560054,
"grad_norm": 0.07748937606811523,
"learning_rate": 4.9632628579997e-05,
"loss": 0.2612,
"step": 1580
},
{
"epoch": 2.0072874493927126,
"grad_norm": 0.2620987296104431,
"learning_rate": 4.9595141700404864e-05,
"loss": 0.6517,
"step": 1590
},
{
"epoch": 2.007962213225371,
"grad_norm": 148.11007690429688,
"learning_rate": 4.955765482081271e-05,
"loss": 0.5783,
"step": 1600
},
{
"epoch": 2.0086369770580297,
"grad_norm": 0.0034163114614784718,
"learning_rate": 4.9520167941220575e-05,
"loss": 0.0304,
"step": 1610
},
{
"epoch": 2.009311740890688,
"grad_norm": 0.02201319858431816,
"learning_rate": 4.948268106162843e-05,
"loss": 0.3777,
"step": 1620
},
{
"epoch": 2.0099865047233467,
"grad_norm": 0.01761261560022831,
"learning_rate": 4.9445194182036294e-05,
"loss": 0.6914,
"step": 1630
},
{
"epoch": 2.0106612685560052,
"grad_norm": 0.02757342904806137,
"learning_rate": 4.940770730244414e-05,
"loss": 0.001,
"step": 1640
},
{
"epoch": 2.0113360323886638,
"grad_norm": 0.016815010458230972,
"learning_rate": 4.9370220422852005e-05,
"loss": 0.0006,
"step": 1650
},
{
"epoch": 2.0120107962213227,
"grad_norm": 0.7724957466125488,
"learning_rate": 4.933273354325986e-05,
"loss": 0.4395,
"step": 1660
},
{
"epoch": 2.0126855600539812,
"grad_norm": 0.003277893178164959,
"learning_rate": 4.9295246663667724e-05,
"loss": 0.0023,
"step": 1670
},
{
"epoch": 2.0133603238866398,
"grad_norm": 0.010450620204210281,
"learning_rate": 4.925775978407557e-05,
"loss": 0.0003,
"step": 1680
},
{
"epoch": 2.0140350877192983,
"grad_norm": 0.008632444776594639,
"learning_rate": 4.9220272904483435e-05,
"loss": 0.0588,
"step": 1690
},
{
"epoch": 2.014709851551957,
"grad_norm": 0.2135269045829773,
"learning_rate": 4.918278602489129e-05,
"loss": 0.8012,
"step": 1700
},
{
"epoch": 2.0153846153846153,
"grad_norm": 0.006235187407582998,
"learning_rate": 4.9145299145299147e-05,
"loss": 0.0007,
"step": 1710
},
{
"epoch": 2.016059379217274,
"grad_norm": 0.013167057186365128,
"learning_rate": 4.9107812265707e-05,
"loss": 0.0004,
"step": 1720
},
{
"epoch": 2.0167341430499324,
"grad_norm": 0.008585361763834953,
"learning_rate": 4.9070325386114865e-05,
"loss": 0.0006,
"step": 1730
},
{
"epoch": 2.017408906882591,
"grad_norm": 55.19523620605469,
"learning_rate": 4.903283850652272e-05,
"loss": 0.8423,
"step": 1740
},
{
"epoch": 2.01808367071525,
"grad_norm": 0.005840742029249668,
"learning_rate": 4.8995351626930576e-05,
"loss": 0.432,
"step": 1750
},
{
"epoch": 2.0187584345479084,
"grad_norm": 0.007270222995430231,
"learning_rate": 4.895786474733843e-05,
"loss": 0.5094,
"step": 1760
},
{
"epoch": 2.019433198380567,
"grad_norm": 0.013795904815196991,
"learning_rate": 4.8920377867746295e-05,
"loss": 0.6975,
"step": 1770
},
{
"epoch": 2.0201079622132254,
"grad_norm": 0.44005972146987915,
"learning_rate": 4.888289098815415e-05,
"loss": 0.0006,
"step": 1780
},
{
"epoch": 2.020782726045884,
"grad_norm": 0.020803041756153107,
"learning_rate": 4.8845404108562006e-05,
"loss": 0.0003,
"step": 1790
},
{
"epoch": 2.0214574898785425,
"grad_norm": 0.004395525902509689,
"learning_rate": 4.880791722896986e-05,
"loss": 0.0007,
"step": 1800
},
{
"epoch": 2.022132253711201,
"grad_norm": 0.07428783923387527,
"learning_rate": 4.8770430349377725e-05,
"loss": 0.0006,
"step": 1810
},
{
"epoch": 2.0228070175438595,
"grad_norm": 0.007445579394698143,
"learning_rate": 4.8732943469785574e-05,
"loss": 0.0002,
"step": 1820
},
{
"epoch": 2.023481781376518,
"grad_norm": 0.02664661407470703,
"learning_rate": 4.8695456590193436e-05,
"loss": 0.0002,
"step": 1830
},
{
"epoch": 2.024156545209177,
"grad_norm": 0.08112671971321106,
"learning_rate": 4.865796971060129e-05,
"loss": 0.0003,
"step": 1840
},
{
"epoch": 2.0248313090418355,
"grad_norm": 0.002486151410266757,
"learning_rate": 4.862048283100915e-05,
"loss": 0.0018,
"step": 1850
},
{
"epoch": 2.025506072874494,
"grad_norm": 0.00320970406755805,
"learning_rate": 4.8582995951417004e-05,
"loss": 0.2271,
"step": 1860
},
{
"epoch": 2.0261808367071525,
"grad_norm": 0.1994234174489975,
"learning_rate": 4.8545509071824866e-05,
"loss": 0.5385,
"step": 1870
},
{
"epoch": 2.026855600539811,
"grad_norm": 0.0024550287052989006,
"learning_rate": 4.850802219223272e-05,
"loss": 0.0759,
"step": 1880
},
{
"epoch": 2.0275303643724696,
"grad_norm": 0.004535624757409096,
"learning_rate": 4.847053531264058e-05,
"loss": 0.0006,
"step": 1890
},
{
"epoch": 2.028205128205128,
"grad_norm": 0.07630165666341782,
"learning_rate": 4.8433048433048433e-05,
"loss": 0.0002,
"step": 1900
},
{
"epoch": 2.0288798920377866,
"grad_norm": 0.005508648231625557,
"learning_rate": 4.839556155345629e-05,
"loss": 0.0048,
"step": 1910
},
{
"epoch": 2.029554655870445,
"grad_norm": 0.00268650334328413,
"learning_rate": 4.835807467386415e-05,
"loss": 0.021,
"step": 1920
},
{
"epoch": 2.030229419703104,
"grad_norm": 0.6032857894897461,
"learning_rate": 4.832058779427201e-05,
"loss": 0.9845,
"step": 1930
},
{
"epoch": 2.0309041835357626,
"grad_norm": 0.0025021624751389027,
"learning_rate": 4.828310091467986e-05,
"loss": 0.0005,
"step": 1940
},
{
"epoch": 2.031578947368421,
"grad_norm": 0.0031213329639285803,
"learning_rate": 4.824561403508772e-05,
"loss": 0.1197,
"step": 1950
},
{
"epoch": 2.0322537112010797,
"grad_norm": 0.011701357550919056,
"learning_rate": 4.820812715549558e-05,
"loss": 0.0004,
"step": 1960
},
{
"epoch": 2.032928475033738,
"grad_norm": 0.002749168314039707,
"learning_rate": 4.817064027590343e-05,
"loss": 0.0001,
"step": 1970
},
{
"epoch": 2.0336032388663967,
"grad_norm": 0.003767299233004451,
"learning_rate": 4.813315339631129e-05,
"loss": 0.0002,
"step": 1980
},
{
"epoch": 2.0342780026990552,
"grad_norm": 0.005788211710751057,
"learning_rate": 4.809566651671915e-05,
"loss": 0.0012,
"step": 1990
},
{
"epoch": 2.0349527665317138,
"grad_norm": 329.865966796875,
"learning_rate": 4.805817963712701e-05,
"loss": 1.4817,
"step": 2000
},
{
"epoch": 2.0356275303643723,
"grad_norm": 0.011975220404565334,
"learning_rate": 4.802069275753486e-05,
"loss": 1.3289,
"step": 2010
},
{
"epoch": 2.0363022941970312,
"grad_norm": 0.0021649515256285667,
"learning_rate": 4.798320587794272e-05,
"loss": 0.0055,
"step": 2020
},
{
"epoch": 2.0369770580296898,
"grad_norm": 0.0019632915500551462,
"learning_rate": 4.794571899835058e-05,
"loss": 0.0002,
"step": 2030
},
{
"epoch": 2.0376518218623483,
"grad_norm": 0.005742478650063276,
"learning_rate": 4.790823211875844e-05,
"loss": 0.0011,
"step": 2040
},
{
"epoch": 2.038326585695007,
"grad_norm": 0.009554996155202389,
"learning_rate": 4.787074523916629e-05,
"loss": 0.8594,
"step": 2050
},
{
"epoch": 2.0390013495276653,
"grad_norm": 0.0015004322631284595,
"learning_rate": 4.783325835957415e-05,
"loss": 0.0138,
"step": 2060
},
{
"epoch": 2.039676113360324,
"grad_norm": 0.005102177150547504,
"learning_rate": 4.779577147998201e-05,
"loss": 0.7251,
"step": 2070
},
{
"epoch": 2.0403508771929824,
"grad_norm": 0.0036967694759368896,
"learning_rate": 4.7758284600389865e-05,
"loss": 0.004,
"step": 2080
},
{
"epoch": 2.041025641025641,
"grad_norm": 0.0025739429984241724,
"learning_rate": 4.772079772079772e-05,
"loss": 0.9565,
"step": 2090
},
{
"epoch": 2.0417004048582994,
"grad_norm": 0.006292873062193394,
"learning_rate": 4.768331084120558e-05,
"loss": 0.0182,
"step": 2100
},
{
"epoch": 2.0423751686909584,
"grad_norm": 0.007768746931105852,
"learning_rate": 4.764582396161344e-05,
"loss": 0.4385,
"step": 2110
},
{
"epoch": 2.043049932523617,
"grad_norm": 0.005842685699462891,
"learning_rate": 4.7608337082021294e-05,
"loss": 0.6865,
"step": 2120
},
{
"epoch": 2.0437246963562754,
"grad_norm": 0.003818152705207467,
"learning_rate": 4.757085020242915e-05,
"loss": 0.1049,
"step": 2130
},
{
"epoch": 2.044399460188934,
"grad_norm": 0.0034294927027076483,
"learning_rate": 4.753336332283701e-05,
"loss": 0.0004,
"step": 2140
},
{
"epoch": 2.0450742240215924,
"grad_norm": 0.005487513262778521,
"learning_rate": 4.749587644324487e-05,
"loss": 0.2808,
"step": 2150
},
{
"epoch": 2.045748987854251,
"grad_norm": 0.004234930034726858,
"learning_rate": 4.7458389563652724e-05,
"loss": 0.0096,
"step": 2160
},
{
"epoch": 2.0464237516869095,
"grad_norm": 0.004304991569370031,
"learning_rate": 4.742090268406058e-05,
"loss": 1.0463,
"step": 2170
},
{
"epoch": 2.047098515519568,
"grad_norm": 0.06642390042543411,
"learning_rate": 4.738341580446844e-05,
"loss": 0.0003,
"step": 2180
},
{
"epoch": 2.0477732793522265,
"grad_norm": 21.008607864379883,
"learning_rate": 4.73459289248763e-05,
"loss": 0.0073,
"step": 2190
},
{
"epoch": 2.0484480431848855,
"grad_norm": 0.003075533313676715,
"learning_rate": 4.7308442045284154e-05,
"loss": 0.6295,
"step": 2200
},
{
"epoch": 2.049122807017544,
"grad_norm": 0.002309370320290327,
"learning_rate": 4.727095516569201e-05,
"loss": 0.1005,
"step": 2210
},
{
"epoch": 2.0497975708502025,
"grad_norm": 0.0027971486561000347,
"learning_rate": 4.7233468286099866e-05,
"loss": 0.0021,
"step": 2220
},
{
"epoch": 2.05,
"eval_accuracy": 0.8839285714285714,
"eval_f1": 0.882116388637625,
"eval_loss": 0.5603616833686829,
"eval_runtime": 74.4272,
"eval_samples_per_second": 1.505,
"eval_steps_per_second": 1.505,
"step": 2223
},
{
"epoch": 3.000472334682861,
"grad_norm": 0.01166750118136406,
"learning_rate": 4.719598140650772e-05,
"loss": 0.0163,
"step": 2230
},
{
"epoch": 3.0011470985155198,
"grad_norm": 96.65850067138672,
"learning_rate": 4.7158494526915584e-05,
"loss": 1.6819,
"step": 2240
},
{
"epoch": 3.0018218623481783,
"grad_norm": 0.05787191540002823,
"learning_rate": 4.712100764732344e-05,
"loss": 0.6805,
"step": 2250
},
{
"epoch": 3.002496626180837,
"grad_norm": 0.0013539530336856842,
"learning_rate": 4.7083520767731296e-05,
"loss": 0.0011,
"step": 2260
},
{
"epoch": 3.0031713900134953,
"grad_norm": 0.010776277631521225,
"learning_rate": 4.704603388813915e-05,
"loss": 0.0148,
"step": 2270
},
{
"epoch": 3.003846153846154,
"grad_norm": 0.024213161319494247,
"learning_rate": 4.700854700854701e-05,
"loss": 0.0008,
"step": 2280
},
{
"epoch": 3.0045209176788124,
"grad_norm": 0.0691986232995987,
"learning_rate": 4.697106012895487e-05,
"loss": 0.006,
"step": 2290
},
{
"epoch": 3.005195681511471,
"grad_norm": 0.009089670144021511,
"learning_rate": 4.6933573249362725e-05,
"loss": 0.6432,
"step": 2300
},
{
"epoch": 3.0058704453441294,
"grad_norm": 0.005548300687223673,
"learning_rate": 4.689608636977058e-05,
"loss": 0.0018,
"step": 2310
},
{
"epoch": 3.006545209176788,
"grad_norm": 0.006319984793663025,
"learning_rate": 4.685859949017844e-05,
"loss": 0.0001,
"step": 2320
},
{
"epoch": 3.007219973009447,
"grad_norm": 0.007898062467575073,
"learning_rate": 4.68211126105863e-05,
"loss": 0.0008,
"step": 2330
},
{
"epoch": 3.0078947368421054,
"grad_norm": 0.003347884165123105,
"learning_rate": 4.678362573099415e-05,
"loss": 0.0002,
"step": 2340
},
{
"epoch": 3.008569500674764,
"grad_norm": 0.009431365877389908,
"learning_rate": 4.674613885140201e-05,
"loss": 0.0001,
"step": 2350
},
{
"epoch": 3.0092442645074224,
"grad_norm": 0.006901255808770657,
"learning_rate": 4.670865197180987e-05,
"loss": 0.0001,
"step": 2360
},
{
"epoch": 3.009919028340081,
"grad_norm": 0.00315679213963449,
"learning_rate": 4.667116509221773e-05,
"loss": 0.0002,
"step": 2370
},
{
"epoch": 3.0105937921727395,
"grad_norm": 0.21266283094882965,
"learning_rate": 4.663367821262558e-05,
"loss": 0.0006,
"step": 2380
},
{
"epoch": 3.011268556005398,
"grad_norm": 0.004384478088468313,
"learning_rate": 4.659619133303344e-05,
"loss": 0.0006,
"step": 2390
},
{
"epoch": 3.0119433198380565,
"grad_norm": 0.013708599843084812,
"learning_rate": 4.65587044534413e-05,
"loss": 0.2589,
"step": 2400
},
{
"epoch": 3.012618083670715,
"grad_norm": 308.8554992675781,
"learning_rate": 4.652121757384916e-05,
"loss": 1.1215,
"step": 2410
},
{
"epoch": 3.013292847503374,
"grad_norm": 0.0031652101315557957,
"learning_rate": 4.648373069425701e-05,
"loss": 0.5235,
"step": 2420
},
{
"epoch": 3.0139676113360325,
"grad_norm": 0.00223003257997334,
"learning_rate": 4.644624381466487e-05,
"loss": 0.0001,
"step": 2430
},
{
"epoch": 3.014642375168691,
"grad_norm": 0.0067682513035833836,
"learning_rate": 4.640875693507273e-05,
"loss": 0.0001,
"step": 2440
},
{
"epoch": 3.0153171390013496,
"grad_norm": 0.0025887356605380774,
"learning_rate": 4.637127005548059e-05,
"loss": 0.0003,
"step": 2450
},
{
"epoch": 3.015991902834008,
"grad_norm": 0.0077194697223603725,
"learning_rate": 4.633378317588844e-05,
"loss": 0.6217,
"step": 2460
},
{
"epoch": 3.0166666666666666,
"grad_norm": 0.03473236411809921,
"learning_rate": 4.62962962962963e-05,
"loss": 0.8179,
"step": 2470
},
{
"epoch": 3.017341430499325,
"grad_norm": 0.014423678629100323,
"learning_rate": 4.6258809416704157e-05,
"loss": 0.2263,
"step": 2480
},
{
"epoch": 3.0180161943319836,
"grad_norm": 0.006188780535012484,
"learning_rate": 4.622132253711201e-05,
"loss": 0.6701,
"step": 2490
},
{
"epoch": 3.018690958164642,
"grad_norm": 0.35851019620895386,
"learning_rate": 4.618383565751987e-05,
"loss": 0.0008,
"step": 2500
},
{
"epoch": 3.019365721997301,
"grad_norm": 0.0032032101880759,
"learning_rate": 4.614634877792773e-05,
"loss": 0.0137,
"step": 2510
},
{
"epoch": 3.0200404858299597,
"grad_norm": 0.006460473407059908,
"learning_rate": 4.6108861898335586e-05,
"loss": 0.0016,
"step": 2520
},
{
"epoch": 3.020715249662618,
"grad_norm": 0.0026447370182722807,
"learning_rate": 4.607137501874344e-05,
"loss": 0.0014,
"step": 2530
},
{
"epoch": 3.0213900134952767,
"grad_norm": 0.0030527382623404264,
"learning_rate": 4.60338881391513e-05,
"loss": 0.0001,
"step": 2540
},
{
"epoch": 3.022064777327935,
"grad_norm": 0.007262419909238815,
"learning_rate": 4.599640125955916e-05,
"loss": 0.0001,
"step": 2550
},
{
"epoch": 3.0227395411605937,
"grad_norm": 0.0038091035094112158,
"learning_rate": 4.5958914379967016e-05,
"loss": 0.2222,
"step": 2560
},
{
"epoch": 3.0234143049932523,
"grad_norm": 0.0035387033130973577,
"learning_rate": 4.592142750037487e-05,
"loss": 0.8073,
"step": 2570
},
{
"epoch": 3.0240890688259108,
"grad_norm": 0.0033677646424621344,
"learning_rate": 4.588394062078273e-05,
"loss": 0.0006,
"step": 2580
},
{
"epoch": 3.0247638326585693,
"grad_norm": 0.006484442390501499,
"learning_rate": 4.5846453741190584e-05,
"loss": 0.0004,
"step": 2590
},
{
"epoch": 3.0254385964912283,
"grad_norm": 0.010489704087376595,
"learning_rate": 4.580896686159844e-05,
"loss": 0.0009,
"step": 2600
},
{
"epoch": 3.026113360323887,
"grad_norm": 0.0032699282746762037,
"learning_rate": 4.57714799820063e-05,
"loss": 0.0039,
"step": 2610
},
{
"epoch": 3.0267881241565453,
"grad_norm": 97.59777069091797,
"learning_rate": 4.573399310241416e-05,
"loss": 1.8702,
"step": 2620
},
{
"epoch": 3.027462887989204,
"grad_norm": 0.05291153863072395,
"learning_rate": 4.5696506222822014e-05,
"loss": 0.9784,
"step": 2630
},
{
"epoch": 3.0281376518218623,
"grad_norm": 0.0015122004551813006,
"learning_rate": 4.565901934322987e-05,
"loss": 0.0008,
"step": 2640
},
{
"epoch": 3.028812415654521,
"grad_norm": 0.10103687644004822,
"learning_rate": 4.5621532463637725e-05,
"loss": 0.0013,
"step": 2650
},
{
"epoch": 3.0294871794871794,
"grad_norm": 0.002090906724333763,
"learning_rate": 4.558404558404559e-05,
"loss": 0.0005,
"step": 2660
},
{
"epoch": 3.030161943319838,
"grad_norm": 0.0011990441707894206,
"learning_rate": 4.5546558704453443e-05,
"loss": 0.772,
"step": 2670
},
{
"epoch": 3.0308367071524964,
"grad_norm": 0.0113350385800004,
"learning_rate": 4.55090718248613e-05,
"loss": 0.0039,
"step": 2680
},
{
"epoch": 3.0315114709851554,
"grad_norm": 379.9673156738281,
"learning_rate": 4.5471584945269155e-05,
"loss": 1.1636,
"step": 2690
},
{
"epoch": 3.032186234817814,
"grad_norm": 0.03890157490968704,
"learning_rate": 4.543409806567702e-05,
"loss": 1.1822,
"step": 2700
},
{
"epoch": 3.0328609986504724,
"grad_norm": 0.0033322779927402735,
"learning_rate": 4.5396611186084866e-05,
"loss": 0.0014,
"step": 2710
},
{
"epoch": 3.033535762483131,
"grad_norm": 0.6530995965003967,
"learning_rate": 4.535912430649273e-05,
"loss": 0.6605,
"step": 2720
},
{
"epoch": 3.0342105263157895,
"grad_norm": 0.03727166727185249,
"learning_rate": 4.5321637426900585e-05,
"loss": 0.9511,
"step": 2730
},
{
"epoch": 3.034885290148448,
"grad_norm": 0.0015920967562124133,
"learning_rate": 4.528415054730845e-05,
"loss": 0.0008,
"step": 2740
},
{
"epoch": 3.0355600539811065,
"grad_norm": 0.08293965458869934,
"learning_rate": 4.5246663667716296e-05,
"loss": 0.0007,
"step": 2750
},
{
"epoch": 3.036234817813765,
"grad_norm": 0.04548066109418869,
"learning_rate": 4.520917678812416e-05,
"loss": 0.0015,
"step": 2760
},
{
"epoch": 3.0369095816464236,
"grad_norm": 0.011057593859732151,
"learning_rate": 4.5171689908532015e-05,
"loss": 0.6973,
"step": 2770
},
{
"epoch": 3.0375843454790825,
"grad_norm": 113.07095336914062,
"learning_rate": 4.513420302893988e-05,
"loss": 0.3667,
"step": 2780
},
{
"epoch": 3.038259109311741,
"grad_norm": 0.0016718521947041154,
"learning_rate": 4.5096716149347726e-05,
"loss": 0.6746,
"step": 2790
},
{
"epoch": 3.0389338731443996,
"grad_norm": 0.006011700723320246,
"learning_rate": 4.505922926975559e-05,
"loss": 1.4009,
"step": 2800
},
{
"epoch": 3.039608636977058,
"grad_norm": 0.0032539258245378733,
"learning_rate": 4.5021742390163445e-05,
"loss": 0.005,
"step": 2810
},
{
"epoch": 3.0402834008097166,
"grad_norm": 0.024762948974967003,
"learning_rate": 4.498425551057131e-05,
"loss": 0.0007,
"step": 2820
},
{
"epoch": 3.040958164642375,
"grad_norm": 0.008271398954093456,
"learning_rate": 4.4946768630979156e-05,
"loss": 0.0004,
"step": 2830
},
{
"epoch": 3.0416329284750336,
"grad_norm": 0.0073724472895264626,
"learning_rate": 4.490928175138702e-05,
"loss": 0.7153,
"step": 2840
},
{
"epoch": 3.042307692307692,
"grad_norm": 0.01329676155000925,
"learning_rate": 4.4871794871794874e-05,
"loss": 0.1339,
"step": 2850
},
{
"epoch": 3.0429824561403507,
"grad_norm": 0.00492237601429224,
"learning_rate": 4.483430799220273e-05,
"loss": 0.7432,
"step": 2860
},
{
"epoch": 3.0436572199730096,
"grad_norm": 0.006463408935815096,
"learning_rate": 4.4796821112610586e-05,
"loss": 0.0007,
"step": 2870
},
{
"epoch": 3.044331983805668,
"grad_norm": 0.0007826614892110229,
"learning_rate": 4.475933423301845e-05,
"loss": 0.5263,
"step": 2880
},
{
"epoch": 3.0450067476383267,
"grad_norm": 0.0012907817726954818,
"learning_rate": 4.4721847353426304e-05,
"loss": 0.0017,
"step": 2890
},
{
"epoch": 3.045681511470985,
"grad_norm": 0.0011142657604068518,
"learning_rate": 4.468436047383416e-05,
"loss": 0.0004,
"step": 2900
},
{
"epoch": 3.0463562753036437,
"grad_norm": 0.0039123659953475,
"learning_rate": 4.4646873594242016e-05,
"loss": 0.025,
"step": 2910
},
{
"epoch": 3.0470310391363022,
"grad_norm": 0.006876886822283268,
"learning_rate": 4.460938671464988e-05,
"loss": 0.5972,
"step": 2920
},
{
"epoch": 3.0477058029689608,
"grad_norm": 0.0013078979682177305,
"learning_rate": 4.4571899835057734e-05,
"loss": 0.0216,
"step": 2930
},
{
"epoch": 3.0483805668016193,
"grad_norm": 0.01804491877555847,
"learning_rate": 4.453441295546559e-05,
"loss": 0.0025,
"step": 2940
},
{
"epoch": 3.049055330634278,
"grad_norm": 0.0017017913050949574,
"learning_rate": 4.4496926075873446e-05,
"loss": 0.1553,
"step": 2950
},
{
"epoch": 3.0497300944669368,
"grad_norm": 0.004222176969051361,
"learning_rate": 4.445943919628131e-05,
"loss": 0.0002,
"step": 2960
},
{
"epoch": 3.05,
"eval_accuracy": 0.9017857142857143,
"eval_f1": 0.900079642364192,
"eval_loss": 0.3880017399787903,
"eval_runtime": 72.9967,
"eval_samples_per_second": 1.534,
"eval_steps_per_second": 1.534,
"step": 2964
},
{
"epoch": 4.0004048582995955,
"grad_norm": 0.0011517743114382029,
"learning_rate": 4.442195231668916e-05,
"loss": 0.4772,
"step": 2970
},
{
"epoch": 4.001079622132254,
"grad_norm": 0.0008661440806463361,
"learning_rate": 4.438446543709702e-05,
"loss": 0.0001,
"step": 2980
},
{
"epoch": 4.0017543859649125,
"grad_norm": 0.005399093497544527,
"learning_rate": 4.4346978557504876e-05,
"loss": 0.0033,
"step": 2990
},
{
"epoch": 4.002429149797571,
"grad_norm": 0.0038267234340310097,
"learning_rate": 4.430949167791273e-05,
"loss": 0.0005,
"step": 3000
},
{
"epoch": 4.0031039136302295,
"grad_norm": 0.0029461942613124847,
"learning_rate": 4.427200479832059e-05,
"loss": 0.0002,
"step": 3010
},
{
"epoch": 4.003778677462888,
"grad_norm": 0.0006391266360878944,
"learning_rate": 4.423451791872845e-05,
"loss": 0.0001,
"step": 3020
},
{
"epoch": 4.004453441295547,
"grad_norm": 0.004189279396086931,
"learning_rate": 4.4197031039136306e-05,
"loss": 0.0001,
"step": 3030
},
{
"epoch": 4.005128205128205,
"grad_norm": 0.0011289932299405336,
"learning_rate": 4.415954415954416e-05,
"loss": 0.0001,
"step": 3040
},
{
"epoch": 4.005802968960864,
"grad_norm": 0.0023520805407315493,
"learning_rate": 4.412205727995202e-05,
"loss": 0.0001,
"step": 3050
},
{
"epoch": 4.006477732793522,
"grad_norm": 0.0018153834389522672,
"learning_rate": 4.408457040035987e-05,
"loss": 0.8745,
"step": 3060
},
{
"epoch": 4.007152496626181,
"grad_norm": 0.001743017346598208,
"learning_rate": 4.4047083520767735e-05,
"loss": 0.0003,
"step": 3070
},
{
"epoch": 4.007827260458839,
"grad_norm": 0.002831714926287532,
"learning_rate": 4.400959664117559e-05,
"loss": 0.0066,
"step": 3080
},
{
"epoch": 4.008502024291498,
"grad_norm": 0.005015307106077671,
"learning_rate": 4.397210976158345e-05,
"loss": 0.1127,
"step": 3090
},
{
"epoch": 4.009176788124156,
"grad_norm": 0.0019009409006685019,
"learning_rate": 4.39346228819913e-05,
"loss": 0.001,
"step": 3100
},
{
"epoch": 4.009851551956815,
"grad_norm": 0.0011994624510407448,
"learning_rate": 4.3897136002399165e-05,
"loss": 0.8256,
"step": 3110
},
{
"epoch": 4.010526315789473,
"grad_norm": 0.002758684800937772,
"learning_rate": 4.3859649122807014e-05,
"loss": 0.0002,
"step": 3120
},
{
"epoch": 4.011201079622133,
"grad_norm": 0.014079189859330654,
"learning_rate": 4.382216224321488e-05,
"loss": 0.0001,
"step": 3130
},
{
"epoch": 4.011875843454791,
"grad_norm": 0.001694743288680911,
"learning_rate": 4.378467536362273e-05,
"loss": 0.0001,
"step": 3140
},
{
"epoch": 4.01255060728745,
"grad_norm": 0.005108845420181751,
"learning_rate": 4.3747188484030595e-05,
"loss": 0.0001,
"step": 3150
},
{
"epoch": 4.013225371120108,
"grad_norm": 0.0009567590313963592,
"learning_rate": 4.3709701604438444e-05,
"loss": 0.0003,
"step": 3160
},
{
"epoch": 4.013900134952767,
"grad_norm": 0.005206429865211248,
"learning_rate": 4.367221472484631e-05,
"loss": 0.0139,
"step": 3170
},
{
"epoch": 4.014574898785425,
"grad_norm": 0.0010895140003412962,
"learning_rate": 4.363472784525416e-05,
"loss": 0.0001,
"step": 3180
},
{
"epoch": 4.015249662618084,
"grad_norm": 0.0026008691638708115,
"learning_rate": 4.3597240965662025e-05,
"loss": 0.0002,
"step": 3190
},
{
"epoch": 4.015924426450742,
"grad_norm": 0.00945541262626648,
"learning_rate": 4.3559754086069874e-05,
"loss": 0.0001,
"step": 3200
},
{
"epoch": 4.016599190283401,
"grad_norm": 0.002652823692187667,
"learning_rate": 4.3522267206477737e-05,
"loss": 0.0003,
"step": 3210
},
{
"epoch": 4.017273954116059,
"grad_norm": 0.011731209233403206,
"learning_rate": 4.348478032688559e-05,
"loss": 0.0001,
"step": 3220
},
{
"epoch": 4.017948717948718,
"grad_norm": 0.002854161197319627,
"learning_rate": 4.344729344729345e-05,
"loss": 0.0001,
"step": 3230
},
{
"epoch": 4.018623481781376,
"grad_norm": 0.0006263653049245477,
"learning_rate": 4.3409806567701304e-05,
"loss": 0.0002,
"step": 3240
},
{
"epoch": 4.019298245614035,
"grad_norm": 0.008615193888545036,
"learning_rate": 4.3372319688109166e-05,
"loss": 0.7675,
"step": 3250
},
{
"epoch": 4.0199730094466934,
"grad_norm": 0.0012555683497339487,
"learning_rate": 4.333483280851702e-05,
"loss": 0.0001,
"step": 3260
},
{
"epoch": 4.020647773279352,
"grad_norm": 0.0026209617499262094,
"learning_rate": 4.329734592892488e-05,
"loss": 0.0001,
"step": 3270
},
{
"epoch": 4.0213225371120105,
"grad_norm": 0.0008131062495522201,
"learning_rate": 4.3259859049332734e-05,
"loss": 0.495,
"step": 3280
},
{
"epoch": 4.021997300944669,
"grad_norm": 0.004160483367741108,
"learning_rate": 4.3222372169740596e-05,
"loss": 0.0001,
"step": 3290
},
{
"epoch": 4.0226720647773275,
"grad_norm": 0.00135552987921983,
"learning_rate": 4.318488529014845e-05,
"loss": 0.0001,
"step": 3300
},
{
"epoch": 4.023346828609987,
"grad_norm": 0.0020715997088700533,
"learning_rate": 4.314739841055631e-05,
"loss": 0.0001,
"step": 3310
},
{
"epoch": 4.0240215924426455,
"grad_norm": 0.0006134248687885702,
"learning_rate": 4.3109911530964164e-05,
"loss": 0.0003,
"step": 3320
},
{
"epoch": 4.024696356275304,
"grad_norm": 0.005337740760296583,
"learning_rate": 4.3072424651372026e-05,
"loss": 0.0002,
"step": 3330
},
{
"epoch": 4.0253711201079625,
"grad_norm": 0.002447796519845724,
"learning_rate": 4.303493777177988e-05,
"loss": 0.0013,
"step": 3340
},
{
"epoch": 4.026045883940621,
"grad_norm": 0.0020753496792167425,
"learning_rate": 4.299745089218774e-05,
"loss": 0.0001,
"step": 3350
},
{
"epoch": 4.0267206477732795,
"grad_norm": 0.001169373164884746,
"learning_rate": 4.2959964012595594e-05,
"loss": 0.4363,
"step": 3360
},
{
"epoch": 4.027395411605938,
"grad_norm": 0.0031577907502651215,
"learning_rate": 4.292247713300345e-05,
"loss": 0.4359,
"step": 3370
},
{
"epoch": 4.028070175438597,
"grad_norm": 0.0011828079586848617,
"learning_rate": 4.2884990253411305e-05,
"loss": 0.0001,
"step": 3380
},
{
"epoch": 4.028744939271255,
"grad_norm": 0.0016030353726819158,
"learning_rate": 4.284750337381917e-05,
"loss": 0.0001,
"step": 3390
},
{
"epoch": 4.029419703103914,
"grad_norm": 0.014403590932488441,
"learning_rate": 4.2810016494227023e-05,
"loss": 0.7807,
"step": 3400
},
{
"epoch": 4.030094466936572,
"grad_norm": 0.005019639153033495,
"learning_rate": 4.277252961463488e-05,
"loss": 0.4727,
"step": 3410
},
{
"epoch": 4.030769230769231,
"grad_norm": 0.002246898366138339,
"learning_rate": 4.2735042735042735e-05,
"loss": 0.0499,
"step": 3420
},
{
"epoch": 4.031443994601889,
"grad_norm": 0.013324781320989132,
"learning_rate": 4.269755585545059e-05,
"loss": 0.5992,
"step": 3430
},
{
"epoch": 4.032118758434548,
"grad_norm": 0.0579649917781353,
"learning_rate": 4.266006897585845e-05,
"loss": 0.0039,
"step": 3440
},
{
"epoch": 4.032793522267206,
"grad_norm": 1.7032642364501953,
"learning_rate": 4.262258209626631e-05,
"loss": 0.6145,
"step": 3450
},
{
"epoch": 4.033468286099865,
"grad_norm": 0.013759407214820385,
"learning_rate": 4.2585095216674165e-05,
"loss": 0.0002,
"step": 3460
},
{
"epoch": 4.034143049932523,
"grad_norm": 0.00753359729424119,
"learning_rate": 4.254760833708202e-05,
"loss": 0.0071,
"step": 3470
},
{
"epoch": 4.034817813765182,
"grad_norm": 0.0020441561937332153,
"learning_rate": 4.251012145748988e-05,
"loss": 0.001,
"step": 3480
},
{
"epoch": 4.035492577597841,
"grad_norm": 0.001379093388095498,
"learning_rate": 4.247263457789773e-05,
"loss": 0.0013,
"step": 3490
},
{
"epoch": 4.0361673414305,
"grad_norm": 0.002510966034606099,
"learning_rate": 4.2435147698305595e-05,
"loss": 0.0003,
"step": 3500
},
{
"epoch": 4.036842105263158,
"grad_norm": 0.0011007965076714754,
"learning_rate": 4.239766081871345e-05,
"loss": 1.0836,
"step": 3510
},
{
"epoch": 4.037516869095817,
"grad_norm": 0.022373057901859283,
"learning_rate": 4.236017393912131e-05,
"loss": 0.0838,
"step": 3520
},
{
"epoch": 4.038191632928475,
"grad_norm": 0.0008921432308852673,
"learning_rate": 4.232268705952916e-05,
"loss": 0.0001,
"step": 3530
},
{
"epoch": 4.038866396761134,
"grad_norm": 0.0007166191353462636,
"learning_rate": 4.2285200179937025e-05,
"loss": 0.0076,
"step": 3540
},
{
"epoch": 4.039541160593792,
"grad_norm": 8.101381301879883,
"learning_rate": 4.224771330034488e-05,
"loss": 0.0233,
"step": 3550
},
{
"epoch": 4.040215924426451,
"grad_norm": 0.0007625047001056373,
"learning_rate": 4.221022642075274e-05,
"loss": 0.0003,
"step": 3560
},
{
"epoch": 4.040890688259109,
"grad_norm": 0.00398569880053401,
"learning_rate": 4.217273954116059e-05,
"loss": 0.0001,
"step": 3570
},
{
"epoch": 4.041565452091768,
"grad_norm": 0.0010361782042309642,
"learning_rate": 4.2135252661568455e-05,
"loss": 0.0001,
"step": 3580
},
{
"epoch": 4.042240215924426,
"grad_norm": 0.001946108415722847,
"learning_rate": 4.209776578197631e-05,
"loss": 0.0004,
"step": 3590
},
{
"epoch": 4.042914979757085,
"grad_norm": 0.003806932596489787,
"learning_rate": 4.2060278902384166e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 4.043589743589743,
"grad_norm": 0.0009996455628424883,
"learning_rate": 4.202279202279202e-05,
"loss": 0.0002,
"step": 3610
},
{
"epoch": 4.044264507422402,
"grad_norm": 0.0016769858775660396,
"learning_rate": 4.1985305143199884e-05,
"loss": 0.0001,
"step": 3620
},
{
"epoch": 4.0449392712550605,
"grad_norm": 0.00047590630128979683,
"learning_rate": 4.194781826360774e-05,
"loss": 0.0001,
"step": 3630
},
{
"epoch": 4.045614035087719,
"grad_norm": 0.0010459835175424814,
"learning_rate": 4.1910331384015596e-05,
"loss": 0.3976,
"step": 3640
},
{
"epoch": 4.0462887989203775,
"grad_norm": 0.003536689095199108,
"learning_rate": 4.187284450442345e-05,
"loss": 0.5592,
"step": 3650
},
{
"epoch": 4.046963562753036,
"grad_norm": 0.004078584257513285,
"learning_rate": 4.1835357624831314e-05,
"loss": 0.2639,
"step": 3660
},
{
"epoch": 4.0476383265856954,
"grad_norm": 0.01091256644576788,
"learning_rate": 4.179787074523917e-05,
"loss": 0.0001,
"step": 3670
},
{
"epoch": 4.048313090418354,
"grad_norm": 0.0032140237744897604,
"learning_rate": 4.1760383865647026e-05,
"loss": 0.2047,
"step": 3680
},
{
"epoch": 4.0489878542510125,
"grad_norm": 0.003986234311014414,
"learning_rate": 4.172289698605488e-05,
"loss": 0.0019,
"step": 3690
},
{
"epoch": 4.049662618083671,
"grad_norm": 0.0013649433385580778,
"learning_rate": 4.1685410106462744e-05,
"loss": 0.0001,
"step": 3700
},
{
"epoch": 4.05,
"eval_accuracy": 0.9285714285714286,
"eval_f1": 0.9284473859473861,
"eval_loss": 0.43087735772132874,
"eval_runtime": 74.3247,
"eval_samples_per_second": 1.507,
"eval_steps_per_second": 1.507,
"step": 3705
},
{
"epoch": 5.00033738191633,
"grad_norm": 0.0009709022124297917,
"learning_rate": 4.16479232268706e-05,
"loss": 0.0001,
"step": 3710
},
{
"epoch": 5.001012145748988,
"grad_norm": 0.00450406176969409,
"learning_rate": 4.1610436347278456e-05,
"loss": 0.0001,
"step": 3720
},
{
"epoch": 5.001686909581647,
"grad_norm": 490.396240234375,
"learning_rate": 4.157294946768631e-05,
"loss": 0.3041,
"step": 3730
},
{
"epoch": 5.002361673414305,
"grad_norm": 0.00026446336414664984,
"learning_rate": 4.153546258809417e-05,
"loss": 0.0001,
"step": 3740
},
{
"epoch": 5.003036437246964,
"grad_norm": 0.0011977544054389,
"learning_rate": 4.149797570850202e-05,
"loss": 0.0001,
"step": 3750
},
{
"epoch": 5.003711201079622,
"grad_norm": 0.0008563337032683194,
"learning_rate": 4.1460488828909886e-05,
"loss": 0.6888,
"step": 3760
},
{
"epoch": 5.004385964912281,
"grad_norm": 0.0008433638722635806,
"learning_rate": 4.142300194931774e-05,
"loss": 0.0003,
"step": 3770
},
{
"epoch": 5.005060728744939,
"grad_norm": 0.0007336140261031687,
"learning_rate": 4.13855150697256e-05,
"loss": 0.5238,
"step": 3780
},
{
"epoch": 5.005735492577598,
"grad_norm": 0.0012576148146763444,
"learning_rate": 4.134802819013345e-05,
"loss": 0.0023,
"step": 3790
},
{
"epoch": 5.006410256410256,
"grad_norm": 0.0009189122938551009,
"learning_rate": 4.131054131054131e-05,
"loss": 0.0131,
"step": 3800
},
{
"epoch": 5.007085020242915,
"grad_norm": 0.008739179000258446,
"learning_rate": 4.127305443094917e-05,
"loss": 0.0003,
"step": 3810
},
{
"epoch": 5.007759784075573,
"grad_norm": 0.0012460118159651756,
"learning_rate": 4.123556755135703e-05,
"loss": 0.0001,
"step": 3820
},
{
"epoch": 5.008434547908232,
"grad_norm": 0.002039340790361166,
"learning_rate": 4.119808067176488e-05,
"loss": 0.0003,
"step": 3830
},
{
"epoch": 5.0091093117408905,
"grad_norm": 0.0009501971653662622,
"learning_rate": 4.116059379217274e-05,
"loss": 0.0052,
"step": 3840
},
{
"epoch": 5.009784075573549,
"grad_norm": 0.07869889587163925,
"learning_rate": 4.11231069125806e-05,
"loss": 0.0002,
"step": 3850
},
{
"epoch": 5.0104588394062075,
"grad_norm": 0.0006638221675530076,
"learning_rate": 4.108562003298845e-05,
"loss": 0.0005,
"step": 3860
},
{
"epoch": 5.011133603238866,
"grad_norm": 0.0008539034170098603,
"learning_rate": 4.104813315339631e-05,
"loss": 0.0001,
"step": 3870
},
{
"epoch": 5.0118083670715246,
"grad_norm": 0.0006605815142393112,
"learning_rate": 4.101064627380417e-05,
"loss": 0.0004,
"step": 3880
},
{
"epoch": 5.012483130904184,
"grad_norm": 0.0008256967412307858,
"learning_rate": 4.097315939421203e-05,
"loss": 0.0001,
"step": 3890
},
{
"epoch": 5.0131578947368425,
"grad_norm": 0.008075601421296597,
"learning_rate": 4.093567251461988e-05,
"loss": 0.0018,
"step": 3900
},
{
"epoch": 5.013832658569501,
"grad_norm": 0.0012110425159335136,
"learning_rate": 4.089818563502774e-05,
"loss": 0.0011,
"step": 3910
},
{
"epoch": 5.0145074224021595,
"grad_norm": 0.0048310281708836555,
"learning_rate": 4.08606987554356e-05,
"loss": 0.0001,
"step": 3920
},
{
"epoch": 5.015182186234818,
"grad_norm": 0.0012771515175700188,
"learning_rate": 4.082321187584346e-05,
"loss": 0.0003,
"step": 3930
},
{
"epoch": 5.015856950067477,
"grad_norm": 0.0013642838457599282,
"learning_rate": 4.078572499625131e-05,
"loss": 0.0001,
"step": 3940
},
{
"epoch": 5.016531713900135,
"grad_norm": 311.0769348144531,
"learning_rate": 4.074823811665917e-05,
"loss": 0.7081,
"step": 3950
},
{
"epoch": 5.017206477732794,
"grad_norm": 0.002835233462974429,
"learning_rate": 4.071075123706703e-05,
"loss": 0.0003,
"step": 3960
},
{
"epoch": 5.017881241565452,
"grad_norm": 0.0006811009370721877,
"learning_rate": 4.067326435747489e-05,
"loss": 0.4166,
"step": 3970
},
{
"epoch": 5.018556005398111,
"grad_norm": 0.0010262362193316221,
"learning_rate": 4.063577747788274e-05,
"loss": 0.0001,
"step": 3980
},
{
"epoch": 5.019230769230769,
"grad_norm": 0.11619503796100616,
"learning_rate": 4.05982905982906e-05,
"loss": 0.0002,
"step": 3990
},
{
"epoch": 5.019905533063428,
"grad_norm": 0.011183816939592361,
"learning_rate": 4.056080371869846e-05,
"loss": 0.0006,
"step": 4000
},
{
"epoch": 5.020580296896086,
"grad_norm": 0.0007078946800902486,
"learning_rate": 4.0523316839106314e-05,
"loss": 0.0004,
"step": 4010
},
{
"epoch": 5.021255060728745,
"grad_norm": 0.008296789601445198,
"learning_rate": 4.048582995951417e-05,
"loss": 0.0134,
"step": 4020
},
{
"epoch": 5.021929824561403,
"grad_norm": 0.013501118868589401,
"learning_rate": 4.044834307992203e-05,
"loss": 0.0003,
"step": 4030
},
{
"epoch": 5.022604588394062,
"grad_norm": 0.15977753698825836,
"learning_rate": 4.041085620032989e-05,
"loss": 0.0001,
"step": 4040
},
{
"epoch": 5.02327935222672,
"grad_norm": 0.004472650587558746,
"learning_rate": 4.0373369320737744e-05,
"loss": 0.0032,
"step": 4050
},
{
"epoch": 5.023954116059379,
"grad_norm": 0.0012224495876580477,
"learning_rate": 4.03358824411456e-05,
"loss": 0.0,
"step": 4060
},
{
"epoch": 5.024628879892038,
"grad_norm": 0.0016181441023945808,
"learning_rate": 4.029839556155346e-05,
"loss": 0.7806,
"step": 4070
},
{
"epoch": 5.025303643724697,
"grad_norm": 0.004258355125784874,
"learning_rate": 4.026090868196132e-05,
"loss": 0.0,
"step": 4080
},
{
"epoch": 5.025978407557355,
"grad_norm": 0.0011408330174162984,
"learning_rate": 4.0223421802369174e-05,
"loss": 0.0001,
"step": 4090
},
{
"epoch": 5.026653171390014,
"grad_norm": 0.010054398328065872,
"learning_rate": 4.018593492277703e-05,
"loss": 0.0001,
"step": 4100
},
{
"epoch": 5.027327935222672,
"grad_norm": 0.0009806094458326697,
"learning_rate": 4.014844804318489e-05,
"loss": 0.0001,
"step": 4110
},
{
"epoch": 5.028002699055331,
"grad_norm": 0.0007722462760284543,
"learning_rate": 4.011096116359274e-05,
"loss": 0.0003,
"step": 4120
},
{
"epoch": 5.028677462887989,
"grad_norm": 0.01538068987429142,
"learning_rate": 4.0073474284000604e-05,
"loss": 0.6961,
"step": 4130
},
{
"epoch": 5.029352226720648,
"grad_norm": 0.00021896508405916393,
"learning_rate": 4.003598740440846e-05,
"loss": 0.0001,
"step": 4140
},
{
"epoch": 5.030026990553306,
"grad_norm": 0.0006867019692435861,
"learning_rate": 3.9998500524816315e-05,
"loss": 0.0,
"step": 4150
},
{
"epoch": 5.030701754385965,
"grad_norm": 0.0021174189168959856,
"learning_rate": 3.996101364522417e-05,
"loss": 0.0,
"step": 4160
},
{
"epoch": 5.031376518218623,
"grad_norm": 0.0005668731173500419,
"learning_rate": 3.992352676563203e-05,
"loss": 0.0,
"step": 4170
},
{
"epoch": 5.032051282051282,
"grad_norm": 0.0007015119190327823,
"learning_rate": 3.988603988603989e-05,
"loss": 0.4088,
"step": 4180
},
{
"epoch": 5.0327260458839405,
"grad_norm": 0.007248507812619209,
"learning_rate": 3.9848553006447745e-05,
"loss": 0.0212,
"step": 4190
},
{
"epoch": 5.033400809716599,
"grad_norm": 0.0023328044917434454,
"learning_rate": 3.98110661268556e-05,
"loss": 0.0001,
"step": 4200
},
{
"epoch": 5.0340755735492575,
"grad_norm": 0.0011781149078160524,
"learning_rate": 3.9773579247263456e-05,
"loss": 0.0001,
"step": 4210
},
{
"epoch": 5.034750337381916,
"grad_norm": 0.000842131907120347,
"learning_rate": 3.973609236767132e-05,
"loss": 0.0001,
"step": 4220
},
{
"epoch": 5.0354251012145745,
"grad_norm": 0.0013578764628618956,
"learning_rate": 3.9698605488079175e-05,
"loss": 0.0001,
"step": 4230
},
{
"epoch": 5.036099865047233,
"grad_norm": 0.0005201473250053823,
"learning_rate": 3.966111860848703e-05,
"loss": 0.0001,
"step": 4240
},
{
"epoch": 5.0367746288798925,
"grad_norm": 0.0011828228598460555,
"learning_rate": 3.9623631728894886e-05,
"loss": 0.0065,
"step": 4250
},
{
"epoch": 5.037449392712551,
"grad_norm": 0.000755178218241781,
"learning_rate": 3.958614484930275e-05,
"loss": 0.207,
"step": 4260
},
{
"epoch": 5.0381241565452095,
"grad_norm": 0.0009751113248057663,
"learning_rate": 3.95486579697106e-05,
"loss": 0.0001,
"step": 4270
},
{
"epoch": 5.038798920377868,
"grad_norm": 0.00031620432855561376,
"learning_rate": 3.951117109011846e-05,
"loss": 0.337,
"step": 4280
},
{
"epoch": 5.0394736842105265,
"grad_norm": 0.0007090018480084836,
"learning_rate": 3.9473684210526316e-05,
"loss": 0.0006,
"step": 4290
},
{
"epoch": 5.040148448043185,
"grad_norm": 0.0010267384350299835,
"learning_rate": 3.943619733093418e-05,
"loss": 0.0,
"step": 4300
},
{
"epoch": 5.040823211875844,
"grad_norm": 0.014587147161364555,
"learning_rate": 3.939871045134203e-05,
"loss": 0.0001,
"step": 4310
},
{
"epoch": 5.041497975708502,
"grad_norm": 0.000788258679676801,
"learning_rate": 3.936122357174989e-05,
"loss": 0.0,
"step": 4320
},
{
"epoch": 5.042172739541161,
"grad_norm": 0.0006495325942523777,
"learning_rate": 3.9323736692157746e-05,
"loss": 0.0,
"step": 4330
},
{
"epoch": 5.042847503373819,
"grad_norm": 0.0006167737883515656,
"learning_rate": 3.928624981256561e-05,
"loss": 0.1018,
"step": 4340
},
{
"epoch": 5.043522267206478,
"grad_norm": 0.0014920184621587396,
"learning_rate": 3.924876293297346e-05,
"loss": 0.0,
"step": 4350
},
{
"epoch": 5.044197031039136,
"grad_norm": 0.0015535310376435518,
"learning_rate": 3.921127605338132e-05,
"loss": 0.0007,
"step": 4360
},
{
"epoch": 5.044871794871795,
"grad_norm": 0.0006431335350498557,
"learning_rate": 3.9173789173789176e-05,
"loss": 0.0001,
"step": 4370
},
{
"epoch": 5.045546558704453,
"grad_norm": 0.005366568453609943,
"learning_rate": 3.913630229419703e-05,
"loss": 0.0,
"step": 4380
},
{
"epoch": 5.046221322537112,
"grad_norm": 0.0013297253753989935,
"learning_rate": 3.909881541460489e-05,
"loss": 0.0,
"step": 4390
},
{
"epoch": 5.04689608636977,
"grad_norm": 0.0004990586312487721,
"learning_rate": 3.906132853501275e-05,
"loss": 0.0,
"step": 4400
},
{
"epoch": 5.047570850202429,
"grad_norm": 0.0013985860859975219,
"learning_rate": 3.9023841655420606e-05,
"loss": 0.0,
"step": 4410
},
{
"epoch": 5.048245614035087,
"grad_norm": 0.0006711781024932861,
"learning_rate": 3.898635477582846e-05,
"loss": 0.0,
"step": 4420
},
{
"epoch": 5.048920377867747,
"grad_norm": 0.0006565306102856994,
"learning_rate": 3.894886789623632e-05,
"loss": 0.0,
"step": 4430
},
{
"epoch": 5.049595141700405,
"grad_norm": 0.0009195157326757908,
"learning_rate": 3.891138101664418e-05,
"loss": 0.0001,
"step": 4440
},
{
"epoch": 5.05,
"eval_accuracy": 0.9107142857142857,
"eval_f1": 0.9105137981578073,
"eval_loss": 0.7364658117294312,
"eval_runtime": 73.1769,
"eval_samples_per_second": 1.531,
"eval_steps_per_second": 1.531,
"step": 4446
},
{
"epoch": 6.000269905533063,
"grad_norm": 0.0008725410443730652,
"learning_rate": 3.8873894137052036e-05,
"loss": 0.0,
"step": 4450
},
{
"epoch": 6.000944669365722,
"grad_norm": 0.0006686112028546631,
"learning_rate": 3.883640725745989e-05,
"loss": 0.0,
"step": 4460
},
{
"epoch": 6.001619433198381,
"grad_norm": 0.000973099609836936,
"learning_rate": 3.879892037786775e-05,
"loss": 0.0,
"step": 4470
},
{
"epoch": 6.0022941970310395,
"grad_norm": 0.0036273570731282234,
"learning_rate": 3.876143349827561e-05,
"loss": 0.0,
"step": 4480
},
{
"epoch": 6.002968960863698,
"grad_norm": 0.0030524057801812887,
"learning_rate": 3.8723946618683466e-05,
"loss": 0.9891,
"step": 4490
},
{
"epoch": 6.0036437246963565,
"grad_norm": 0.0005925680161453784,
"learning_rate": 3.868645973909132e-05,
"loss": 0.0001,
"step": 4500
},
{
"epoch": 6.004318488529015,
"grad_norm": 0.0012102797627449036,
"learning_rate": 3.864897285949918e-05,
"loss": 0.0004,
"step": 4510
},
{
"epoch": 6.004993252361674,
"grad_norm": 0.001870299456641078,
"learning_rate": 3.861148597990703e-05,
"loss": 0.0001,
"step": 4520
},
{
"epoch": 6.005668016194332,
"grad_norm": 0.0008334846352227032,
"learning_rate": 3.857399910031489e-05,
"loss": 0.0,
"step": 4530
},
{
"epoch": 6.006342780026991,
"grad_norm": 0.0909259095788002,
"learning_rate": 3.853651222072275e-05,
"loss": 0.0033,
"step": 4540
},
{
"epoch": 6.007017543859649,
"grad_norm": 0.08534003794193268,
"learning_rate": 3.849902534113061e-05,
"loss": 0.0001,
"step": 4550
},
{
"epoch": 6.007692307692308,
"grad_norm": 0.009015407413244247,
"learning_rate": 3.846153846153846e-05,
"loss": 0.0001,
"step": 4560
},
{
"epoch": 6.008367071524966,
"grad_norm": 0.0005771831492893398,
"learning_rate": 3.842405158194632e-05,
"loss": 0.0,
"step": 4570
},
{
"epoch": 6.009041835357625,
"grad_norm": 0.00015217051259241998,
"learning_rate": 3.8386564702354174e-05,
"loss": 0.0,
"step": 4580
},
{
"epoch": 6.009716599190283,
"grad_norm": 0.001618007430806756,
"learning_rate": 3.834907782276204e-05,
"loss": 0.0001,
"step": 4590
},
{
"epoch": 6.010391363022942,
"grad_norm": 0.0008747613755986094,
"learning_rate": 3.831159094316989e-05,
"loss": 0.0,
"step": 4600
},
{
"epoch": 6.0110661268556,
"grad_norm": 0.0011886496795341372,
"learning_rate": 3.827410406357775e-05,
"loss": 0.0001,
"step": 4610
},
{
"epoch": 6.011740890688259,
"grad_norm": 0.0006136572919785976,
"learning_rate": 3.8236617183985604e-05,
"loss": 0.0,
"step": 4620
},
{
"epoch": 6.012415654520917,
"grad_norm": 0.0002797636261675507,
"learning_rate": 3.819913030439347e-05,
"loss": 0.0,
"step": 4630
},
{
"epoch": 6.013090418353576,
"grad_norm": 0.0005924575380049646,
"learning_rate": 3.8161643424801316e-05,
"loss": 0.0,
"step": 4640
},
{
"epoch": 6.013765182186235,
"grad_norm": 381.5912170410156,
"learning_rate": 3.812415654520918e-05,
"loss": 0.6612,
"step": 4650
},
{
"epoch": 6.014439946018894,
"grad_norm": 0.0007501631625927985,
"learning_rate": 3.8086669665617034e-05,
"loss": 0.057,
"step": 4660
},
{
"epoch": 6.015114709851552,
"grad_norm": 0.00048053194768726826,
"learning_rate": 3.80491827860249e-05,
"loss": 0.7472,
"step": 4670
},
{
"epoch": 6.015789473684211,
"grad_norm": 0.0008806756814010441,
"learning_rate": 3.8011695906432746e-05,
"loss": 0.0,
"step": 4680
},
{
"epoch": 6.016464237516869,
"grad_norm": 0.0007039654301479459,
"learning_rate": 3.797420902684061e-05,
"loss": 0.0002,
"step": 4690
},
{
"epoch": 6.017139001349528,
"grad_norm": 0.0005677440203726292,
"learning_rate": 3.7936722147248464e-05,
"loss": 0.0,
"step": 4700
},
{
"epoch": 6.017813765182186,
"grad_norm": 0.0006246105185709894,
"learning_rate": 3.7899235267656327e-05,
"loss": 0.0002,
"step": 4710
},
{
"epoch": 6.018488529014845,
"grad_norm": 0.0003905866760760546,
"learning_rate": 3.7861748388064176e-05,
"loss": 0.0,
"step": 4720
},
{
"epoch": 6.019163292847503,
"grad_norm": 0.0004027994582429528,
"learning_rate": 3.782426150847204e-05,
"loss": 0.0002,
"step": 4730
},
{
"epoch": 6.019838056680162,
"grad_norm": 0.0017455661436542869,
"learning_rate": 3.7786774628879894e-05,
"loss": 0.0001,
"step": 4740
},
{
"epoch": 6.02051282051282,
"grad_norm": 0.0022832180839031935,
"learning_rate": 3.774928774928775e-05,
"loss": 0.0001,
"step": 4750
},
{
"epoch": 6.021187584345479,
"grad_norm": 295.60693359375,
"learning_rate": 3.7711800869695605e-05,
"loss": 0.7359,
"step": 4760
},
{
"epoch": 6.0218623481781375,
"grad_norm": 0.0004823520721402019,
"learning_rate": 3.767431399010347e-05,
"loss": 0.0,
"step": 4770
},
{
"epoch": 6.022537112010796,
"grad_norm": 0.003145309165120125,
"learning_rate": 3.7636827110511324e-05,
"loss": 0.0,
"step": 4780
},
{
"epoch": 6.0232118758434545,
"grad_norm": 0.00026828868431039155,
"learning_rate": 3.759934023091918e-05,
"loss": 0.0,
"step": 4790
},
{
"epoch": 6.023886639676113,
"grad_norm": 0.000310034112771973,
"learning_rate": 3.7561853351327035e-05,
"loss": 0.0,
"step": 4800
},
{
"epoch": 6.024561403508772,
"grad_norm": 0.00041966387652792037,
"learning_rate": 3.75243664717349e-05,
"loss": 0.0,
"step": 4810
},
{
"epoch": 6.02523616734143,
"grad_norm": 0.0011529176263138652,
"learning_rate": 3.7486879592142754e-05,
"loss": 0.5445,
"step": 4820
},
{
"epoch": 6.0259109311740895,
"grad_norm": 0.02147838845849037,
"learning_rate": 3.744939271255061e-05,
"loss": 1.0205,
"step": 4830
},
{
"epoch": 6.026585695006748,
"grad_norm": 0.000508416909724474,
"learning_rate": 3.7411905832958465e-05,
"loss": 0.001,
"step": 4840
},
{
"epoch": 6.0272604588394065,
"grad_norm": 0.008615111000835896,
"learning_rate": 3.737441895336633e-05,
"loss": 0.0001,
"step": 4850
},
{
"epoch": 6.027935222672065,
"grad_norm": 0.444153755903244,
"learning_rate": 3.7336932073774184e-05,
"loss": 0.9325,
"step": 4860
},
{
"epoch": 6.028609986504724,
"grad_norm": 0.0013290736824274063,
"learning_rate": 3.729944519418204e-05,
"loss": 0.0001,
"step": 4870
},
{
"epoch": 6.029284750337382,
"grad_norm": 0.000803654664196074,
"learning_rate": 3.7261958314589895e-05,
"loss": 0.0044,
"step": 4880
},
{
"epoch": 6.029959514170041,
"grad_norm": 0.0021947200875729322,
"learning_rate": 3.722447143499775e-05,
"loss": 0.9785,
"step": 4890
},
{
"epoch": 6.030634278002699,
"grad_norm": 0.0023971525952219963,
"learning_rate": 3.718698455540561e-05,
"loss": 0.0001,
"step": 4900
},
{
"epoch": 6.031309041835358,
"grad_norm": 0.00609954446554184,
"learning_rate": 3.714949767581347e-05,
"loss": 0.0002,
"step": 4910
},
{
"epoch": 6.031983805668016,
"grad_norm": 0.0020932150073349476,
"learning_rate": 3.7112010796221325e-05,
"loss": 0.0002,
"step": 4920
},
{
"epoch": 6.032658569500675,
"grad_norm": 0.0034460346214473248,
"learning_rate": 3.707452391662918e-05,
"loss": 0.0004,
"step": 4930
},
{
"epoch": 6.033333333333333,
"grad_norm": 0.0021088484209030867,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.0001,
"step": 4940
},
{
"epoch": 6.034008097165992,
"grad_norm": 0.002742623910307884,
"learning_rate": 3.699955015744489e-05,
"loss": 0.0006,
"step": 4950
},
{
"epoch": 6.03468286099865,
"grad_norm": 0.002541649155318737,
"learning_rate": 3.6962063277852755e-05,
"loss": 0.0001,
"step": 4960
},
{
"epoch": 6.035357624831309,
"grad_norm": 0.000678271462675184,
"learning_rate": 3.692457639826061e-05,
"loss": 0.0,
"step": 4970
},
{
"epoch": 6.036032388663967,
"grad_norm": 0.0022359860595315695,
"learning_rate": 3.6887089518668466e-05,
"loss": 0.0002,
"step": 4980
},
{
"epoch": 6.036707152496626,
"grad_norm": 0.003631311934441328,
"learning_rate": 3.684960263907632e-05,
"loss": 0.0139,
"step": 4990
},
{
"epoch": 6.037381916329284,
"grad_norm": 408.66119384765625,
"learning_rate": 3.6812115759484185e-05,
"loss": 0.3617,
"step": 5000
},
{
"epoch": 6.038056680161944,
"grad_norm": 0.001363090705126524,
"learning_rate": 3.6774628879892034e-05,
"loss": 0.7014,
"step": 5010
},
{
"epoch": 6.038731443994602,
"grad_norm": 0.0028585607651621103,
"learning_rate": 3.6737142000299896e-05,
"loss": 0.0209,
"step": 5020
},
{
"epoch": 6.039406207827261,
"grad_norm": 0.0029073706828057766,
"learning_rate": 3.669965512070775e-05,
"loss": 0.0007,
"step": 5030
},
{
"epoch": 6.040080971659919,
"grad_norm": 0.021762054413557053,
"learning_rate": 3.6662168241115615e-05,
"loss": 3.0967,
"step": 5040
},
{
"epoch": 6.040755735492578,
"grad_norm": 1.7035624980926514,
"learning_rate": 3.6624681361523464e-05,
"loss": 1.3983,
"step": 5050
},
{
"epoch": 6.041430499325236,
"grad_norm": 0.07881853729486465,
"learning_rate": 3.6587194481931326e-05,
"loss": 0.8778,
"step": 5060
},
{
"epoch": 6.042105263157895,
"grad_norm": 49.91697311401367,
"learning_rate": 3.654970760233918e-05,
"loss": 0.0293,
"step": 5070
},
{
"epoch": 6.042780026990553,
"grad_norm": 0.01630672998726368,
"learning_rate": 3.6512220722747045e-05,
"loss": 1.1156,
"step": 5080
},
{
"epoch": 6.043454790823212,
"grad_norm": 0.007935232482850552,
"learning_rate": 3.6474733843154894e-05,
"loss": 0.0031,
"step": 5090
},
{
"epoch": 6.04412955465587,
"grad_norm": 299.0083923339844,
"learning_rate": 3.6437246963562756e-05,
"loss": 0.6953,
"step": 5100
},
{
"epoch": 6.044804318488529,
"grad_norm": 0.014369282871484756,
"learning_rate": 3.639976008397061e-05,
"loss": 0.0002,
"step": 5110
},
{
"epoch": 6.0454790823211875,
"grad_norm": 0.0033456783276051283,
"learning_rate": 3.6362273204378474e-05,
"loss": 0.0009,
"step": 5120
},
{
"epoch": 6.046153846153846,
"grad_norm": 0.0012127397349104285,
"learning_rate": 3.6324786324786323e-05,
"loss": 0.0001,
"step": 5130
},
{
"epoch": 6.0468286099865045,
"grad_norm": 0.003025912446901202,
"learning_rate": 3.6287299445194186e-05,
"loss": 0.0001,
"step": 5140
},
{
"epoch": 6.047503373819163,
"grad_norm": 0.006771762855350971,
"learning_rate": 3.624981256560204e-05,
"loss": 0.0003,
"step": 5150
},
{
"epoch": 6.0481781376518216,
"grad_norm": 0.006291988305747509,
"learning_rate": 3.62123256860099e-05,
"loss": 0.6232,
"step": 5160
},
{
"epoch": 6.04885290148448,
"grad_norm": 0.010942903347313404,
"learning_rate": 3.617483880641775e-05,
"loss": 0.9909,
"step": 5170
},
{
"epoch": 6.049527665317139,
"grad_norm": 0.0050459960475564,
"learning_rate": 3.6137351926825616e-05,
"loss": 0.8987,
"step": 5180
},
{
"epoch": 6.05,
"eval_accuracy": 0.8392857142857143,
"eval_f1": 0.8294011707968183,
"eval_loss": 0.930968701839447,
"eval_runtime": 74.4165,
"eval_samples_per_second": 1.505,
"eval_steps_per_second": 1.505,
"step": 5187
},
{
"epoch": 7.000202429149797,
"grad_norm": 0.012029584497213364,
"learning_rate": 3.609986504723347e-05,
"loss": 0.0003,
"step": 5190
},
{
"epoch": 7.000877192982456,
"grad_norm": 0.002462017349898815,
"learning_rate": 3.606237816764133e-05,
"loss": 0.0005,
"step": 5200
},
{
"epoch": 7.001551956815114,
"grad_norm": 0.0375690832734108,
"learning_rate": 3.602489128804918e-05,
"loss": 0.1058,
"step": 5210
},
{
"epoch": 7.002226720647773,
"grad_norm": 0.026218879967927933,
"learning_rate": 3.5987404408457046e-05,
"loss": 0.0083,
"step": 5220
},
{
"epoch": 7.002901484480432,
"grad_norm": 0.0031192379537969828,
"learning_rate": 3.59499175288649e-05,
"loss": 0.0342,
"step": 5230
},
{
"epoch": 7.003576248313091,
"grad_norm": 0.002261426765471697,
"learning_rate": 3.591243064927276e-05,
"loss": 0.8758,
"step": 5240
},
{
"epoch": 7.004251012145749,
"grad_norm": 0.7252321839332581,
"learning_rate": 3.587494376968061e-05,
"loss": 0.0008,
"step": 5250
},
{
"epoch": 7.004925775978408,
"grad_norm": 0.002154165878891945,
"learning_rate": 3.583745689008847e-05,
"loss": 0.0002,
"step": 5260
},
{
"epoch": 7.005600539811066,
"grad_norm": 0.0012370734475553036,
"learning_rate": 3.5799970010496325e-05,
"loss": 1.0515,
"step": 5270
},
{
"epoch": 7.006275303643725,
"grad_norm": 0.0021348996087908745,
"learning_rate": 3.576248313090419e-05,
"loss": 0.0001,
"step": 5280
},
{
"epoch": 7.006950067476383,
"grad_norm": 0.006049524061381817,
"learning_rate": 3.572499625131204e-05,
"loss": 0.0003,
"step": 5290
},
{
"epoch": 7.007624831309042,
"grad_norm": 0.01275632157921791,
"learning_rate": 3.56875093717199e-05,
"loss": 0.0021,
"step": 5300
},
{
"epoch": 7.0082995951417,
"grad_norm": 0.0016850440297275782,
"learning_rate": 3.5650022492127754e-05,
"loss": 0.0001,
"step": 5310
},
{
"epoch": 7.008974358974359,
"grad_norm": 0.0009741022950038314,
"learning_rate": 3.561253561253561e-05,
"loss": 0.0005,
"step": 5320
},
{
"epoch": 7.0096491228070175,
"grad_norm": 0.000799846719019115,
"learning_rate": 3.557504873294347e-05,
"loss": 0.0002,
"step": 5330
},
{
"epoch": 7.010323886639676,
"grad_norm": 0.0008095399825833738,
"learning_rate": 3.553756185335133e-05,
"loss": 0.0024,
"step": 5340
},
{
"epoch": 7.0109986504723345,
"grad_norm": 0.0016390876844525337,
"learning_rate": 3.5500074973759184e-05,
"loss": 0.0,
"step": 5350
},
{
"epoch": 7.011673414304993,
"grad_norm": 0.0013130076695233583,
"learning_rate": 3.546258809416704e-05,
"loss": 0.8843,
"step": 5360
},
{
"epoch": 7.0123481781376515,
"grad_norm": 0.015013671480119228,
"learning_rate": 3.54251012145749e-05,
"loss": 0.7296,
"step": 5370
},
{
"epoch": 7.01302294197031,
"grad_norm": 0.003729419782757759,
"learning_rate": 3.538761433498276e-05,
"loss": 0.004,
"step": 5380
},
{
"epoch": 7.013697705802969,
"grad_norm": 0.007766401395201683,
"learning_rate": 3.5350127455390614e-05,
"loss": 0.0001,
"step": 5390
},
{
"epoch": 7.014372469635627,
"grad_norm": 0.03760051354765892,
"learning_rate": 3.531264057579847e-05,
"loss": 0.0002,
"step": 5400
},
{
"epoch": 7.0150472334682865,
"grad_norm": 0.003396588610485196,
"learning_rate": 3.527515369620633e-05,
"loss": 0.0001,
"step": 5410
},
{
"epoch": 7.015721997300945,
"grad_norm": 0.005965414922684431,
"learning_rate": 3.523766681661418e-05,
"loss": 0.0001,
"step": 5420
},
{
"epoch": 7.0163967611336036,
"grad_norm": 0.002591415075585246,
"learning_rate": 3.5200179937022044e-05,
"loss": 0.0001,
"step": 5430
},
{
"epoch": 7.017071524966262,
"grad_norm": 0.0007187007577158511,
"learning_rate": 3.51626930574299e-05,
"loss": 0.6273,
"step": 5440
},
{
"epoch": 7.017746288798921,
"grad_norm": 0.0018147805240005255,
"learning_rate": 3.512520617783776e-05,
"loss": 0.0001,
"step": 5450
},
{
"epoch": 7.018421052631579,
"grad_norm": 0.0007241186103783548,
"learning_rate": 3.508771929824561e-05,
"loss": 0.0002,
"step": 5460
},
{
"epoch": 7.019095816464238,
"grad_norm": 0.002352670766413212,
"learning_rate": 3.5050232418653474e-05,
"loss": 0.0001,
"step": 5470
},
{
"epoch": 7.019770580296896,
"grad_norm": 0.0018704934045672417,
"learning_rate": 3.501274553906133e-05,
"loss": 0.0004,
"step": 5480
},
{
"epoch": 7.020445344129555,
"grad_norm": 0.002092360518872738,
"learning_rate": 3.497525865946919e-05,
"loss": 0.0001,
"step": 5490
},
{
"epoch": 7.021120107962213,
"grad_norm": 0.001126096467487514,
"learning_rate": 3.493777177987704e-05,
"loss": 0.6823,
"step": 5500
},
{
"epoch": 7.021794871794872,
"grad_norm": 0.0008661505416966975,
"learning_rate": 3.4900284900284904e-05,
"loss": 0.0001,
"step": 5510
},
{
"epoch": 7.02246963562753,
"grad_norm": 0.02058524824678898,
"learning_rate": 3.486279802069276e-05,
"loss": 0.0001,
"step": 5520
},
{
"epoch": 7.023144399460189,
"grad_norm": 0.002387122018262744,
"learning_rate": 3.4825311141100615e-05,
"loss": 0.0002,
"step": 5530
},
{
"epoch": 7.023819163292847,
"grad_norm": 0.0011330017587170005,
"learning_rate": 3.478782426150847e-05,
"loss": 0.0001,
"step": 5540
},
{
"epoch": 7.024493927125506,
"grad_norm": 0.0005625615012831986,
"learning_rate": 3.4750337381916334e-05,
"loss": 0.0002,
"step": 5550
},
{
"epoch": 7.025168690958164,
"grad_norm": 0.0008695796132087708,
"learning_rate": 3.471285050232419e-05,
"loss": 0.0,
"step": 5560
},
{
"epoch": 7.025843454790823,
"grad_norm": 0.0016092468285933137,
"learning_rate": 3.4675363622732045e-05,
"loss": 0.0001,
"step": 5570
},
{
"epoch": 7.026518218623481,
"grad_norm": 0.0011349094565957785,
"learning_rate": 3.46378767431399e-05,
"loss": 0.0003,
"step": 5580
},
{
"epoch": 7.027192982456141,
"grad_norm": 0.0005459162639454007,
"learning_rate": 3.4600389863547764e-05,
"loss": 0.0001,
"step": 5590
},
{
"epoch": 7.027867746288799,
"grad_norm": 0.0009417292312718928,
"learning_rate": 3.456290298395562e-05,
"loss": 0.0,
"step": 5600
},
{
"epoch": 7.028542510121458,
"grad_norm": 0.0005761535139754415,
"learning_rate": 3.4525416104363475e-05,
"loss": 0.0,
"step": 5610
},
{
"epoch": 7.029217273954116,
"grad_norm": 0.0007409591344185174,
"learning_rate": 3.448792922477133e-05,
"loss": 0.0001,
"step": 5620
},
{
"epoch": 7.029892037786775,
"grad_norm": 0.004374117590487003,
"learning_rate": 3.4450442345179194e-05,
"loss": 0.0,
"step": 5630
},
{
"epoch": 7.030566801619433,
"grad_norm": 0.017210789024829865,
"learning_rate": 3.441295546558704e-05,
"loss": 0.0001,
"step": 5640
},
{
"epoch": 7.031241565452092,
"grad_norm": 0.0008836057968437672,
"learning_rate": 3.4375468585994905e-05,
"loss": 0.0012,
"step": 5650
},
{
"epoch": 7.03191632928475,
"grad_norm": 0.0015315774362534285,
"learning_rate": 3.433798170640276e-05,
"loss": 0.0,
"step": 5660
},
{
"epoch": 7.032591093117409,
"grad_norm": 0.0006376684177666903,
"learning_rate": 3.4300494826810617e-05,
"loss": 0.0,
"step": 5670
},
{
"epoch": 7.0332658569500675,
"grad_norm": 0.0005232661496847868,
"learning_rate": 3.426300794721847e-05,
"loss": 0.0,
"step": 5680
},
{
"epoch": 7.033940620782726,
"grad_norm": 0.0008468987653031945,
"learning_rate": 3.4225521067626335e-05,
"loss": 0.0003,
"step": 5690
},
{
"epoch": 7.0346153846153845,
"grad_norm": 0.000993360416032374,
"learning_rate": 3.418803418803419e-05,
"loss": 0.0,
"step": 5700
},
{
"epoch": 7.035290148448043,
"grad_norm": 0.0020066085271537304,
"learning_rate": 3.4150547308442046e-05,
"loss": 0.0001,
"step": 5710
},
{
"epoch": 7.0359649122807015,
"grad_norm": 0.00036297430051490664,
"learning_rate": 3.41130604288499e-05,
"loss": 0.0,
"step": 5720
},
{
"epoch": 7.03663967611336,
"grad_norm": 0.0009432988590560853,
"learning_rate": 3.407557354925776e-05,
"loss": 0.0001,
"step": 5730
},
{
"epoch": 7.037314439946019,
"grad_norm": 0.0018047185149043798,
"learning_rate": 3.403808666966562e-05,
"loss": 0.8627,
"step": 5740
},
{
"epoch": 7.037989203778677,
"grad_norm": 0.0037690841127187014,
"learning_rate": 3.4000599790073476e-05,
"loss": 0.0781,
"step": 5750
},
{
"epoch": 7.038663967611336,
"grad_norm": 0.023057781159877777,
"learning_rate": 3.396311291048133e-05,
"loss": 0.0001,
"step": 5760
},
{
"epoch": 7.039338731443995,
"grad_norm": 0.004012484569102526,
"learning_rate": 3.392562603088919e-05,
"loss": 0.0001,
"step": 5770
},
{
"epoch": 7.0400134952766535,
"grad_norm": 0.0012608218239620328,
"learning_rate": 3.388813915129705e-05,
"loss": 0.0001,
"step": 5780
},
{
"epoch": 7.040688259109312,
"grad_norm": 0.002351221162825823,
"learning_rate": 3.38506522717049e-05,
"loss": 0.0001,
"step": 5790
},
{
"epoch": 7.041363022941971,
"grad_norm": 0.000716827402357012,
"learning_rate": 3.381316539211276e-05,
"loss": 0.0005,
"step": 5800
},
{
"epoch": 7.042037786774629,
"grad_norm": 0.0029892646707594395,
"learning_rate": 3.377567851252062e-05,
"loss": 0.0,
"step": 5810
},
{
"epoch": 7.042712550607288,
"grad_norm": 372.2917175292969,
"learning_rate": 3.373819163292848e-05,
"loss": 0.5735,
"step": 5820
},
{
"epoch": 7.043387314439946,
"grad_norm": 0.0010425182990729809,
"learning_rate": 3.370070475333633e-05,
"loss": 0.0,
"step": 5830
},
{
"epoch": 7.044062078272605,
"grad_norm": 43.60670852661133,
"learning_rate": 3.366321787374419e-05,
"loss": 1.7134,
"step": 5840
},
{
"epoch": 7.044736842105263,
"grad_norm": 44.16180419921875,
"learning_rate": 3.362573099415205e-05,
"loss": 0.4081,
"step": 5850
},
{
"epoch": 7.045411605937922,
"grad_norm": 0.002386684063822031,
"learning_rate": 3.358824411455991e-05,
"loss": 0.0014,
"step": 5860
},
{
"epoch": 7.04608636977058,
"grad_norm": 0.000626052962616086,
"learning_rate": 3.355075723496776e-05,
"loss": 0.6338,
"step": 5870
},
{
"epoch": 7.046761133603239,
"grad_norm": 0.0048158965073525906,
"learning_rate": 3.351327035537562e-05,
"loss": 0.0002,
"step": 5880
},
{
"epoch": 7.047435897435897,
"grad_norm": 102.3766860961914,
"learning_rate": 3.347578347578348e-05,
"loss": 0.7,
"step": 5890
},
{
"epoch": 7.048110661268556,
"grad_norm": 0.0005689793615601957,
"learning_rate": 3.343829659619133e-05,
"loss": 0.7073,
"step": 5900
},
{
"epoch": 7.048785425101214,
"grad_norm": 0.013288695365190506,
"learning_rate": 3.340080971659919e-05,
"loss": 0.0572,
"step": 5910
},
{
"epoch": 7.049460188933873,
"grad_norm": 0.0011189569486305118,
"learning_rate": 3.336332283700705e-05,
"loss": 0.4888,
"step": 5920
},
{
"epoch": 7.05,
"eval_accuracy": 0.875,
"eval_f1": 0.8702947845804988,
"eval_loss": 0.856253445148468,
"eval_runtime": 74.1698,
"eval_samples_per_second": 1.51,
"eval_steps_per_second": 1.51,
"step": 5928
},
{
"epoch": 8.000134952766532,
"grad_norm": 0.0010889604454860091,
"learning_rate": 3.332583595741491e-05,
"loss": 0.0492,
"step": 5930
},
{
"epoch": 8.000809716599191,
"grad_norm": 0.0005811200244352221,
"learning_rate": 3.328834907782276e-05,
"loss": 0.0003,
"step": 5940
},
{
"epoch": 8.001484480431849,
"grad_norm": 0.0028562431689351797,
"learning_rate": 3.325086219823062e-05,
"loss": 0.0003,
"step": 5950
},
{
"epoch": 8.002159244264508,
"grad_norm": 0.0011086298618465662,
"learning_rate": 3.321337531863848e-05,
"loss": 0.0001,
"step": 5960
},
{
"epoch": 8.002834008097166,
"grad_norm": 0.0018863864243030548,
"learning_rate": 3.317588843904634e-05,
"loss": 0.0001,
"step": 5970
},
{
"epoch": 8.003508771929825,
"grad_norm": 0.0009740761015564203,
"learning_rate": 3.313840155945419e-05,
"loss": 0.0002,
"step": 5980
},
{
"epoch": 8.004183535762483,
"grad_norm": 0.0005378098576329648,
"learning_rate": 3.310091467986205e-05,
"loss": 0.0001,
"step": 5990
},
{
"epoch": 8.004858299595142,
"grad_norm": 0.001058222958818078,
"learning_rate": 3.306342780026991e-05,
"loss": 0.0001,
"step": 6000
},
{
"epoch": 8.0055330634278,
"grad_norm": 0.0010611525503918529,
"learning_rate": 3.302594092067777e-05,
"loss": 0.0001,
"step": 6010
},
{
"epoch": 8.006207827260459,
"grad_norm": 0.002727857790887356,
"learning_rate": 3.298845404108562e-05,
"loss": 0.0001,
"step": 6020
},
{
"epoch": 8.006882591093117,
"grad_norm": 0.0007821051403880119,
"learning_rate": 3.295096716149348e-05,
"loss": 0.0017,
"step": 6030
},
{
"epoch": 8.007557354925776,
"grad_norm": 0.001169922179542482,
"learning_rate": 3.2913480281901335e-05,
"loss": 0.0001,
"step": 6040
},
{
"epoch": 8.008232118758434,
"grad_norm": 0.0011363876983523369,
"learning_rate": 3.287599340230919e-05,
"loss": 0.0001,
"step": 6050
},
{
"epoch": 8.008906882591093,
"grad_norm": 0.0005207853973843157,
"learning_rate": 3.283850652271705e-05,
"loss": 0.6813,
"step": 6060
},
{
"epoch": 8.00958164642375,
"grad_norm": 0.0005264964420348406,
"learning_rate": 3.280101964312491e-05,
"loss": 0.0001,
"step": 6070
},
{
"epoch": 8.01025641025641,
"grad_norm": 0.0005870209424756467,
"learning_rate": 3.2763532763532764e-05,
"loss": 0.0001,
"step": 6080
},
{
"epoch": 8.01093117408907,
"grad_norm": 0.0016355343395844102,
"learning_rate": 3.272604588394062e-05,
"loss": 0.0004,
"step": 6090
},
{
"epoch": 8.011605937921727,
"grad_norm": 0.004568756558001041,
"learning_rate": 3.2688559004348476e-05,
"loss": 0.0004,
"step": 6100
},
{
"epoch": 8.012280701754387,
"grad_norm": 0.0005888245650567114,
"learning_rate": 3.265107212475634e-05,
"loss": 0.0001,
"step": 6110
},
{
"epoch": 8.012955465587044,
"grad_norm": 0.0023943374399095774,
"learning_rate": 3.2613585245164194e-05,
"loss": 0.0,
"step": 6120
},
{
"epoch": 8.013630229419704,
"grad_norm": 0.0004357252037152648,
"learning_rate": 3.257609836557205e-05,
"loss": 0.0002,
"step": 6130
},
{
"epoch": 8.014304993252361,
"grad_norm": 0.0006332327611744404,
"learning_rate": 3.2538611485979906e-05,
"loss": 0.0001,
"step": 6140
},
{
"epoch": 8.01497975708502,
"grad_norm": 0.0006531701656058431,
"learning_rate": 3.250112460638777e-05,
"loss": 0.0001,
"step": 6150
},
{
"epoch": 8.015654520917678,
"grad_norm": 0.0005107235629111528,
"learning_rate": 3.246363772679562e-05,
"loss": 0.0,
"step": 6160
},
{
"epoch": 8.016329284750338,
"grad_norm": 0.012723034247756004,
"learning_rate": 3.242615084720348e-05,
"loss": 0.0001,
"step": 6170
},
{
"epoch": 8.017004048582995,
"grad_norm": 0.0427851527929306,
"learning_rate": 3.2388663967611336e-05,
"loss": 0.0002,
"step": 6180
},
{
"epoch": 8.017678812415655,
"grad_norm": 0.001141960732638836,
"learning_rate": 3.23511770880192e-05,
"loss": 0.0,
"step": 6190
},
{
"epoch": 8.018353576248312,
"grad_norm": 0.0015029623173177242,
"learning_rate": 3.231369020842705e-05,
"loss": 0.0001,
"step": 6200
},
{
"epoch": 8.019028340080972,
"grad_norm": 0.0005648156511597335,
"learning_rate": 3.227620332883491e-05,
"loss": 0.0001,
"step": 6210
},
{
"epoch": 8.01970310391363,
"grad_norm": 0.0006971880211494863,
"learning_rate": 3.2238716449242766e-05,
"loss": 0.0,
"step": 6220
},
{
"epoch": 8.020377867746289,
"grad_norm": 0.0005205124034546316,
"learning_rate": 3.220122956965063e-05,
"loss": 0.0001,
"step": 6230
},
{
"epoch": 8.021052631578947,
"grad_norm": 0.0007245125016197562,
"learning_rate": 3.216374269005848e-05,
"loss": 0.0001,
"step": 6240
},
{
"epoch": 8.021727395411606,
"grad_norm": 0.0005247213994152844,
"learning_rate": 3.212625581046634e-05,
"loss": 0.0001,
"step": 6250
},
{
"epoch": 8.022402159244265,
"grad_norm": 0.0005060233525000513,
"learning_rate": 3.2088768930874195e-05,
"loss": 0.0014,
"step": 6260
},
{
"epoch": 8.023076923076923,
"grad_norm": 0.01399776991456747,
"learning_rate": 3.205128205128206e-05,
"loss": 0.0001,
"step": 6270
},
{
"epoch": 8.023751686909582,
"grad_norm": 0.0013257871614769101,
"learning_rate": 3.201379517168991e-05,
"loss": 0.0,
"step": 6280
},
{
"epoch": 8.02442645074224,
"grad_norm": 0.00038729843799956143,
"learning_rate": 3.197630829209777e-05,
"loss": 0.0,
"step": 6290
},
{
"epoch": 8.0251012145749,
"grad_norm": 0.0013562028761953115,
"learning_rate": 3.1938821412505625e-05,
"loss": 0.0002,
"step": 6300
},
{
"epoch": 8.025775978407557,
"grad_norm": 0.0023358569014817476,
"learning_rate": 3.190133453291348e-05,
"loss": 0.0,
"step": 6310
},
{
"epoch": 8.026450742240216,
"grad_norm": 0.0007051244028843939,
"learning_rate": 3.186384765332134e-05,
"loss": 0.0,
"step": 6320
},
{
"epoch": 8.027125506072874,
"grad_norm": 0.00045763421803712845,
"learning_rate": 3.18263607737292e-05,
"loss": 0.0,
"step": 6330
},
{
"epoch": 8.027800269905534,
"grad_norm": 0.0003405519819352776,
"learning_rate": 3.1788873894137055e-05,
"loss": 0.0,
"step": 6340
},
{
"epoch": 8.028475033738191,
"grad_norm": 0.0009031207882799208,
"learning_rate": 3.175138701454491e-05,
"loss": 0.3007,
"step": 6350
},
{
"epoch": 8.02914979757085,
"grad_norm": 0.00048344547394663095,
"learning_rate": 3.171390013495277e-05,
"loss": 0.0,
"step": 6360
},
{
"epoch": 8.029824561403508,
"grad_norm": 0.005110772326588631,
"learning_rate": 3.167641325536063e-05,
"loss": 0.0,
"step": 6370
},
{
"epoch": 8.030499325236168,
"grad_norm": 0.0005193505785427988,
"learning_rate": 3.1638926375768485e-05,
"loss": 1.2393,
"step": 6380
},
{
"epoch": 8.031174089068825,
"grad_norm": 0.001544152619317174,
"learning_rate": 3.160143949617634e-05,
"loss": 0.0,
"step": 6390
},
{
"epoch": 8.031848852901485,
"grad_norm": 0.004015884827822447,
"learning_rate": 3.15639526165842e-05,
"loss": 0.0001,
"step": 6400
},
{
"epoch": 8.032523616734142,
"grad_norm": 0.005030054599046707,
"learning_rate": 3.152646573699205e-05,
"loss": 0.0002,
"step": 6410
},
{
"epoch": 8.033198380566802,
"grad_norm": 0.08386117219924927,
"learning_rate": 3.148897885739991e-05,
"loss": 0.0002,
"step": 6420
},
{
"epoch": 8.03387314439946,
"grad_norm": 0.004819917026907206,
"learning_rate": 3.145149197780777e-05,
"loss": 0.1815,
"step": 6430
},
{
"epoch": 8.034547908232119,
"grad_norm": 0.0022033504210412502,
"learning_rate": 3.1414005098215627e-05,
"loss": 0.0001,
"step": 6440
},
{
"epoch": 8.035222672064778,
"grad_norm": 0.0040964060463011265,
"learning_rate": 3.137651821862348e-05,
"loss": 0.0001,
"step": 6450
},
{
"epoch": 8.035897435897436,
"grad_norm": 0.004042464308440685,
"learning_rate": 3.133903133903134e-05,
"loss": 0.0001,
"step": 6460
},
{
"epoch": 8.036572199730095,
"grad_norm": 0.0027346210554242134,
"learning_rate": 3.1301544459439194e-05,
"loss": 0.0001,
"step": 6470
},
{
"epoch": 8.037246963562753,
"grad_norm": 0.0005888897576369345,
"learning_rate": 3.1264057579847056e-05,
"loss": 0.0001,
"step": 6480
},
{
"epoch": 8.037921727395412,
"grad_norm": 0.004620389547199011,
"learning_rate": 3.122657070025491e-05,
"loss": 0.0001,
"step": 6490
},
{
"epoch": 8.03859649122807,
"grad_norm": 0.0017953782808035612,
"learning_rate": 3.118908382066277e-05,
"loss": 0.0001,
"step": 6500
},
{
"epoch": 8.03927125506073,
"grad_norm": 0.0019287167815491557,
"learning_rate": 3.1151596941070624e-05,
"loss": 0.0,
"step": 6510
},
{
"epoch": 8.039946018893387,
"grad_norm": 0.017189156264066696,
"learning_rate": 3.1114110061478486e-05,
"loss": 0.0001,
"step": 6520
},
{
"epoch": 8.040620782726046,
"grad_norm": 0.0002868880983442068,
"learning_rate": 3.107662318188634e-05,
"loss": 0.0001,
"step": 6530
},
{
"epoch": 8.041295546558704,
"grad_norm": 0.003237192053347826,
"learning_rate": 3.10391363022942e-05,
"loss": 0.0,
"step": 6540
},
{
"epoch": 8.041970310391363,
"grad_norm": 0.010104048997163773,
"learning_rate": 3.1001649422702054e-05,
"loss": 0.0001,
"step": 6550
},
{
"epoch": 8.042645074224021,
"grad_norm": 0.0012962371110916138,
"learning_rate": 3.0964162543109916e-05,
"loss": 0.0001,
"step": 6560
},
{
"epoch": 8.04331983805668,
"grad_norm": 0.0021973999682813883,
"learning_rate": 3.0926675663517765e-05,
"loss": 0.0001,
"step": 6570
},
{
"epoch": 8.043994601889338,
"grad_norm": 0.004213243722915649,
"learning_rate": 3.088918878392563e-05,
"loss": 0.0001,
"step": 6580
},
{
"epoch": 8.044669365721997,
"grad_norm": 0.0007371046231128275,
"learning_rate": 3.0851701904333484e-05,
"loss": 0.0,
"step": 6590
},
{
"epoch": 8.045344129554655,
"grad_norm": 0.0029181931167840958,
"learning_rate": 3.0814215024741346e-05,
"loss": 0.0001,
"step": 6600
},
{
"epoch": 8.046018893387314,
"grad_norm": 0.001932345563545823,
"learning_rate": 3.0776728145149195e-05,
"loss": 0.0001,
"step": 6610
},
{
"epoch": 8.046693657219974,
"grad_norm": 0.0020557758398354053,
"learning_rate": 3.073924126555706e-05,
"loss": 1.2559,
"step": 6620
},
{
"epoch": 8.047368421052632,
"grad_norm": 0.005378492642194033,
"learning_rate": 3.0701754385964913e-05,
"loss": 0.084,
"step": 6630
},
{
"epoch": 8.048043184885291,
"grad_norm": 0.0023935220669955015,
"learning_rate": 3.0664267506372776e-05,
"loss": 0.2057,
"step": 6640
},
{
"epoch": 8.048717948717949,
"grad_norm": 0.0029694943223148584,
"learning_rate": 3.0626780626780625e-05,
"loss": 0.0001,
"step": 6650
},
{
"epoch": 8.049392712550608,
"grad_norm": 0.0006969795795157552,
"learning_rate": 3.058929374718849e-05,
"loss": 0.0001,
"step": 6660
},
{
"epoch": 8.05,
"eval_accuracy": 0.8928571428571429,
"eval_f1": 0.8894495468057416,
"eval_loss": 0.6908820867538452,
"eval_runtime": 70.8817,
"eval_samples_per_second": 1.58,
"eval_steps_per_second": 1.58,
"step": 6669
},
{
"epoch": 9.000067476383267,
"grad_norm": 0.007031037472188473,
"learning_rate": 3.055180686759634e-05,
"loss": 0.0001,
"step": 6670
},
{
"epoch": 9.000742240215924,
"grad_norm": 0.0013843988999724388,
"learning_rate": 3.05143199880042e-05,
"loss": 0.0001,
"step": 6680
},
{
"epoch": 9.001417004048584,
"grad_norm": 0.07400429248809814,
"learning_rate": 3.0476833108412055e-05,
"loss": 0.0002,
"step": 6690
},
{
"epoch": 9.002091767881241,
"grad_norm": 0.001845911960117519,
"learning_rate": 3.0439346228819914e-05,
"loss": 0.0,
"step": 6700
},
{
"epoch": 9.0027665317139,
"grad_norm": 0.00020295576541684568,
"learning_rate": 3.0401859349227773e-05,
"loss": 0.0014,
"step": 6710
},
{
"epoch": 9.003441295546558,
"grad_norm": 0.001036637695506215,
"learning_rate": 3.0364372469635626e-05,
"loss": 0.0003,
"step": 6720
},
{
"epoch": 9.004116059379218,
"grad_norm": 0.001262377598322928,
"learning_rate": 3.0326885590043485e-05,
"loss": 0.0001,
"step": 6730
},
{
"epoch": 9.004790823211875,
"grad_norm": 0.0012167665408924222,
"learning_rate": 3.0289398710451344e-05,
"loss": 0.6511,
"step": 6740
},
{
"epoch": 9.005465587044535,
"grad_norm": 0.0019521707436069846,
"learning_rate": 3.0251911830859203e-05,
"loss": 0.0,
"step": 6750
},
{
"epoch": 9.006140350877192,
"grad_norm": 0.0013618938392028213,
"learning_rate": 3.0214424951267055e-05,
"loss": 0.0001,
"step": 6760
},
{
"epoch": 9.006815114709852,
"grad_norm": 0.0009306151187047362,
"learning_rate": 3.0176938071674915e-05,
"loss": 0.0001,
"step": 6770
},
{
"epoch": 9.00748987854251,
"grad_norm": 0.0007624576683156192,
"learning_rate": 3.0139451192082774e-05,
"loss": 0.0001,
"step": 6780
},
{
"epoch": 9.008164642375169,
"grad_norm": 0.0007957870257087052,
"learning_rate": 3.0101964312490626e-05,
"loss": 0.4765,
"step": 6790
},
{
"epoch": 9.008839406207827,
"grad_norm": 0.47597193717956543,
"learning_rate": 3.0064477432898485e-05,
"loss": 0.0019,
"step": 6800
},
{
"epoch": 9.009514170040486,
"grad_norm": 0.0003396008105482906,
"learning_rate": 3.0026990553306344e-05,
"loss": 0.0001,
"step": 6810
},
{
"epoch": 9.010188933873144,
"grad_norm": 0.0011485237628221512,
"learning_rate": 2.9989503673714204e-05,
"loss": 0.0,
"step": 6820
},
{
"epoch": 9.010863697705803,
"grad_norm": 0.0008013169863261282,
"learning_rate": 2.9952016794122056e-05,
"loss": 0.0001,
"step": 6830
},
{
"epoch": 9.011538461538462,
"grad_norm": 0.00038786802906543016,
"learning_rate": 2.9914529914529915e-05,
"loss": 0.0,
"step": 6840
},
{
"epoch": 9.01221322537112,
"grad_norm": 0.003582603298127651,
"learning_rate": 2.9877043034937774e-05,
"loss": 0.9191,
"step": 6850
},
{
"epoch": 9.01288798920378,
"grad_norm": 0.0014808046398684382,
"learning_rate": 2.9839556155345634e-05,
"loss": 0.0001,
"step": 6860
},
{
"epoch": 9.013562753036437,
"grad_norm": 0.01157829724252224,
"learning_rate": 2.9802069275753486e-05,
"loss": 0.0001,
"step": 6870
},
{
"epoch": 9.014237516869096,
"grad_norm": 0.007076776586472988,
"learning_rate": 2.9764582396161345e-05,
"loss": 0.0012,
"step": 6880
},
{
"epoch": 9.014912280701754,
"grad_norm": 0.003984262701123953,
"learning_rate": 2.9727095516569204e-05,
"loss": 0.0001,
"step": 6890
},
{
"epoch": 9.015587044534414,
"grad_norm": 0.00039073076914064586,
"learning_rate": 2.9689608636977063e-05,
"loss": 0.0001,
"step": 6900
},
{
"epoch": 9.016261808367071,
"grad_norm": 0.005625125020742416,
"learning_rate": 2.9652121757384916e-05,
"loss": 0.0001,
"step": 6910
},
{
"epoch": 9.01693657219973,
"grad_norm": 0.0015515730483457446,
"learning_rate": 2.9614634877792775e-05,
"loss": 0.0,
"step": 6920
},
{
"epoch": 9.017611336032388,
"grad_norm": 0.0017237714491784573,
"learning_rate": 2.9577147998200634e-05,
"loss": 0.0,
"step": 6930
},
{
"epoch": 9.018286099865048,
"grad_norm": 0.008184783160686493,
"learning_rate": 2.9539661118608486e-05,
"loss": 0.0001,
"step": 6940
},
{
"epoch": 9.018960863697705,
"grad_norm": 0.002028749557211995,
"learning_rate": 2.9502174239016346e-05,
"loss": 0.0001,
"step": 6950
},
{
"epoch": 9.019635627530365,
"grad_norm": 0.0036216990556567907,
"learning_rate": 2.9464687359424205e-05,
"loss": 0.0,
"step": 6960
},
{
"epoch": 9.020310391363022,
"grad_norm": 0.0013016269076615572,
"learning_rate": 2.942720047983206e-05,
"loss": 0.0001,
"step": 6970
},
{
"epoch": 9.020985155195682,
"grad_norm": 0.00772570027038455,
"learning_rate": 2.9389713600239916e-05,
"loss": 0.0001,
"step": 6980
},
{
"epoch": 9.02165991902834,
"grad_norm": 0.0003020280273631215,
"learning_rate": 2.9352226720647776e-05,
"loss": 0.0,
"step": 6990
},
{
"epoch": 9.022334682860999,
"grad_norm": 0.0012822924181818962,
"learning_rate": 2.931473984105563e-05,
"loss": 0.0,
"step": 7000
},
{
"epoch": 9.023009446693656,
"grad_norm": 0.0010099551873281598,
"learning_rate": 2.927725296146349e-05,
"loss": 0.0001,
"step": 7010
},
{
"epoch": 9.023684210526316,
"grad_norm": 0.0024363386910408735,
"learning_rate": 2.9239766081871346e-05,
"loss": 0.0001,
"step": 7020
},
{
"epoch": 9.024358974358975,
"grad_norm": 0.0023049945011734962,
"learning_rate": 2.9202279202279202e-05,
"loss": 0.0001,
"step": 7030
},
{
"epoch": 9.025033738191633,
"grad_norm": 0.0029273051768541336,
"learning_rate": 2.916479232268706e-05,
"loss": 0.0,
"step": 7040
},
{
"epoch": 9.025708502024292,
"grad_norm": 0.003555365838110447,
"learning_rate": 2.9127305443094917e-05,
"loss": 0.0001,
"step": 7050
},
{
"epoch": 9.02638326585695,
"grad_norm": 0.0033711865544319153,
"learning_rate": 2.9089818563502773e-05,
"loss": 0.0,
"step": 7060
},
{
"epoch": 9.02705802968961,
"grad_norm": 0.00046359331463463604,
"learning_rate": 2.9052331683910632e-05,
"loss": 0.0001,
"step": 7070
},
{
"epoch": 9.027732793522267,
"grad_norm": 0.0003137718595098704,
"learning_rate": 2.901484480431849e-05,
"loss": 0.0,
"step": 7080
},
{
"epoch": 9.028407557354926,
"grad_norm": 0.0016707087634131312,
"learning_rate": 2.8977357924726343e-05,
"loss": 0.0,
"step": 7090
},
{
"epoch": 9.029082321187584,
"grad_norm": 0.0012837687972933054,
"learning_rate": 2.8939871045134203e-05,
"loss": 0.0,
"step": 7100
},
{
"epoch": 9.029757085020243,
"grad_norm": 0.00030405522556975484,
"learning_rate": 2.8902384165542062e-05,
"loss": 0.0,
"step": 7110
},
{
"epoch": 9.030431848852901,
"grad_norm": 0.000334856566041708,
"learning_rate": 2.886489728594992e-05,
"loss": 0.0,
"step": 7120
},
{
"epoch": 9.03110661268556,
"grad_norm": 0.00024141219910234213,
"learning_rate": 2.8827410406357773e-05,
"loss": 0.0,
"step": 7130
},
{
"epoch": 9.031781376518218,
"grad_norm": 0.0014251351822167635,
"learning_rate": 2.8789923526765633e-05,
"loss": 0.0001,
"step": 7140
},
{
"epoch": 9.032456140350877,
"grad_norm": 0.0001798996381694451,
"learning_rate": 2.875243664717349e-05,
"loss": 0.0,
"step": 7150
},
{
"epoch": 9.033130904183535,
"grad_norm": 0.00026806764071807265,
"learning_rate": 2.871494976758135e-05,
"loss": 0.0,
"step": 7160
},
{
"epoch": 9.033805668016194,
"grad_norm": 0.001039984286762774,
"learning_rate": 2.8677462887989203e-05,
"loss": 0.0,
"step": 7170
},
{
"epoch": 9.034480431848852,
"grad_norm": 0.00029442558297887444,
"learning_rate": 2.8639976008397062e-05,
"loss": 0.0,
"step": 7180
},
{
"epoch": 9.035155195681511,
"grad_norm": 0.0010803727200254798,
"learning_rate": 2.860248912880492e-05,
"loss": 0.0,
"step": 7190
},
{
"epoch": 9.035829959514171,
"grad_norm": 0.0009579784818924963,
"learning_rate": 2.8565002249212774e-05,
"loss": 0.0,
"step": 7200
},
{
"epoch": 9.036504723346829,
"grad_norm": 0.00148207473102957,
"learning_rate": 2.8527515369620633e-05,
"loss": 0.5707,
"step": 7210
},
{
"epoch": 9.037179487179488,
"grad_norm": 0.0010521633084863424,
"learning_rate": 2.8490028490028492e-05,
"loss": 0.0627,
"step": 7220
},
{
"epoch": 9.037854251012146,
"grad_norm": 0.0016639038221910596,
"learning_rate": 2.845254161043635e-05,
"loss": 0.0205,
"step": 7230
},
{
"epoch": 9.038529014844805,
"grad_norm": 0.0019760627765208483,
"learning_rate": 2.8415054730844204e-05,
"loss": 0.0001,
"step": 7240
},
{
"epoch": 9.039203778677463,
"grad_norm": 0.0023020838852971792,
"learning_rate": 2.8377567851252063e-05,
"loss": 0.0001,
"step": 7250
},
{
"epoch": 9.039878542510122,
"grad_norm": 0.9819605946540833,
"learning_rate": 2.8340080971659922e-05,
"loss": 0.0009,
"step": 7260
},
{
"epoch": 9.04055330634278,
"grad_norm": 0.002409159205853939,
"learning_rate": 2.830259409206778e-05,
"loss": 0.6277,
"step": 7270
},
{
"epoch": 9.041228070175439,
"grad_norm": 298.6535339355469,
"learning_rate": 2.8265107212475634e-05,
"loss": 0.947,
"step": 7280
},
{
"epoch": 9.041902834008097,
"grad_norm": 0.034443099051713943,
"learning_rate": 2.8227620332883493e-05,
"loss": 0.0001,
"step": 7290
},
{
"epoch": 9.042577597840756,
"grad_norm": 0.040302518755197525,
"learning_rate": 2.8190133453291352e-05,
"loss": 0.003,
"step": 7300
},
{
"epoch": 9.043252361673414,
"grad_norm": 0.0009369853651151061,
"learning_rate": 2.8152646573699204e-05,
"loss": 0.0,
"step": 7310
},
{
"epoch": 9.043927125506073,
"grad_norm": 0.0013028283137828112,
"learning_rate": 2.8115159694107064e-05,
"loss": 0.0,
"step": 7320
},
{
"epoch": 9.04460188933873,
"grad_norm": 0.001541333505883813,
"learning_rate": 2.8077672814514923e-05,
"loss": 0.0,
"step": 7330
},
{
"epoch": 9.04527665317139,
"grad_norm": 0.000400466175051406,
"learning_rate": 2.8040185934922782e-05,
"loss": 0.0006,
"step": 7340
},
{
"epoch": 9.045951417004048,
"grad_norm": 0.001137162558734417,
"learning_rate": 2.8002699055330634e-05,
"loss": 0.0002,
"step": 7350
},
{
"epoch": 9.046626180836707,
"grad_norm": 0.0009733253973536193,
"learning_rate": 2.7965212175738493e-05,
"loss": 0.0,
"step": 7360
},
{
"epoch": 9.047300944669365,
"grad_norm": 0.0002777110203169286,
"learning_rate": 2.792772529614635e-05,
"loss": 0.0,
"step": 7370
},
{
"epoch": 9.047975708502024,
"grad_norm": 0.0009547212393954396,
"learning_rate": 2.789023841655421e-05,
"loss": 0.0,
"step": 7380
},
{
"epoch": 9.048650472334684,
"grad_norm": 0.0003457583661656827,
"learning_rate": 2.7852751536962064e-05,
"loss": 0.0008,
"step": 7390
},
{
"epoch": 9.049325236167341,
"grad_norm": 0.0019107568077743053,
"learning_rate": 2.781526465736992e-05,
"loss": 0.0009,
"step": 7400
},
{
"epoch": 9.05,
"grad_norm": 0.0008839426445774734,
"learning_rate": 2.777777777777778e-05,
"loss": 0.0018,
"step": 7410
},
{
"epoch": 9.05,
"eval_accuracy": 0.8928571428571429,
"eval_f1": 0.8916871416871418,
"eval_loss": 0.9169295430183411,
"eval_runtime": 70.5688,
"eval_samples_per_second": 1.587,
"eval_steps_per_second": 1.587,
"step": 7410
},
{
"epoch": 10.00067476383266,
"grad_norm": 0.0002696131123229861,
"learning_rate": 2.7740290898185638e-05,
"loss": 0.002,
"step": 7420
},
{
"epoch": 10.001349527665317,
"grad_norm": 0.00017847323033493012,
"learning_rate": 2.770280401859349e-05,
"loss": 0.0,
"step": 7430
},
{
"epoch": 10.002024291497976,
"grad_norm": 0.0010017943568527699,
"learning_rate": 2.766531713900135e-05,
"loss": 0.0,
"step": 7440
},
{
"epoch": 10.002699055330634,
"grad_norm": 0.0006036867271177471,
"learning_rate": 2.762783025940921e-05,
"loss": 0.0,
"step": 7450
},
{
"epoch": 10.003373819163293,
"grad_norm": 0.00019583333050832152,
"learning_rate": 2.759034337981706e-05,
"loss": 0.1066,
"step": 7460
},
{
"epoch": 10.004048582995951,
"grad_norm": 0.08632688224315643,
"learning_rate": 2.755285650022492e-05,
"loss": 0.0003,
"step": 7470
},
{
"epoch": 10.00472334682861,
"grad_norm": 0.00013941490033175796,
"learning_rate": 2.751536962063278e-05,
"loss": 0.0,
"step": 7480
},
{
"epoch": 10.005398110661268,
"grad_norm": 0.0003023550088983029,
"learning_rate": 2.747788274104064e-05,
"loss": 0.0001,
"step": 7490
},
{
"epoch": 10.006072874493928,
"grad_norm": 0.0005739156622439623,
"learning_rate": 2.744039586144849e-05,
"loss": 0.3069,
"step": 7500
},
{
"epoch": 10.006747638326585,
"grad_norm": 0.0005304102669470012,
"learning_rate": 2.740290898185635e-05,
"loss": 0.0,
"step": 7510
},
{
"epoch": 10.007422402159245,
"grad_norm": 0.0009174313163384795,
"learning_rate": 2.736542210226421e-05,
"loss": 0.0,
"step": 7520
},
{
"epoch": 10.008097165991902,
"grad_norm": 0.0004933126620016992,
"learning_rate": 2.732793522267207e-05,
"loss": 0.0,
"step": 7530
},
{
"epoch": 10.008771929824562,
"grad_norm": 0.002700564218685031,
"learning_rate": 2.729044834307992e-05,
"loss": 0.0,
"step": 7540
},
{
"epoch": 10.00944669365722,
"grad_norm": 0.0008284652722068131,
"learning_rate": 2.725296146348778e-05,
"loss": 0.7602,
"step": 7550
},
{
"epoch": 10.010121457489879,
"grad_norm": 0.0005742429639212787,
"learning_rate": 2.721547458389564e-05,
"loss": 0.0013,
"step": 7560
},
{
"epoch": 10.010796221322536,
"grad_norm": 0.0001865791855379939,
"learning_rate": 2.7177987704303492e-05,
"loss": 1.0409,
"step": 7570
},
{
"epoch": 10.011470985155196,
"grad_norm": 0.0005401599337346852,
"learning_rate": 2.714050082471135e-05,
"loss": 0.0,
"step": 7580
},
{
"epoch": 10.012145748987853,
"grad_norm": 10.862272262573242,
"learning_rate": 2.710301394511921e-05,
"loss": 0.6573,
"step": 7590
},
{
"epoch": 10.012820512820513,
"grad_norm": 37.7309455871582,
"learning_rate": 2.706552706552707e-05,
"loss": 0.7899,
"step": 7600
},
{
"epoch": 10.013495276653172,
"grad_norm": 0.0009414503001607955,
"learning_rate": 2.7028040185934922e-05,
"loss": 0.0011,
"step": 7610
},
{
"epoch": 10.01417004048583,
"grad_norm": 0.0004630287585314363,
"learning_rate": 2.699055330634278e-05,
"loss": 0.0001,
"step": 7620
},
{
"epoch": 10.01484480431849,
"grad_norm": 0.0013565809931606054,
"learning_rate": 2.695306642675064e-05,
"loss": 0.0001,
"step": 7630
},
{
"epoch": 10.015519568151147,
"grad_norm": 0.0022902884520590305,
"learning_rate": 2.69155795471585e-05,
"loss": 0.0,
"step": 7640
},
{
"epoch": 10.016194331983806,
"grad_norm": 0.0009432418155483902,
"learning_rate": 2.687809266756635e-05,
"loss": 0.0,
"step": 7650
},
{
"epoch": 10.016869095816464,
"grad_norm": 0.0009669333812780678,
"learning_rate": 2.684060578797421e-05,
"loss": 0.0001,
"step": 7660
},
{
"epoch": 10.017543859649123,
"grad_norm": 0.0011604432947933674,
"learning_rate": 2.680311890838207e-05,
"loss": 0.0011,
"step": 7670
},
{
"epoch": 10.018218623481781,
"grad_norm": 0.0037133977748453617,
"learning_rate": 2.6765632028789922e-05,
"loss": 0.0089,
"step": 7680
},
{
"epoch": 10.01889338731444,
"grad_norm": 0.0019840672612190247,
"learning_rate": 2.672814514919778e-05,
"loss": 0.0001,
"step": 7690
},
{
"epoch": 10.019568151147098,
"grad_norm": 0.0010515927569940686,
"learning_rate": 2.669065826960564e-05,
"loss": 0.0,
"step": 7700
},
{
"epoch": 10.020242914979757,
"grad_norm": 0.00031027224031277,
"learning_rate": 2.66531713900135e-05,
"loss": 0.0001,
"step": 7710
},
{
"epoch": 10.020917678812415,
"grad_norm": 0.0026109693571925163,
"learning_rate": 2.6615684510421352e-05,
"loss": 0.0001,
"step": 7720
},
{
"epoch": 10.021592442645074,
"grad_norm": 0.001366731128655374,
"learning_rate": 2.657819763082921e-05,
"loss": 0.0,
"step": 7730
},
{
"epoch": 10.022267206477732,
"grad_norm": 0.0010099642677232623,
"learning_rate": 2.654071075123707e-05,
"loss": 0.0,
"step": 7740
},
{
"epoch": 10.022941970310391,
"grad_norm": 0.0007431610720232129,
"learning_rate": 2.6503223871644926e-05,
"loss": 0.3974,
"step": 7750
},
{
"epoch": 10.023616734143049,
"grad_norm": 0.0005235990975052118,
"learning_rate": 2.6465736992052782e-05,
"loss": 0.0001,
"step": 7760
},
{
"epoch": 10.024291497975709,
"grad_norm": 0.002703143283724785,
"learning_rate": 2.642825011246064e-05,
"loss": 0.9305,
"step": 7770
},
{
"epoch": 10.024966261808368,
"grad_norm": 0.0013169089797884226,
"learning_rate": 2.6390763232868497e-05,
"loss": 0.0061,
"step": 7780
},
{
"epoch": 10.025641025641026,
"grad_norm": 0.0006970075191929936,
"learning_rate": 2.6353276353276356e-05,
"loss": 0.0,
"step": 7790
},
{
"epoch": 10.026315789473685,
"grad_norm": 0.0022921450436115265,
"learning_rate": 2.6315789473684212e-05,
"loss": 2.1218,
"step": 7800
},
{
"epoch": 10.026990553306343,
"grad_norm": 0.015075190924108028,
"learning_rate": 2.6278302594092068e-05,
"loss": 0.0001,
"step": 7810
},
{
"epoch": 10.027665317139002,
"grad_norm": 0.0003634750028140843,
"learning_rate": 2.6240815714499927e-05,
"loss": 0.0003,
"step": 7820
},
{
"epoch": 10.02834008097166,
"grad_norm": 0.005189963150769472,
"learning_rate": 2.6203328834907783e-05,
"loss": 0.0002,
"step": 7830
},
{
"epoch": 10.029014844804319,
"grad_norm": 0.0013347219210118055,
"learning_rate": 2.616584195531564e-05,
"loss": 0.0004,
"step": 7840
},
{
"epoch": 10.029689608636977,
"grad_norm": 0.011999278329312801,
"learning_rate": 2.6128355075723498e-05,
"loss": 0.0001,
"step": 7850
},
{
"epoch": 10.030364372469636,
"grad_norm": 0.0007896720780991018,
"learning_rate": 2.6090868196131357e-05,
"loss": 0.0001,
"step": 7860
},
{
"epoch": 10.031039136302294,
"grad_norm": 0.004586980678141117,
"learning_rate": 2.605338131653921e-05,
"loss": 0.0001,
"step": 7870
},
{
"epoch": 10.031713900134953,
"grad_norm": 0.001417971565388143,
"learning_rate": 2.601589443694707e-05,
"loss": 0.0,
"step": 7880
},
{
"epoch": 10.03238866396761,
"grad_norm": 0.0019554668106138706,
"learning_rate": 2.5978407557354928e-05,
"loss": 0.0001,
"step": 7890
},
{
"epoch": 10.03306342780027,
"grad_norm": 0.028743397444486618,
"learning_rate": 2.5940920677762787e-05,
"loss": 0.0001,
"step": 7900
},
{
"epoch": 10.033738191632928,
"grad_norm": 0.0008731328416615725,
"learning_rate": 2.590343379817064e-05,
"loss": 0.0001,
"step": 7910
},
{
"epoch": 10.034412955465587,
"grad_norm": 0.0012366612209007144,
"learning_rate": 2.5865946918578498e-05,
"loss": 0.0001,
"step": 7920
},
{
"epoch": 10.035087719298245,
"grad_norm": 0.0026165838353335857,
"learning_rate": 2.5828460038986357e-05,
"loss": 0.0001,
"step": 7930
},
{
"epoch": 10.035762483130904,
"grad_norm": 0.014659812673926353,
"learning_rate": 2.579097315939421e-05,
"loss": 0.0002,
"step": 7940
},
{
"epoch": 10.036437246963562,
"grad_norm": 0.00143991329241544,
"learning_rate": 2.575348627980207e-05,
"loss": 0.0,
"step": 7950
},
{
"epoch": 10.037112010796221,
"grad_norm": 0.00752654206007719,
"learning_rate": 2.5715999400209928e-05,
"loss": 0.0001,
"step": 7960
},
{
"epoch": 10.03778677462888,
"grad_norm": 0.0011906948639079928,
"learning_rate": 2.5678512520617787e-05,
"loss": 0.0001,
"step": 7970
},
{
"epoch": 10.038461538461538,
"grad_norm": 0.004429694265127182,
"learning_rate": 2.564102564102564e-05,
"loss": 0.0001,
"step": 7980
},
{
"epoch": 10.039136302294198,
"grad_norm": 0.00023650593357160687,
"learning_rate": 2.56035387614335e-05,
"loss": 0.0001,
"step": 7990
},
{
"epoch": 10.039811066126855,
"grad_norm": 0.0007866804371587932,
"learning_rate": 2.5566051881841358e-05,
"loss": 0.0,
"step": 8000
},
{
"epoch": 10.040485829959515,
"grad_norm": 0.0013989802682772279,
"learning_rate": 2.5528565002249217e-05,
"loss": 0.0001,
"step": 8010
},
{
"epoch": 10.041160593792172,
"grad_norm": 0.0008867586147971451,
"learning_rate": 2.549107812265707e-05,
"loss": 0.2682,
"step": 8020
},
{
"epoch": 10.041835357624832,
"grad_norm": 0.001083207200281322,
"learning_rate": 2.545359124306493e-05,
"loss": 0.0,
"step": 8030
},
{
"epoch": 10.04251012145749,
"grad_norm": 0.0010164374252781272,
"learning_rate": 2.5416104363472788e-05,
"loss": 0.0014,
"step": 8040
},
{
"epoch": 10.043184885290149,
"grad_norm": 0.0032585004810243845,
"learning_rate": 2.5378617483880647e-05,
"loss": 0.0001,
"step": 8050
},
{
"epoch": 10.043859649122806,
"grad_norm": 0.0007220272673293948,
"learning_rate": 2.53411306042885e-05,
"loss": 0.0001,
"step": 8060
},
{
"epoch": 10.044534412955466,
"grad_norm": 0.0010795597918331623,
"learning_rate": 2.530364372469636e-05,
"loss": 0.0001,
"step": 8070
},
{
"epoch": 10.045209176788124,
"grad_norm": 0.0033428198657929897,
"learning_rate": 2.5266156845104218e-05,
"loss": 0.0,
"step": 8080
},
{
"epoch": 10.045883940620783,
"grad_norm": 0.0007780479500070214,
"learning_rate": 2.522866996551207e-05,
"loss": 0.0003,
"step": 8090
},
{
"epoch": 10.04655870445344,
"grad_norm": 0.002177152084186673,
"learning_rate": 2.519118308591993e-05,
"loss": 1.034,
"step": 8100
},
{
"epoch": 10.0472334682861,
"grad_norm": 0.012076308950781822,
"learning_rate": 2.515369620632779e-05,
"loss": 0.0001,
"step": 8110
},
{
"epoch": 10.047908232118758,
"grad_norm": 0.00882900319993496,
"learning_rate": 2.5116209326735644e-05,
"loss": 0.0001,
"step": 8120
},
{
"epoch": 10.048582995951417,
"grad_norm": 0.0017163383308798075,
"learning_rate": 2.50787224471435e-05,
"loss": 0.0002,
"step": 8130
},
{
"epoch": 10.049257759784076,
"grad_norm": 0.07908181846141815,
"learning_rate": 2.504123556755136e-05,
"loss": 0.0002,
"step": 8140
},
{
"epoch": 10.049932523616734,
"grad_norm": 0.0007900993805378675,
"learning_rate": 2.5003748687959215e-05,
"loss": 0.0,
"step": 8150
},
{
"epoch": 10.05,
"eval_accuracy": 0.8928571428571429,
"eval_f1": 0.8927764491849939,
"eval_loss": 0.6104062795639038,
"eval_runtime": 74.468,
"eval_samples_per_second": 1.504,
"eval_steps_per_second": 1.504,
"step": 8151
},
{
"epoch": 11.000607287449393,
"grad_norm": 0.0007240193081088364,
"learning_rate": 2.496626180836707e-05,
"loss": 0.0,
"step": 8160
},
{
"epoch": 11.001282051282052,
"grad_norm": 0.0002468556631356478,
"learning_rate": 2.492877492877493e-05,
"loss": 0.11,
"step": 8170
},
{
"epoch": 11.00195681511471,
"grad_norm": 0.0006738382508046925,
"learning_rate": 2.4891288049182786e-05,
"loss": 0.0009,
"step": 8180
},
{
"epoch": 11.00263157894737,
"grad_norm": 0.0002363823732594028,
"learning_rate": 2.485380116959064e-05,
"loss": 0.0001,
"step": 8190
},
{
"epoch": 11.003306342780027,
"grad_norm": 0.01611531712114811,
"learning_rate": 2.48163142899985e-05,
"loss": 0.0001,
"step": 8200
},
{
"epoch": 11.003981106612686,
"grad_norm": 0.00017891006427817047,
"learning_rate": 2.4778827410406356e-05,
"loss": 0.0001,
"step": 8210
},
{
"epoch": 11.004655870445344,
"grad_norm": 0.0012173757422715425,
"learning_rate": 2.4741340530814216e-05,
"loss": 0.0,
"step": 8220
},
{
"epoch": 11.005330634278003,
"grad_norm": 0.00027030581259168684,
"learning_rate": 2.470385365122207e-05,
"loss": 0.0001,
"step": 8230
},
{
"epoch": 11.006005398110661,
"grad_norm": 0.0007059440249577165,
"learning_rate": 2.466636677162993e-05,
"loss": 0.0038,
"step": 8240
},
{
"epoch": 11.00668016194332,
"grad_norm": 0.0038354801945388317,
"learning_rate": 2.4628879892037786e-05,
"loss": 0.0,
"step": 8250
},
{
"epoch": 11.007354925775978,
"grad_norm": 0.002050234004855156,
"learning_rate": 2.4591393012445645e-05,
"loss": 0.0001,
"step": 8260
},
{
"epoch": 11.008029689608637,
"grad_norm": 0.0007953056483529508,
"learning_rate": 2.45539061328535e-05,
"loss": 0.0001,
"step": 8270
},
{
"epoch": 11.008704453441295,
"grad_norm": 0.0005133861559443176,
"learning_rate": 2.451641925326136e-05,
"loss": 0.0001,
"step": 8280
},
{
"epoch": 11.009379217273954,
"grad_norm": 0.00046163739170879126,
"learning_rate": 2.4478932373669216e-05,
"loss": 0.0,
"step": 8290
},
{
"epoch": 11.010053981106612,
"grad_norm": 0.0001449552073609084,
"learning_rate": 2.4441445494077075e-05,
"loss": 0.3172,
"step": 8300
},
{
"epoch": 11.010728744939271,
"grad_norm": 164.93666076660156,
"learning_rate": 2.440395861448493e-05,
"loss": 0.6368,
"step": 8310
},
{
"epoch": 11.011403508771929,
"grad_norm": 0.000476795103168115,
"learning_rate": 2.4366471734892787e-05,
"loss": 0.0,
"step": 8320
},
{
"epoch": 11.012078272604588,
"grad_norm": 0.0037983739748597145,
"learning_rate": 2.4328984855300646e-05,
"loss": 0.0002,
"step": 8330
},
{
"epoch": 11.012753036437246,
"grad_norm": 0.000796021893620491,
"learning_rate": 2.4291497975708502e-05,
"loss": 0.0,
"step": 8340
},
{
"epoch": 11.013427800269906,
"grad_norm": 0.0005037175142206252,
"learning_rate": 2.425401109611636e-05,
"loss": 0.0002,
"step": 8350
},
{
"epoch": 11.014102564102565,
"grad_norm": 0.0043189083226025105,
"learning_rate": 2.4216524216524217e-05,
"loss": 0.0001,
"step": 8360
},
{
"epoch": 11.014777327935223,
"grad_norm": 0.0015088323270902038,
"learning_rate": 2.4179037336932076e-05,
"loss": 0.0,
"step": 8370
},
{
"epoch": 11.015452091767882,
"grad_norm": 0.009932787157595158,
"learning_rate": 2.414155045733993e-05,
"loss": 0.0,
"step": 8380
},
{
"epoch": 11.01612685560054,
"grad_norm": 0.0006705676787532866,
"learning_rate": 2.410406357774779e-05,
"loss": 0.0,
"step": 8390
},
{
"epoch": 11.016801619433199,
"grad_norm": 0.0004983929102309048,
"learning_rate": 2.4066576698155647e-05,
"loss": 0.0,
"step": 8400
},
{
"epoch": 11.017476383265857,
"grad_norm": 0.0002321622014278546,
"learning_rate": 2.4029089818563506e-05,
"loss": 0.0,
"step": 8410
},
{
"epoch": 11.018151147098516,
"grad_norm": 0.00045225844951346517,
"learning_rate": 2.399160293897136e-05,
"loss": 0.0001,
"step": 8420
},
{
"epoch": 11.018825910931174,
"grad_norm": 0.0006059862207621336,
"learning_rate": 2.395411605937922e-05,
"loss": 0.0,
"step": 8430
},
{
"epoch": 11.019500674763833,
"grad_norm": 0.00025944746448658407,
"learning_rate": 2.3916629179787076e-05,
"loss": 0.0,
"step": 8440
},
{
"epoch": 11.02017543859649,
"grad_norm": 0.005270655732601881,
"learning_rate": 2.3879142300194932e-05,
"loss": 0.0,
"step": 8450
},
{
"epoch": 11.02085020242915,
"grad_norm": 0.0001714004756649956,
"learning_rate": 2.384165542060279e-05,
"loss": 0.0,
"step": 8460
},
{
"epoch": 11.021524966261808,
"grad_norm": 0.0004896153695881367,
"learning_rate": 2.3804168541010647e-05,
"loss": 0.0001,
"step": 8470
},
{
"epoch": 11.022199730094467,
"grad_norm": 0.0004871699493378401,
"learning_rate": 2.3766681661418506e-05,
"loss": 0.0,
"step": 8480
},
{
"epoch": 11.022874493927125,
"grad_norm": 0.00332398503087461,
"learning_rate": 2.3729194781826362e-05,
"loss": 0.0,
"step": 8490
},
{
"epoch": 11.023549257759784,
"grad_norm": 0.0004967203130945563,
"learning_rate": 2.369170790223422e-05,
"loss": 0.0,
"step": 8500
},
{
"epoch": 11.024224021592442,
"grad_norm": 0.0006828585756011307,
"learning_rate": 2.3654221022642077e-05,
"loss": 0.0,
"step": 8510
},
{
"epoch": 11.024898785425101,
"grad_norm": 0.00026437186170369387,
"learning_rate": 2.3616734143049933e-05,
"loss": 0.0923,
"step": 8520
},
{
"epoch": 11.025573549257759,
"grad_norm": 0.001157809398137033,
"learning_rate": 2.3579247263457792e-05,
"loss": 0.3747,
"step": 8530
},
{
"epoch": 11.026248313090418,
"grad_norm": 0.0006130054825916886,
"learning_rate": 2.3541760383865648e-05,
"loss": 0.1333,
"step": 8540
},
{
"epoch": 11.026923076923078,
"grad_norm": 0.004360508639365435,
"learning_rate": 2.3504273504273504e-05,
"loss": 0.0001,
"step": 8550
},
{
"epoch": 11.027597840755735,
"grad_norm": 0.0038445070385932922,
"learning_rate": 2.3466786624681363e-05,
"loss": 0.0005,
"step": 8560
},
{
"epoch": 11.028272604588395,
"grad_norm": 0.0003999462933279574,
"learning_rate": 2.342929974508922e-05,
"loss": 0.0,
"step": 8570
},
{
"epoch": 11.028947368421052,
"grad_norm": 0.0013614681083709002,
"learning_rate": 2.3391812865497074e-05,
"loss": 0.0,
"step": 8580
},
{
"epoch": 11.029622132253712,
"grad_norm": 8.867596625350416e-05,
"learning_rate": 2.3354325985904933e-05,
"loss": 0.0,
"step": 8590
},
{
"epoch": 11.03029689608637,
"grad_norm": 0.0005633147084154189,
"learning_rate": 2.331683910631279e-05,
"loss": 0.0,
"step": 8600
},
{
"epoch": 11.030971659919029,
"grad_norm": 0.0005220117163844407,
"learning_rate": 2.327935222672065e-05,
"loss": 0.0001,
"step": 8610
},
{
"epoch": 11.031646423751686,
"grad_norm": 0.0004213712236378342,
"learning_rate": 2.3241865347128504e-05,
"loss": 0.0,
"step": 8620
},
{
"epoch": 11.032321187584346,
"grad_norm": 0.00038689616485498846,
"learning_rate": 2.3204378467536363e-05,
"loss": 0.0,
"step": 8630
},
{
"epoch": 11.032995951417004,
"grad_norm": 0.00039902018033899367,
"learning_rate": 2.316689158794422e-05,
"loss": 0.0,
"step": 8640
},
{
"epoch": 11.033670715249663,
"grad_norm": 0.0026982324197888374,
"learning_rate": 2.3129404708352078e-05,
"loss": 0.0,
"step": 8650
},
{
"epoch": 11.03434547908232,
"grad_norm": 0.0001991643221117556,
"learning_rate": 2.3091917828759934e-05,
"loss": 0.0,
"step": 8660
},
{
"epoch": 11.03502024291498,
"grad_norm": 0.0019273010548204184,
"learning_rate": 2.3054430949167793e-05,
"loss": 0.0,
"step": 8670
},
{
"epoch": 11.035695006747638,
"grad_norm": 0.000698404386639595,
"learning_rate": 2.301694406957565e-05,
"loss": 0.0001,
"step": 8680
},
{
"epoch": 11.036369770580297,
"grad_norm": 0.00025344561436213553,
"learning_rate": 2.2979457189983508e-05,
"loss": 0.0,
"step": 8690
},
{
"epoch": 11.037044534412955,
"grad_norm": 0.0027119882870465517,
"learning_rate": 2.2941970310391364e-05,
"loss": 0.4804,
"step": 8700
},
{
"epoch": 11.037719298245614,
"grad_norm": 0.00020401214715093374,
"learning_rate": 2.290448343079922e-05,
"loss": 0.0,
"step": 8710
},
{
"epoch": 11.038394062078273,
"grad_norm": 0.0004772163520101458,
"learning_rate": 2.286699655120708e-05,
"loss": 0.0,
"step": 8720
},
{
"epoch": 11.039068825910931,
"grad_norm": 0.0004061859508510679,
"learning_rate": 2.2829509671614935e-05,
"loss": 0.0001,
"step": 8730
},
{
"epoch": 11.03974358974359,
"grad_norm": 0.0010080209467560053,
"learning_rate": 2.2792022792022794e-05,
"loss": 0.0,
"step": 8740
},
{
"epoch": 11.040418353576248,
"grad_norm": 0.00021367882436607033,
"learning_rate": 2.275453591243065e-05,
"loss": 0.0,
"step": 8750
},
{
"epoch": 11.041093117408908,
"grad_norm": 0.002230451675131917,
"learning_rate": 2.271704903283851e-05,
"loss": 0.0,
"step": 8760
},
{
"epoch": 11.041767881241565,
"grad_norm": 0.0003300936659798026,
"learning_rate": 2.2679562153246365e-05,
"loss": 0.0,
"step": 8770
},
{
"epoch": 11.042442645074225,
"grad_norm": 0.0023498530499637127,
"learning_rate": 2.2642075273654224e-05,
"loss": 0.0001,
"step": 8780
},
{
"epoch": 11.043117408906882,
"grad_norm": 0.0011958705727010965,
"learning_rate": 2.260458839406208e-05,
"loss": 0.0001,
"step": 8790
},
{
"epoch": 11.043792172739542,
"grad_norm": 0.0022039199247956276,
"learning_rate": 2.256710151446994e-05,
"loss": 0.0,
"step": 8800
},
{
"epoch": 11.0444669365722,
"grad_norm": 0.0003688375581987202,
"learning_rate": 2.2529614634877794e-05,
"loss": 0.0059,
"step": 8810
},
{
"epoch": 11.045141700404859,
"grad_norm": 0.0007805086788721383,
"learning_rate": 2.2492127755285654e-05,
"loss": 0.0,
"step": 8820
},
{
"epoch": 11.045816464237516,
"grad_norm": 0.0009934029076248407,
"learning_rate": 2.245464087569351e-05,
"loss": 0.0,
"step": 8830
},
{
"epoch": 11.046491228070176,
"grad_norm": 0.001246001455001533,
"learning_rate": 2.2417153996101365e-05,
"loss": 0.0,
"step": 8840
},
{
"epoch": 11.047165991902833,
"grad_norm": 9.812816279008985e-05,
"learning_rate": 2.2379667116509224e-05,
"loss": 0.0,
"step": 8850
},
{
"epoch": 11.047840755735493,
"grad_norm": 0.0004926809924654663,
"learning_rate": 2.234218023691708e-05,
"loss": 0.0,
"step": 8860
},
{
"epoch": 11.04851551956815,
"grad_norm": 0.0003611448628362268,
"learning_rate": 2.230469335732494e-05,
"loss": 0.0,
"step": 8870
},
{
"epoch": 11.04919028340081,
"grad_norm": 0.000520729401614517,
"learning_rate": 2.2267206477732795e-05,
"loss": 0.0,
"step": 8880
},
{
"epoch": 11.049865047233467,
"grad_norm": 0.00023293115373235196,
"learning_rate": 2.2229719598140654e-05,
"loss": 0.0,
"step": 8890
},
{
"epoch": 11.05,
"eval_accuracy": 0.9196428571428571,
"eval_f1": 0.9207212368977075,
"eval_loss": 0.6125034689903259,
"eval_runtime": 71.2839,
"eval_samples_per_second": 1.571,
"eval_steps_per_second": 1.571,
"step": 8892
},
{
"epoch": 12.000539811066126,
"grad_norm": 0.0007233197102323174,
"learning_rate": 2.219223271854851e-05,
"loss": 0.4448,
"step": 8900
},
{
"epoch": 12.001214574898786,
"grad_norm": 0.0002516189415473491,
"learning_rate": 2.2154745838956366e-05,
"loss": 0.0354,
"step": 8910
},
{
"epoch": 12.001889338731443,
"grad_norm": 0.0003420355205889791,
"learning_rate": 2.2117258959364225e-05,
"loss": 0.0,
"step": 8920
},
{
"epoch": 12.002564102564103,
"grad_norm": 0.0004494291788432747,
"learning_rate": 2.207977207977208e-05,
"loss": 0.0,
"step": 8930
},
{
"epoch": 12.003238866396762,
"grad_norm": 0.00031234361813403666,
"learning_rate": 2.2042285200179936e-05,
"loss": 0.0,
"step": 8940
},
{
"epoch": 12.00391363022942,
"grad_norm": 0.00012721461826004088,
"learning_rate": 2.2004798320587796e-05,
"loss": 0.0838,
"step": 8950
},
{
"epoch": 12.004588394062079,
"grad_norm": 0.000489223632030189,
"learning_rate": 2.196731144099565e-05,
"loss": 0.0,
"step": 8960
},
{
"epoch": 12.005263157894737,
"grad_norm": 0.0033521486911922693,
"learning_rate": 2.1929824561403507e-05,
"loss": 0.1973,
"step": 8970
},
{
"epoch": 12.005937921727396,
"grad_norm": 0.009397713467478752,
"learning_rate": 2.1892337681811366e-05,
"loss": 0.0,
"step": 8980
},
{
"epoch": 12.006612685560054,
"grad_norm": 0.006849181838333607,
"learning_rate": 2.1854850802219222e-05,
"loss": 0.0,
"step": 8990
},
{
"epoch": 12.007287449392713,
"grad_norm": 0.0006626849644817412,
"learning_rate": 2.181736392262708e-05,
"loss": 0.0,
"step": 9000
},
{
"epoch": 12.00796221322537,
"grad_norm": 0.000323317275615409,
"learning_rate": 2.1779877043034937e-05,
"loss": 0.4233,
"step": 9010
},
{
"epoch": 12.00863697705803,
"grad_norm": 0.00013916198804508895,
"learning_rate": 2.1742390163442796e-05,
"loss": 0.0006,
"step": 9020
},
{
"epoch": 12.009311740890688,
"grad_norm": 0.0004281499423086643,
"learning_rate": 2.1704903283850652e-05,
"loss": 0.0,
"step": 9030
},
{
"epoch": 12.009986504723347,
"grad_norm": 0.0038120527751743793,
"learning_rate": 2.166741640425851e-05,
"loss": 0.2496,
"step": 9040
},
{
"epoch": 12.010661268556005,
"grad_norm": 0.007827537134289742,
"learning_rate": 2.1629929524666367e-05,
"loss": 0.0,
"step": 9050
},
{
"epoch": 12.011336032388664,
"grad_norm": 0.0004882031353190541,
"learning_rate": 2.1592442645074226e-05,
"loss": 0.0236,
"step": 9060
},
{
"epoch": 12.012010796221322,
"grad_norm": 0.0006974562420509756,
"learning_rate": 2.1554955765482082e-05,
"loss": 0.0,
"step": 9070
},
{
"epoch": 12.012685560053981,
"grad_norm": 0.0007927274564281106,
"learning_rate": 2.151746888588994e-05,
"loss": 0.0,
"step": 9080
},
{
"epoch": 12.013360323886639,
"grad_norm": 0.0005972622311674058,
"learning_rate": 2.1479982006297797e-05,
"loss": 0.0,
"step": 9090
},
{
"epoch": 12.014035087719298,
"grad_norm": 0.0020678879227489233,
"learning_rate": 2.1442495126705653e-05,
"loss": 0.0,
"step": 9100
},
{
"epoch": 12.014709851551958,
"grad_norm": 0.0017037901561707258,
"learning_rate": 2.1405008247113512e-05,
"loss": 0.0,
"step": 9110
},
{
"epoch": 12.015384615384615,
"grad_norm": 0.002625885419547558,
"learning_rate": 2.1367521367521368e-05,
"loss": 0.0,
"step": 9120
},
{
"epoch": 12.016059379217275,
"grad_norm": 0.00016007163503672928,
"learning_rate": 2.1330034487929227e-05,
"loss": 0.0,
"step": 9130
},
{
"epoch": 12.016734143049932,
"grad_norm": 8.975803211797029e-05,
"learning_rate": 2.1292547608337082e-05,
"loss": 0.0,
"step": 9140
},
{
"epoch": 12.017408906882592,
"grad_norm": 0.00010270516213495284,
"learning_rate": 2.125506072874494e-05,
"loss": 0.0,
"step": 9150
},
{
"epoch": 12.01808367071525,
"grad_norm": 0.0003781057021114975,
"learning_rate": 2.1217573849152797e-05,
"loss": 0.0,
"step": 9160
},
{
"epoch": 12.018758434547909,
"grad_norm": 0.00045806102571077645,
"learning_rate": 2.1180086969560657e-05,
"loss": 0.0,
"step": 9170
},
{
"epoch": 12.019433198380566,
"grad_norm": 0.00040667993016541004,
"learning_rate": 2.1142600089968512e-05,
"loss": 0.0,
"step": 9180
},
{
"epoch": 12.020107962213226,
"grad_norm": 7.579607336083427e-05,
"learning_rate": 2.110511321037637e-05,
"loss": 0.0,
"step": 9190
},
{
"epoch": 12.020782726045883,
"grad_norm": 0.0002768370322883129,
"learning_rate": 2.1067626330784227e-05,
"loss": 0.0,
"step": 9200
},
{
"epoch": 12.021457489878543,
"grad_norm": 0.0010953324381262064,
"learning_rate": 2.1030139451192083e-05,
"loss": 0.0,
"step": 9210
},
{
"epoch": 12.0221322537112,
"grad_norm": 0.00658809207379818,
"learning_rate": 2.0992652571599942e-05,
"loss": 0.0919,
"step": 9220
},
{
"epoch": 12.02280701754386,
"grad_norm": 0.0006163925281725824,
"learning_rate": 2.0955165692007798e-05,
"loss": 0.0,
"step": 9230
},
{
"epoch": 12.023481781376518,
"grad_norm": 0.000813082791864872,
"learning_rate": 2.0917678812415657e-05,
"loss": 0.0001,
"step": 9240
},
{
"epoch": 12.024156545209177,
"grad_norm": 0.00046772375935688615,
"learning_rate": 2.0880191932823513e-05,
"loss": 0.0,
"step": 9250
},
{
"epoch": 12.024831309041835,
"grad_norm": 0.0005937941023148596,
"learning_rate": 2.0842705053231372e-05,
"loss": 0.0002,
"step": 9260
},
{
"epoch": 12.025506072874494,
"grad_norm": 0.000659748911857605,
"learning_rate": 2.0805218173639228e-05,
"loss": 0.0,
"step": 9270
},
{
"epoch": 12.026180836707152,
"grad_norm": 0.0006786544108763337,
"learning_rate": 2.0767731294047084e-05,
"loss": 0.0,
"step": 9280
},
{
"epoch": 12.026855600539811,
"grad_norm": 0.000225842886720784,
"learning_rate": 2.0730244414454943e-05,
"loss": 0.0,
"step": 9290
},
{
"epoch": 12.02753036437247,
"grad_norm": 0.0006020697182975709,
"learning_rate": 2.06927575348628e-05,
"loss": 0.0,
"step": 9300
},
{
"epoch": 12.028205128205128,
"grad_norm": 0.0005702193011529744,
"learning_rate": 2.0655270655270654e-05,
"loss": 0.0,
"step": 9310
},
{
"epoch": 12.028879892037788,
"grad_norm": 0.000844390713609755,
"learning_rate": 2.0617783775678514e-05,
"loss": 0.0,
"step": 9320
},
{
"epoch": 12.029554655870445,
"grad_norm": 9.666190453572199e-05,
"learning_rate": 2.058029689608637e-05,
"loss": 0.0,
"step": 9330
},
{
"epoch": 12.030229419703105,
"grad_norm": 0.0001864578080130741,
"learning_rate": 2.0542810016494225e-05,
"loss": 0.0,
"step": 9340
},
{
"epoch": 12.030904183535762,
"grad_norm": 0.00014394025492947549,
"learning_rate": 2.0505323136902084e-05,
"loss": 0.0,
"step": 9350
},
{
"epoch": 12.031578947368422,
"grad_norm": 0.00027057836996391416,
"learning_rate": 2.046783625730994e-05,
"loss": 0.0,
"step": 9360
},
{
"epoch": 12.03225371120108,
"grad_norm": 0.0004066646215505898,
"learning_rate": 2.04303493777178e-05,
"loss": 0.0,
"step": 9370
},
{
"epoch": 12.032928475033739,
"grad_norm": 0.00043117342283949256,
"learning_rate": 2.0392862498125655e-05,
"loss": 0.0,
"step": 9380
},
{
"epoch": 12.033603238866396,
"grad_norm": 0.00019329691713210195,
"learning_rate": 2.0355375618533514e-05,
"loss": 0.0001,
"step": 9390
},
{
"epoch": 12.034278002699056,
"grad_norm": 0.00036019805702380836,
"learning_rate": 2.031788873894137e-05,
"loss": 0.0,
"step": 9400
},
{
"epoch": 12.034952766531713,
"grad_norm": 0.0006936113350093365,
"learning_rate": 2.028040185934923e-05,
"loss": 0.0,
"step": 9410
},
{
"epoch": 12.035627530364373,
"grad_norm": 0.00041965124546550214,
"learning_rate": 2.0242914979757085e-05,
"loss": 0.0,
"step": 9420
},
{
"epoch": 12.03630229419703,
"grad_norm": 0.00011109585466329008,
"learning_rate": 2.0205428100164944e-05,
"loss": 0.0,
"step": 9430
},
{
"epoch": 12.03697705802969,
"grad_norm": 0.000144297766382806,
"learning_rate": 2.01679412205728e-05,
"loss": 0.0281,
"step": 9440
},
{
"epoch": 12.037651821862347,
"grad_norm": 0.0002551145735196769,
"learning_rate": 2.013045434098066e-05,
"loss": 0.0,
"step": 9450
},
{
"epoch": 12.038326585695007,
"grad_norm": 0.006847582757472992,
"learning_rate": 2.0092967461388515e-05,
"loss": 0.0,
"step": 9460
},
{
"epoch": 12.039001349527666,
"grad_norm": 0.00011437670036684722,
"learning_rate": 2.005548058179637e-05,
"loss": 0.0,
"step": 9470
},
{
"epoch": 12.039676113360324,
"grad_norm": 0.00040303889545612037,
"learning_rate": 2.001799370220423e-05,
"loss": 0.0,
"step": 9480
},
{
"epoch": 12.040350877192983,
"grad_norm": 0.00046083523193374276,
"learning_rate": 1.9980506822612085e-05,
"loss": 0.0,
"step": 9490
},
{
"epoch": 12.04102564102564,
"grad_norm": 0.0006515540299005806,
"learning_rate": 1.9943019943019945e-05,
"loss": 0.0,
"step": 9500
},
{
"epoch": 12.0417004048583,
"grad_norm": 0.00014752485731150955,
"learning_rate": 1.99055330634278e-05,
"loss": 0.0,
"step": 9510
},
{
"epoch": 12.042375168690958,
"grad_norm": 0.0005620931042358279,
"learning_rate": 1.986804618383566e-05,
"loss": 0.0,
"step": 9520
},
{
"epoch": 12.043049932523617,
"grad_norm": 0.00011923335841856897,
"learning_rate": 1.9830559304243515e-05,
"loss": 0.0,
"step": 9530
},
{
"epoch": 12.043724696356275,
"grad_norm": 0.0002657576696947217,
"learning_rate": 1.9793072424651374e-05,
"loss": 0.0,
"step": 9540
},
{
"epoch": 12.044399460188934,
"grad_norm": 0.0001235770614584908,
"learning_rate": 1.975558554505923e-05,
"loss": 0.0,
"step": 9550
},
{
"epoch": 12.045074224021592,
"grad_norm": 0.0001751129748299718,
"learning_rate": 1.971809866546709e-05,
"loss": 0.4854,
"step": 9560
},
{
"epoch": 12.045748987854251,
"grad_norm": 0.000554791884496808,
"learning_rate": 1.9680611785874945e-05,
"loss": 0.0,
"step": 9570
},
{
"epoch": 12.046423751686909,
"grad_norm": 0.0003107208467554301,
"learning_rate": 1.9643124906282804e-05,
"loss": 0.0,
"step": 9580
},
{
"epoch": 12.047098515519568,
"grad_norm": 0.0002857028157450259,
"learning_rate": 1.960563802669066e-05,
"loss": 0.0,
"step": 9590
},
{
"epoch": 12.047773279352226,
"grad_norm": 0.0001487692934460938,
"learning_rate": 1.9568151147098516e-05,
"loss": 0.0,
"step": 9600
},
{
"epoch": 12.048448043184885,
"grad_norm": 0.0004835377912968397,
"learning_rate": 1.9530664267506375e-05,
"loss": 0.0,
"step": 9610
},
{
"epoch": 12.049122807017543,
"grad_norm": 0.004288305062800646,
"learning_rate": 1.949317738791423e-05,
"loss": 0.0,
"step": 9620
},
{
"epoch": 12.049797570850203,
"grad_norm": 0.0002630397502798587,
"learning_rate": 1.945569050832209e-05,
"loss": 0.0,
"step": 9630
},
{
"epoch": 12.05,
"eval_accuracy": 0.9285714285714286,
"eval_f1": 0.9281167328042328,
"eval_loss": 0.5643919110298157,
"eval_runtime": 75.5753,
"eval_samples_per_second": 1.482,
"eval_steps_per_second": 1.482,
"step": 9633
},
{
"epoch": 13.000472334682861,
"grad_norm": 0.00026892725145444274,
"learning_rate": 1.9418203628729946e-05,
"loss": 0.0,
"step": 9640
},
{
"epoch": 13.001147098515519,
"grad_norm": 0.00012843680451624095,
"learning_rate": 1.9380716749137805e-05,
"loss": 0.0,
"step": 9650
},
{
"epoch": 13.001821862348178,
"grad_norm": 0.00029701701714657247,
"learning_rate": 1.934322986954566e-05,
"loss": 0.0,
"step": 9660
},
{
"epoch": 13.002496626180836,
"grad_norm": 0.00036974012618884444,
"learning_rate": 1.9305742989953516e-05,
"loss": 0.0,
"step": 9670
},
{
"epoch": 13.003171390013495,
"grad_norm": 0.0001296445552725345,
"learning_rate": 1.9268256110361376e-05,
"loss": 0.0078,
"step": 9680
},
{
"epoch": 13.003846153846155,
"grad_norm": 0.0002359377540415153,
"learning_rate": 1.923076923076923e-05,
"loss": 0.0,
"step": 9690
},
{
"epoch": 13.004520917678812,
"grad_norm": 0.0003535948053468019,
"learning_rate": 1.9193282351177087e-05,
"loss": 0.0,
"step": 9700
},
{
"epoch": 13.005195681511472,
"grad_norm": 0.00025236004148609936,
"learning_rate": 1.9155795471584946e-05,
"loss": 0.0,
"step": 9710
},
{
"epoch": 13.00587044534413,
"grad_norm": 0.0002863478730432689,
"learning_rate": 1.9118308591992802e-05,
"loss": 0.0,
"step": 9720
},
{
"epoch": 13.006545209176789,
"grad_norm": 0.00016143821994774044,
"learning_rate": 1.9080821712400658e-05,
"loss": 0.3645,
"step": 9730
},
{
"epoch": 13.007219973009446,
"grad_norm": 0.0004113702161703259,
"learning_rate": 1.9043334832808517e-05,
"loss": 0.0,
"step": 9740
},
{
"epoch": 13.007894736842106,
"grad_norm": 0.0008134804083965719,
"learning_rate": 1.9005847953216373e-05,
"loss": 0.0,
"step": 9750
},
{
"epoch": 13.008569500674763,
"grad_norm": 0.00027760997181758285,
"learning_rate": 1.8968361073624232e-05,
"loss": 0.0,
"step": 9760
},
{
"epoch": 13.009244264507423,
"grad_norm": 0.0016426608199253678,
"learning_rate": 1.8930874194032088e-05,
"loss": 0.0,
"step": 9770
},
{
"epoch": 13.00991902834008,
"grad_norm": 0.0008006367716006935,
"learning_rate": 1.8893387314439947e-05,
"loss": 0.0,
"step": 9780
},
{
"epoch": 13.01059379217274,
"grad_norm": 0.00025531640858389437,
"learning_rate": 1.8855900434847803e-05,
"loss": 0.0,
"step": 9790
},
{
"epoch": 13.011268556005398,
"grad_norm": 0.0003084157651755959,
"learning_rate": 1.8818413555255662e-05,
"loss": 0.0,
"step": 9800
},
{
"epoch": 13.011943319838057,
"grad_norm": 0.0007207695161923766,
"learning_rate": 1.8780926675663518e-05,
"loss": 0.0001,
"step": 9810
},
{
"epoch": 13.012618083670715,
"grad_norm": 0.00012202781363157555,
"learning_rate": 1.8743439796071377e-05,
"loss": 0.0,
"step": 9820
},
{
"epoch": 13.013292847503374,
"grad_norm": 0.0012473361566662788,
"learning_rate": 1.8705952916479233e-05,
"loss": 0.0,
"step": 9830
},
{
"epoch": 13.013967611336032,
"grad_norm": 0.0007895145681686699,
"learning_rate": 1.8668466036887092e-05,
"loss": 0.0,
"step": 9840
},
{
"epoch": 13.014642375168691,
"grad_norm": 0.0002717502065934241,
"learning_rate": 1.8630979157294948e-05,
"loss": 0.0,
"step": 9850
},
{
"epoch": 13.015317139001349,
"grad_norm": 0.0002320138446521014,
"learning_rate": 1.8593492277702803e-05,
"loss": 0.0,
"step": 9860
},
{
"epoch": 13.015991902834008,
"grad_norm": 0.0002716576855164021,
"learning_rate": 1.8556005398110663e-05,
"loss": 0.0,
"step": 9870
},
{
"epoch": 13.016666666666667,
"grad_norm": 7.131123129511252e-05,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.0,
"step": 9880
},
{
"epoch": 13.017341430499325,
"grad_norm": 0.00045431696344166994,
"learning_rate": 1.8481031638926377e-05,
"loss": 0.0,
"step": 9890
},
{
"epoch": 13.018016194331985,
"grad_norm": 0.00013243043213151395,
"learning_rate": 1.8443544759334233e-05,
"loss": 0.0,
"step": 9900
},
{
"epoch": 13.018690958164642,
"grad_norm": 0.00031196267809718847,
"learning_rate": 1.8406057879742092e-05,
"loss": 0.0,
"step": 9910
},
{
"epoch": 13.019365721997302,
"grad_norm": 0.000940505473408848,
"learning_rate": 1.8368571000149948e-05,
"loss": 0.0,
"step": 9920
},
{
"epoch": 13.02004048582996,
"grad_norm": 0.0002774264430627227,
"learning_rate": 1.8331084120557807e-05,
"loss": 0.0,
"step": 9930
},
{
"epoch": 13.020715249662619,
"grad_norm": 0.0002633021795190871,
"learning_rate": 1.8293597240965663e-05,
"loss": 0.0,
"step": 9940
},
{
"epoch": 13.021390013495276,
"grad_norm": 7.044156518531963e-05,
"learning_rate": 1.8256110361373522e-05,
"loss": 0.0,
"step": 9950
},
{
"epoch": 13.022064777327936,
"grad_norm": 0.00017661662423051894,
"learning_rate": 1.8218623481781378e-05,
"loss": 0.0,
"step": 9960
},
{
"epoch": 13.022739541160593,
"grad_norm": 0.00028747491887770593,
"learning_rate": 1.8181136602189237e-05,
"loss": 0.0,
"step": 9970
},
{
"epoch": 13.023414304993253,
"grad_norm": 0.00039829890010878444,
"learning_rate": 1.8143649722597093e-05,
"loss": 0.0,
"step": 9980
},
{
"epoch": 13.02408906882591,
"grad_norm": 0.00022789667127653956,
"learning_rate": 1.810616284300495e-05,
"loss": 0.0,
"step": 9990
},
{
"epoch": 13.02476383265857,
"grad_norm": 0.00028411843231879175,
"learning_rate": 1.8068675963412808e-05,
"loss": 0.0,
"step": 10000
},
{
"epoch": 13.025438596491227,
"grad_norm": 0.0002080064732581377,
"learning_rate": 1.8031189083820664e-05,
"loss": 0.0,
"step": 10010
},
{
"epoch": 13.026113360323887,
"grad_norm": 0.00023453705944120884,
"learning_rate": 1.7993702204228523e-05,
"loss": 0.0096,
"step": 10020
},
{
"epoch": 13.026788124156544,
"grad_norm": 0.00010610045865178108,
"learning_rate": 1.795621532463638e-05,
"loss": 0.0,
"step": 10030
},
{
"epoch": 13.027462887989204,
"grad_norm": 0.0001514716714154929,
"learning_rate": 1.7918728445044234e-05,
"loss": 0.0,
"step": 10040
},
{
"epoch": 13.028137651821863,
"grad_norm": 0.00033169661764986813,
"learning_rate": 1.7881241565452094e-05,
"loss": 0.0,
"step": 10050
},
{
"epoch": 13.02881241565452,
"grad_norm": 0.00013784744078293443,
"learning_rate": 1.784375468585995e-05,
"loss": 0.0,
"step": 10060
},
{
"epoch": 13.02948717948718,
"grad_norm": 8.872824400896206e-05,
"learning_rate": 1.7806267806267805e-05,
"loss": 0.0,
"step": 10070
},
{
"epoch": 13.030161943319838,
"grad_norm": 0.00037344591692090034,
"learning_rate": 1.7768780926675664e-05,
"loss": 0.0,
"step": 10080
},
{
"epoch": 13.030836707152497,
"grad_norm": 0.0003687291464302689,
"learning_rate": 1.773129404708352e-05,
"loss": 0.0,
"step": 10090
},
{
"epoch": 13.031511470985155,
"grad_norm": 0.00017588827176950872,
"learning_rate": 1.769380716749138e-05,
"loss": 0.0,
"step": 10100
},
{
"epoch": 13.032186234817814,
"grad_norm": 0.00026350162806920707,
"learning_rate": 1.7656320287899235e-05,
"loss": 0.0,
"step": 10110
},
{
"epoch": 13.032860998650472,
"grad_norm": 9.849424532148987e-05,
"learning_rate": 1.761883340830709e-05,
"loss": 0.0,
"step": 10120
},
{
"epoch": 13.033535762483131,
"grad_norm": 0.00028973835287615657,
"learning_rate": 1.758134652871495e-05,
"loss": 0.0,
"step": 10130
},
{
"epoch": 13.034210526315789,
"grad_norm": 0.00022602990793529898,
"learning_rate": 1.7543859649122806e-05,
"loss": 0.0,
"step": 10140
},
{
"epoch": 13.034885290148448,
"grad_norm": 0.000543447386007756,
"learning_rate": 1.7506372769530665e-05,
"loss": 0.0,
"step": 10150
},
{
"epoch": 13.035560053981106,
"grad_norm": 0.0006508603109978139,
"learning_rate": 1.746888588993852e-05,
"loss": 0.0,
"step": 10160
},
{
"epoch": 13.036234817813765,
"grad_norm": 6.645211396971717e-05,
"learning_rate": 1.743139901034638e-05,
"loss": 0.4286,
"step": 10170
},
{
"epoch": 13.036909581646423,
"grad_norm": 0.00017078538076020777,
"learning_rate": 1.7393912130754236e-05,
"loss": 0.0,
"step": 10180
},
{
"epoch": 13.037584345479083,
"grad_norm": 0.0010123905958607793,
"learning_rate": 1.7356425251162095e-05,
"loss": 0.0,
"step": 10190
},
{
"epoch": 13.03825910931174,
"grad_norm": 0.00027252710424363613,
"learning_rate": 1.731893837156995e-05,
"loss": 0.0,
"step": 10200
},
{
"epoch": 13.0389338731444,
"grad_norm": 0.00013458417379297316,
"learning_rate": 1.728145149197781e-05,
"loss": 0.0,
"step": 10210
},
{
"epoch": 13.039608636977057,
"grad_norm": 0.00022678014647681266,
"learning_rate": 1.7243964612385665e-05,
"loss": 0.0,
"step": 10220
},
{
"epoch": 13.040283400809717,
"grad_norm": 0.00022790237562730908,
"learning_rate": 1.720647773279352e-05,
"loss": 0.0,
"step": 10230
},
{
"epoch": 13.040958164642376,
"grad_norm": 0.0002460694231558591,
"learning_rate": 1.716899085320138e-05,
"loss": 0.0,
"step": 10240
},
{
"epoch": 13.041632928475034,
"grad_norm": 0.00018956181884277612,
"learning_rate": 1.7131503973609236e-05,
"loss": 0.0,
"step": 10250
},
{
"epoch": 13.042307692307693,
"grad_norm": 0.00017144810408353806,
"learning_rate": 1.7094017094017095e-05,
"loss": 0.0,
"step": 10260
},
{
"epoch": 13.04298245614035,
"grad_norm": 0.0002925437001977116,
"learning_rate": 1.705653021442495e-05,
"loss": 0.0,
"step": 10270
},
{
"epoch": 13.04365721997301,
"grad_norm": 0.0002330515708308667,
"learning_rate": 1.701904333483281e-05,
"loss": 0.013,
"step": 10280
},
{
"epoch": 13.044331983805668,
"grad_norm": 0.00011631449160631746,
"learning_rate": 1.6981556455240666e-05,
"loss": 0.0,
"step": 10290
},
{
"epoch": 13.045006747638327,
"grad_norm": 0.0003174786688759923,
"learning_rate": 1.6944069575648525e-05,
"loss": 0.0,
"step": 10300
},
{
"epoch": 13.045681511470985,
"grad_norm": 0.0001684718154137954,
"learning_rate": 1.690658269605638e-05,
"loss": 0.0,
"step": 10310
},
{
"epoch": 13.046356275303644,
"grad_norm": 0.001750526949763298,
"learning_rate": 1.686909581646424e-05,
"loss": 0.0,
"step": 10320
},
{
"epoch": 13.047031039136302,
"grad_norm": 0.00024045804457273334,
"learning_rate": 1.6831608936872096e-05,
"loss": 0.0,
"step": 10330
},
{
"epoch": 13.047705802968961,
"grad_norm": 0.0006596571765840054,
"learning_rate": 1.6794122057279955e-05,
"loss": 0.0,
"step": 10340
},
{
"epoch": 13.048380566801619,
"grad_norm": 0.001252808142453432,
"learning_rate": 1.675663517768781e-05,
"loss": 0.3996,
"step": 10350
},
{
"epoch": 13.049055330634278,
"grad_norm": 0.0002453498891554773,
"learning_rate": 1.6719148298095667e-05,
"loss": 0.0,
"step": 10360
},
{
"epoch": 13.049730094466936,
"grad_norm": 0.0005040777614340186,
"learning_rate": 1.6681661418503526e-05,
"loss": 0.0,
"step": 10370
},
{
"epoch": 13.05,
"eval_accuracy": 0.9285714285714286,
"eval_f1": 0.9285714285714286,
"eval_loss": 0.5062018632888794,
"eval_runtime": 72.8565,
"eval_samples_per_second": 1.537,
"eval_steps_per_second": 1.537,
"step": 10374
},
{
"epoch": 14.000404858299595,
"grad_norm": 6.942117033759132e-05,
"learning_rate": 1.664417453891138e-05,
"loss": 0.0,
"step": 10380
},
{
"epoch": 14.001079622132254,
"grad_norm": 0.0004584739508572966,
"learning_rate": 1.660668765931924e-05,
"loss": 0.0,
"step": 10390
},
{
"epoch": 14.001754385964912,
"grad_norm": 0.0002316083264304325,
"learning_rate": 1.6569200779727097e-05,
"loss": 0.2714,
"step": 10400
},
{
"epoch": 14.002429149797571,
"grad_norm": 0.00024051779473666102,
"learning_rate": 1.6531713900134956e-05,
"loss": 0.0,
"step": 10410
},
{
"epoch": 14.003103913630229,
"grad_norm": 0.0008334843441843987,
"learning_rate": 1.649422702054281e-05,
"loss": 0.0,
"step": 10420
},
{
"epoch": 14.003778677462888,
"grad_norm": 0.00020968765602447093,
"learning_rate": 1.6456740140950667e-05,
"loss": 0.0178,
"step": 10430
},
{
"epoch": 14.004453441295546,
"grad_norm": 0.00022330092906486243,
"learning_rate": 1.6419253261358526e-05,
"loss": 0.0,
"step": 10440
},
{
"epoch": 14.005128205128205,
"grad_norm": 0.00021671153081115335,
"learning_rate": 1.6381766381766382e-05,
"loss": 0.009,
"step": 10450
},
{
"epoch": 14.005802968960865,
"grad_norm": 0.00033940834691748023,
"learning_rate": 1.6344279502174238e-05,
"loss": 0.0,
"step": 10460
},
{
"epoch": 14.006477732793522,
"grad_norm": 0.00048104580491781235,
"learning_rate": 1.6306792622582097e-05,
"loss": 0.0,
"step": 10470
},
{
"epoch": 14.007152496626182,
"grad_norm": 0.00029779202304780483,
"learning_rate": 1.6269305742989953e-05,
"loss": 0.0,
"step": 10480
},
{
"epoch": 14.00782726045884,
"grad_norm": 0.0004120915837120265,
"learning_rate": 1.623181886339781e-05,
"loss": 0.0,
"step": 10490
},
{
"epoch": 14.008502024291499,
"grad_norm": 0.0003056660061702132,
"learning_rate": 1.6194331983805668e-05,
"loss": 0.0,
"step": 10500
},
{
"epoch": 14.009176788124156,
"grad_norm": 0.000378406752133742,
"learning_rate": 1.6156845104213524e-05,
"loss": 0.0039,
"step": 10510
},
{
"epoch": 14.009851551956816,
"grad_norm": 0.0005049049505032599,
"learning_rate": 1.6119358224621383e-05,
"loss": 0.0,
"step": 10520
},
{
"epoch": 14.010526315789473,
"grad_norm": 0.00025037440354935825,
"learning_rate": 1.608187134502924e-05,
"loss": 0.0,
"step": 10530
},
{
"epoch": 14.011201079622133,
"grad_norm": 0.00037562023499049246,
"learning_rate": 1.6044384465437098e-05,
"loss": 0.0,
"step": 10540
},
{
"epoch": 14.01187584345479,
"grad_norm": 0.0003121852350886911,
"learning_rate": 1.6006897585844954e-05,
"loss": 0.0,
"step": 10550
},
{
"epoch": 14.01255060728745,
"grad_norm": 0.0003679589426610619,
"learning_rate": 1.5969410706252813e-05,
"loss": 0.0,
"step": 10560
},
{
"epoch": 14.013225371120107,
"grad_norm": 0.00028154728352092206,
"learning_rate": 1.593192382666067e-05,
"loss": 0.0,
"step": 10570
},
{
"epoch": 14.013900134952767,
"grad_norm": 0.00020654525724239647,
"learning_rate": 1.5894436947068528e-05,
"loss": 0.0,
"step": 10580
},
{
"epoch": 14.014574898785424,
"grad_norm": 0.00034096045419573784,
"learning_rate": 1.5856950067476383e-05,
"loss": 0.0,
"step": 10590
},
{
"epoch": 14.015249662618084,
"grad_norm": 0.00026030451408587396,
"learning_rate": 1.5819463187884243e-05,
"loss": 0.0,
"step": 10600
},
{
"epoch": 14.015924426450741,
"grad_norm": 8.031875040614977e-05,
"learning_rate": 1.57819763082921e-05,
"loss": 0.0,
"step": 10610
},
{
"epoch": 14.0165991902834,
"grad_norm": 0.000621096114628017,
"learning_rate": 1.5744489428699954e-05,
"loss": 0.0,
"step": 10620
},
{
"epoch": 14.01727395411606,
"grad_norm": 0.000524580420460552,
"learning_rate": 1.5707002549107813e-05,
"loss": 0.0,
"step": 10630
},
{
"epoch": 14.017948717948718,
"grad_norm": 0.00011200064182048663,
"learning_rate": 1.566951566951567e-05,
"loss": 0.0,
"step": 10640
},
{
"epoch": 14.018623481781377,
"grad_norm": 0.00032178129185922444,
"learning_rate": 1.5632028789923528e-05,
"loss": 0.0,
"step": 10650
},
{
"epoch": 14.019298245614035,
"grad_norm": 0.00024140749883372337,
"learning_rate": 1.5594541910331384e-05,
"loss": 0.0,
"step": 10660
},
{
"epoch": 14.019973009446694,
"grad_norm": 0.00022133818129077554,
"learning_rate": 1.5557055030739243e-05,
"loss": 0.0,
"step": 10670
},
{
"epoch": 14.020647773279352,
"grad_norm": 0.0002797930792439729,
"learning_rate": 1.55195681511471e-05,
"loss": 0.0,
"step": 10680
},
{
"epoch": 14.021322537112011,
"grad_norm": 0.0002334755117772147,
"learning_rate": 1.5482081271554958e-05,
"loss": 0.0,
"step": 10690
},
{
"epoch": 14.021997300944669,
"grad_norm": 0.0002469551400281489,
"learning_rate": 1.5444594391962814e-05,
"loss": 0.0,
"step": 10700
},
{
"epoch": 14.022672064777328,
"grad_norm": 8.5323081293609e-05,
"learning_rate": 1.5407107512370673e-05,
"loss": 0.0,
"step": 10710
},
{
"epoch": 14.023346828609986,
"grad_norm": 0.00019482328207232058,
"learning_rate": 1.536962063277853e-05,
"loss": 0.0,
"step": 10720
},
{
"epoch": 14.024021592442645,
"grad_norm": 0.00021449346968438476,
"learning_rate": 1.5332133753186388e-05,
"loss": 0.4463,
"step": 10730
},
{
"epoch": 14.024696356275303,
"grad_norm": 0.00064310641027987,
"learning_rate": 1.5294646873594244e-05,
"loss": 0.0,
"step": 10740
},
{
"epoch": 14.025371120107962,
"grad_norm": 0.00020890735322609544,
"learning_rate": 1.52571599940021e-05,
"loss": 0.0,
"step": 10750
},
{
"epoch": 14.02604588394062,
"grad_norm": 0.0005201689782552421,
"learning_rate": 1.5219673114409957e-05,
"loss": 0.0,
"step": 10760
},
{
"epoch": 14.02672064777328,
"grad_norm": 0.0005751597345806658,
"learning_rate": 1.5182186234817813e-05,
"loss": 0.0,
"step": 10770
},
{
"epoch": 14.027395411605937,
"grad_norm": 0.0009388537146151066,
"learning_rate": 1.5144699355225672e-05,
"loss": 0.0,
"step": 10780
},
{
"epoch": 14.028070175438597,
"grad_norm": 0.0005402613314799964,
"learning_rate": 1.5107212475633528e-05,
"loss": 0.0,
"step": 10790
},
{
"epoch": 14.028744939271254,
"grad_norm": 0.00010339209256926551,
"learning_rate": 1.5069725596041387e-05,
"loss": 0.0,
"step": 10800
},
{
"epoch": 14.029419703103914,
"grad_norm": 0.0005152708035893738,
"learning_rate": 1.5032238716449243e-05,
"loss": 0.0,
"step": 10810
},
{
"epoch": 14.030094466936573,
"grad_norm": 0.0007186134462244809,
"learning_rate": 1.4994751836857102e-05,
"loss": 0.0,
"step": 10820
},
{
"epoch": 14.03076923076923,
"grad_norm": 0.0005925975274294615,
"learning_rate": 1.4957264957264958e-05,
"loss": 0.0,
"step": 10830
},
{
"epoch": 14.03144399460189,
"grad_norm": 0.00019110101857222617,
"learning_rate": 1.4919778077672817e-05,
"loss": 0.0,
"step": 10840
},
{
"epoch": 14.032118758434548,
"grad_norm": 0.00018360813555773348,
"learning_rate": 1.4882291198080673e-05,
"loss": 0.0,
"step": 10850
},
{
"epoch": 14.032793522267207,
"grad_norm": 0.00020973542996216565,
"learning_rate": 1.4844804318488532e-05,
"loss": 0.0,
"step": 10860
},
{
"epoch": 14.033468286099865,
"grad_norm": 0.0007199271931312978,
"learning_rate": 1.4807317438896387e-05,
"loss": 0.0,
"step": 10870
},
{
"epoch": 14.034143049932524,
"grad_norm": 9.265208791475743e-05,
"learning_rate": 1.4769830559304243e-05,
"loss": 0.0,
"step": 10880
},
{
"epoch": 14.034817813765182,
"grad_norm": 8.818476635497063e-05,
"learning_rate": 1.4732343679712102e-05,
"loss": 0.0,
"step": 10890
},
{
"epoch": 14.035492577597841,
"grad_norm": 0.00018744076078291982,
"learning_rate": 1.4694856800119958e-05,
"loss": 0.0,
"step": 10900
},
{
"epoch": 14.036167341430499,
"grad_norm": 0.0003558373427949846,
"learning_rate": 1.4657369920527816e-05,
"loss": 0.0,
"step": 10910
},
{
"epoch": 14.036842105263158,
"grad_norm": 0.00015756840002723038,
"learning_rate": 1.4619883040935673e-05,
"loss": 0.0,
"step": 10920
},
{
"epoch": 14.037516869095816,
"grad_norm": 0.00011693660053424537,
"learning_rate": 1.458239616134353e-05,
"loss": 0.0,
"step": 10930
},
{
"epoch": 14.038191632928475,
"grad_norm": 0.00013403450429905206,
"learning_rate": 1.4544909281751386e-05,
"loss": 0.0,
"step": 10940
},
{
"epoch": 14.038866396761133,
"grad_norm": 0.00014881876995787024,
"learning_rate": 1.4507422402159246e-05,
"loss": 0.0,
"step": 10950
},
{
"epoch": 14.039541160593792,
"grad_norm": 0.00014527350140269846,
"learning_rate": 1.4469935522567101e-05,
"loss": 0.0,
"step": 10960
},
{
"epoch": 14.04021592442645,
"grad_norm": 0.00016278887051157653,
"learning_rate": 1.443244864297496e-05,
"loss": 0.0,
"step": 10970
},
{
"epoch": 14.04089068825911,
"grad_norm": 8.402692037634552e-05,
"learning_rate": 1.4394961763382816e-05,
"loss": 0.0,
"step": 10980
},
{
"epoch": 14.041565452091769,
"grad_norm": 0.00017224009206984192,
"learning_rate": 1.4357474883790675e-05,
"loss": 0.0,
"step": 10990
},
{
"epoch": 14.042240215924426,
"grad_norm": 0.0005430065211839974,
"learning_rate": 1.4319988004198531e-05,
"loss": 0.0,
"step": 11000
},
{
"epoch": 14.042914979757086,
"grad_norm": 0.0009919034782797098,
"learning_rate": 1.4282501124606387e-05,
"loss": 0.0,
"step": 11010
},
{
"epoch": 14.043589743589743,
"grad_norm": 0.0003526155778672546,
"learning_rate": 1.4245014245014246e-05,
"loss": 0.0,
"step": 11020
},
{
"epoch": 14.044264507422403,
"grad_norm": 9.54778806772083e-05,
"learning_rate": 1.4207527365422102e-05,
"loss": 0.0,
"step": 11030
},
{
"epoch": 14.04493927125506,
"grad_norm": 0.0001671431091381237,
"learning_rate": 1.4170040485829961e-05,
"loss": 0.0,
"step": 11040
},
{
"epoch": 14.04561403508772,
"grad_norm": 0.00022146198898553848,
"learning_rate": 1.4132553606237817e-05,
"loss": 0.3607,
"step": 11050
},
{
"epoch": 14.046288798920378,
"grad_norm": 0.0001517270429758355,
"learning_rate": 1.4095066726645676e-05,
"loss": 0.0,
"step": 11060
},
{
"epoch": 14.046963562753037,
"grad_norm": 0.0006123693310655653,
"learning_rate": 1.4057579847053532e-05,
"loss": 0.0,
"step": 11070
},
{
"epoch": 14.047638326585695,
"grad_norm": 0.001610752660781145,
"learning_rate": 1.4020092967461391e-05,
"loss": 0.0,
"step": 11080
},
{
"epoch": 14.048313090418354,
"grad_norm": 0.0001440331107005477,
"learning_rate": 1.3982606087869247e-05,
"loss": 0.0,
"step": 11090
},
{
"epoch": 14.048987854251012,
"grad_norm": 0.0007454275619238615,
"learning_rate": 1.3945119208277104e-05,
"loss": 0.0,
"step": 11100
},
{
"epoch": 14.049662618083671,
"grad_norm": 0.0003447613853495568,
"learning_rate": 1.390763232868496e-05,
"loss": 0.0,
"step": 11110
},
{
"epoch": 14.05,
"eval_accuracy": 0.9375,
"eval_f1": 0.9373365167161658,
"eval_loss": 0.5185861587524414,
"eval_runtime": 73.7028,
"eval_samples_per_second": 1.52,
"eval_steps_per_second": 1.52,
"step": 11115
},
{
"epoch": 15.001349527665317,
"eval_accuracy": 0.9023255813953488,
"eval_f1": 0.9016146713373171,
"eval_loss": 0.7568970918655396,
"eval_runtime": 137.2112,
"eval_samples_per_second": 1.567,
"eval_steps_per_second": 1.567,
"step": 11116
},
{
"epoch": 15.001349527665317,
"step": 11116,
"total_flos": 2.8480212872085897e+19,
"train_loss": 5.147429101647338e-09,
"train_runtime": 143.5742,
"train_samples_per_second": 5.161,
"train_steps_per_second": 5.161
},
{
"epoch": 15.001349527665317,
"eval_accuracy": 0.9375,
"eval_f1": 0.9373365167161658,
"eval_loss": 0.5185860991477966,
"eval_runtime": 72.3734,
"eval_samples_per_second": 1.548,
"eval_steps_per_second": 1.548,
"step": 11116
},
{
"epoch": 15.001349527665317,
"eval_accuracy": 0.9023255813953488,
"eval_f1": 0.9016146713373171,
"eval_loss": 0.756963849067688,
"eval_runtime": 137.6677,
"eval_samples_per_second": 1.562,
"eval_steps_per_second": 1.562,
"step": 11116
}
],
"logging_steps": 10,
"max_steps": 741,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.8480212872085897e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}