blood-beit-base-finetuned / trainer_state.json
selmamalak's picture
End of training
9d78380 verified
{
"best_metric": 0.9707943925233645,
"best_model_checkpoint": "beit-base-patch16-224-pt22k-ft22k-finetuned-lora-medmnistv2/checkpoint-1870",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 1870,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.053475935828877004,
"grad_norm": 4.650041580200195,
"learning_rate": 0.004973262032085562,
"loss": 1.5063,
"step": 10
},
{
"epoch": 0.10695187165775401,
"grad_norm": 3.0658373832702637,
"learning_rate": 0.004946524064171123,
"loss": 0.8711,
"step": 20
},
{
"epoch": 0.16042780748663102,
"grad_norm": 2.9676272869110107,
"learning_rate": 0.004919786096256685,
"loss": 0.8,
"step": 30
},
{
"epoch": 0.21390374331550802,
"grad_norm": 2.5159189701080322,
"learning_rate": 0.004893048128342246,
"loss": 0.7794,
"step": 40
},
{
"epoch": 0.26737967914438504,
"grad_norm": 2.4576735496520996,
"learning_rate": 0.004868983957219251,
"loss": 0.8748,
"step": 50
},
{
"epoch": 0.32085561497326204,
"grad_norm": 1.9533675909042358,
"learning_rate": 0.004842245989304813,
"loss": 0.6213,
"step": 60
},
{
"epoch": 0.37433155080213903,
"grad_norm": 3.91825795173645,
"learning_rate": 0.004815508021390374,
"loss": 0.6883,
"step": 70
},
{
"epoch": 0.42780748663101603,
"grad_norm": 3.228422164916992,
"learning_rate": 0.004788770053475936,
"loss": 0.7019,
"step": 80
},
{
"epoch": 0.48128342245989303,
"grad_norm": 4.45206356048584,
"learning_rate": 0.004762032085561497,
"loss": 0.5394,
"step": 90
},
{
"epoch": 0.5347593582887701,
"grad_norm": 2.184957504272461,
"learning_rate": 0.004735294117647059,
"loss": 0.5543,
"step": 100
},
{
"epoch": 0.5882352941176471,
"grad_norm": 2.246079206466675,
"learning_rate": 0.00470855614973262,
"loss": 0.5738,
"step": 110
},
{
"epoch": 0.6417112299465241,
"grad_norm": 2.6914820671081543,
"learning_rate": 0.004681818181818182,
"loss": 0.6209,
"step": 120
},
{
"epoch": 0.6951871657754011,
"grad_norm": 2.5458545684814453,
"learning_rate": 0.0046550802139037435,
"loss": 0.5597,
"step": 130
},
{
"epoch": 0.7486631016042781,
"grad_norm": 2.676391363143921,
"learning_rate": 0.004628342245989305,
"loss": 0.5273,
"step": 140
},
{
"epoch": 0.8021390374331551,
"grad_norm": 2.5059385299682617,
"learning_rate": 0.0046016042780748665,
"loss": 0.5199,
"step": 150
},
{
"epoch": 0.8556149732620321,
"grad_norm": 1.451249122619629,
"learning_rate": 0.004574866310160428,
"loss": 0.5509,
"step": 160
},
{
"epoch": 0.9090909090909091,
"grad_norm": 2.5957276821136475,
"learning_rate": 0.00454812834224599,
"loss": 0.5336,
"step": 170
},
{
"epoch": 0.9625668449197861,
"grad_norm": 2.4229955673217773,
"learning_rate": 0.004521390374331551,
"loss": 0.4657,
"step": 180
},
{
"epoch": 1.0,
"eval_accuracy": 0.9094626168224299,
"eval_f1": 0.8972949130385568,
"eval_loss": 0.2451503425836563,
"eval_precision": 0.8964084875867973,
"eval_recall": 0.9082806506629539,
"eval_runtime": 10.2386,
"eval_samples_per_second": 167.21,
"eval_steps_per_second": 10.451,
"step": 187
},
{
"epoch": 1.0160427807486632,
"grad_norm": 2.3994851112365723,
"learning_rate": 0.004494652406417113,
"loss": 0.4772,
"step": 190
},
{
"epoch": 1.0695187165775402,
"grad_norm": 1.985571265220642,
"learning_rate": 0.004467914438502674,
"loss": 0.5995,
"step": 200
},
{
"epoch": 1.1229946524064172,
"grad_norm": 2.3798632621765137,
"learning_rate": 0.004441176470588235,
"loss": 0.5686,
"step": 210
},
{
"epoch": 1.1764705882352942,
"grad_norm": 3.1128406524658203,
"learning_rate": 0.004414438502673797,
"loss": 0.4984,
"step": 220
},
{
"epoch": 1.2299465240641712,
"grad_norm": 2.8572049140930176,
"learning_rate": 0.004387700534759359,
"loss": 0.5027,
"step": 230
},
{
"epoch": 1.2834224598930482,
"grad_norm": 5.178213119506836,
"learning_rate": 0.00436096256684492,
"loss": 0.4864,
"step": 240
},
{
"epoch": 1.3368983957219251,
"grad_norm": 1.9515773057937622,
"learning_rate": 0.004334224598930481,
"loss": 0.4528,
"step": 250
},
{
"epoch": 1.3903743315508021,
"grad_norm": 3.023959159851074,
"learning_rate": 0.0043074866310160425,
"loss": 0.5513,
"step": 260
},
{
"epoch": 1.4438502673796791,
"grad_norm": 2.371218204498291,
"learning_rate": 0.004280748663101605,
"loss": 0.442,
"step": 270
},
{
"epoch": 1.4973262032085561,
"grad_norm": 2.111191987991333,
"learning_rate": 0.004254010695187166,
"loss": 0.6163,
"step": 280
},
{
"epoch": 1.5508021390374331,
"grad_norm": 2.123419761657715,
"learning_rate": 0.004227272727272727,
"loss": 0.5522,
"step": 290
},
{
"epoch": 1.6042780748663101,
"grad_norm": 1.6425999402999878,
"learning_rate": 0.004200534759358289,
"loss": 0.4601,
"step": 300
},
{
"epoch": 1.6577540106951871,
"grad_norm": 3.847395420074463,
"learning_rate": 0.00417379679144385,
"loss": 0.5434,
"step": 310
},
{
"epoch": 1.7112299465240641,
"grad_norm": 1.8732799291610718,
"learning_rate": 0.004147058823529412,
"loss": 0.4952,
"step": 320
},
{
"epoch": 1.7647058823529411,
"grad_norm": 1.4881893396377563,
"learning_rate": 0.004120320855614973,
"loss": 0.4926,
"step": 330
},
{
"epoch": 1.8181818181818183,
"grad_norm": 1.9936500787734985,
"learning_rate": 0.004093582887700535,
"loss": 0.4582,
"step": 340
},
{
"epoch": 1.8716577540106951,
"grad_norm": 4.784737586975098,
"learning_rate": 0.004066844919786096,
"loss": 0.4839,
"step": 350
},
{
"epoch": 1.9251336898395723,
"grad_norm": 2.403982162475586,
"learning_rate": 0.004040106951871658,
"loss": 0.5868,
"step": 360
},
{
"epoch": 1.9786096256684491,
"grad_norm": 1.7464922666549683,
"learning_rate": 0.004013368983957219,
"loss": 0.4327,
"step": 370
},
{
"epoch": 2.0,
"eval_accuracy": 0.9182242990654206,
"eval_f1": 0.9007413709436916,
"eval_loss": 0.21109923720359802,
"eval_precision": 0.9299210483133126,
"eval_recall": 0.8921235393972065,
"eval_runtime": 10.4332,
"eval_samples_per_second": 164.091,
"eval_steps_per_second": 10.256,
"step": 374
},
{
"epoch": 2.0320855614973263,
"grad_norm": 1.444707989692688,
"learning_rate": 0.003986631016042781,
"loss": 0.478,
"step": 380
},
{
"epoch": 2.085561497326203,
"grad_norm": 1.4123905897140503,
"learning_rate": 0.003959893048128342,
"loss": 0.5,
"step": 390
},
{
"epoch": 2.1390374331550803,
"grad_norm": 2.96335768699646,
"learning_rate": 0.003933155080213904,
"loss": 0.5348,
"step": 400
},
{
"epoch": 2.192513368983957,
"grad_norm": 1.4397529363632202,
"learning_rate": 0.0039064171122994654,
"loss": 0.4571,
"step": 410
},
{
"epoch": 2.2459893048128343,
"grad_norm": 1.821366548538208,
"learning_rate": 0.0038796791443850265,
"loss": 0.4982,
"step": 420
},
{
"epoch": 2.299465240641711,
"grad_norm": 2.112130641937256,
"learning_rate": 0.0038529411764705885,
"loss": 0.4343,
"step": 430
},
{
"epoch": 2.3529411764705883,
"grad_norm": 1.942734956741333,
"learning_rate": 0.00382620320855615,
"loss": 0.5078,
"step": 440
},
{
"epoch": 2.406417112299465,
"grad_norm": 2.774502754211426,
"learning_rate": 0.003799465240641711,
"loss": 0.4016,
"step": 450
},
{
"epoch": 2.4598930481283423,
"grad_norm": 2.139463424682617,
"learning_rate": 0.0037727272727272726,
"loss": 0.5415,
"step": 460
},
{
"epoch": 2.5133689839572195,
"grad_norm": 1.9148341417312622,
"learning_rate": 0.003745989304812834,
"loss": 0.4417,
"step": 470
},
{
"epoch": 2.5668449197860963,
"grad_norm": 1.9109567403793335,
"learning_rate": 0.003719251336898396,
"loss": 0.4273,
"step": 480
},
{
"epoch": 2.620320855614973,
"grad_norm": 2.2219059467315674,
"learning_rate": 0.0036925133689839572,
"loss": 0.5218,
"step": 490
},
{
"epoch": 2.6737967914438503,
"grad_norm": 3.378606081008911,
"learning_rate": 0.0036657754010695188,
"loss": 0.4318,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 1.668760061264038,
"learning_rate": 0.0036390374331550803,
"loss": 0.4447,
"step": 510
},
{
"epoch": 2.7807486631016043,
"grad_norm": 1.830342411994934,
"learning_rate": 0.0036122994652406414,
"loss": 0.4507,
"step": 520
},
{
"epoch": 2.834224598930481,
"grad_norm": 2.2146425247192383,
"learning_rate": 0.0035855614973262034,
"loss": 0.4127,
"step": 530
},
{
"epoch": 2.8877005347593583,
"grad_norm": 1.3959295749664307,
"learning_rate": 0.003558823529411765,
"loss": 0.4353,
"step": 540
},
{
"epoch": 2.9411764705882355,
"grad_norm": 1.844604253768921,
"learning_rate": 0.0035320855614973264,
"loss": 0.3488,
"step": 550
},
{
"epoch": 2.9946524064171123,
"grad_norm": 1.421885371208191,
"learning_rate": 0.0035053475935828875,
"loss": 0.3977,
"step": 560
},
{
"epoch": 3.0,
"eval_accuracy": 0.9339953271028038,
"eval_f1": 0.924420495312186,
"eval_loss": 0.17427141964435577,
"eval_precision": 0.9228598461246502,
"eval_recall": 0.928247943129569,
"eval_runtime": 9.981,
"eval_samples_per_second": 171.527,
"eval_steps_per_second": 10.72,
"step": 561
},
{
"epoch": 3.0481283422459895,
"grad_norm": 2.2883894443511963,
"learning_rate": 0.003478609625668449,
"loss": 0.3909,
"step": 570
},
{
"epoch": 3.1016042780748663,
"grad_norm": 2.4753079414367676,
"learning_rate": 0.003451871657754011,
"loss": 0.4352,
"step": 580
},
{
"epoch": 3.1550802139037435,
"grad_norm": 2.298736572265625,
"learning_rate": 0.0034251336898395725,
"loss": 0.4641,
"step": 590
},
{
"epoch": 3.2085561497326203,
"grad_norm": 1.4368634223937988,
"learning_rate": 0.0033983957219251336,
"loss": 0.4225,
"step": 600
},
{
"epoch": 3.2620320855614975,
"grad_norm": 1.462842583656311,
"learning_rate": 0.003371657754010695,
"loss": 0.3958,
"step": 610
},
{
"epoch": 3.3155080213903743,
"grad_norm": 2.449066638946533,
"learning_rate": 0.0033449197860962567,
"loss": 0.3784,
"step": 620
},
{
"epoch": 3.3689839572192515,
"grad_norm": 1.5616710186004639,
"learning_rate": 0.0033181818181818186,
"loss": 0.4476,
"step": 630
},
{
"epoch": 3.4224598930481283,
"grad_norm": 2.284454345703125,
"learning_rate": 0.0032914438502673797,
"loss": 0.3725,
"step": 640
},
{
"epoch": 3.4759358288770055,
"grad_norm": 1.5143663883209229,
"learning_rate": 0.0032647058823529413,
"loss": 0.4597,
"step": 650
},
{
"epoch": 3.5294117647058822,
"grad_norm": 1.6112128496170044,
"learning_rate": 0.003237967914438503,
"loss": 0.4198,
"step": 660
},
{
"epoch": 3.5828877005347595,
"grad_norm": 1.2612804174423218,
"learning_rate": 0.003211229946524064,
"loss": 0.4785,
"step": 670
},
{
"epoch": 3.6363636363636362,
"grad_norm": 2.0233500003814697,
"learning_rate": 0.0031844919786096254,
"loss": 0.4276,
"step": 680
},
{
"epoch": 3.6898395721925135,
"grad_norm": 1.2161093950271606,
"learning_rate": 0.0031577540106951874,
"loss": 0.3865,
"step": 690
},
{
"epoch": 3.7433155080213902,
"grad_norm": 1.835656762123108,
"learning_rate": 0.003131016042780749,
"loss": 0.3202,
"step": 700
},
{
"epoch": 3.7967914438502675,
"grad_norm": 2.9908785820007324,
"learning_rate": 0.00310427807486631,
"loss": 0.3879,
"step": 710
},
{
"epoch": 3.8502673796791442,
"grad_norm": 1.587223768234253,
"learning_rate": 0.0030775401069518715,
"loss": 0.3682,
"step": 720
},
{
"epoch": 3.9037433155080214,
"grad_norm": 2.0039021968841553,
"learning_rate": 0.003050802139037433,
"loss": 0.4148,
"step": 730
},
{
"epoch": 3.9572192513368982,
"grad_norm": 1.8037409782409668,
"learning_rate": 0.003024064171122995,
"loss": 0.3318,
"step": 740
},
{
"epoch": 4.0,
"eval_accuracy": 0.9351635514018691,
"eval_f1": 0.928485806906975,
"eval_loss": 0.17756415903568268,
"eval_precision": 0.9248343621199285,
"eval_recall": 0.9352570988138212,
"eval_runtime": 10.1719,
"eval_samples_per_second": 168.307,
"eval_steps_per_second": 10.519,
"step": 748
},
{
"epoch": 4.010695187165775,
"grad_norm": 2.230004072189331,
"learning_rate": 0.002997326203208556,
"loss": 0.4071,
"step": 750
},
{
"epoch": 4.064171122994653,
"grad_norm": 2.1018853187561035,
"learning_rate": 0.0029705882352941177,
"loss": 0.3498,
"step": 760
},
{
"epoch": 4.117647058823529,
"grad_norm": 1.6814857721328735,
"learning_rate": 0.002943850267379679,
"loss": 0.4085,
"step": 770
},
{
"epoch": 4.171122994652406,
"grad_norm": 2.0869903564453125,
"learning_rate": 0.0029171122994652403,
"loss": 0.4481,
"step": 780
},
{
"epoch": 4.224598930481283,
"grad_norm": 1.4043067693710327,
"learning_rate": 0.0028903743315508022,
"loss": 0.3234,
"step": 790
},
{
"epoch": 4.278074866310161,
"grad_norm": 2.0766959190368652,
"learning_rate": 0.0028636363636363638,
"loss": 0.3719,
"step": 800
},
{
"epoch": 4.331550802139038,
"grad_norm": 1.85934317111969,
"learning_rate": 0.0028368983957219253,
"loss": 0.4784,
"step": 810
},
{
"epoch": 4.385026737967914,
"grad_norm": 2.3728232383728027,
"learning_rate": 0.0028101604278074864,
"loss": 0.3704,
"step": 820
},
{
"epoch": 4.438502673796791,
"grad_norm": 1.2759883403778076,
"learning_rate": 0.002783422459893048,
"loss": 0.3283,
"step": 830
},
{
"epoch": 4.491978609625669,
"grad_norm": 1.2006633281707764,
"learning_rate": 0.00275668449197861,
"loss": 0.3792,
"step": 840
},
{
"epoch": 4.545454545454545,
"grad_norm": 2.0884652137756348,
"learning_rate": 0.0027299465240641714,
"loss": 0.4041,
"step": 850
},
{
"epoch": 4.598930481283422,
"grad_norm": 1.281827688217163,
"learning_rate": 0.0027032085561497325,
"loss": 0.352,
"step": 860
},
{
"epoch": 4.652406417112299,
"grad_norm": 1.7143138647079468,
"learning_rate": 0.002676470588235294,
"loss": 0.3896,
"step": 870
},
{
"epoch": 4.705882352941177,
"grad_norm": 2.069678544998169,
"learning_rate": 0.0026497326203208556,
"loss": 0.335,
"step": 880
},
{
"epoch": 4.759358288770054,
"grad_norm": 1.6988319158554077,
"learning_rate": 0.0026229946524064175,
"loss": 0.3693,
"step": 890
},
{
"epoch": 4.81283422459893,
"grad_norm": 1.6188457012176514,
"learning_rate": 0.0025962566844919786,
"loss": 0.337,
"step": 900
},
{
"epoch": 4.866310160427807,
"grad_norm": 2.0478222370147705,
"learning_rate": 0.00256951871657754,
"loss": 0.3156,
"step": 910
},
{
"epoch": 4.919786096256685,
"grad_norm": 1.7088401317596436,
"learning_rate": 0.0025427807486631017,
"loss": 0.3414,
"step": 920
},
{
"epoch": 4.973262032085562,
"grad_norm": 1.161230444908142,
"learning_rate": 0.002516042780748663,
"loss": 0.3461,
"step": 930
},
{
"epoch": 5.0,
"eval_accuracy": 0.9380841121495327,
"eval_f1": 0.9304948103477649,
"eval_loss": 0.17028363049030304,
"eval_precision": 0.9311071354745837,
"eval_recall": 0.9344001562456381,
"eval_runtime": 10.2604,
"eval_samples_per_second": 166.855,
"eval_steps_per_second": 10.428,
"step": 935
},
{
"epoch": 5.026737967914438,
"grad_norm": 1.723848819732666,
"learning_rate": 0.0024893048128342248,
"loss": 0.3622,
"step": 940
},
{
"epoch": 5.080213903743315,
"grad_norm": 2.0140602588653564,
"learning_rate": 0.002462566844919786,
"loss": 0.3973,
"step": 950
},
{
"epoch": 5.133689839572193,
"grad_norm": 1.5653032064437866,
"learning_rate": 0.002435828877005348,
"loss": 0.3106,
"step": 960
},
{
"epoch": 5.18716577540107,
"grad_norm": 1.7829616069793701,
"learning_rate": 0.002409090909090909,
"loss": 0.3723,
"step": 970
},
{
"epoch": 5.240641711229946,
"grad_norm": 0.9940521717071533,
"learning_rate": 0.0023823529411764704,
"loss": 0.3453,
"step": 980
},
{
"epoch": 5.294117647058823,
"grad_norm": 1.1114059686660767,
"learning_rate": 0.002355614973262032,
"loss": 0.3769,
"step": 990
},
{
"epoch": 5.347593582887701,
"grad_norm": 0.9444433450698853,
"learning_rate": 0.0023288770053475935,
"loss": 0.3489,
"step": 1000
},
{
"epoch": 5.401069518716578,
"grad_norm": 2.0856947898864746,
"learning_rate": 0.002302139037433155,
"loss": 0.374,
"step": 1010
},
{
"epoch": 5.454545454545454,
"grad_norm": 1.679477572441101,
"learning_rate": 0.0022754010695187166,
"loss": 0.3738,
"step": 1020
},
{
"epoch": 5.508021390374331,
"grad_norm": 1.3019518852233887,
"learning_rate": 0.002248663101604278,
"loss": 0.3634,
"step": 1030
},
{
"epoch": 5.561497326203209,
"grad_norm": 1.467846155166626,
"learning_rate": 0.0022219251336898396,
"loss": 0.3457,
"step": 1040
},
{
"epoch": 5.614973262032086,
"grad_norm": 1.6348631381988525,
"learning_rate": 0.002195187165775401,
"loss": 0.3216,
"step": 1050
},
{
"epoch": 5.668449197860962,
"grad_norm": 1.158215880393982,
"learning_rate": 0.0021684491978609627,
"loss": 0.3033,
"step": 1060
},
{
"epoch": 5.721925133689839,
"grad_norm": 0.8872423768043518,
"learning_rate": 0.002141711229946524,
"loss": 0.2919,
"step": 1070
},
{
"epoch": 5.775401069518717,
"grad_norm": 1.9146243333816528,
"learning_rate": 0.0021149732620320857,
"loss": 0.3228,
"step": 1080
},
{
"epoch": 5.828877005347594,
"grad_norm": 1.7084169387817383,
"learning_rate": 0.0020882352941176473,
"loss": 0.2754,
"step": 1090
},
{
"epoch": 5.882352941176471,
"grad_norm": 1.0626111030578613,
"learning_rate": 0.0020614973262032084,
"loss": 0.3165,
"step": 1100
},
{
"epoch": 5.935828877005347,
"grad_norm": 1.8155293464660645,
"learning_rate": 0.00203475935828877,
"loss": 0.2815,
"step": 1110
},
{
"epoch": 5.989304812834225,
"grad_norm": 1.8623782396316528,
"learning_rate": 0.0020080213903743314,
"loss": 0.3309,
"step": 1120
},
{
"epoch": 6.0,
"eval_accuracy": 0.9369158878504673,
"eval_f1": 0.9334719219156348,
"eval_loss": 0.19556888937950134,
"eval_precision": 0.9335706750233659,
"eval_recall": 0.9396740716392903,
"eval_runtime": 10.2767,
"eval_samples_per_second": 166.591,
"eval_steps_per_second": 10.412,
"step": 1122
},
{
"epoch": 6.042780748663102,
"grad_norm": 1.1055293083190918,
"learning_rate": 0.001981283422459893,
"loss": 0.3202,
"step": 1130
},
{
"epoch": 6.096256684491979,
"grad_norm": 1.7265422344207764,
"learning_rate": 0.0019545454545454545,
"loss": 0.2973,
"step": 1140
},
{
"epoch": 6.149732620320855,
"grad_norm": 2.0242912769317627,
"learning_rate": 0.001927807486631016,
"loss": 0.302,
"step": 1150
},
{
"epoch": 6.2032085561497325,
"grad_norm": 1.0210644006729126,
"learning_rate": 0.0019010695187165775,
"loss": 0.2785,
"step": 1160
},
{
"epoch": 6.25668449197861,
"grad_norm": 1.5111178159713745,
"learning_rate": 0.001874331550802139,
"loss": 0.2873,
"step": 1170
},
{
"epoch": 6.310160427807487,
"grad_norm": 1.060488224029541,
"learning_rate": 0.0018475935828877006,
"loss": 0.321,
"step": 1180
},
{
"epoch": 6.363636363636363,
"grad_norm": 1.0627189874649048,
"learning_rate": 0.0018208556149732621,
"loss": 0.2682,
"step": 1190
},
{
"epoch": 6.4171122994652405,
"grad_norm": 1.1237576007843018,
"learning_rate": 0.0017941176470588236,
"loss": 0.2383,
"step": 1200
},
{
"epoch": 6.470588235294118,
"grad_norm": 1.6101592779159546,
"learning_rate": 0.001767379679144385,
"loss": 0.3197,
"step": 1210
},
{
"epoch": 6.524064171122995,
"grad_norm": 0.6864691972732544,
"learning_rate": 0.0017406417112299467,
"loss": 0.2307,
"step": 1220
},
{
"epoch": 6.577540106951871,
"grad_norm": 1.339308500289917,
"learning_rate": 0.001713903743315508,
"loss": 0.2534,
"step": 1230
},
{
"epoch": 6.6310160427807485,
"grad_norm": 1.3319642543792725,
"learning_rate": 0.0016871657754010698,
"loss": 0.32,
"step": 1240
},
{
"epoch": 6.684491978609626,
"grad_norm": 1.4089816808700562,
"learning_rate": 0.001660427807486631,
"loss": 0.285,
"step": 1250
},
{
"epoch": 6.737967914438503,
"grad_norm": 1.212084174156189,
"learning_rate": 0.0016336898395721924,
"loss": 0.2217,
"step": 1260
},
{
"epoch": 6.791443850267379,
"grad_norm": 1.6609482765197754,
"learning_rate": 0.0016069518716577541,
"loss": 0.2952,
"step": 1270
},
{
"epoch": 6.8449197860962565,
"grad_norm": 1.060892105102539,
"learning_rate": 0.0015802139037433154,
"loss": 0.2524,
"step": 1280
},
{
"epoch": 6.898395721925134,
"grad_norm": 1.3365124464035034,
"learning_rate": 0.001553475935828877,
"loss": 0.2694,
"step": 1290
},
{
"epoch": 6.951871657754011,
"grad_norm": 1.1521918773651123,
"learning_rate": 0.0015267379679144385,
"loss": 0.3088,
"step": 1300
},
{
"epoch": 7.0,
"eval_accuracy": 0.9532710280373832,
"eval_f1": 0.9461125894090557,
"eval_loss": 0.11792106181383133,
"eval_precision": 0.9426583892398479,
"eval_recall": 0.952515495389921,
"eval_runtime": 10.3853,
"eval_samples_per_second": 164.849,
"eval_steps_per_second": 10.303,
"step": 1309
},
{
"epoch": 7.005347593582887,
"grad_norm": 0.8682220578193665,
"learning_rate": 0.0015,
"loss": 0.2627,
"step": 1310
},
{
"epoch": 7.0588235294117645,
"grad_norm": 2.279827356338501,
"learning_rate": 0.0014732620320855616,
"loss": 0.2796,
"step": 1320
},
{
"epoch": 7.112299465240642,
"grad_norm": 1.3697049617767334,
"learning_rate": 0.001446524064171123,
"loss": 0.2369,
"step": 1330
},
{
"epoch": 7.165775401069519,
"grad_norm": 0.8857790231704712,
"learning_rate": 0.0014197860962566844,
"loss": 0.2648,
"step": 1340
},
{
"epoch": 7.219251336898395,
"grad_norm": 2.053224802017212,
"learning_rate": 0.0013930481283422461,
"loss": 0.212,
"step": 1350
},
{
"epoch": 7.2727272727272725,
"grad_norm": 1.619578242301941,
"learning_rate": 0.0013663101604278075,
"loss": 0.2229,
"step": 1360
},
{
"epoch": 7.32620320855615,
"grad_norm": 1.3765966892242432,
"learning_rate": 0.0013395721925133692,
"loss": 0.2311,
"step": 1370
},
{
"epoch": 7.379679144385027,
"grad_norm": 1.2967066764831543,
"learning_rate": 0.0013128342245989305,
"loss": 0.2402,
"step": 1380
},
{
"epoch": 7.433155080213904,
"grad_norm": 1.2961163520812988,
"learning_rate": 0.0012860962566844918,
"loss": 0.2318,
"step": 1390
},
{
"epoch": 7.4866310160427805,
"grad_norm": 1.6240290403366089,
"learning_rate": 0.0012593582887700536,
"loss": 0.2669,
"step": 1400
},
{
"epoch": 7.540106951871658,
"grad_norm": 1.1457808017730713,
"learning_rate": 0.0012326203208556149,
"loss": 0.2887,
"step": 1410
},
{
"epoch": 7.593582887700535,
"grad_norm": 1.303931474685669,
"learning_rate": 0.0012058823529411764,
"loss": 0.2862,
"step": 1420
},
{
"epoch": 7.647058823529412,
"grad_norm": 0.9429693222045898,
"learning_rate": 0.001179144385026738,
"loss": 0.2282,
"step": 1430
},
{
"epoch": 7.7005347593582885,
"grad_norm": 1.349269986152649,
"learning_rate": 0.0011524064171122995,
"loss": 0.2414,
"step": 1440
},
{
"epoch": 7.754010695187166,
"grad_norm": 1.185160517692566,
"learning_rate": 0.001125668449197861,
"loss": 0.219,
"step": 1450
},
{
"epoch": 7.807486631016043,
"grad_norm": 1.5935460329055786,
"learning_rate": 0.0010989304812834225,
"loss": 0.2109,
"step": 1460
},
{
"epoch": 7.86096256684492,
"grad_norm": 1.4563795328140259,
"learning_rate": 0.001072192513368984,
"loss": 0.2943,
"step": 1470
},
{
"epoch": 7.9144385026737964,
"grad_norm": 1.2570650577545166,
"learning_rate": 0.0010454545454545454,
"loss": 0.2275,
"step": 1480
},
{
"epoch": 7.967914438502674,
"grad_norm": 0.6930679082870483,
"learning_rate": 0.001018716577540107,
"loss": 0.2129,
"step": 1490
},
{
"epoch": 8.0,
"eval_accuracy": 0.9637850467289719,
"eval_f1": 0.9610548371575116,
"eval_loss": 0.09920904040336609,
"eval_precision": 0.9569323583080014,
"eval_recall": 0.9673920345290172,
"eval_runtime": 10.543,
"eval_samples_per_second": 162.382,
"eval_steps_per_second": 10.149,
"step": 1496
},
{
"epoch": 8.02139037433155,
"grad_norm": 1.4018137454986572,
"learning_rate": 0.0009919786096256684,
"loss": 0.2638,
"step": 1500
},
{
"epoch": 8.074866310160427,
"grad_norm": 1.2713522911071777,
"learning_rate": 0.00096524064171123,
"loss": 0.2099,
"step": 1510
},
{
"epoch": 8.128342245989305,
"grad_norm": 1.004296064376831,
"learning_rate": 0.0009385026737967915,
"loss": 0.1801,
"step": 1520
},
{
"epoch": 8.181818181818182,
"grad_norm": 0.7041844129562378,
"learning_rate": 0.0009117647058823529,
"loss": 0.1829,
"step": 1530
},
{
"epoch": 8.235294117647058,
"grad_norm": 1.3204301595687866,
"learning_rate": 0.0008850267379679144,
"loss": 0.2444,
"step": 1540
},
{
"epoch": 8.288770053475936,
"grad_norm": 1.261974573135376,
"learning_rate": 0.000858288770053476,
"loss": 0.2431,
"step": 1550
},
{
"epoch": 8.342245989304812,
"grad_norm": 0.9899649024009705,
"learning_rate": 0.0008315508021390375,
"loss": 0.1808,
"step": 1560
},
{
"epoch": 8.39572192513369,
"grad_norm": 1.150225281715393,
"learning_rate": 0.0008048128342245989,
"loss": 0.2048,
"step": 1570
},
{
"epoch": 8.449197860962567,
"grad_norm": 0.9454184770584106,
"learning_rate": 0.0007780748663101605,
"loss": 0.1919,
"step": 1580
},
{
"epoch": 8.502673796791443,
"grad_norm": 1.26669442653656,
"learning_rate": 0.000751336898395722,
"loss": 0.1837,
"step": 1590
},
{
"epoch": 8.556149732620321,
"grad_norm": 0.8547130823135376,
"learning_rate": 0.0007245989304812835,
"loss": 0.1774,
"step": 1600
},
{
"epoch": 8.609625668449198,
"grad_norm": 1.8781049251556396,
"learning_rate": 0.000697860962566845,
"loss": 0.2202,
"step": 1610
},
{
"epoch": 8.663101604278076,
"grad_norm": 0.7876987457275391,
"learning_rate": 0.0006711229946524064,
"loss": 0.1781,
"step": 1620
},
{
"epoch": 8.716577540106952,
"grad_norm": 1.2137806415557861,
"learning_rate": 0.0006443850267379679,
"loss": 0.1722,
"step": 1630
},
{
"epoch": 8.770053475935828,
"grad_norm": 1.6328903436660767,
"learning_rate": 0.0006176470588235294,
"loss": 0.2085,
"step": 1640
},
{
"epoch": 8.823529411764707,
"grad_norm": 0.9435901641845703,
"learning_rate": 0.0005909090909090909,
"loss": 0.2335,
"step": 1650
},
{
"epoch": 8.877005347593583,
"grad_norm": 1.1905876398086548,
"learning_rate": 0.0005641711229946525,
"loss": 0.2387,
"step": 1660
},
{
"epoch": 8.93048128342246,
"grad_norm": 0.8758776783943176,
"learning_rate": 0.0005374331550802139,
"loss": 0.2265,
"step": 1670
},
{
"epoch": 8.983957219251337,
"grad_norm": 1.3745719194412231,
"learning_rate": 0.0005106951871657754,
"loss": 0.2049,
"step": 1680
},
{
"epoch": 9.0,
"eval_accuracy": 0.967873831775701,
"eval_f1": 0.9651132770824573,
"eval_loss": 0.08469934016466141,
"eval_precision": 0.9626628225985181,
"eval_recall": 0.9683070024371949,
"eval_runtime": 10.3829,
"eval_samples_per_second": 164.887,
"eval_steps_per_second": 10.305,
"step": 1683
},
{
"epoch": 9.037433155080214,
"grad_norm": 0.9230683445930481,
"learning_rate": 0.0004839572192513369,
"loss": 0.1654,
"step": 1690
},
{
"epoch": 9.090909090909092,
"grad_norm": 0.8362302184104919,
"learning_rate": 0.0004572192513368984,
"loss": 0.1918,
"step": 1700
},
{
"epoch": 9.144385026737968,
"grad_norm": 1.3025470972061157,
"learning_rate": 0.0004304812834224599,
"loss": 0.1497,
"step": 1710
},
{
"epoch": 9.197860962566844,
"grad_norm": 0.8339858055114746,
"learning_rate": 0.00040374331550802143,
"loss": 0.196,
"step": 1720
},
{
"epoch": 9.251336898395722,
"grad_norm": 1.3273382186889648,
"learning_rate": 0.00037700534759358285,
"loss": 0.1912,
"step": 1730
},
{
"epoch": 9.304812834224599,
"grad_norm": 0.5822441577911377,
"learning_rate": 0.0003502673796791444,
"loss": 0.1452,
"step": 1740
},
{
"epoch": 9.358288770053475,
"grad_norm": 0.8451639413833618,
"learning_rate": 0.0003235294117647059,
"loss": 0.1877,
"step": 1750
},
{
"epoch": 9.411764705882353,
"grad_norm": 1.0270066261291504,
"learning_rate": 0.0002967914438502674,
"loss": 0.1964,
"step": 1760
},
{
"epoch": 9.46524064171123,
"grad_norm": 1.0621460676193237,
"learning_rate": 0.00027005347593582886,
"loss": 0.2015,
"step": 1770
},
{
"epoch": 9.518716577540108,
"grad_norm": 0.9587564468383789,
"learning_rate": 0.00024331550802139036,
"loss": 0.1962,
"step": 1780
},
{
"epoch": 9.572192513368984,
"grad_norm": 0.719536304473877,
"learning_rate": 0.00021657754010695186,
"loss": 0.1389,
"step": 1790
},
{
"epoch": 9.62566844919786,
"grad_norm": 0.89113450050354,
"learning_rate": 0.0001898395721925134,
"loss": 0.1783,
"step": 1800
},
{
"epoch": 9.679144385026738,
"grad_norm": 0.8831282258033752,
"learning_rate": 0.0001631016042780749,
"loss": 0.1871,
"step": 1810
},
{
"epoch": 9.732620320855615,
"grad_norm": 0.6015557646751404,
"learning_rate": 0.00013636363636363637,
"loss": 0.1414,
"step": 1820
},
{
"epoch": 9.786096256684491,
"grad_norm": 1.1582796573638916,
"learning_rate": 0.00010962566844919787,
"loss": 0.2408,
"step": 1830
},
{
"epoch": 9.83957219251337,
"grad_norm": 0.7856789231300354,
"learning_rate": 8.288770053475936e-05,
"loss": 0.145,
"step": 1840
},
{
"epoch": 9.893048128342246,
"grad_norm": 1.1010181903839111,
"learning_rate": 5.614973262032086e-05,
"loss": 0.1758,
"step": 1850
},
{
"epoch": 9.946524064171124,
"grad_norm": 0.7676904797554016,
"learning_rate": 2.9411764705882354e-05,
"loss": 0.1683,
"step": 1860
},
{
"epoch": 10.0,
"grad_norm": 1.4464507102966309,
"learning_rate": 2.6737967914438504e-06,
"loss": 0.2007,
"step": 1870
},
{
"epoch": 10.0,
"eval_accuracy": 0.9707943925233645,
"eval_f1": 0.9697517307733657,
"eval_loss": 0.07853860408067703,
"eval_precision": 0.9668363312878312,
"eval_recall": 0.9737482240908748,
"eval_runtime": 10.3924,
"eval_samples_per_second": 164.735,
"eval_steps_per_second": 10.296,
"step": 1870
},
{
"epoch": 10.0,
"step": 1870,
"total_flos": 9.328175742872125e+18,
"train_loss": 0.3662890907277398,
"train_runtime": 1600.7009,
"train_samples_per_second": 74.711,
"train_steps_per_second": 1.168
}
],
"logging_steps": 10,
"max_steps": 1870,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 9.328175742872125e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}