vishalkatheriya18's picture
End of training
69f744d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 98.96907216494846,
"eval_steps": 500,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.41237113402061853,
"grad_norm": 5.916716575622559,
"learning_rate": 2.0833333333333334e-06,
"loss": 1.6297,
"step": 10
},
{
"epoch": 0.8247422680412371,
"grad_norm": 5.051618576049805,
"learning_rate": 4.166666666666667e-06,
"loss": 1.613,
"step": 20
},
{
"epoch": 0.9896907216494846,
"eval_accuracy": 0.2927536231884058,
"eval_loss": 1.5833344459533691,
"eval_precision": 0.3247879943590829,
"eval_recall": 0.2927536231884058,
"eval_runtime": 2.9495,
"eval_samples_per_second": 116.97,
"eval_steps_per_second": 3.729,
"step": 24
},
{
"epoch": 1.2371134020618557,
"grad_norm": 4.8794169425964355,
"learning_rate": 6.25e-06,
"loss": 1.5792,
"step": 30
},
{
"epoch": 1.6494845360824741,
"grad_norm": 6.336801052093506,
"learning_rate": 8.333333333333334e-06,
"loss": 1.5494,
"step": 40
},
{
"epoch": 1.9793814432989691,
"eval_accuracy": 0.3681159420289855,
"eval_loss": 1.4944071769714355,
"eval_precision": 0.440954469667821,
"eval_recall": 0.3681159420289855,
"eval_runtime": 1.7863,
"eval_samples_per_second": 193.135,
"eval_steps_per_second": 6.158,
"step": 48
},
{
"epoch": 2.0618556701030926,
"grad_norm": 8.574434280395508,
"learning_rate": 1.0416666666666668e-05,
"loss": 1.5014,
"step": 50
},
{
"epoch": 2.4742268041237114,
"grad_norm": 6.564225673675537,
"learning_rate": 1.25e-05,
"loss": 1.4422,
"step": 60
},
{
"epoch": 2.88659793814433,
"grad_norm": 5.804593086242676,
"learning_rate": 1.4583333333333335e-05,
"loss": 1.3989,
"step": 70
},
{
"epoch": 2.9690721649484537,
"eval_accuracy": 0.5159420289855072,
"eval_loss": 1.3423842191696167,
"eval_precision": 0.52619860815513,
"eval_recall": 0.5159420289855072,
"eval_runtime": 1.8303,
"eval_samples_per_second": 188.493,
"eval_steps_per_second": 6.01,
"step": 72
},
{
"epoch": 3.2989690721649483,
"grad_norm": 6.893215656280518,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.2968,
"step": 80
},
{
"epoch": 3.711340206185567,
"grad_norm": 12.37126350402832,
"learning_rate": 1.8750000000000002e-05,
"loss": 1.2238,
"step": 90
},
{
"epoch": 4.0,
"eval_accuracy": 0.6260869565217392,
"eval_loss": 1.1162269115447998,
"eval_precision": 0.6665610702002287,
"eval_recall": 0.6260869565217392,
"eval_runtime": 1.8634,
"eval_samples_per_second": 185.144,
"eval_steps_per_second": 5.903,
"step": 97
},
{
"epoch": 4.123711340206185,
"grad_norm": 6.501392841339111,
"learning_rate": 2.0833333333333336e-05,
"loss": 1.1194,
"step": 100
},
{
"epoch": 4.536082474226804,
"grad_norm": 14.653229713439941,
"learning_rate": 2.2916666666666667e-05,
"loss": 1.0499,
"step": 110
},
{
"epoch": 4.948453608247423,
"grad_norm": 15.2618408203125,
"learning_rate": 2.5e-05,
"loss": 0.9585,
"step": 120
},
{
"epoch": 4.989690721649485,
"eval_accuracy": 0.6985507246376812,
"eval_loss": 0.8966168761253357,
"eval_precision": 0.7013922738306568,
"eval_recall": 0.6985507246376812,
"eval_runtime": 1.8339,
"eval_samples_per_second": 188.12,
"eval_steps_per_second": 5.998,
"step": 121
},
{
"epoch": 5.360824742268041,
"grad_norm": 12.275806427001953,
"learning_rate": 2.7083333333333332e-05,
"loss": 0.8986,
"step": 130
},
{
"epoch": 5.77319587628866,
"grad_norm": 15.373220443725586,
"learning_rate": 2.916666666666667e-05,
"loss": 0.8934,
"step": 140
},
{
"epoch": 5.979381443298969,
"eval_accuracy": 0.7507246376811594,
"eval_loss": 0.763816773891449,
"eval_precision": 0.7489666881245252,
"eval_recall": 0.7507246376811594,
"eval_runtime": 1.9332,
"eval_samples_per_second": 178.459,
"eval_steps_per_second": 5.69,
"step": 145
},
{
"epoch": 6.185567010309279,
"grad_norm": 15.394486427307129,
"learning_rate": 3.125e-05,
"loss": 0.8326,
"step": 150
},
{
"epoch": 6.597938144329897,
"grad_norm": 14.27376937866211,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.7589,
"step": 160
},
{
"epoch": 6.969072164948454,
"eval_accuracy": 0.7652173913043478,
"eval_loss": 0.6776081919670105,
"eval_precision": 0.771906259033061,
"eval_recall": 0.7652173913043478,
"eval_runtime": 1.836,
"eval_samples_per_second": 187.91,
"eval_steps_per_second": 5.991,
"step": 169
},
{
"epoch": 7.010309278350515,
"grad_norm": 21.43760871887207,
"learning_rate": 3.541666666666667e-05,
"loss": 0.7404,
"step": 170
},
{
"epoch": 7.422680412371134,
"grad_norm": 15.207581520080566,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.653,
"step": 180
},
{
"epoch": 7.835051546391752,
"grad_norm": 25.153663635253906,
"learning_rate": 3.958333333333333e-05,
"loss": 0.6746,
"step": 190
},
{
"epoch": 8.0,
"eval_accuracy": 0.7623188405797101,
"eval_loss": 0.6126735210418701,
"eval_precision": 0.7628428431334807,
"eval_recall": 0.7623188405797101,
"eval_runtime": 1.8501,
"eval_samples_per_second": 186.474,
"eval_steps_per_second": 5.946,
"step": 194
},
{
"epoch": 8.24742268041237,
"grad_norm": 23.2750301361084,
"learning_rate": 4.166666666666667e-05,
"loss": 0.6516,
"step": 200
},
{
"epoch": 8.65979381443299,
"grad_norm": 21.777841567993164,
"learning_rate": 4.375e-05,
"loss": 0.6048,
"step": 210
},
{
"epoch": 8.989690721649485,
"eval_accuracy": 0.8202898550724638,
"eval_loss": 0.5220813751220703,
"eval_precision": 0.8216835971752063,
"eval_recall": 0.8202898550724638,
"eval_runtime": 1.8243,
"eval_samples_per_second": 189.114,
"eval_steps_per_second": 6.03,
"step": 218
},
{
"epoch": 9.072164948453608,
"grad_norm": 15.630614280700684,
"learning_rate": 4.5833333333333334e-05,
"loss": 0.5723,
"step": 220
},
{
"epoch": 9.484536082474227,
"grad_norm": 13.571239471435547,
"learning_rate": 4.791666666666667e-05,
"loss": 0.5436,
"step": 230
},
{
"epoch": 9.896907216494846,
"grad_norm": 24.206087112426758,
"learning_rate": 5e-05,
"loss": 0.531,
"step": 240
},
{
"epoch": 9.97938144329897,
"eval_accuracy": 0.8115942028985508,
"eval_loss": 0.4930874705314636,
"eval_precision": 0.8203605371226137,
"eval_recall": 0.8115942028985508,
"eval_runtime": 1.788,
"eval_samples_per_second": 192.958,
"eval_steps_per_second": 6.152,
"step": 242
},
{
"epoch": 10.309278350515465,
"grad_norm": 17.16573715209961,
"learning_rate": 4.976851851851852e-05,
"loss": 0.5034,
"step": 250
},
{
"epoch": 10.721649484536082,
"grad_norm": 19.933942794799805,
"learning_rate": 4.9537037037037035e-05,
"loss": 0.57,
"step": 260
},
{
"epoch": 10.969072164948454,
"eval_accuracy": 0.8318840579710145,
"eval_loss": 0.44795188307762146,
"eval_precision": 0.8344579895060443,
"eval_recall": 0.8318840579710145,
"eval_runtime": 1.8183,
"eval_samples_per_second": 189.733,
"eval_steps_per_second": 6.049,
"step": 266
},
{
"epoch": 11.1340206185567,
"grad_norm": 25.91600799560547,
"learning_rate": 4.930555555555556e-05,
"loss": 0.4791,
"step": 270
},
{
"epoch": 11.54639175257732,
"grad_norm": 23.493484497070312,
"learning_rate": 4.9074074074074075e-05,
"loss": 0.4372,
"step": 280
},
{
"epoch": 11.958762886597938,
"grad_norm": 14.273780822753906,
"learning_rate": 4.8842592592592595e-05,
"loss": 0.4624,
"step": 290
},
{
"epoch": 12.0,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.42139920592308044,
"eval_precision": 0.846014277166443,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.7884,
"eval_samples_per_second": 192.914,
"eval_steps_per_second": 6.151,
"step": 291
},
{
"epoch": 12.371134020618557,
"grad_norm": 26.43771743774414,
"learning_rate": 4.8611111111111115e-05,
"loss": 0.4509,
"step": 300
},
{
"epoch": 12.783505154639176,
"grad_norm": 29.501718521118164,
"learning_rate": 4.837962962962963e-05,
"loss": 0.417,
"step": 310
},
{
"epoch": 12.989690721649485,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.44392213225364685,
"eval_precision": 0.8485676738054103,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.762,
"eval_samples_per_second": 195.797,
"eval_steps_per_second": 6.243,
"step": 315
},
{
"epoch": 13.195876288659793,
"grad_norm": 16.380001068115234,
"learning_rate": 4.814814814814815e-05,
"loss": 0.4042,
"step": 320
},
{
"epoch": 13.608247422680412,
"grad_norm": 26.098731994628906,
"learning_rate": 4.791666666666667e-05,
"loss": 0.3814,
"step": 330
},
{
"epoch": 13.97938144329897,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.41379421949386597,
"eval_precision": 0.8477774513274812,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.7998,
"eval_samples_per_second": 191.689,
"eval_steps_per_second": 6.112,
"step": 339
},
{
"epoch": 14.02061855670103,
"grad_norm": 13.136883735656738,
"learning_rate": 4.768518518518519e-05,
"loss": 0.4209,
"step": 340
},
{
"epoch": 14.43298969072165,
"grad_norm": 18.104930877685547,
"learning_rate": 4.745370370370371e-05,
"loss": 0.3817,
"step": 350
},
{
"epoch": 14.845360824742269,
"grad_norm": 27.79136848449707,
"learning_rate": 4.722222222222222e-05,
"loss": 0.3737,
"step": 360
},
{
"epoch": 14.969072164948454,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.41388532519340515,
"eval_precision": 0.8466409143288909,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.8854,
"eval_samples_per_second": 182.983,
"eval_steps_per_second": 5.834,
"step": 363
},
{
"epoch": 15.257731958762886,
"grad_norm": 33.14027786254883,
"learning_rate": 4.699074074074074e-05,
"loss": 0.3782,
"step": 370
},
{
"epoch": 15.670103092783505,
"grad_norm": 10.574623107910156,
"learning_rate": 4.675925925925926e-05,
"loss": 0.3971,
"step": 380
},
{
"epoch": 16.0,
"eval_accuracy": 0.863768115942029,
"eval_loss": 0.4119352400302887,
"eval_precision": 0.8664915871553495,
"eval_recall": 0.863768115942029,
"eval_runtime": 1.8638,
"eval_samples_per_second": 185.11,
"eval_steps_per_second": 5.902,
"step": 388
},
{
"epoch": 16.082474226804123,
"grad_norm": 14.796497344970703,
"learning_rate": 4.652777777777778e-05,
"loss": 0.3227,
"step": 390
},
{
"epoch": 16.49484536082474,
"grad_norm": 13.750545501708984,
"learning_rate": 4.62962962962963e-05,
"loss": 0.306,
"step": 400
},
{
"epoch": 16.90721649484536,
"grad_norm": 15.056818962097168,
"learning_rate": 4.6064814814814814e-05,
"loss": 0.343,
"step": 410
},
{
"epoch": 16.989690721649485,
"eval_accuracy": 0.8608695652173913,
"eval_loss": 0.4421471655368805,
"eval_precision": 0.8659298079116737,
"eval_recall": 0.8608695652173913,
"eval_runtime": 1.7876,
"eval_samples_per_second": 192.996,
"eval_steps_per_second": 6.154,
"step": 412
},
{
"epoch": 17.31958762886598,
"grad_norm": 19.41351318359375,
"learning_rate": 4.5833333333333334e-05,
"loss": 0.3383,
"step": 420
},
{
"epoch": 17.7319587628866,
"grad_norm": 22.833810806274414,
"learning_rate": 4.5601851851851854e-05,
"loss": 0.3311,
"step": 430
},
{
"epoch": 17.97938144329897,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.45808833837509155,
"eval_precision": 0.8503668982654489,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8173,
"eval_samples_per_second": 189.846,
"eval_steps_per_second": 6.053,
"step": 436
},
{
"epoch": 18.144329896907216,
"grad_norm": 9.80312442779541,
"learning_rate": 4.5370370370370374e-05,
"loss": 0.301,
"step": 440
},
{
"epoch": 18.556701030927837,
"grad_norm": 17.442903518676758,
"learning_rate": 4.5138888888888894e-05,
"loss": 0.2594,
"step": 450
},
{
"epoch": 18.969072164948454,
"grad_norm": 25.01900863647461,
"learning_rate": 4.490740740740741e-05,
"loss": 0.2652,
"step": 460
},
{
"epoch": 18.969072164948454,
"eval_accuracy": 0.8405797101449275,
"eval_loss": 0.4563068747520447,
"eval_precision": 0.8441116322796441,
"eval_recall": 0.8405797101449275,
"eval_runtime": 1.8121,
"eval_samples_per_second": 190.387,
"eval_steps_per_second": 6.07,
"step": 460
},
{
"epoch": 19.38144329896907,
"grad_norm": 22.951929092407227,
"learning_rate": 4.467592592592593e-05,
"loss": 0.2726,
"step": 470
},
{
"epoch": 19.79381443298969,
"grad_norm": 17.189971923828125,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.3026,
"step": 480
},
{
"epoch": 20.0,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.4535578489303589,
"eval_precision": 0.8549145070160367,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.8156,
"eval_samples_per_second": 190.019,
"eval_steps_per_second": 6.059,
"step": 485
},
{
"epoch": 20.20618556701031,
"grad_norm": 19.29929542541504,
"learning_rate": 4.4212962962962966e-05,
"loss": 0.2808,
"step": 490
},
{
"epoch": 20.61855670103093,
"grad_norm": 23.201435089111328,
"learning_rate": 4.3981481481481486e-05,
"loss": 0.2562,
"step": 500
},
{
"epoch": 20.989690721649485,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.44093257188796997,
"eval_precision": 0.8493084398986088,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.9468,
"eval_samples_per_second": 177.217,
"eval_steps_per_second": 5.65,
"step": 509
},
{
"epoch": 21.030927835051546,
"grad_norm": 12.947028160095215,
"learning_rate": 4.375e-05,
"loss": 0.2739,
"step": 510
},
{
"epoch": 21.443298969072163,
"grad_norm": 21.544536590576172,
"learning_rate": 4.351851851851852e-05,
"loss": 0.2383,
"step": 520
},
{
"epoch": 21.855670103092784,
"grad_norm": 12.224617958068848,
"learning_rate": 4.328703703703704e-05,
"loss": 0.2282,
"step": 530
},
{
"epoch": 21.97938144329897,
"eval_accuracy": 0.8434782608695652,
"eval_loss": 0.4388555884361267,
"eval_precision": 0.8451190974708183,
"eval_recall": 0.8434782608695652,
"eval_runtime": 1.7718,
"eval_samples_per_second": 194.721,
"eval_steps_per_second": 6.208,
"step": 533
},
{
"epoch": 22.2680412371134,
"grad_norm": 17.55919647216797,
"learning_rate": 4.305555555555556e-05,
"loss": 0.2505,
"step": 540
},
{
"epoch": 22.68041237113402,
"grad_norm": 10.570196151733398,
"learning_rate": 4.282407407407408e-05,
"loss": 0.2374,
"step": 550
},
{
"epoch": 22.969072164948454,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.4452122747898102,
"eval_precision": 0.8589461524849866,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.8751,
"eval_samples_per_second": 183.989,
"eval_steps_per_second": 5.866,
"step": 557
},
{
"epoch": 23.09278350515464,
"grad_norm": 25.781587600708008,
"learning_rate": 4.259259259259259e-05,
"loss": 0.2355,
"step": 560
},
{
"epoch": 23.50515463917526,
"grad_norm": 22.854766845703125,
"learning_rate": 4.236111111111111e-05,
"loss": 0.2553,
"step": 570
},
{
"epoch": 23.917525773195877,
"grad_norm": 15.405595779418945,
"learning_rate": 4.212962962962963e-05,
"loss": 0.216,
"step": 580
},
{
"epoch": 24.0,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.4375264048576355,
"eval_precision": 0.858123097800969,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.8051,
"eval_samples_per_second": 191.128,
"eval_steps_per_second": 6.094,
"step": 582
},
{
"epoch": 24.329896907216494,
"grad_norm": 15.453635215759277,
"learning_rate": 4.1898148148148145e-05,
"loss": 0.2019,
"step": 590
},
{
"epoch": 24.742268041237114,
"grad_norm": 12.363275527954102,
"learning_rate": 4.166666666666667e-05,
"loss": 0.2127,
"step": 600
},
{
"epoch": 24.989690721649485,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.44218453764915466,
"eval_precision": 0.8587798835624924,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.9062,
"eval_samples_per_second": 180.991,
"eval_steps_per_second": 5.771,
"step": 606
},
{
"epoch": 25.15463917525773,
"grad_norm": 15.13847827911377,
"learning_rate": 4.1435185185185185e-05,
"loss": 0.2301,
"step": 610
},
{
"epoch": 25.567010309278352,
"grad_norm": 20.761062622070312,
"learning_rate": 4.1203703703703705e-05,
"loss": 0.1807,
"step": 620
},
{
"epoch": 25.97938144329897,
"grad_norm": 17.889150619506836,
"learning_rate": 4.0972222222222225e-05,
"loss": 0.2004,
"step": 630
},
{
"epoch": 25.97938144329897,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.46348363161087036,
"eval_precision": 0.8519325944084339,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.7728,
"eval_samples_per_second": 194.609,
"eval_steps_per_second": 6.205,
"step": 630
},
{
"epoch": 26.391752577319586,
"grad_norm": 23.56374168395996,
"learning_rate": 4.074074074074074e-05,
"loss": 0.2427,
"step": 640
},
{
"epoch": 26.804123711340207,
"grad_norm": 9.772664070129395,
"learning_rate": 4.0509259259259265e-05,
"loss": 0.2029,
"step": 650
},
{
"epoch": 26.969072164948454,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5214529037475586,
"eval_precision": 0.8545500895204992,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.9291,
"eval_samples_per_second": 178.841,
"eval_steps_per_second": 5.702,
"step": 654
},
{
"epoch": 27.216494845360824,
"grad_norm": 14.480449676513672,
"learning_rate": 4.027777777777778e-05,
"loss": 0.1903,
"step": 660
},
{
"epoch": 27.628865979381445,
"grad_norm": 16.415973663330078,
"learning_rate": 4.00462962962963e-05,
"loss": 0.1794,
"step": 670
},
{
"epoch": 28.0,
"eval_accuracy": 0.863768115942029,
"eval_loss": 0.47563326358795166,
"eval_precision": 0.8669166767891824,
"eval_recall": 0.863768115942029,
"eval_runtime": 1.7555,
"eval_samples_per_second": 196.529,
"eval_steps_per_second": 6.266,
"step": 679
},
{
"epoch": 28.04123711340206,
"grad_norm": 8.689855575561523,
"learning_rate": 3.981481481481482e-05,
"loss": 0.1822,
"step": 680
},
{
"epoch": 28.45360824742268,
"grad_norm": 12.505402565002441,
"learning_rate": 3.958333333333333e-05,
"loss": 0.1828,
"step": 690
},
{
"epoch": 28.8659793814433,
"grad_norm": 15.491950988769531,
"learning_rate": 3.935185185185186e-05,
"loss": 0.1835,
"step": 700
},
{
"epoch": 28.989690721649485,
"eval_accuracy": 0.8608695652173913,
"eval_loss": 0.4727528393268585,
"eval_precision": 0.8649801117780185,
"eval_recall": 0.8608695652173913,
"eval_runtime": 1.8858,
"eval_samples_per_second": 182.95,
"eval_steps_per_second": 5.833,
"step": 703
},
{
"epoch": 29.278350515463917,
"grad_norm": 16.289226531982422,
"learning_rate": 3.912037037037037e-05,
"loss": 0.1907,
"step": 710
},
{
"epoch": 29.690721649484537,
"grad_norm": 13.304434776306152,
"learning_rate": 3.888888888888889e-05,
"loss": 0.1781,
"step": 720
},
{
"epoch": 29.97938144329897,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.4636934697628021,
"eval_precision": 0.8568131435327558,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.8681,
"eval_samples_per_second": 184.683,
"eval_steps_per_second": 5.888,
"step": 727
},
{
"epoch": 30.103092783505154,
"grad_norm": 6.991786003112793,
"learning_rate": 3.865740740740741e-05,
"loss": 0.1829,
"step": 730
},
{
"epoch": 30.51546391752577,
"grad_norm": 10.514315605163574,
"learning_rate": 3.8425925925925924e-05,
"loss": 0.1627,
"step": 740
},
{
"epoch": 30.927835051546392,
"grad_norm": 9.121224403381348,
"learning_rate": 3.8194444444444444e-05,
"loss": 0.1671,
"step": 750
},
{
"epoch": 30.969072164948454,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.485573947429657,
"eval_precision": 0.8599276434444294,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.9437,
"eval_samples_per_second": 177.497,
"eval_steps_per_second": 5.659,
"step": 751
},
{
"epoch": 31.34020618556701,
"grad_norm": 13.762226104736328,
"learning_rate": 3.7962962962962964e-05,
"loss": 0.1721,
"step": 760
},
{
"epoch": 31.75257731958763,
"grad_norm": 10.415836334228516,
"learning_rate": 3.7731481481481484e-05,
"loss": 0.1762,
"step": 770
},
{
"epoch": 32.0,
"eval_accuracy": 0.8666666666666667,
"eval_loss": 0.5007998943328857,
"eval_precision": 0.8684023473901008,
"eval_recall": 0.8666666666666667,
"eval_runtime": 1.769,
"eval_samples_per_second": 195.026,
"eval_steps_per_second": 6.218,
"step": 776
},
{
"epoch": 32.16494845360825,
"grad_norm": 10.8311767578125,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.1707,
"step": 780
},
{
"epoch": 32.577319587628864,
"grad_norm": 12.070932388305664,
"learning_rate": 3.726851851851852e-05,
"loss": 0.1673,
"step": 790
},
{
"epoch": 32.98969072164948,
"grad_norm": 8.654770851135254,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.1867,
"step": 800
},
{
"epoch": 32.98969072164948,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.5058211088180542,
"eval_precision": 0.8584843785997619,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.8394,
"eval_samples_per_second": 187.561,
"eval_steps_per_second": 5.98,
"step": 800
},
{
"epoch": 33.402061855670105,
"grad_norm": 8.323944091796875,
"learning_rate": 3.6805555555555556e-05,
"loss": 0.1553,
"step": 810
},
{
"epoch": 33.81443298969072,
"grad_norm": 14.134881973266602,
"learning_rate": 3.6574074074074076e-05,
"loss": 0.1409,
"step": 820
},
{
"epoch": 33.97938144329897,
"eval_accuracy": 0.8405797101449275,
"eval_loss": 0.5489646792411804,
"eval_precision": 0.8408524440704116,
"eval_recall": 0.8405797101449275,
"eval_runtime": 1.7738,
"eval_samples_per_second": 194.496,
"eval_steps_per_second": 6.201,
"step": 824
},
{
"epoch": 34.22680412371134,
"grad_norm": 17.74443244934082,
"learning_rate": 3.6342592592592596e-05,
"loss": 0.1498,
"step": 830
},
{
"epoch": 34.63917525773196,
"grad_norm": 14.35798454284668,
"learning_rate": 3.611111111111111e-05,
"loss": 0.1315,
"step": 840
},
{
"epoch": 34.96907216494845,
"eval_accuracy": 0.8347826086956521,
"eval_loss": 0.528394877910614,
"eval_precision": 0.8356368409524089,
"eval_recall": 0.8347826086956521,
"eval_runtime": 1.8034,
"eval_samples_per_second": 191.304,
"eval_steps_per_second": 6.1,
"step": 848
},
{
"epoch": 35.05154639175258,
"grad_norm": 15.67455005645752,
"learning_rate": 3.587962962962963e-05,
"loss": 0.163,
"step": 850
},
{
"epoch": 35.4639175257732,
"grad_norm": 6.1969828605651855,
"learning_rate": 3.564814814814815e-05,
"loss": 0.1406,
"step": 860
},
{
"epoch": 35.876288659793815,
"grad_norm": 14.651385307312012,
"learning_rate": 3.541666666666667e-05,
"loss": 0.1315,
"step": 870
},
{
"epoch": 36.0,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.5415348410606384,
"eval_precision": 0.8487979974677805,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.7509,
"eval_samples_per_second": 197.042,
"eval_steps_per_second": 6.282,
"step": 873
},
{
"epoch": 36.28865979381443,
"grad_norm": 15.739358901977539,
"learning_rate": 3.518518518518519e-05,
"loss": 0.1944,
"step": 880
},
{
"epoch": 36.70103092783505,
"grad_norm": 16.889202117919922,
"learning_rate": 3.49537037037037e-05,
"loss": 0.1974,
"step": 890
},
{
"epoch": 36.98969072164948,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.519416332244873,
"eval_precision": 0.8536148561469765,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.7833,
"eval_samples_per_second": 193.461,
"eval_steps_per_second": 6.168,
"step": 897
},
{
"epoch": 37.11340206185567,
"grad_norm": 10.011604309082031,
"learning_rate": 3.472222222222222e-05,
"loss": 0.1605,
"step": 900
},
{
"epoch": 37.52577319587629,
"grad_norm": 18.694128036499023,
"learning_rate": 3.449074074074074e-05,
"loss": 0.1515,
"step": 910
},
{
"epoch": 37.93814432989691,
"grad_norm": 9.140711784362793,
"learning_rate": 3.425925925925926e-05,
"loss": 0.1337,
"step": 920
},
{
"epoch": 37.97938144329897,
"eval_accuracy": 0.8608695652173913,
"eval_loss": 0.5088416337966919,
"eval_precision": 0.8602982452483552,
"eval_recall": 0.8608695652173913,
"eval_runtime": 1.7456,
"eval_samples_per_second": 197.634,
"eval_steps_per_second": 6.301,
"step": 921
},
{
"epoch": 38.350515463917525,
"grad_norm": 12.548330307006836,
"learning_rate": 3.402777777777778e-05,
"loss": 0.1439,
"step": 930
},
{
"epoch": 38.76288659793814,
"grad_norm": 12.762455940246582,
"learning_rate": 3.3796296296296295e-05,
"loss": 0.173,
"step": 940
},
{
"epoch": 38.96907216494845,
"eval_accuracy": 0.8666666666666667,
"eval_loss": 0.4912014305591583,
"eval_precision": 0.867978256170476,
"eval_recall": 0.8666666666666667,
"eval_runtime": 1.8067,
"eval_samples_per_second": 190.96,
"eval_steps_per_second": 6.089,
"step": 945
},
{
"epoch": 39.175257731958766,
"grad_norm": 12.083857536315918,
"learning_rate": 3.3564814814814815e-05,
"loss": 0.1477,
"step": 950
},
{
"epoch": 39.58762886597938,
"grad_norm": 17.14080238342285,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.1285,
"step": 960
},
{
"epoch": 40.0,
"grad_norm": 13.190485000610352,
"learning_rate": 3.3101851851851855e-05,
"loss": 0.1409,
"step": 970
},
{
"epoch": 40.0,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5222660899162292,
"eval_precision": 0.8501727809182621,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8482,
"eval_samples_per_second": 186.669,
"eval_steps_per_second": 5.952,
"step": 970
},
{
"epoch": 40.41237113402062,
"grad_norm": 8.88687801361084,
"learning_rate": 3.2870370370370375e-05,
"loss": 0.151,
"step": 980
},
{
"epoch": 40.824742268041234,
"grad_norm": 7.21800422668457,
"learning_rate": 3.263888888888889e-05,
"loss": 0.1379,
"step": 990
},
{
"epoch": 40.98969072164948,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5204349160194397,
"eval_precision": 0.8486749182344644,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8062,
"eval_samples_per_second": 191.006,
"eval_steps_per_second": 6.09,
"step": 994
},
{
"epoch": 41.23711340206186,
"grad_norm": 10.057676315307617,
"learning_rate": 3.240740740740741e-05,
"loss": 0.1079,
"step": 1000
},
{
"epoch": 41.649484536082475,
"grad_norm": 13.667500495910645,
"learning_rate": 3.217592592592593e-05,
"loss": 0.1437,
"step": 1010
},
{
"epoch": 41.97938144329897,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.5860036611557007,
"eval_precision": 0.8550665818648641,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.7468,
"eval_samples_per_second": 197.503,
"eval_steps_per_second": 6.297,
"step": 1018
},
{
"epoch": 42.06185567010309,
"grad_norm": 6.985457420349121,
"learning_rate": 3.194444444444444e-05,
"loss": 0.1521,
"step": 1020
},
{
"epoch": 42.47422680412371,
"grad_norm": 16.70668601989746,
"learning_rate": 3.171296296296297e-05,
"loss": 0.1393,
"step": 1030
},
{
"epoch": 42.88659793814433,
"grad_norm": 6.907033920288086,
"learning_rate": 3.148148148148148e-05,
"loss": 0.1022,
"step": 1040
},
{
"epoch": 42.96907216494845,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.5460776686668396,
"eval_precision": 0.8491763964495722,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.7961,
"eval_samples_per_second": 192.078,
"eval_steps_per_second": 6.124,
"step": 1042
},
{
"epoch": 43.29896907216495,
"grad_norm": 9.046392440795898,
"learning_rate": 3.125e-05,
"loss": 0.1385,
"step": 1050
},
{
"epoch": 43.71134020618557,
"grad_norm": 10.188021659851074,
"learning_rate": 3.101851851851852e-05,
"loss": 0.1181,
"step": 1060
},
{
"epoch": 44.0,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.541079044342041,
"eval_precision": 0.856643419178803,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.7664,
"eval_samples_per_second": 195.31,
"eval_steps_per_second": 6.227,
"step": 1067
},
{
"epoch": 44.123711340206185,
"grad_norm": 8.506319046020508,
"learning_rate": 3.0787037037037034e-05,
"loss": 0.1411,
"step": 1070
},
{
"epoch": 44.5360824742268,
"grad_norm": 15.423176765441895,
"learning_rate": 3.055555555555556e-05,
"loss": 0.1346,
"step": 1080
},
{
"epoch": 44.94845360824742,
"grad_norm": 6.524370193481445,
"learning_rate": 3.0324074074074077e-05,
"loss": 0.1212,
"step": 1090
},
{
"epoch": 44.98969072164948,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.5293735861778259,
"eval_precision": 0.8580282602145957,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.8173,
"eval_samples_per_second": 189.843,
"eval_steps_per_second": 6.053,
"step": 1091
},
{
"epoch": 45.36082474226804,
"grad_norm": 12.142955780029297,
"learning_rate": 3.0092592592592593e-05,
"loss": 0.105,
"step": 1100
},
{
"epoch": 45.77319587628866,
"grad_norm": 11.581314086914062,
"learning_rate": 2.9861111111111113e-05,
"loss": 0.1049,
"step": 1110
},
{
"epoch": 45.97938144329897,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.566691517829895,
"eval_precision": 0.8491712997027965,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.799,
"eval_samples_per_second": 191.772,
"eval_steps_per_second": 6.114,
"step": 1115
},
{
"epoch": 46.18556701030928,
"grad_norm": 15.353252410888672,
"learning_rate": 2.962962962962963e-05,
"loss": 0.1335,
"step": 1120
},
{
"epoch": 46.597938144329895,
"grad_norm": 11.990909576416016,
"learning_rate": 2.9398148148148146e-05,
"loss": 0.1132,
"step": 1130
},
{
"epoch": 46.96907216494845,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.5908281207084656,
"eval_precision": 0.8491182494977805,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.8291,
"eval_samples_per_second": 188.615,
"eval_steps_per_second": 6.014,
"step": 1139
},
{
"epoch": 47.01030927835052,
"grad_norm": 7.466699600219727,
"learning_rate": 2.916666666666667e-05,
"loss": 0.1229,
"step": 1140
},
{
"epoch": 47.422680412371136,
"grad_norm": 4.299150466918945,
"learning_rate": 2.8935185185185186e-05,
"loss": 0.1181,
"step": 1150
},
{
"epoch": 47.83505154639175,
"grad_norm": 8.699248313903809,
"learning_rate": 2.8703703703703706e-05,
"loss": 0.1313,
"step": 1160
},
{
"epoch": 48.0,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.5995594263076782,
"eval_precision": 0.8581686976058893,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.7851,
"eval_samples_per_second": 193.27,
"eval_steps_per_second": 6.162,
"step": 1164
},
{
"epoch": 48.24742268041237,
"grad_norm": 7.394286632537842,
"learning_rate": 2.8472222222222223e-05,
"loss": 0.1287,
"step": 1170
},
{
"epoch": 48.65979381443299,
"grad_norm": 10.575745582580566,
"learning_rate": 2.824074074074074e-05,
"loss": 0.1312,
"step": 1180
},
{
"epoch": 48.98969072164948,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.542959451675415,
"eval_precision": 0.8607254186783246,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.7426,
"eval_samples_per_second": 197.985,
"eval_steps_per_second": 6.313,
"step": 1188
},
{
"epoch": 49.07216494845361,
"grad_norm": 14.257989883422852,
"learning_rate": 2.8009259259259263e-05,
"loss": 0.1341,
"step": 1190
},
{
"epoch": 49.48453608247423,
"grad_norm": 9.95071029663086,
"learning_rate": 2.777777777777778e-05,
"loss": 0.138,
"step": 1200
},
{
"epoch": 49.896907216494846,
"grad_norm": 10.54672622680664,
"learning_rate": 2.75462962962963e-05,
"loss": 0.0996,
"step": 1210
},
{
"epoch": 49.97938144329897,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.5776570439338684,
"eval_precision": 0.8561151948364225,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.8283,
"eval_samples_per_second": 188.7,
"eval_steps_per_second": 6.017,
"step": 1212
},
{
"epoch": 50.30927835051546,
"grad_norm": 9.269867897033691,
"learning_rate": 2.7314814814814816e-05,
"loss": 0.1183,
"step": 1220
},
{
"epoch": 50.72164948453608,
"grad_norm": 3.963714361190796,
"learning_rate": 2.7083333333333332e-05,
"loss": 0.1389,
"step": 1230
},
{
"epoch": 50.96907216494845,
"eval_accuracy": 0.8434782608695652,
"eval_loss": 0.5757654905319214,
"eval_precision": 0.8486477905744771,
"eval_recall": 0.8434782608695652,
"eval_runtime": 1.8064,
"eval_samples_per_second": 190.984,
"eval_steps_per_second": 6.089,
"step": 1236
},
{
"epoch": 51.134020618556704,
"grad_norm": 24.62941551208496,
"learning_rate": 2.6851851851851855e-05,
"loss": 0.1188,
"step": 1240
},
{
"epoch": 51.54639175257732,
"grad_norm": 14.212287902832031,
"learning_rate": 2.6620370370370372e-05,
"loss": 0.1257,
"step": 1250
},
{
"epoch": 51.95876288659794,
"grad_norm": 10.230920791625977,
"learning_rate": 2.6388888888888892e-05,
"loss": 0.1079,
"step": 1260
},
{
"epoch": 52.0,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.5540273785591125,
"eval_precision": 0.8611434608590304,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.7965,
"eval_samples_per_second": 192.043,
"eval_steps_per_second": 6.123,
"step": 1261
},
{
"epoch": 52.371134020618555,
"grad_norm": 12.681902885437012,
"learning_rate": 2.615740740740741e-05,
"loss": 0.0964,
"step": 1270
},
{
"epoch": 52.78350515463917,
"grad_norm": 14.907917022705078,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.0972,
"step": 1280
},
{
"epoch": 52.98969072164948,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5599762797355652,
"eval_precision": 0.8559313253403165,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.8665,
"eval_samples_per_second": 184.836,
"eval_steps_per_second": 5.893,
"step": 1285
},
{
"epoch": 53.1958762886598,
"grad_norm": 13.571532249450684,
"learning_rate": 2.5694444444444445e-05,
"loss": 0.1164,
"step": 1290
},
{
"epoch": 53.608247422680414,
"grad_norm": 14.119112014770508,
"learning_rate": 2.5462962962962965e-05,
"loss": 0.0985,
"step": 1300
},
{
"epoch": 53.97938144329897,
"eval_accuracy": 0.863768115942029,
"eval_loss": 0.5391947627067566,
"eval_precision": 0.865555829019492,
"eval_recall": 0.863768115942029,
"eval_runtime": 1.8914,
"eval_samples_per_second": 182.408,
"eval_steps_per_second": 5.816,
"step": 1309
},
{
"epoch": 54.02061855670103,
"grad_norm": 11.18630599975586,
"learning_rate": 2.5231481481481485e-05,
"loss": 0.1139,
"step": 1310
},
{
"epoch": 54.43298969072165,
"grad_norm": 14.511212348937988,
"learning_rate": 2.5e-05,
"loss": 0.1117,
"step": 1320
},
{
"epoch": 54.845360824742265,
"grad_norm": 4.760071277618408,
"learning_rate": 2.4768518518518518e-05,
"loss": 0.1112,
"step": 1330
},
{
"epoch": 54.96907216494845,
"eval_accuracy": 0.863768115942029,
"eval_loss": 0.5410789847373962,
"eval_precision": 0.8655836794521399,
"eval_recall": 0.863768115942029,
"eval_runtime": 1.8766,
"eval_samples_per_second": 183.845,
"eval_steps_per_second": 5.862,
"step": 1333
},
{
"epoch": 55.25773195876289,
"grad_norm": 8.37569808959961,
"learning_rate": 2.4537037037037038e-05,
"loss": 0.1062,
"step": 1340
},
{
"epoch": 55.670103092783506,
"grad_norm": 10.700220108032227,
"learning_rate": 2.4305555555555558e-05,
"loss": 0.1308,
"step": 1350
},
{
"epoch": 56.0,
"eval_accuracy": 0.863768115942029,
"eval_loss": 0.5445396900177002,
"eval_precision": 0.8653666576853845,
"eval_recall": 0.863768115942029,
"eval_runtime": 1.8208,
"eval_samples_per_second": 189.479,
"eval_steps_per_second": 6.041,
"step": 1358
},
{
"epoch": 56.08247422680412,
"grad_norm": 19.0463924407959,
"learning_rate": 2.4074074074074074e-05,
"loss": 0.1081,
"step": 1360
},
{
"epoch": 56.49484536082474,
"grad_norm": 6.819794654846191,
"learning_rate": 2.3842592592592594e-05,
"loss": 0.1072,
"step": 1370
},
{
"epoch": 56.90721649484536,
"grad_norm": 6.308873176574707,
"learning_rate": 2.361111111111111e-05,
"loss": 0.1005,
"step": 1380
},
{
"epoch": 56.98969072164948,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5554308891296387,
"eval_precision": 0.8551462662985753,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.868,
"eval_samples_per_second": 184.69,
"eval_steps_per_second": 5.889,
"step": 1382
},
{
"epoch": 57.31958762886598,
"grad_norm": 5.025654315948486,
"learning_rate": 2.337962962962963e-05,
"loss": 0.088,
"step": 1390
},
{
"epoch": 57.7319587628866,
"grad_norm": 10.021939277648926,
"learning_rate": 2.314814814814815e-05,
"loss": 0.0871,
"step": 1400
},
{
"epoch": 57.97938144329897,
"eval_accuracy": 0.8405797101449275,
"eval_loss": 0.5966009497642517,
"eval_precision": 0.8440749450064067,
"eval_recall": 0.8405797101449275,
"eval_runtime": 1.7974,
"eval_samples_per_second": 191.939,
"eval_steps_per_second": 6.12,
"step": 1406
},
{
"epoch": 58.144329896907216,
"grad_norm": 16.077518463134766,
"learning_rate": 2.2916666666666667e-05,
"loss": 0.089,
"step": 1410
},
{
"epoch": 58.55670103092783,
"grad_norm": 14.556241035461426,
"learning_rate": 2.2685185185185187e-05,
"loss": 0.1072,
"step": 1420
},
{
"epoch": 58.96907216494845,
"grad_norm": 9.045204162597656,
"learning_rate": 2.2453703703703703e-05,
"loss": 0.1102,
"step": 1430
},
{
"epoch": 58.96907216494845,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.5807223916053772,
"eval_precision": 0.8543040805400182,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.8412,
"eval_samples_per_second": 187.376,
"eval_steps_per_second": 5.974,
"step": 1430
},
{
"epoch": 59.381443298969074,
"grad_norm": 12.29312515258789,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.1021,
"step": 1440
},
{
"epoch": 59.79381443298969,
"grad_norm": 13.808602333068848,
"learning_rate": 2.1990740740740743e-05,
"loss": 0.1028,
"step": 1450
},
{
"epoch": 60.0,
"eval_accuracy": 0.8434782608695652,
"eval_loss": 0.5653913021087646,
"eval_precision": 0.8490636359945823,
"eval_recall": 0.8434782608695652,
"eval_runtime": 1.8195,
"eval_samples_per_second": 189.615,
"eval_steps_per_second": 6.046,
"step": 1455
},
{
"epoch": 60.20618556701031,
"grad_norm": 8.929511070251465,
"learning_rate": 2.175925925925926e-05,
"loss": 0.1103,
"step": 1460
},
{
"epoch": 60.618556701030926,
"grad_norm": 14.425239562988281,
"learning_rate": 2.152777777777778e-05,
"loss": 0.107,
"step": 1470
},
{
"epoch": 60.98969072164948,
"eval_accuracy": 0.8434782608695652,
"eval_loss": 0.577854573726654,
"eval_precision": 0.8460752319344831,
"eval_recall": 0.8434782608695652,
"eval_runtime": 1.8265,
"eval_samples_per_second": 188.883,
"eval_steps_per_second": 6.022,
"step": 1479
},
{
"epoch": 61.03092783505155,
"grad_norm": 10.870781898498535,
"learning_rate": 2.1296296296296296e-05,
"loss": 0.0954,
"step": 1480
},
{
"epoch": 61.44329896907217,
"grad_norm": 10.188617706298828,
"learning_rate": 2.1064814814814816e-05,
"loss": 0.0942,
"step": 1490
},
{
"epoch": 61.855670103092784,
"grad_norm": 6.4580302238464355,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.0848,
"step": 1500
},
{
"epoch": 61.97938144329897,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5842954516410828,
"eval_precision": 0.8569219850916401,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.8368,
"eval_samples_per_second": 187.828,
"eval_steps_per_second": 5.989,
"step": 1503
},
{
"epoch": 62.2680412371134,
"grad_norm": 13.236536979675293,
"learning_rate": 2.0601851851851853e-05,
"loss": 0.0993,
"step": 1510
},
{
"epoch": 62.68041237113402,
"grad_norm": 11.377030372619629,
"learning_rate": 2.037037037037037e-05,
"loss": 0.0976,
"step": 1520
},
{
"epoch": 62.96907216494845,
"eval_accuracy": 0.8434782608695652,
"eval_loss": 0.6161760687828064,
"eval_precision": 0.8454310204706964,
"eval_recall": 0.8434782608695652,
"eval_runtime": 1.7609,
"eval_samples_per_second": 195.923,
"eval_steps_per_second": 6.247,
"step": 1527
},
{
"epoch": 63.09278350515464,
"grad_norm": 9.68355655670166,
"learning_rate": 2.013888888888889e-05,
"loss": 0.0788,
"step": 1530
},
{
"epoch": 63.50515463917526,
"grad_norm": 6.282276153564453,
"learning_rate": 1.990740740740741e-05,
"loss": 0.103,
"step": 1540
},
{
"epoch": 63.91752577319588,
"grad_norm": 4.893520832061768,
"learning_rate": 1.967592592592593e-05,
"loss": 0.0977,
"step": 1550
},
{
"epoch": 64.0,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.5822046995162964,
"eval_precision": 0.8468574730482583,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.8068,
"eval_samples_per_second": 190.942,
"eval_steps_per_second": 6.088,
"step": 1552
},
{
"epoch": 64.3298969072165,
"grad_norm": 10.216239929199219,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.1112,
"step": 1560
},
{
"epoch": 64.74226804123711,
"grad_norm": 22.551631927490234,
"learning_rate": 1.9212962962962962e-05,
"loss": 0.1256,
"step": 1570
},
{
"epoch": 64.98969072164948,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.575657308101654,
"eval_precision": 0.851359361697526,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8317,
"eval_samples_per_second": 188.346,
"eval_steps_per_second": 6.005,
"step": 1576
},
{
"epoch": 65.15463917525773,
"grad_norm": 6.853829383850098,
"learning_rate": 1.8981481481481482e-05,
"loss": 0.096,
"step": 1580
},
{
"epoch": 65.56701030927834,
"grad_norm": 14.361750602722168,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.0942,
"step": 1590
},
{
"epoch": 65.97938144329896,
"grad_norm": 9.966873168945312,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.0883,
"step": 1600
},
{
"epoch": 65.97938144329896,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.5716322660446167,
"eval_precision": 0.8466640969128532,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.7836,
"eval_samples_per_second": 193.433,
"eval_steps_per_second": 6.167,
"step": 1600
},
{
"epoch": 66.3917525773196,
"grad_norm": 9.780498504638672,
"learning_rate": 1.8287037037037038e-05,
"loss": 0.0791,
"step": 1610
},
{
"epoch": 66.80412371134021,
"grad_norm": 10.076851844787598,
"learning_rate": 1.8055555555555555e-05,
"loss": 0.0808,
"step": 1620
},
{
"epoch": 66.96907216494846,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5726441144943237,
"eval_precision": 0.8562372477793413,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.782,
"eval_samples_per_second": 193.608,
"eval_steps_per_second": 6.173,
"step": 1624
},
{
"epoch": 67.21649484536083,
"grad_norm": 10.814988136291504,
"learning_rate": 1.7824074074074075e-05,
"loss": 0.0604,
"step": 1630
},
{
"epoch": 67.62886597938144,
"grad_norm": 14.779629707336426,
"learning_rate": 1.7592592592592595e-05,
"loss": 0.1034,
"step": 1640
},
{
"epoch": 68.0,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5412786602973938,
"eval_precision": 0.8548742107305042,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.8607,
"eval_samples_per_second": 185.418,
"eval_steps_per_second": 5.912,
"step": 1649
},
{
"epoch": 68.04123711340206,
"grad_norm": 7.925902843475342,
"learning_rate": 1.736111111111111e-05,
"loss": 0.098,
"step": 1650
},
{
"epoch": 68.45360824742268,
"grad_norm": 8.179915428161621,
"learning_rate": 1.712962962962963e-05,
"loss": 0.0871,
"step": 1660
},
{
"epoch": 68.8659793814433,
"grad_norm": 8.375000953674316,
"learning_rate": 1.6898148148148148e-05,
"loss": 0.0845,
"step": 1670
},
{
"epoch": 68.98969072164948,
"eval_accuracy": 0.8434782608695652,
"eval_loss": 0.5826108455657959,
"eval_precision": 0.8476663926581475,
"eval_recall": 0.8434782608695652,
"eval_runtime": 1.8967,
"eval_samples_per_second": 181.896,
"eval_steps_per_second": 5.8,
"step": 1673
},
{
"epoch": 69.27835051546391,
"grad_norm": 8.613913536071777,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0911,
"step": 1680
},
{
"epoch": 69.69072164948453,
"grad_norm": 9.535558700561523,
"learning_rate": 1.6435185185185187e-05,
"loss": 0.0916,
"step": 1690
},
{
"epoch": 69.97938144329896,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.566058337688446,
"eval_precision": 0.8522049189345976,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.7731,
"eval_samples_per_second": 194.574,
"eval_steps_per_second": 6.204,
"step": 1697
},
{
"epoch": 70.10309278350516,
"grad_norm": 7.769627571105957,
"learning_rate": 1.6203703703703704e-05,
"loss": 0.1011,
"step": 1700
},
{
"epoch": 70.51546391752578,
"grad_norm": 9.350245475769043,
"learning_rate": 1.597222222222222e-05,
"loss": 0.0896,
"step": 1710
},
{
"epoch": 70.9278350515464,
"grad_norm": 11.536579132080078,
"learning_rate": 1.574074074074074e-05,
"loss": 0.0912,
"step": 1720
},
{
"epoch": 70.96907216494846,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5770707130432129,
"eval_precision": 0.84979303172866,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.817,
"eval_samples_per_second": 189.875,
"eval_steps_per_second": 6.054,
"step": 1721
},
{
"epoch": 71.34020618556701,
"grad_norm": 15.122323989868164,
"learning_rate": 1.550925925925926e-05,
"loss": 0.0995,
"step": 1730
},
{
"epoch": 71.75257731958763,
"grad_norm": 12.938358306884766,
"learning_rate": 1.527777777777778e-05,
"loss": 0.0863,
"step": 1740
},
{
"epoch": 72.0,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5769326686859131,
"eval_precision": 0.8550354692908756,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.8313,
"eval_samples_per_second": 188.386,
"eval_steps_per_second": 6.007,
"step": 1746
},
{
"epoch": 72.16494845360825,
"grad_norm": 6.935812950134277,
"learning_rate": 1.5046296296296297e-05,
"loss": 0.0731,
"step": 1750
},
{
"epoch": 72.57731958762886,
"grad_norm": 10.120232582092285,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.1101,
"step": 1760
},
{
"epoch": 72.98969072164948,
"grad_norm": 5.746927738189697,
"learning_rate": 1.4583333333333335e-05,
"loss": 0.083,
"step": 1770
},
{
"epoch": 72.98969072164948,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5860167145729065,
"eval_precision": 0.8486187988428825,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8602,
"eval_samples_per_second": 185.466,
"eval_steps_per_second": 5.913,
"step": 1770
},
{
"epoch": 73.4020618556701,
"grad_norm": 14.205853462219238,
"learning_rate": 1.4351851851851853e-05,
"loss": 0.1003,
"step": 1780
},
{
"epoch": 73.81443298969072,
"grad_norm": 6.671767711639404,
"learning_rate": 1.412037037037037e-05,
"loss": 0.0839,
"step": 1790
},
{
"epoch": 73.97938144329896,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5647125244140625,
"eval_precision": 0.8550673486786019,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.843,
"eval_samples_per_second": 187.195,
"eval_steps_per_second": 5.969,
"step": 1794
},
{
"epoch": 74.22680412371135,
"grad_norm": 6.19529914855957,
"learning_rate": 1.388888888888889e-05,
"loss": 0.0798,
"step": 1800
},
{
"epoch": 74.63917525773196,
"grad_norm": 13.039739608764648,
"learning_rate": 1.3657407407407408e-05,
"loss": 0.0903,
"step": 1810
},
{
"epoch": 74.96907216494846,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.601210355758667,
"eval_precision": 0.8534831427546733,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.7476,
"eval_samples_per_second": 197.417,
"eval_steps_per_second": 6.294,
"step": 1818
},
{
"epoch": 75.05154639175258,
"grad_norm": 6.386416435241699,
"learning_rate": 1.3425925925925928e-05,
"loss": 0.0872,
"step": 1820
},
{
"epoch": 75.4639175257732,
"grad_norm": 7.484694957733154,
"learning_rate": 1.3194444444444446e-05,
"loss": 0.0751,
"step": 1830
},
{
"epoch": 75.87628865979381,
"grad_norm": 10.781839370727539,
"learning_rate": 1.2962962962962962e-05,
"loss": 0.074,
"step": 1840
},
{
"epoch": 76.0,
"eval_accuracy": 0.8463768115942029,
"eval_loss": 0.6048101186752319,
"eval_precision": 0.8461499789126601,
"eval_recall": 0.8463768115942029,
"eval_runtime": 1.7696,
"eval_samples_per_second": 194.962,
"eval_steps_per_second": 6.216,
"step": 1843
},
{
"epoch": 76.28865979381443,
"grad_norm": 17.32390022277832,
"learning_rate": 1.2731481481481482e-05,
"loss": 0.0943,
"step": 1850
},
{
"epoch": 76.70103092783505,
"grad_norm": 12.162288665771484,
"learning_rate": 1.25e-05,
"loss": 0.0907,
"step": 1860
},
{
"epoch": 76.98969072164948,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5806660056114197,
"eval_precision": 0.8495330403324792,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.7482,
"eval_samples_per_second": 197.35,
"eval_steps_per_second": 6.292,
"step": 1867
},
{
"epoch": 77.11340206185567,
"grad_norm": 6.960859298706055,
"learning_rate": 1.2268518518518519e-05,
"loss": 0.0748,
"step": 1870
},
{
"epoch": 77.52577319587628,
"grad_norm": 14.269356727600098,
"learning_rate": 1.2037037037037037e-05,
"loss": 0.0781,
"step": 1880
},
{
"epoch": 77.9381443298969,
"grad_norm": 6.466542720794678,
"learning_rate": 1.1805555555555555e-05,
"loss": 0.0613,
"step": 1890
},
{
"epoch": 77.97938144329896,
"eval_accuracy": 0.8376811594202899,
"eval_loss": 0.5774852633476257,
"eval_precision": 0.8381818122940702,
"eval_recall": 0.8376811594202899,
"eval_runtime": 1.7656,
"eval_samples_per_second": 195.404,
"eval_steps_per_second": 6.23,
"step": 1891
},
{
"epoch": 78.35051546391753,
"grad_norm": 16.949039459228516,
"learning_rate": 1.1574074074074075e-05,
"loss": 0.0783,
"step": 1900
},
{
"epoch": 78.76288659793815,
"grad_norm": 5.50955057144165,
"learning_rate": 1.1342592592592593e-05,
"loss": 0.0964,
"step": 1910
},
{
"epoch": 78.96907216494846,
"eval_accuracy": 0.8666666666666667,
"eval_loss": 0.5758916735649109,
"eval_precision": 0.8675733846947259,
"eval_recall": 0.8666666666666667,
"eval_runtime": 1.7818,
"eval_samples_per_second": 193.62,
"eval_steps_per_second": 6.173,
"step": 1915
},
{
"epoch": 79.17525773195877,
"grad_norm": 7.778840065002441,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.0775,
"step": 1920
},
{
"epoch": 79.58762886597938,
"grad_norm": 10.63167667388916,
"learning_rate": 1.087962962962963e-05,
"loss": 0.0849,
"step": 1930
},
{
"epoch": 80.0,
"grad_norm": 10.529654502868652,
"learning_rate": 1.0648148148148148e-05,
"loss": 0.0735,
"step": 1940
},
{
"epoch": 80.0,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.5961835384368896,
"eval_precision": 0.8565539653910103,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.7657,
"eval_samples_per_second": 195.391,
"eval_steps_per_second": 6.23,
"step": 1940
},
{
"epoch": 80.41237113402062,
"grad_norm": 10.91960334777832,
"learning_rate": 1.0416666666666668e-05,
"loss": 0.0803,
"step": 1950
},
{
"epoch": 80.82474226804123,
"grad_norm": 6.953213691711426,
"learning_rate": 1.0185185185185185e-05,
"loss": 0.0663,
"step": 1960
},
{
"epoch": 80.98969072164948,
"eval_accuracy": 0.8434782608695652,
"eval_loss": 0.5768997669219971,
"eval_precision": 0.8441240738989768,
"eval_recall": 0.8434782608695652,
"eval_runtime": 1.8615,
"eval_samples_per_second": 185.334,
"eval_steps_per_second": 5.909,
"step": 1964
},
{
"epoch": 81.23711340206185,
"grad_norm": 14.6912841796875,
"learning_rate": 9.953703703703704e-06,
"loss": 0.0756,
"step": 1970
},
{
"epoch": 81.64948453608247,
"grad_norm": 11.421167373657227,
"learning_rate": 9.722222222222223e-06,
"loss": 0.0719,
"step": 1980
},
{
"epoch": 81.97938144329896,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5826414823532104,
"eval_precision": 0.8506964547245877,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8427,
"eval_samples_per_second": 187.221,
"eval_steps_per_second": 5.969,
"step": 1988
},
{
"epoch": 82.0618556701031,
"grad_norm": 16.955421447753906,
"learning_rate": 9.490740740740741e-06,
"loss": 0.0756,
"step": 1990
},
{
"epoch": 82.47422680412372,
"grad_norm": 13.900518417358398,
"learning_rate": 9.259259259259259e-06,
"loss": 0.0683,
"step": 2000
},
{
"epoch": 82.88659793814433,
"grad_norm": 9.04283618927002,
"learning_rate": 9.027777777777777e-06,
"loss": 0.0718,
"step": 2010
},
{
"epoch": 82.96907216494846,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.5879714488983154,
"eval_precision": 0.8590052571684228,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.7802,
"eval_samples_per_second": 193.802,
"eval_steps_per_second": 6.179,
"step": 2012
},
{
"epoch": 83.29896907216495,
"grad_norm": 8.817221641540527,
"learning_rate": 8.796296296296297e-06,
"loss": 0.0699,
"step": 2020
},
{
"epoch": 83.71134020618557,
"grad_norm": 9.379308700561523,
"learning_rate": 8.564814814814816e-06,
"loss": 0.0925,
"step": 2030
},
{
"epoch": 84.0,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5986330509185791,
"eval_precision": 0.8512692229678578,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.7681,
"eval_samples_per_second": 195.129,
"eval_steps_per_second": 6.221,
"step": 2037
},
{
"epoch": 84.12371134020619,
"grad_norm": 8.215590476989746,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0617,
"step": 2040
},
{
"epoch": 84.5360824742268,
"grad_norm": 5.024844169616699,
"learning_rate": 8.101851851851852e-06,
"loss": 0.0729,
"step": 2050
},
{
"epoch": 84.94845360824742,
"grad_norm": 9.782211303710938,
"learning_rate": 7.87037037037037e-06,
"loss": 0.0621,
"step": 2060
},
{
"epoch": 84.98969072164948,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5914923548698425,
"eval_precision": 0.8496762597563219,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.7614,
"eval_samples_per_second": 195.868,
"eval_steps_per_second": 6.245,
"step": 2061
},
{
"epoch": 85.36082474226804,
"grad_norm": 7.3921942710876465,
"learning_rate": 7.63888888888889e-06,
"loss": 0.0621,
"step": 2070
},
{
"epoch": 85.77319587628865,
"grad_norm": 10.206525802612305,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.059,
"step": 2080
},
{
"epoch": 85.97938144329896,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.577899694442749,
"eval_precision": 0.8577329472646936,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.8903,
"eval_samples_per_second": 182.511,
"eval_steps_per_second": 5.819,
"step": 2085
},
{
"epoch": 86.18556701030928,
"grad_norm": 18.180044174194336,
"learning_rate": 7.1759259259259266e-06,
"loss": 0.0663,
"step": 2090
},
{
"epoch": 86.5979381443299,
"grad_norm": 10.320213317871094,
"learning_rate": 6.944444444444445e-06,
"loss": 0.0806,
"step": 2100
},
{
"epoch": 86.96907216494846,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.5928123593330383,
"eval_precision": 0.850145540799145,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8068,
"eval_samples_per_second": 190.946,
"eval_steps_per_second": 6.088,
"step": 2109
},
{
"epoch": 87.01030927835052,
"grad_norm": 13.640397071838379,
"learning_rate": 6.712962962962964e-06,
"loss": 0.0581,
"step": 2110
},
{
"epoch": 87.42268041237114,
"grad_norm": 9.787714004516602,
"learning_rate": 6.481481481481481e-06,
"loss": 0.0641,
"step": 2120
},
{
"epoch": 87.83505154639175,
"grad_norm": 7.827996730804443,
"learning_rate": 6.25e-06,
"loss": 0.0617,
"step": 2130
},
{
"epoch": 88.0,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.606200098991394,
"eval_precision": 0.8519519771693684,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.7968,
"eval_samples_per_second": 192.013,
"eval_steps_per_second": 6.122,
"step": 2134
},
{
"epoch": 88.24742268041237,
"grad_norm": 10.409219741821289,
"learning_rate": 6.0185185185185185e-06,
"loss": 0.0677,
"step": 2140
},
{
"epoch": 88.65979381443299,
"grad_norm": 13.120059967041016,
"learning_rate": 5.787037037037038e-06,
"loss": 0.0651,
"step": 2150
},
{
"epoch": 88.98969072164948,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.6067116260528564,
"eval_precision": 0.8518690976003952,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.8144,
"eval_samples_per_second": 190.144,
"eval_steps_per_second": 6.063,
"step": 2158
},
{
"epoch": 89.0721649484536,
"grad_norm": 8.974705696105957,
"learning_rate": 5.555555555555556e-06,
"loss": 0.0672,
"step": 2160
},
{
"epoch": 89.48453608247422,
"grad_norm": 13.397907257080078,
"learning_rate": 5.324074074074074e-06,
"loss": 0.0727,
"step": 2170
},
{
"epoch": 89.89690721649484,
"grad_norm": 4.159496784210205,
"learning_rate": 5.092592592592592e-06,
"loss": 0.0754,
"step": 2180
},
{
"epoch": 89.97938144329896,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.6107772588729858,
"eval_precision": 0.8553431503660337,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.7776,
"eval_samples_per_second": 194.084,
"eval_steps_per_second": 6.188,
"step": 2182
},
{
"epoch": 90.30927835051547,
"grad_norm": 11.130279541015625,
"learning_rate": 4.861111111111111e-06,
"loss": 0.079,
"step": 2190
},
{
"epoch": 90.72164948453609,
"grad_norm": 13.203577995300293,
"learning_rate": 4.6296296296296296e-06,
"loss": 0.0682,
"step": 2200
},
{
"epoch": 90.96907216494846,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.618496298789978,
"eval_precision": 0.8488872700953353,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.7798,
"eval_samples_per_second": 193.847,
"eval_steps_per_second": 6.181,
"step": 2206
},
{
"epoch": 91.1340206185567,
"grad_norm": 10.04045581817627,
"learning_rate": 4.398148148148149e-06,
"loss": 0.0699,
"step": 2210
},
{
"epoch": 91.54639175257732,
"grad_norm": 2.500128984451294,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0664,
"step": 2220
},
{
"epoch": 91.95876288659794,
"grad_norm": 9.432464599609375,
"learning_rate": 3.935185185185185e-06,
"loss": 0.0763,
"step": 2230
},
{
"epoch": 92.0,
"eval_accuracy": 0.8579710144927536,
"eval_loss": 0.6168191432952881,
"eval_precision": 0.8575139456543875,
"eval_recall": 0.8579710144927536,
"eval_runtime": 1.8002,
"eval_samples_per_second": 191.65,
"eval_steps_per_second": 6.111,
"step": 2231
},
{
"epoch": 92.37113402061856,
"grad_norm": 9.279271125793457,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.0742,
"step": 2240
},
{
"epoch": 92.78350515463917,
"grad_norm": 19.246337890625,
"learning_rate": 3.4722222222222224e-06,
"loss": 0.0703,
"step": 2250
},
{
"epoch": 92.98969072164948,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.6258795261383057,
"eval_precision": 0.8520768323971984,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.8416,
"eval_samples_per_second": 187.341,
"eval_steps_per_second": 5.973,
"step": 2255
},
{
"epoch": 93.19587628865979,
"grad_norm": 5.38301420211792,
"learning_rate": 3.2407407407407406e-06,
"loss": 0.0559,
"step": 2260
},
{
"epoch": 93.6082474226804,
"grad_norm": 7.105731964111328,
"learning_rate": 3.0092592592592593e-06,
"loss": 0.0861,
"step": 2270
},
{
"epoch": 93.97938144329896,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.6128158569335938,
"eval_precision": 0.8553431503660337,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.776,
"eval_samples_per_second": 194.252,
"eval_steps_per_second": 6.194,
"step": 2279
},
{
"epoch": 94.02061855670104,
"grad_norm": 14.296255111694336,
"learning_rate": 2.777777777777778e-06,
"loss": 0.089,
"step": 2280
},
{
"epoch": 94.43298969072166,
"grad_norm": 11.694154739379883,
"learning_rate": 2.546296296296296e-06,
"loss": 0.07,
"step": 2290
},
{
"epoch": 94.84536082474227,
"grad_norm": 8.240065574645996,
"learning_rate": 2.3148148148148148e-06,
"loss": 0.0807,
"step": 2300
},
{
"epoch": 94.96907216494846,
"eval_accuracy": 0.855072463768116,
"eval_loss": 0.6139995455741882,
"eval_precision": 0.8546533219302098,
"eval_recall": 0.855072463768116,
"eval_runtime": 1.763,
"eval_samples_per_second": 195.691,
"eval_steps_per_second": 6.239,
"step": 2303
},
{
"epoch": 95.25773195876289,
"grad_norm": 6.740184307098389,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.0814,
"step": 2310
},
{
"epoch": 95.6701030927835,
"grad_norm": 9.714829444885254,
"learning_rate": 1.8518518518518519e-06,
"loss": 0.0621,
"step": 2320
},
{
"epoch": 96.0,
"eval_accuracy": 0.8521739130434782,
"eval_loss": 0.6132925748825073,
"eval_precision": 0.8531657869027159,
"eval_recall": 0.8521739130434782,
"eval_runtime": 1.8081,
"eval_samples_per_second": 190.808,
"eval_steps_per_second": 6.084,
"step": 2328
},
{
"epoch": 96.08247422680412,
"grad_norm": 11.212587356567383,
"learning_rate": 1.6203703703703703e-06,
"loss": 0.065,
"step": 2330
},
{
"epoch": 96.49484536082474,
"grad_norm": 5.428162097930908,
"learning_rate": 1.388888888888889e-06,
"loss": 0.0621,
"step": 2340
},
{
"epoch": 96.90721649484536,
"grad_norm": 15.444799423217773,
"learning_rate": 1.1574074074074074e-06,
"loss": 0.0831,
"step": 2350
},
{
"epoch": 96.98969072164948,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.6100958585739136,
"eval_precision": 0.8507158478342087,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.7991,
"eval_samples_per_second": 191.765,
"eval_steps_per_second": 6.114,
"step": 2352
},
{
"epoch": 97.31958762886597,
"grad_norm": 12.789685249328613,
"learning_rate": 9.259259259259259e-07,
"loss": 0.0584,
"step": 2360
},
{
"epoch": 97.73195876288659,
"grad_norm": 9.271283149719238,
"learning_rate": 6.944444444444445e-07,
"loss": 0.0625,
"step": 2370
},
{
"epoch": 97.97938144329896,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.6096817851066589,
"eval_precision": 0.8507158478342087,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.8191,
"eval_samples_per_second": 189.651,
"eval_steps_per_second": 6.047,
"step": 2376
},
{
"epoch": 98.14432989690722,
"grad_norm": 10.486361503601074,
"learning_rate": 4.6296296296296297e-07,
"loss": 0.0563,
"step": 2380
},
{
"epoch": 98.55670103092784,
"grad_norm": 4.260477066040039,
"learning_rate": 2.3148148148148148e-07,
"loss": 0.0648,
"step": 2390
},
{
"epoch": 98.96907216494846,
"grad_norm": 8.932230949401855,
"learning_rate": 0.0,
"loss": 0.0571,
"step": 2400
},
{
"epoch": 98.96907216494846,
"eval_accuracy": 0.8492753623188406,
"eval_loss": 0.6083797812461853,
"eval_precision": 0.8507158478342087,
"eval_recall": 0.8492753623188406,
"eval_runtime": 1.7521,
"eval_samples_per_second": 196.912,
"eval_steps_per_second": 6.278,
"step": 2400
},
{
"epoch": 98.96907216494846,
"step": 2400,
"total_flos": 7.732715563096474e+18,
"train_loss": 0.2344164727628231,
"train_runtime": 4723.8268,
"train_samples_per_second": 65.709,
"train_steps_per_second": 0.508
}
],
"logging_steps": 10,
"max_steps": 2400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.732715563096474e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}