|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 98.96907216494846, |
|
"eval_steps": 500, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 5.916716575622559, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 1.6297, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 5.051618576049805, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.613, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9896907216494846, |
|
"eval_accuracy": 0.2927536231884058, |
|
"eval_loss": 1.5833344459533691, |
|
"eval_precision": 0.3247879943590829, |
|
"eval_recall": 0.2927536231884058, |
|
"eval_runtime": 2.9495, |
|
"eval_samples_per_second": 116.97, |
|
"eval_steps_per_second": 3.729, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 4.8794169425964355, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.5792, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 6.336801052093506, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.5494, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.9793814432989691, |
|
"eval_accuracy": 0.3681159420289855, |
|
"eval_loss": 1.4944071769714355, |
|
"eval_precision": 0.440954469667821, |
|
"eval_recall": 0.3681159420289855, |
|
"eval_runtime": 1.7863, |
|
"eval_samples_per_second": 193.135, |
|
"eval_steps_per_second": 6.158, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 8.574434280395508, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 1.5014, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.4742268041237114, |
|
"grad_norm": 6.564225673675537, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.4422, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.88659793814433, |
|
"grad_norm": 5.804593086242676, |
|
"learning_rate": 1.4583333333333335e-05, |
|
"loss": 1.3989, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.9690721649484537, |
|
"eval_accuracy": 0.5159420289855072, |
|
"eval_loss": 1.3423842191696167, |
|
"eval_precision": 0.52619860815513, |
|
"eval_recall": 0.5159420289855072, |
|
"eval_runtime": 1.8303, |
|
"eval_samples_per_second": 188.493, |
|
"eval_steps_per_second": 6.01, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.2989690721649483, |
|
"grad_norm": 6.893215656280518, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.2968, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.711340206185567, |
|
"grad_norm": 12.37126350402832, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 1.2238, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6260869565217392, |
|
"eval_loss": 1.1162269115447998, |
|
"eval_precision": 0.6665610702002287, |
|
"eval_recall": 0.6260869565217392, |
|
"eval_runtime": 1.8634, |
|
"eval_samples_per_second": 185.144, |
|
"eval_steps_per_second": 5.903, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 4.123711340206185, |
|
"grad_norm": 6.501392841339111, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 1.1194, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.536082474226804, |
|
"grad_norm": 14.653229713439941, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 1.0499, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.948453608247423, |
|
"grad_norm": 15.2618408203125, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.9585, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.989690721649485, |
|
"eval_accuracy": 0.6985507246376812, |
|
"eval_loss": 0.8966168761253357, |
|
"eval_precision": 0.7013922738306568, |
|
"eval_recall": 0.6985507246376812, |
|
"eval_runtime": 1.8339, |
|
"eval_samples_per_second": 188.12, |
|
"eval_steps_per_second": 5.998, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 5.360824742268041, |
|
"grad_norm": 12.275806427001953, |
|
"learning_rate": 2.7083333333333332e-05, |
|
"loss": 0.8986, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.77319587628866, |
|
"grad_norm": 15.373220443725586, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.8934, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 5.979381443298969, |
|
"eval_accuracy": 0.7507246376811594, |
|
"eval_loss": 0.763816773891449, |
|
"eval_precision": 0.7489666881245252, |
|
"eval_recall": 0.7507246376811594, |
|
"eval_runtime": 1.9332, |
|
"eval_samples_per_second": 178.459, |
|
"eval_steps_per_second": 5.69, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.185567010309279, |
|
"grad_norm": 15.394486427307129, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.8326, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.597938144329897, |
|
"grad_norm": 14.27376937866211, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.7589, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.969072164948454, |
|
"eval_accuracy": 0.7652173913043478, |
|
"eval_loss": 0.6776081919670105, |
|
"eval_precision": 0.771906259033061, |
|
"eval_recall": 0.7652173913043478, |
|
"eval_runtime": 1.836, |
|
"eval_samples_per_second": 187.91, |
|
"eval_steps_per_second": 5.991, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 7.010309278350515, |
|
"grad_norm": 21.43760871887207, |
|
"learning_rate": 3.541666666666667e-05, |
|
"loss": 0.7404, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 7.422680412371134, |
|
"grad_norm": 15.207581520080566, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.653, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.835051546391752, |
|
"grad_norm": 25.153663635253906, |
|
"learning_rate": 3.958333333333333e-05, |
|
"loss": 0.6746, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7623188405797101, |
|
"eval_loss": 0.6126735210418701, |
|
"eval_precision": 0.7628428431334807, |
|
"eval_recall": 0.7623188405797101, |
|
"eval_runtime": 1.8501, |
|
"eval_samples_per_second": 186.474, |
|
"eval_steps_per_second": 5.946, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 8.24742268041237, |
|
"grad_norm": 23.2750301361084, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6516, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.65979381443299, |
|
"grad_norm": 21.777841567993164, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.6048, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.989690721649485, |
|
"eval_accuracy": 0.8202898550724638, |
|
"eval_loss": 0.5220813751220703, |
|
"eval_precision": 0.8216835971752063, |
|
"eval_recall": 0.8202898550724638, |
|
"eval_runtime": 1.8243, |
|
"eval_samples_per_second": 189.114, |
|
"eval_steps_per_second": 6.03, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 9.072164948453608, |
|
"grad_norm": 15.630614280700684, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.5723, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 9.484536082474227, |
|
"grad_norm": 13.571239471435547, |
|
"learning_rate": 4.791666666666667e-05, |
|
"loss": 0.5436, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 9.896907216494846, |
|
"grad_norm": 24.206087112426758, |
|
"learning_rate": 5e-05, |
|
"loss": 0.531, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.97938144329897, |
|
"eval_accuracy": 0.8115942028985508, |
|
"eval_loss": 0.4930874705314636, |
|
"eval_precision": 0.8203605371226137, |
|
"eval_recall": 0.8115942028985508, |
|
"eval_runtime": 1.788, |
|
"eval_samples_per_second": 192.958, |
|
"eval_steps_per_second": 6.152, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 10.309278350515465, |
|
"grad_norm": 17.16573715209961, |
|
"learning_rate": 4.976851851851852e-05, |
|
"loss": 0.5034, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.721649484536082, |
|
"grad_norm": 19.933942794799805, |
|
"learning_rate": 4.9537037037037035e-05, |
|
"loss": 0.57, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 10.969072164948454, |
|
"eval_accuracy": 0.8318840579710145, |
|
"eval_loss": 0.44795188307762146, |
|
"eval_precision": 0.8344579895060443, |
|
"eval_recall": 0.8318840579710145, |
|
"eval_runtime": 1.8183, |
|
"eval_samples_per_second": 189.733, |
|
"eval_steps_per_second": 6.049, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 11.1340206185567, |
|
"grad_norm": 25.91600799560547, |
|
"learning_rate": 4.930555555555556e-05, |
|
"loss": 0.4791, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.54639175257732, |
|
"grad_norm": 23.493484497070312, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 0.4372, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 11.958762886597938, |
|
"grad_norm": 14.273780822753906, |
|
"learning_rate": 4.8842592592592595e-05, |
|
"loss": 0.4624, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.42139920592308044, |
|
"eval_precision": 0.846014277166443, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.7884, |
|
"eval_samples_per_second": 192.914, |
|
"eval_steps_per_second": 6.151, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 12.371134020618557, |
|
"grad_norm": 26.43771743774414, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.4509, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.783505154639176, |
|
"grad_norm": 29.501718521118164, |
|
"learning_rate": 4.837962962962963e-05, |
|
"loss": 0.417, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 12.989690721649485, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.44392213225364685, |
|
"eval_precision": 0.8485676738054103, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.762, |
|
"eval_samples_per_second": 195.797, |
|
"eval_steps_per_second": 6.243, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 13.195876288659793, |
|
"grad_norm": 16.380001068115234, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.4042, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 13.608247422680412, |
|
"grad_norm": 26.098731994628906, |
|
"learning_rate": 4.791666666666667e-05, |
|
"loss": 0.3814, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 13.97938144329897, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.41379421949386597, |
|
"eval_precision": 0.8477774513274812, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.7998, |
|
"eval_samples_per_second": 191.689, |
|
"eval_steps_per_second": 6.112, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 14.02061855670103, |
|
"grad_norm": 13.136883735656738, |
|
"learning_rate": 4.768518518518519e-05, |
|
"loss": 0.4209, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 14.43298969072165, |
|
"grad_norm": 18.104930877685547, |
|
"learning_rate": 4.745370370370371e-05, |
|
"loss": 0.3817, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 14.845360824742269, |
|
"grad_norm": 27.79136848449707, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.3737, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 14.969072164948454, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.41388532519340515, |
|
"eval_precision": 0.8466409143288909, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.8854, |
|
"eval_samples_per_second": 182.983, |
|
"eval_steps_per_second": 5.834, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 15.257731958762886, |
|
"grad_norm": 33.14027786254883, |
|
"learning_rate": 4.699074074074074e-05, |
|
"loss": 0.3782, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 15.670103092783505, |
|
"grad_norm": 10.574623107910156, |
|
"learning_rate": 4.675925925925926e-05, |
|
"loss": 0.3971, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.863768115942029, |
|
"eval_loss": 0.4119352400302887, |
|
"eval_precision": 0.8664915871553495, |
|
"eval_recall": 0.863768115942029, |
|
"eval_runtime": 1.8638, |
|
"eval_samples_per_second": 185.11, |
|
"eval_steps_per_second": 5.902, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 16.082474226804123, |
|
"grad_norm": 14.796497344970703, |
|
"learning_rate": 4.652777777777778e-05, |
|
"loss": 0.3227, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 16.49484536082474, |
|
"grad_norm": 13.750545501708984, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.306, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.90721649484536, |
|
"grad_norm": 15.056818962097168, |
|
"learning_rate": 4.6064814814814814e-05, |
|
"loss": 0.343, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 16.989690721649485, |
|
"eval_accuracy": 0.8608695652173913, |
|
"eval_loss": 0.4421471655368805, |
|
"eval_precision": 0.8659298079116737, |
|
"eval_recall": 0.8608695652173913, |
|
"eval_runtime": 1.7876, |
|
"eval_samples_per_second": 192.996, |
|
"eval_steps_per_second": 6.154, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 17.31958762886598, |
|
"grad_norm": 19.41351318359375, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.3383, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 17.7319587628866, |
|
"grad_norm": 22.833810806274414, |
|
"learning_rate": 4.5601851851851854e-05, |
|
"loss": 0.3311, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 17.97938144329897, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.45808833837509155, |
|
"eval_precision": 0.8503668982654489, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8173, |
|
"eval_samples_per_second": 189.846, |
|
"eval_steps_per_second": 6.053, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 18.144329896907216, |
|
"grad_norm": 9.80312442779541, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.301, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 18.556701030927837, |
|
"grad_norm": 17.442903518676758, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 0.2594, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 18.969072164948454, |
|
"grad_norm": 25.01900863647461, |
|
"learning_rate": 4.490740740740741e-05, |
|
"loss": 0.2652, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 18.969072164948454, |
|
"eval_accuracy": 0.8405797101449275, |
|
"eval_loss": 0.4563068747520447, |
|
"eval_precision": 0.8441116322796441, |
|
"eval_recall": 0.8405797101449275, |
|
"eval_runtime": 1.8121, |
|
"eval_samples_per_second": 190.387, |
|
"eval_steps_per_second": 6.07, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 19.38144329896907, |
|
"grad_norm": 22.951929092407227, |
|
"learning_rate": 4.467592592592593e-05, |
|
"loss": 0.2726, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 19.79381443298969, |
|
"grad_norm": 17.189971923828125, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3026, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.4535578489303589, |
|
"eval_precision": 0.8549145070160367, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.8156, |
|
"eval_samples_per_second": 190.019, |
|
"eval_steps_per_second": 6.059, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 20.20618556701031, |
|
"grad_norm": 19.29929542541504, |
|
"learning_rate": 4.4212962962962966e-05, |
|
"loss": 0.2808, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 20.61855670103093, |
|
"grad_norm": 23.201435089111328, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.2562, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.989690721649485, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.44093257188796997, |
|
"eval_precision": 0.8493084398986088, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.9468, |
|
"eval_samples_per_second": 177.217, |
|
"eval_steps_per_second": 5.65, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 21.030927835051546, |
|
"grad_norm": 12.947028160095215, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.2739, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 21.443298969072163, |
|
"grad_norm": 21.544536590576172, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.2383, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 21.855670103092784, |
|
"grad_norm": 12.224617958068848, |
|
"learning_rate": 4.328703703703704e-05, |
|
"loss": 0.2282, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 21.97938144329897, |
|
"eval_accuracy": 0.8434782608695652, |
|
"eval_loss": 0.4388555884361267, |
|
"eval_precision": 0.8451190974708183, |
|
"eval_recall": 0.8434782608695652, |
|
"eval_runtime": 1.7718, |
|
"eval_samples_per_second": 194.721, |
|
"eval_steps_per_second": 6.208, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 22.2680412371134, |
|
"grad_norm": 17.55919647216797, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.2505, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 22.68041237113402, |
|
"grad_norm": 10.570196151733398, |
|
"learning_rate": 4.282407407407408e-05, |
|
"loss": 0.2374, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 22.969072164948454, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.4452122747898102, |
|
"eval_precision": 0.8589461524849866, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.8751, |
|
"eval_samples_per_second": 183.989, |
|
"eval_steps_per_second": 5.866, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 23.09278350515464, |
|
"grad_norm": 25.781587600708008, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.2355, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 23.50515463917526, |
|
"grad_norm": 22.854766845703125, |
|
"learning_rate": 4.236111111111111e-05, |
|
"loss": 0.2553, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 23.917525773195877, |
|
"grad_norm": 15.405595779418945, |
|
"learning_rate": 4.212962962962963e-05, |
|
"loss": 0.216, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.4375264048576355, |
|
"eval_precision": 0.858123097800969, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.8051, |
|
"eval_samples_per_second": 191.128, |
|
"eval_steps_per_second": 6.094, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 24.329896907216494, |
|
"grad_norm": 15.453635215759277, |
|
"learning_rate": 4.1898148148148145e-05, |
|
"loss": 0.2019, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 24.742268041237114, |
|
"grad_norm": 12.363275527954102, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.2127, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.989690721649485, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.44218453764915466, |
|
"eval_precision": 0.8587798835624924, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.9062, |
|
"eval_samples_per_second": 180.991, |
|
"eval_steps_per_second": 5.771, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 25.15463917525773, |
|
"grad_norm": 15.13847827911377, |
|
"learning_rate": 4.1435185185185185e-05, |
|
"loss": 0.2301, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 25.567010309278352, |
|
"grad_norm": 20.761062622070312, |
|
"learning_rate": 4.1203703703703705e-05, |
|
"loss": 0.1807, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 25.97938144329897, |
|
"grad_norm": 17.889150619506836, |
|
"learning_rate": 4.0972222222222225e-05, |
|
"loss": 0.2004, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 25.97938144329897, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.46348363161087036, |
|
"eval_precision": 0.8519325944084339, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.7728, |
|
"eval_samples_per_second": 194.609, |
|
"eval_steps_per_second": 6.205, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 26.391752577319586, |
|
"grad_norm": 23.56374168395996, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.2427, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 26.804123711340207, |
|
"grad_norm": 9.772664070129395, |
|
"learning_rate": 4.0509259259259265e-05, |
|
"loss": 0.2029, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 26.969072164948454, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5214529037475586, |
|
"eval_precision": 0.8545500895204992, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.9291, |
|
"eval_samples_per_second": 178.841, |
|
"eval_steps_per_second": 5.702, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 27.216494845360824, |
|
"grad_norm": 14.480449676513672, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 0.1903, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 27.628865979381445, |
|
"grad_norm": 16.415973663330078, |
|
"learning_rate": 4.00462962962963e-05, |
|
"loss": 0.1794, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.863768115942029, |
|
"eval_loss": 0.47563326358795166, |
|
"eval_precision": 0.8669166767891824, |
|
"eval_recall": 0.863768115942029, |
|
"eval_runtime": 1.7555, |
|
"eval_samples_per_second": 196.529, |
|
"eval_steps_per_second": 6.266, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 28.04123711340206, |
|
"grad_norm": 8.689855575561523, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.1822, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 28.45360824742268, |
|
"grad_norm": 12.505402565002441, |
|
"learning_rate": 3.958333333333333e-05, |
|
"loss": 0.1828, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 28.8659793814433, |
|
"grad_norm": 15.491950988769531, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.1835, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 28.989690721649485, |
|
"eval_accuracy": 0.8608695652173913, |
|
"eval_loss": 0.4727528393268585, |
|
"eval_precision": 0.8649801117780185, |
|
"eval_recall": 0.8608695652173913, |
|
"eval_runtime": 1.8858, |
|
"eval_samples_per_second": 182.95, |
|
"eval_steps_per_second": 5.833, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 29.278350515463917, |
|
"grad_norm": 16.289226531982422, |
|
"learning_rate": 3.912037037037037e-05, |
|
"loss": 0.1907, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 29.690721649484537, |
|
"grad_norm": 13.304434776306152, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1781, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 29.97938144329897, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.4636934697628021, |
|
"eval_precision": 0.8568131435327558, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.8681, |
|
"eval_samples_per_second": 184.683, |
|
"eval_steps_per_second": 5.888, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 30.103092783505154, |
|
"grad_norm": 6.991786003112793, |
|
"learning_rate": 3.865740740740741e-05, |
|
"loss": 0.1829, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 30.51546391752577, |
|
"grad_norm": 10.514315605163574, |
|
"learning_rate": 3.8425925925925924e-05, |
|
"loss": 0.1627, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 30.927835051546392, |
|
"grad_norm": 9.121224403381348, |
|
"learning_rate": 3.8194444444444444e-05, |
|
"loss": 0.1671, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 30.969072164948454, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.485573947429657, |
|
"eval_precision": 0.8599276434444294, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.9437, |
|
"eval_samples_per_second": 177.497, |
|
"eval_steps_per_second": 5.659, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 31.34020618556701, |
|
"grad_norm": 13.762226104736328, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.1721, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 31.75257731958763, |
|
"grad_norm": 10.415836334228516, |
|
"learning_rate": 3.7731481481481484e-05, |
|
"loss": 0.1762, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.5007998943328857, |
|
"eval_precision": 0.8684023473901008, |
|
"eval_recall": 0.8666666666666667, |
|
"eval_runtime": 1.769, |
|
"eval_samples_per_second": 195.026, |
|
"eval_steps_per_second": 6.218, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 32.16494845360825, |
|
"grad_norm": 10.8311767578125, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.1707, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 32.577319587628864, |
|
"grad_norm": 12.070932388305664, |
|
"learning_rate": 3.726851851851852e-05, |
|
"loss": 0.1673, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 32.98969072164948, |
|
"grad_norm": 8.654770851135254, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.1867, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 32.98969072164948, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.5058211088180542, |
|
"eval_precision": 0.8584843785997619, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.8394, |
|
"eval_samples_per_second": 187.561, |
|
"eval_steps_per_second": 5.98, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.402061855670105, |
|
"grad_norm": 8.323944091796875, |
|
"learning_rate": 3.6805555555555556e-05, |
|
"loss": 0.1553, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 33.81443298969072, |
|
"grad_norm": 14.134881973266602, |
|
"learning_rate": 3.6574074074074076e-05, |
|
"loss": 0.1409, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 33.97938144329897, |
|
"eval_accuracy": 0.8405797101449275, |
|
"eval_loss": 0.5489646792411804, |
|
"eval_precision": 0.8408524440704116, |
|
"eval_recall": 0.8405797101449275, |
|
"eval_runtime": 1.7738, |
|
"eval_samples_per_second": 194.496, |
|
"eval_steps_per_second": 6.201, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 34.22680412371134, |
|
"grad_norm": 17.74443244934082, |
|
"learning_rate": 3.6342592592592596e-05, |
|
"loss": 0.1498, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 34.63917525773196, |
|
"grad_norm": 14.35798454284668, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.1315, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 34.96907216494845, |
|
"eval_accuracy": 0.8347826086956521, |
|
"eval_loss": 0.528394877910614, |
|
"eval_precision": 0.8356368409524089, |
|
"eval_recall": 0.8347826086956521, |
|
"eval_runtime": 1.8034, |
|
"eval_samples_per_second": 191.304, |
|
"eval_steps_per_second": 6.1, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 35.05154639175258, |
|
"grad_norm": 15.67455005645752, |
|
"learning_rate": 3.587962962962963e-05, |
|
"loss": 0.163, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.4639175257732, |
|
"grad_norm": 6.1969828605651855, |
|
"learning_rate": 3.564814814814815e-05, |
|
"loss": 0.1406, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 35.876288659793815, |
|
"grad_norm": 14.651385307312012, |
|
"learning_rate": 3.541666666666667e-05, |
|
"loss": 0.1315, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.5415348410606384, |
|
"eval_precision": 0.8487979974677805, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.7509, |
|
"eval_samples_per_second": 197.042, |
|
"eval_steps_per_second": 6.282, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 36.28865979381443, |
|
"grad_norm": 15.739358901977539, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.1944, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 36.70103092783505, |
|
"grad_norm": 16.889202117919922, |
|
"learning_rate": 3.49537037037037e-05, |
|
"loss": 0.1974, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 36.98969072164948, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.519416332244873, |
|
"eval_precision": 0.8536148561469765, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.7833, |
|
"eval_samples_per_second": 193.461, |
|
"eval_steps_per_second": 6.168, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 37.11340206185567, |
|
"grad_norm": 10.011604309082031, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.1605, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.52577319587629, |
|
"grad_norm": 18.694128036499023, |
|
"learning_rate": 3.449074074074074e-05, |
|
"loss": 0.1515, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 37.93814432989691, |
|
"grad_norm": 9.140711784362793, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.1337, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 37.97938144329897, |
|
"eval_accuracy": 0.8608695652173913, |
|
"eval_loss": 0.5088416337966919, |
|
"eval_precision": 0.8602982452483552, |
|
"eval_recall": 0.8608695652173913, |
|
"eval_runtime": 1.7456, |
|
"eval_samples_per_second": 197.634, |
|
"eval_steps_per_second": 6.301, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 38.350515463917525, |
|
"grad_norm": 12.548330307006836, |
|
"learning_rate": 3.402777777777778e-05, |
|
"loss": 0.1439, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 38.76288659793814, |
|
"grad_norm": 12.762455940246582, |
|
"learning_rate": 3.3796296296296295e-05, |
|
"loss": 0.173, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 38.96907216494845, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.4912014305591583, |
|
"eval_precision": 0.867978256170476, |
|
"eval_recall": 0.8666666666666667, |
|
"eval_runtime": 1.8067, |
|
"eval_samples_per_second": 190.96, |
|
"eval_steps_per_second": 6.089, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 39.175257731958766, |
|
"grad_norm": 12.083857536315918, |
|
"learning_rate": 3.3564814814814815e-05, |
|
"loss": 0.1477, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.58762886597938, |
|
"grad_norm": 17.14080238342285, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1285, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 13.190485000610352, |
|
"learning_rate": 3.3101851851851855e-05, |
|
"loss": 0.1409, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5222660899162292, |
|
"eval_precision": 0.8501727809182621, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8482, |
|
"eval_samples_per_second": 186.669, |
|
"eval_steps_per_second": 5.952, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 40.41237113402062, |
|
"grad_norm": 8.88687801361084, |
|
"learning_rate": 3.2870370370370375e-05, |
|
"loss": 0.151, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 40.824742268041234, |
|
"grad_norm": 7.21800422668457, |
|
"learning_rate": 3.263888888888889e-05, |
|
"loss": 0.1379, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 40.98969072164948, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5204349160194397, |
|
"eval_precision": 0.8486749182344644, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8062, |
|
"eval_samples_per_second": 191.006, |
|
"eval_steps_per_second": 6.09, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 41.23711340206186, |
|
"grad_norm": 10.057676315307617, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.1079, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.649484536082475, |
|
"grad_norm": 13.667500495910645, |
|
"learning_rate": 3.217592592592593e-05, |
|
"loss": 0.1437, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 41.97938144329897, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.5860036611557007, |
|
"eval_precision": 0.8550665818648641, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.7468, |
|
"eval_samples_per_second": 197.503, |
|
"eval_steps_per_second": 6.297, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 42.06185567010309, |
|
"grad_norm": 6.985457420349121, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.1521, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 42.47422680412371, |
|
"grad_norm": 16.70668601989746, |
|
"learning_rate": 3.171296296296297e-05, |
|
"loss": 0.1393, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 42.88659793814433, |
|
"grad_norm": 6.907033920288086, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.1022, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 42.96907216494845, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.5460776686668396, |
|
"eval_precision": 0.8491763964495722, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.7961, |
|
"eval_samples_per_second": 192.078, |
|
"eval_steps_per_second": 6.124, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 43.29896907216495, |
|
"grad_norm": 9.046392440795898, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.1385, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.71134020618557, |
|
"grad_norm": 10.188021659851074, |
|
"learning_rate": 3.101851851851852e-05, |
|
"loss": 0.1181, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.541079044342041, |
|
"eval_precision": 0.856643419178803, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.7664, |
|
"eval_samples_per_second": 195.31, |
|
"eval_steps_per_second": 6.227, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 44.123711340206185, |
|
"grad_norm": 8.506319046020508, |
|
"learning_rate": 3.0787037037037034e-05, |
|
"loss": 0.1411, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 44.5360824742268, |
|
"grad_norm": 15.423176765441895, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.1346, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 44.94845360824742, |
|
"grad_norm": 6.524370193481445, |
|
"learning_rate": 3.0324074074074077e-05, |
|
"loss": 0.1212, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 44.98969072164948, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.5293735861778259, |
|
"eval_precision": 0.8580282602145957, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.8173, |
|
"eval_samples_per_second": 189.843, |
|
"eval_steps_per_second": 6.053, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 45.36082474226804, |
|
"grad_norm": 12.142955780029297, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.105, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.77319587628866, |
|
"grad_norm": 11.581314086914062, |
|
"learning_rate": 2.9861111111111113e-05, |
|
"loss": 0.1049, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 45.97938144329897, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.566691517829895, |
|
"eval_precision": 0.8491712997027965, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.799, |
|
"eval_samples_per_second": 191.772, |
|
"eval_steps_per_second": 6.114, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 46.18556701030928, |
|
"grad_norm": 15.353252410888672, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.1335, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 46.597938144329895, |
|
"grad_norm": 11.990909576416016, |
|
"learning_rate": 2.9398148148148146e-05, |
|
"loss": 0.1132, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 46.96907216494845, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.5908281207084656, |
|
"eval_precision": 0.8491182494977805, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.8291, |
|
"eval_samples_per_second": 188.615, |
|
"eval_steps_per_second": 6.014, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 47.01030927835052, |
|
"grad_norm": 7.466699600219727, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.1229, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 47.422680412371136, |
|
"grad_norm": 4.299150466918945, |
|
"learning_rate": 2.8935185185185186e-05, |
|
"loss": 0.1181, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.83505154639175, |
|
"grad_norm": 8.699248313903809, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.1313, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.5995594263076782, |
|
"eval_precision": 0.8581686976058893, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.7851, |
|
"eval_samples_per_second": 193.27, |
|
"eval_steps_per_second": 6.162, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 48.24742268041237, |
|
"grad_norm": 7.394286632537842, |
|
"learning_rate": 2.8472222222222223e-05, |
|
"loss": 0.1287, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 48.65979381443299, |
|
"grad_norm": 10.575745582580566, |
|
"learning_rate": 2.824074074074074e-05, |
|
"loss": 0.1312, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 48.98969072164948, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.542959451675415, |
|
"eval_precision": 0.8607254186783246, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.7426, |
|
"eval_samples_per_second": 197.985, |
|
"eval_steps_per_second": 6.313, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 49.07216494845361, |
|
"grad_norm": 14.257989883422852, |
|
"learning_rate": 2.8009259259259263e-05, |
|
"loss": 0.1341, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 49.48453608247423, |
|
"grad_norm": 9.95071029663086, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.138, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.896907216494846, |
|
"grad_norm": 10.54672622680664, |
|
"learning_rate": 2.75462962962963e-05, |
|
"loss": 0.0996, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 49.97938144329897, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.5776570439338684, |
|
"eval_precision": 0.8561151948364225, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.8283, |
|
"eval_samples_per_second": 188.7, |
|
"eval_steps_per_second": 6.017, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 50.30927835051546, |
|
"grad_norm": 9.269867897033691, |
|
"learning_rate": 2.7314814814814816e-05, |
|
"loss": 0.1183, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 50.72164948453608, |
|
"grad_norm": 3.963714361190796, |
|
"learning_rate": 2.7083333333333332e-05, |
|
"loss": 0.1389, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 50.96907216494845, |
|
"eval_accuracy": 0.8434782608695652, |
|
"eval_loss": 0.5757654905319214, |
|
"eval_precision": 0.8486477905744771, |
|
"eval_recall": 0.8434782608695652, |
|
"eval_runtime": 1.8064, |
|
"eval_samples_per_second": 190.984, |
|
"eval_steps_per_second": 6.089, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 51.134020618556704, |
|
"grad_norm": 24.62941551208496, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.1188, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 51.54639175257732, |
|
"grad_norm": 14.212287902832031, |
|
"learning_rate": 2.6620370370370372e-05, |
|
"loss": 0.1257, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.95876288659794, |
|
"grad_norm": 10.230920791625977, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.1079, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.5540273785591125, |
|
"eval_precision": 0.8611434608590304, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.7965, |
|
"eval_samples_per_second": 192.043, |
|
"eval_steps_per_second": 6.123, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 52.371134020618555, |
|
"grad_norm": 12.681902885437012, |
|
"learning_rate": 2.615740740740741e-05, |
|
"loss": 0.0964, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 52.78350515463917, |
|
"grad_norm": 14.907917022705078, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.0972, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 52.98969072164948, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5599762797355652, |
|
"eval_precision": 0.8559313253403165, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.8665, |
|
"eval_samples_per_second": 184.836, |
|
"eval_steps_per_second": 5.893, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 53.1958762886598, |
|
"grad_norm": 13.571532249450684, |
|
"learning_rate": 2.5694444444444445e-05, |
|
"loss": 0.1164, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 53.608247422680414, |
|
"grad_norm": 14.119112014770508, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.0985, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.97938144329897, |
|
"eval_accuracy": 0.863768115942029, |
|
"eval_loss": 0.5391947627067566, |
|
"eval_precision": 0.865555829019492, |
|
"eval_recall": 0.863768115942029, |
|
"eval_runtime": 1.8914, |
|
"eval_samples_per_second": 182.408, |
|
"eval_steps_per_second": 5.816, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 54.02061855670103, |
|
"grad_norm": 11.18630599975586, |
|
"learning_rate": 2.5231481481481485e-05, |
|
"loss": 0.1139, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 54.43298969072165, |
|
"grad_norm": 14.511212348937988, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1117, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 54.845360824742265, |
|
"grad_norm": 4.760071277618408, |
|
"learning_rate": 2.4768518518518518e-05, |
|
"loss": 0.1112, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 54.96907216494845, |
|
"eval_accuracy": 0.863768115942029, |
|
"eval_loss": 0.5410789847373962, |
|
"eval_precision": 0.8655836794521399, |
|
"eval_recall": 0.863768115942029, |
|
"eval_runtime": 1.8766, |
|
"eval_samples_per_second": 183.845, |
|
"eval_steps_per_second": 5.862, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 55.25773195876289, |
|
"grad_norm": 8.37569808959961, |
|
"learning_rate": 2.4537037037037038e-05, |
|
"loss": 0.1062, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 55.670103092783506, |
|
"grad_norm": 10.700220108032227, |
|
"learning_rate": 2.4305555555555558e-05, |
|
"loss": 0.1308, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.863768115942029, |
|
"eval_loss": 0.5445396900177002, |
|
"eval_precision": 0.8653666576853845, |
|
"eval_recall": 0.863768115942029, |
|
"eval_runtime": 1.8208, |
|
"eval_samples_per_second": 189.479, |
|
"eval_steps_per_second": 6.041, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 56.08247422680412, |
|
"grad_norm": 19.0463924407959, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.1081, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 56.49484536082474, |
|
"grad_norm": 6.819794654846191, |
|
"learning_rate": 2.3842592592592594e-05, |
|
"loss": 0.1072, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 56.90721649484536, |
|
"grad_norm": 6.308873176574707, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.1005, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 56.98969072164948, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5554308891296387, |
|
"eval_precision": 0.8551462662985753, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.868, |
|
"eval_samples_per_second": 184.69, |
|
"eval_steps_per_second": 5.889, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 57.31958762886598, |
|
"grad_norm": 5.025654315948486, |
|
"learning_rate": 2.337962962962963e-05, |
|
"loss": 0.088, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 57.7319587628866, |
|
"grad_norm": 10.021939277648926, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.0871, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.97938144329897, |
|
"eval_accuracy": 0.8405797101449275, |
|
"eval_loss": 0.5966009497642517, |
|
"eval_precision": 0.8440749450064067, |
|
"eval_recall": 0.8405797101449275, |
|
"eval_runtime": 1.7974, |
|
"eval_samples_per_second": 191.939, |
|
"eval_steps_per_second": 6.12, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 58.144329896907216, |
|
"grad_norm": 16.077518463134766, |
|
"learning_rate": 2.2916666666666667e-05, |
|
"loss": 0.089, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 58.55670103092783, |
|
"grad_norm": 14.556241035461426, |
|
"learning_rate": 2.2685185185185187e-05, |
|
"loss": 0.1072, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 58.96907216494845, |
|
"grad_norm": 9.045204162597656, |
|
"learning_rate": 2.2453703703703703e-05, |
|
"loss": 0.1102, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 58.96907216494845, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.5807223916053772, |
|
"eval_precision": 0.8543040805400182, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.8412, |
|
"eval_samples_per_second": 187.376, |
|
"eval_steps_per_second": 5.974, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 59.381443298969074, |
|
"grad_norm": 12.29312515258789, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1021, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 59.79381443298969, |
|
"grad_norm": 13.808602333068848, |
|
"learning_rate": 2.1990740740740743e-05, |
|
"loss": 0.1028, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8434782608695652, |
|
"eval_loss": 0.5653913021087646, |
|
"eval_precision": 0.8490636359945823, |
|
"eval_recall": 0.8434782608695652, |
|
"eval_runtime": 1.8195, |
|
"eval_samples_per_second": 189.615, |
|
"eval_steps_per_second": 6.046, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 60.20618556701031, |
|
"grad_norm": 8.929511070251465, |
|
"learning_rate": 2.175925925925926e-05, |
|
"loss": 0.1103, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 60.618556701030926, |
|
"grad_norm": 14.425239562988281, |
|
"learning_rate": 2.152777777777778e-05, |
|
"loss": 0.107, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 60.98969072164948, |
|
"eval_accuracy": 0.8434782608695652, |
|
"eval_loss": 0.577854573726654, |
|
"eval_precision": 0.8460752319344831, |
|
"eval_recall": 0.8434782608695652, |
|
"eval_runtime": 1.8265, |
|
"eval_samples_per_second": 188.883, |
|
"eval_steps_per_second": 6.022, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 61.03092783505155, |
|
"grad_norm": 10.870781898498535, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.0954, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 61.44329896907217, |
|
"grad_norm": 10.188617706298828, |
|
"learning_rate": 2.1064814814814816e-05, |
|
"loss": 0.0942, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 61.855670103092784, |
|
"grad_norm": 6.4580302238464355, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0848, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.97938144329897, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5842954516410828, |
|
"eval_precision": 0.8569219850916401, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.8368, |
|
"eval_samples_per_second": 187.828, |
|
"eval_steps_per_second": 5.989, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 62.2680412371134, |
|
"grad_norm": 13.236536979675293, |
|
"learning_rate": 2.0601851851851853e-05, |
|
"loss": 0.0993, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 62.68041237113402, |
|
"grad_norm": 11.377030372619629, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.0976, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 62.96907216494845, |
|
"eval_accuracy": 0.8434782608695652, |
|
"eval_loss": 0.6161760687828064, |
|
"eval_precision": 0.8454310204706964, |
|
"eval_recall": 0.8434782608695652, |
|
"eval_runtime": 1.7609, |
|
"eval_samples_per_second": 195.923, |
|
"eval_steps_per_second": 6.247, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 63.09278350515464, |
|
"grad_norm": 9.68355655670166, |
|
"learning_rate": 2.013888888888889e-05, |
|
"loss": 0.0788, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 63.50515463917526, |
|
"grad_norm": 6.282276153564453, |
|
"learning_rate": 1.990740740740741e-05, |
|
"loss": 0.103, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 63.91752577319588, |
|
"grad_norm": 4.893520832061768, |
|
"learning_rate": 1.967592592592593e-05, |
|
"loss": 0.0977, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.5822046995162964, |
|
"eval_precision": 0.8468574730482583, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.8068, |
|
"eval_samples_per_second": 190.942, |
|
"eval_steps_per_second": 6.088, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 64.3298969072165, |
|
"grad_norm": 10.216239929199219, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.1112, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 64.74226804123711, |
|
"grad_norm": 22.551631927490234, |
|
"learning_rate": 1.9212962962962962e-05, |
|
"loss": 0.1256, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 64.98969072164948, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.575657308101654, |
|
"eval_precision": 0.851359361697526, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8317, |
|
"eval_samples_per_second": 188.346, |
|
"eval_steps_per_second": 6.005, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 65.15463917525773, |
|
"grad_norm": 6.853829383850098, |
|
"learning_rate": 1.8981481481481482e-05, |
|
"loss": 0.096, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 65.56701030927834, |
|
"grad_norm": 14.361750602722168, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.0942, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 65.97938144329896, |
|
"grad_norm": 9.966873168945312, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0883, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.97938144329896, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.5716322660446167, |
|
"eval_precision": 0.8466640969128532, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.7836, |
|
"eval_samples_per_second": 193.433, |
|
"eval_steps_per_second": 6.167, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 66.3917525773196, |
|
"grad_norm": 9.780498504638672, |
|
"learning_rate": 1.8287037037037038e-05, |
|
"loss": 0.0791, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 66.80412371134021, |
|
"grad_norm": 10.076851844787598, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.0808, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 66.96907216494846, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5726441144943237, |
|
"eval_precision": 0.8562372477793413, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.782, |
|
"eval_samples_per_second": 193.608, |
|
"eval_steps_per_second": 6.173, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 67.21649484536083, |
|
"grad_norm": 10.814988136291504, |
|
"learning_rate": 1.7824074074074075e-05, |
|
"loss": 0.0604, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 67.62886597938144, |
|
"grad_norm": 14.779629707336426, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.1034, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5412786602973938, |
|
"eval_precision": 0.8548742107305042, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.8607, |
|
"eval_samples_per_second": 185.418, |
|
"eval_steps_per_second": 5.912, |
|
"step": 1649 |
|
}, |
|
{ |
|
"epoch": 68.04123711340206, |
|
"grad_norm": 7.925902843475342, |
|
"learning_rate": 1.736111111111111e-05, |
|
"loss": 0.098, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 68.45360824742268, |
|
"grad_norm": 8.179915428161621, |
|
"learning_rate": 1.712962962962963e-05, |
|
"loss": 0.0871, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 68.8659793814433, |
|
"grad_norm": 8.375000953674316, |
|
"learning_rate": 1.6898148148148148e-05, |
|
"loss": 0.0845, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 68.98969072164948, |
|
"eval_accuracy": 0.8434782608695652, |
|
"eval_loss": 0.5826108455657959, |
|
"eval_precision": 0.8476663926581475, |
|
"eval_recall": 0.8434782608695652, |
|
"eval_runtime": 1.8967, |
|
"eval_samples_per_second": 181.896, |
|
"eval_steps_per_second": 5.8, |
|
"step": 1673 |
|
}, |
|
{ |
|
"epoch": 69.27835051546391, |
|
"grad_norm": 8.613913536071777, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0911, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 69.69072164948453, |
|
"grad_norm": 9.535558700561523, |
|
"learning_rate": 1.6435185185185187e-05, |
|
"loss": 0.0916, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 69.97938144329896, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.566058337688446, |
|
"eval_precision": 0.8522049189345976, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.7731, |
|
"eval_samples_per_second": 194.574, |
|
"eval_steps_per_second": 6.204, |
|
"step": 1697 |
|
}, |
|
{ |
|
"epoch": 70.10309278350516, |
|
"grad_norm": 7.769627571105957, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.1011, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 70.51546391752578, |
|
"grad_norm": 9.350245475769043, |
|
"learning_rate": 1.597222222222222e-05, |
|
"loss": 0.0896, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 70.9278350515464, |
|
"grad_norm": 11.536579132080078, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.0912, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 70.96907216494846, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5770707130432129, |
|
"eval_precision": 0.84979303172866, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.817, |
|
"eval_samples_per_second": 189.875, |
|
"eval_steps_per_second": 6.054, |
|
"step": 1721 |
|
}, |
|
{ |
|
"epoch": 71.34020618556701, |
|
"grad_norm": 15.122323989868164, |
|
"learning_rate": 1.550925925925926e-05, |
|
"loss": 0.0995, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 71.75257731958763, |
|
"grad_norm": 12.938358306884766, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.0863, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5769326686859131, |
|
"eval_precision": 0.8550354692908756, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.8313, |
|
"eval_samples_per_second": 188.386, |
|
"eval_steps_per_second": 6.007, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 72.16494845360825, |
|
"grad_norm": 6.935812950134277, |
|
"learning_rate": 1.5046296296296297e-05, |
|
"loss": 0.0731, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 72.57731958762886, |
|
"grad_norm": 10.120232582092285, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1101, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 72.98969072164948, |
|
"grad_norm": 5.746927738189697, |
|
"learning_rate": 1.4583333333333335e-05, |
|
"loss": 0.083, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 72.98969072164948, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5860167145729065, |
|
"eval_precision": 0.8486187988428825, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8602, |
|
"eval_samples_per_second": 185.466, |
|
"eval_steps_per_second": 5.913, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 73.4020618556701, |
|
"grad_norm": 14.205853462219238, |
|
"learning_rate": 1.4351851851851853e-05, |
|
"loss": 0.1003, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 73.81443298969072, |
|
"grad_norm": 6.671767711639404, |
|
"learning_rate": 1.412037037037037e-05, |
|
"loss": 0.0839, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 73.97938144329896, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5647125244140625, |
|
"eval_precision": 0.8550673486786019, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.843, |
|
"eval_samples_per_second": 187.195, |
|
"eval_steps_per_second": 5.969, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 74.22680412371135, |
|
"grad_norm": 6.19529914855957, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0798, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 74.63917525773196, |
|
"grad_norm": 13.039739608764648, |
|
"learning_rate": 1.3657407407407408e-05, |
|
"loss": 0.0903, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 74.96907216494846, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.601210355758667, |
|
"eval_precision": 0.8534831427546733, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.7476, |
|
"eval_samples_per_second": 197.417, |
|
"eval_steps_per_second": 6.294, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 75.05154639175258, |
|
"grad_norm": 6.386416435241699, |
|
"learning_rate": 1.3425925925925928e-05, |
|
"loss": 0.0872, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 75.4639175257732, |
|
"grad_norm": 7.484694957733154, |
|
"learning_rate": 1.3194444444444446e-05, |
|
"loss": 0.0751, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 75.87628865979381, |
|
"grad_norm": 10.781839370727539, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.074, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8463768115942029, |
|
"eval_loss": 0.6048101186752319, |
|
"eval_precision": 0.8461499789126601, |
|
"eval_recall": 0.8463768115942029, |
|
"eval_runtime": 1.7696, |
|
"eval_samples_per_second": 194.962, |
|
"eval_steps_per_second": 6.216, |
|
"step": 1843 |
|
}, |
|
{ |
|
"epoch": 76.28865979381443, |
|
"grad_norm": 17.32390022277832, |
|
"learning_rate": 1.2731481481481482e-05, |
|
"loss": 0.0943, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 76.70103092783505, |
|
"grad_norm": 12.162288665771484, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0907, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 76.98969072164948, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5806660056114197, |
|
"eval_precision": 0.8495330403324792, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.7482, |
|
"eval_samples_per_second": 197.35, |
|
"eval_steps_per_second": 6.292, |
|
"step": 1867 |
|
}, |
|
{ |
|
"epoch": 77.11340206185567, |
|
"grad_norm": 6.960859298706055, |
|
"learning_rate": 1.2268518518518519e-05, |
|
"loss": 0.0748, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 77.52577319587628, |
|
"grad_norm": 14.269356727600098, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.0781, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 77.9381443298969, |
|
"grad_norm": 6.466542720794678, |
|
"learning_rate": 1.1805555555555555e-05, |
|
"loss": 0.0613, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 77.97938144329896, |
|
"eval_accuracy": 0.8376811594202899, |
|
"eval_loss": 0.5774852633476257, |
|
"eval_precision": 0.8381818122940702, |
|
"eval_recall": 0.8376811594202899, |
|
"eval_runtime": 1.7656, |
|
"eval_samples_per_second": 195.404, |
|
"eval_steps_per_second": 6.23, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 78.35051546391753, |
|
"grad_norm": 16.949039459228516, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.0783, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 78.76288659793815, |
|
"grad_norm": 5.50955057144165, |
|
"learning_rate": 1.1342592592592593e-05, |
|
"loss": 0.0964, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 78.96907216494846, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.5758916735649109, |
|
"eval_precision": 0.8675733846947259, |
|
"eval_recall": 0.8666666666666667, |
|
"eval_runtime": 1.7818, |
|
"eval_samples_per_second": 193.62, |
|
"eval_steps_per_second": 6.173, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 79.17525773195877, |
|
"grad_norm": 7.778840065002441, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0775, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 79.58762886597938, |
|
"grad_norm": 10.63167667388916, |
|
"learning_rate": 1.087962962962963e-05, |
|
"loss": 0.0849, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 10.529654502868652, |
|
"learning_rate": 1.0648148148148148e-05, |
|
"loss": 0.0735, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.5961835384368896, |
|
"eval_precision": 0.8565539653910103, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.7657, |
|
"eval_samples_per_second": 195.391, |
|
"eval_steps_per_second": 6.23, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 80.41237113402062, |
|
"grad_norm": 10.91960334777832, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.0803, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 80.82474226804123, |
|
"grad_norm": 6.953213691711426, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.0663, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 80.98969072164948, |
|
"eval_accuracy": 0.8434782608695652, |
|
"eval_loss": 0.5768997669219971, |
|
"eval_precision": 0.8441240738989768, |
|
"eval_recall": 0.8434782608695652, |
|
"eval_runtime": 1.8615, |
|
"eval_samples_per_second": 185.334, |
|
"eval_steps_per_second": 5.909, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 81.23711340206185, |
|
"grad_norm": 14.6912841796875, |
|
"learning_rate": 9.953703703703704e-06, |
|
"loss": 0.0756, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 81.64948453608247, |
|
"grad_norm": 11.421167373657227, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.0719, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 81.97938144329896, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5826414823532104, |
|
"eval_precision": 0.8506964547245877, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8427, |
|
"eval_samples_per_second": 187.221, |
|
"eval_steps_per_second": 5.969, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 82.0618556701031, |
|
"grad_norm": 16.955421447753906, |
|
"learning_rate": 9.490740740740741e-06, |
|
"loss": 0.0756, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 82.47422680412372, |
|
"grad_norm": 13.900518417358398, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0683, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 82.88659793814433, |
|
"grad_norm": 9.04283618927002, |
|
"learning_rate": 9.027777777777777e-06, |
|
"loss": 0.0718, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 82.96907216494846, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.5879714488983154, |
|
"eval_precision": 0.8590052571684228, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.7802, |
|
"eval_samples_per_second": 193.802, |
|
"eval_steps_per_second": 6.179, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 83.29896907216495, |
|
"grad_norm": 8.817221641540527, |
|
"learning_rate": 8.796296296296297e-06, |
|
"loss": 0.0699, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 83.71134020618557, |
|
"grad_norm": 9.379308700561523, |
|
"learning_rate": 8.564814814814816e-06, |
|
"loss": 0.0925, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5986330509185791, |
|
"eval_precision": 0.8512692229678578, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.7681, |
|
"eval_samples_per_second": 195.129, |
|
"eval_steps_per_second": 6.221, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 84.12371134020619, |
|
"grad_norm": 8.215590476989746, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0617, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 84.5360824742268, |
|
"grad_norm": 5.024844169616699, |
|
"learning_rate": 8.101851851851852e-06, |
|
"loss": 0.0729, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 84.94845360824742, |
|
"grad_norm": 9.782211303710938, |
|
"learning_rate": 7.87037037037037e-06, |
|
"loss": 0.0621, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 84.98969072164948, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5914923548698425, |
|
"eval_precision": 0.8496762597563219, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.7614, |
|
"eval_samples_per_second": 195.868, |
|
"eval_steps_per_second": 6.245, |
|
"step": 2061 |
|
}, |
|
{ |
|
"epoch": 85.36082474226804, |
|
"grad_norm": 7.3921942710876465, |
|
"learning_rate": 7.63888888888889e-06, |
|
"loss": 0.0621, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 85.77319587628865, |
|
"grad_norm": 10.206525802612305, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.059, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 85.97938144329896, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.577899694442749, |
|
"eval_precision": 0.8577329472646936, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.8903, |
|
"eval_samples_per_second": 182.511, |
|
"eval_steps_per_second": 5.819, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 86.18556701030928, |
|
"grad_norm": 18.180044174194336, |
|
"learning_rate": 7.1759259259259266e-06, |
|
"loss": 0.0663, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 86.5979381443299, |
|
"grad_norm": 10.320213317871094, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.0806, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 86.96907216494846, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.5928123593330383, |
|
"eval_precision": 0.850145540799145, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8068, |
|
"eval_samples_per_second": 190.946, |
|
"eval_steps_per_second": 6.088, |
|
"step": 2109 |
|
}, |
|
{ |
|
"epoch": 87.01030927835052, |
|
"grad_norm": 13.640397071838379, |
|
"learning_rate": 6.712962962962964e-06, |
|
"loss": 0.0581, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 87.42268041237114, |
|
"grad_norm": 9.787714004516602, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.0641, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 87.83505154639175, |
|
"grad_norm": 7.827996730804443, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0617, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.606200098991394, |
|
"eval_precision": 0.8519519771693684, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.7968, |
|
"eval_samples_per_second": 192.013, |
|
"eval_steps_per_second": 6.122, |
|
"step": 2134 |
|
}, |
|
{ |
|
"epoch": 88.24742268041237, |
|
"grad_norm": 10.409219741821289, |
|
"learning_rate": 6.0185185185185185e-06, |
|
"loss": 0.0677, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 88.65979381443299, |
|
"grad_norm": 13.120059967041016, |
|
"learning_rate": 5.787037037037038e-06, |
|
"loss": 0.0651, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 88.98969072164948, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.6067116260528564, |
|
"eval_precision": 0.8518690976003952, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.8144, |
|
"eval_samples_per_second": 190.144, |
|
"eval_steps_per_second": 6.063, |
|
"step": 2158 |
|
}, |
|
{ |
|
"epoch": 89.0721649484536, |
|
"grad_norm": 8.974705696105957, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0672, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 89.48453608247422, |
|
"grad_norm": 13.397907257080078, |
|
"learning_rate": 5.324074074074074e-06, |
|
"loss": 0.0727, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 89.89690721649484, |
|
"grad_norm": 4.159496784210205, |
|
"learning_rate": 5.092592592592592e-06, |
|
"loss": 0.0754, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 89.97938144329896, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.6107772588729858, |
|
"eval_precision": 0.8553431503660337, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.7776, |
|
"eval_samples_per_second": 194.084, |
|
"eval_steps_per_second": 6.188, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 90.30927835051547, |
|
"grad_norm": 11.130279541015625, |
|
"learning_rate": 4.861111111111111e-06, |
|
"loss": 0.079, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 90.72164948453609, |
|
"grad_norm": 13.203577995300293, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.0682, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 90.96907216494846, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.618496298789978, |
|
"eval_precision": 0.8488872700953353, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.7798, |
|
"eval_samples_per_second": 193.847, |
|
"eval_steps_per_second": 6.181, |
|
"step": 2206 |
|
}, |
|
{ |
|
"epoch": 91.1340206185567, |
|
"grad_norm": 10.04045581817627, |
|
"learning_rate": 4.398148148148149e-06, |
|
"loss": 0.0699, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 91.54639175257732, |
|
"grad_norm": 2.500128984451294, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0664, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 91.95876288659794, |
|
"grad_norm": 9.432464599609375, |
|
"learning_rate": 3.935185185185185e-06, |
|
"loss": 0.0763, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8579710144927536, |
|
"eval_loss": 0.6168191432952881, |
|
"eval_precision": 0.8575139456543875, |
|
"eval_recall": 0.8579710144927536, |
|
"eval_runtime": 1.8002, |
|
"eval_samples_per_second": 191.65, |
|
"eval_steps_per_second": 6.111, |
|
"step": 2231 |
|
}, |
|
{ |
|
"epoch": 92.37113402061856, |
|
"grad_norm": 9.279271125793457, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0742, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 92.78350515463917, |
|
"grad_norm": 19.246337890625, |
|
"learning_rate": 3.4722222222222224e-06, |
|
"loss": 0.0703, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 92.98969072164948, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.6258795261383057, |
|
"eval_precision": 0.8520768323971984, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.8416, |
|
"eval_samples_per_second": 187.341, |
|
"eval_steps_per_second": 5.973, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 93.19587628865979, |
|
"grad_norm": 5.38301420211792, |
|
"learning_rate": 3.2407407407407406e-06, |
|
"loss": 0.0559, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 93.6082474226804, |
|
"grad_norm": 7.105731964111328, |
|
"learning_rate": 3.0092592592592593e-06, |
|
"loss": 0.0861, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 93.97938144329896, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.6128158569335938, |
|
"eval_precision": 0.8553431503660337, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.776, |
|
"eval_samples_per_second": 194.252, |
|
"eval_steps_per_second": 6.194, |
|
"step": 2279 |
|
}, |
|
{ |
|
"epoch": 94.02061855670104, |
|
"grad_norm": 14.296255111694336, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.089, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 94.43298969072166, |
|
"grad_norm": 11.694154739379883, |
|
"learning_rate": 2.546296296296296e-06, |
|
"loss": 0.07, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 94.84536082474227, |
|
"grad_norm": 8.240065574645996, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.0807, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 94.96907216494846, |
|
"eval_accuracy": 0.855072463768116, |
|
"eval_loss": 0.6139995455741882, |
|
"eval_precision": 0.8546533219302098, |
|
"eval_recall": 0.855072463768116, |
|
"eval_runtime": 1.763, |
|
"eval_samples_per_second": 195.691, |
|
"eval_steps_per_second": 6.239, |
|
"step": 2303 |
|
}, |
|
{ |
|
"epoch": 95.25773195876289, |
|
"grad_norm": 6.740184307098389, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"loss": 0.0814, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 95.6701030927835, |
|
"grad_norm": 9.714829444885254, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0621, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.8521739130434782, |
|
"eval_loss": 0.6132925748825073, |
|
"eval_precision": 0.8531657869027159, |
|
"eval_recall": 0.8521739130434782, |
|
"eval_runtime": 1.8081, |
|
"eval_samples_per_second": 190.808, |
|
"eval_steps_per_second": 6.084, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 96.08247422680412, |
|
"grad_norm": 11.212587356567383, |
|
"learning_rate": 1.6203703703703703e-06, |
|
"loss": 0.065, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 96.49484536082474, |
|
"grad_norm": 5.428162097930908, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.0621, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 96.90721649484536, |
|
"grad_norm": 15.444799423217773, |
|
"learning_rate": 1.1574074074074074e-06, |
|
"loss": 0.0831, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 96.98969072164948, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.6100958585739136, |
|
"eval_precision": 0.8507158478342087, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.7991, |
|
"eval_samples_per_second": 191.765, |
|
"eval_steps_per_second": 6.114, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 97.31958762886597, |
|
"grad_norm": 12.789685249328613, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.0584, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 97.73195876288659, |
|
"grad_norm": 9.271283149719238, |
|
"learning_rate": 6.944444444444445e-07, |
|
"loss": 0.0625, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 97.97938144329896, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.6096817851066589, |
|
"eval_precision": 0.8507158478342087, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.8191, |
|
"eval_samples_per_second": 189.651, |
|
"eval_steps_per_second": 6.047, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 98.14432989690722, |
|
"grad_norm": 10.486361503601074, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"loss": 0.0563, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 98.55670103092784, |
|
"grad_norm": 4.260477066040039, |
|
"learning_rate": 2.3148148148148148e-07, |
|
"loss": 0.0648, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 98.96907216494846, |
|
"grad_norm": 8.932230949401855, |
|
"learning_rate": 0.0, |
|
"loss": 0.0571, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 98.96907216494846, |
|
"eval_accuracy": 0.8492753623188406, |
|
"eval_loss": 0.6083797812461853, |
|
"eval_precision": 0.8507158478342087, |
|
"eval_recall": 0.8492753623188406, |
|
"eval_runtime": 1.7521, |
|
"eval_samples_per_second": 196.912, |
|
"eval_steps_per_second": 6.278, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 98.96907216494846, |
|
"step": 2400, |
|
"total_flos": 7.732715563096474e+18, |
|
"train_loss": 0.2344164727628231, |
|
"train_runtime": 4723.8268, |
|
"train_samples_per_second": 65.709, |
|
"train_steps_per_second": 0.508 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.732715563096474e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|