{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.94413407821229, "global_step": 890, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 9.887640449438202e-05, "loss": 1.3963, "step": 10 }, { "epoch": 0.11, "eval_accuracy": 0.33006536960601807, "eval_loss": 1.3515560626983643, "eval_runtime": 318.9295, "eval_samples_per_second": 0.959, "eval_steps_per_second": 0.241, "step": 10 }, { "epoch": 0.22, "learning_rate": 9.775280898876405e-05, "loss": 1.2981, "step": 20 }, { "epoch": 0.22, "eval_accuracy": 0.30392158031463623, "eval_loss": 1.2864599227905273, "eval_runtime": 533.6299, "eval_samples_per_second": 0.573, "eval_steps_per_second": 0.144, "step": 20 }, { "epoch": 0.34, "learning_rate": 9.662921348314608e-05, "loss": 1.2614, "step": 30 }, { "epoch": 0.34, "eval_accuracy": 0.6535947918891907, "eval_loss": 0.9824705719947815, "eval_runtime": 348.7575, "eval_samples_per_second": 0.877, "eval_steps_per_second": 0.221, "step": 30 }, { "epoch": 0.45, "learning_rate": 9.550561797752809e-05, "loss": 0.9419, "step": 40 }, { "epoch": 0.45, "eval_accuracy": 0.49346405267715454, "eval_loss": 1.0978724956512451, "eval_runtime": 312.9714, "eval_samples_per_second": 0.978, "eval_steps_per_second": 0.246, "step": 40 }, { "epoch": 0.56, "learning_rate": 9.438202247191012e-05, "loss": 0.9813, "step": 50 }, { "epoch": 0.56, "eval_accuracy": 0.5196078419685364, "eval_loss": 0.9674614667892456, "eval_runtime": 312.3793, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.246, "step": 50 }, { "epoch": 0.67, "learning_rate": 9.325842696629214e-05, "loss": 0.7973, "step": 60 }, { "epoch": 0.67, "eval_accuracy": 0.5947712659835815, "eval_loss": 1.0033761262893677, "eval_runtime": 316.8558, "eval_samples_per_second": 0.966, "eval_steps_per_second": 0.243, "step": 60 }, { "epoch": 0.78, "learning_rate": 9.213483146067416e-05, "loss": 0.9575, "step": 70 }, { "epoch": 0.78, "eval_accuracy": 0.5882353186607361, "eval_loss": 0.8489904403686523, "eval_runtime": 535.4904, "eval_samples_per_second": 0.571, "eval_steps_per_second": 0.144, "step": 70 }, { "epoch": 0.89, "learning_rate": 9.112359550561799e-05, "loss": 0.8752, "step": 80 }, { "epoch": 0.89, "eval_accuracy": 0.6895424723625183, "eval_loss": 0.7460987567901611, "eval_runtime": 597.5484, "eval_samples_per_second": 0.512, "eval_steps_per_second": 0.129, "step": 80 }, { "epoch": 1.01, "learning_rate": 9e-05, "loss": 0.719, "step": 90 }, { "epoch": 1.01, "eval_accuracy": 0.7973856329917908, "eval_loss": 0.5500715374946594, "eval_runtime": 583.2832, "eval_samples_per_second": 0.525, "eval_steps_per_second": 0.132, "step": 90 }, { "epoch": 1.12, "learning_rate": 8.887640449438202e-05, "loss": 0.5311, "step": 100 }, { "epoch": 1.12, "eval_accuracy": 0.7973856329917908, "eval_loss": 0.5389693379402161, "eval_runtime": 590.4987, "eval_samples_per_second": 0.518, "eval_steps_per_second": 0.13, "step": 100 }, { "epoch": 1.23, "learning_rate": 8.775280898876405e-05, "loss": 0.3922, "step": 110 }, { "epoch": 1.23, "eval_accuracy": 0.843137264251709, "eval_loss": 0.42480573058128357, "eval_runtime": 597.2704, "eval_samples_per_second": 0.512, "eval_steps_per_second": 0.129, "step": 110 }, { "epoch": 1.34, "learning_rate": 8.662921348314608e-05, "loss": 0.3043, "step": 120 }, { "epoch": 1.34, "eval_accuracy": 0.8496732115745544, "eval_loss": 0.5262107253074646, "eval_runtime": 618.0607, "eval_samples_per_second": 0.495, "eval_steps_per_second": 0.125, "step": 120 }, { "epoch": 1.45, "learning_rate": 8.550561797752809e-05, "loss": 0.7841, "step": 130 }, { "epoch": 1.45, "eval_accuracy": 0.8202614188194275, "eval_loss": 0.618194580078125, "eval_runtime": 480.4521, "eval_samples_per_second": 0.637, "eval_steps_per_second": 0.16, "step": 130 }, { "epoch": 1.56, "learning_rate": 8.438202247191012e-05, "loss": 0.4881, "step": 140 }, { "epoch": 1.56, "eval_accuracy": 0.8333333134651184, "eval_loss": 0.4706672728061676, "eval_runtime": 546.9776, "eval_samples_per_second": 0.559, "eval_steps_per_second": 0.141, "step": 140 }, { "epoch": 1.68, "learning_rate": 8.325842696629214e-05, "loss": 0.39, "step": 150 }, { "epoch": 1.68, "eval_accuracy": 0.8594771027565002, "eval_loss": 0.4261144995689392, "eval_runtime": 314.3222, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.245, "step": 150 }, { "epoch": 1.79, "learning_rate": 8.213483146067417e-05, "loss": 0.4687, "step": 160 }, { "epoch": 1.79, "eval_accuracy": 0.7745097875595093, "eval_loss": 0.7588664293289185, "eval_runtime": 316.9261, "eval_samples_per_second": 0.966, "eval_steps_per_second": 0.243, "step": 160 }, { "epoch": 1.9, "learning_rate": 8.101123595505618e-05, "loss": 0.4289, "step": 170 }, { "epoch": 1.9, "eval_accuracy": 0.843137264251709, "eval_loss": 0.4307919144630432, "eval_runtime": 299.2563, "eval_samples_per_second": 1.023, "eval_steps_per_second": 0.257, "step": 170 }, { "epoch": 2.01, "learning_rate": 7.988764044943821e-05, "loss": 0.4127, "step": 180 }, { "epoch": 2.01, "eval_accuracy": 0.9117646813392639, "eval_loss": 0.30210039019584656, "eval_runtime": 293.1021, "eval_samples_per_second": 1.044, "eval_steps_per_second": 0.263, "step": 180 }, { "epoch": 2.12, "learning_rate": 7.876404494382022e-05, "loss": 0.3205, "step": 190 }, { "epoch": 2.12, "eval_accuracy": 0.9052287340164185, "eval_loss": 0.3764496445655823, "eval_runtime": 626.3698, "eval_samples_per_second": 0.489, "eval_steps_per_second": 0.123, "step": 190 }, { "epoch": 2.23, "learning_rate": 7.764044943820225e-05, "loss": 0.302, "step": 200 }, { "epoch": 2.23, "eval_accuracy": 0.8169934749603271, "eval_loss": 0.6414448618888855, "eval_runtime": 406.1799, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.19, "step": 200 }, { "epoch": 2.35, "learning_rate": 7.651685393258428e-05, "loss": 0.2767, "step": 210 }, { "epoch": 2.35, "eval_accuracy": 0.8856208920478821, "eval_loss": 0.3875592350959778, "eval_runtime": 358.8222, "eval_samples_per_second": 0.853, "eval_steps_per_second": 0.215, "step": 210 }, { "epoch": 2.46, "learning_rate": 7.53932584269663e-05, "loss": 0.4107, "step": 220 }, { "epoch": 2.46, "eval_accuracy": 0.8104575276374817, "eval_loss": 0.6241660118103027, "eval_runtime": 301.8068, "eval_samples_per_second": 1.014, "eval_steps_per_second": 0.255, "step": 220 }, { "epoch": 2.57, "learning_rate": 7.426966292134831e-05, "loss": 0.4392, "step": 230 }, { "epoch": 2.57, "eval_accuracy": 0.9183006286621094, "eval_loss": 0.2545139491558075, "eval_runtime": 315.8591, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.244, "step": 230 }, { "epoch": 2.68, "learning_rate": 7.314606741573034e-05, "loss": 0.2376, "step": 240 }, { "epoch": 2.68, "eval_accuracy": 0.8496732115745544, "eval_loss": 0.49588432908058167, "eval_runtime": 304.5879, "eval_samples_per_second": 1.005, "eval_steps_per_second": 0.253, "step": 240 }, { "epoch": 2.79, "learning_rate": 7.202247191011237e-05, "loss": 0.4715, "step": 250 }, { "epoch": 2.79, "eval_accuracy": 0.8235294222831726, "eval_loss": 0.6127722859382629, "eval_runtime": 296.1686, "eval_samples_per_second": 1.033, "eval_steps_per_second": 0.26, "step": 250 }, { "epoch": 2.91, "learning_rate": 7.089887640449438e-05, "loss": 0.2753, "step": 260 }, { "epoch": 2.91, "eval_accuracy": 0.898692786693573, "eval_loss": 0.33089637756347656, "eval_runtime": 318.4459, "eval_samples_per_second": 0.961, "eval_steps_per_second": 0.242, "step": 260 }, { "epoch": 3.02, "learning_rate": 6.97752808988764e-05, "loss": 0.2919, "step": 270 }, { "epoch": 3.02, "eval_accuracy": 0.8921568393707275, "eval_loss": 0.41311776638031006, "eval_runtime": 285.5443, "eval_samples_per_second": 1.072, "eval_steps_per_second": 0.27, "step": 270 }, { "epoch": 3.13, "learning_rate": 6.865168539325843e-05, "loss": 0.2222, "step": 280 }, { "epoch": 3.13, "eval_accuracy": 0.898692786693573, "eval_loss": 0.3918479084968567, "eval_runtime": 297.6229, "eval_samples_per_second": 1.028, "eval_steps_per_second": 0.259, "step": 280 }, { "epoch": 3.24, "learning_rate": 6.752808988764046e-05, "loss": 0.0371, "step": 290 }, { "epoch": 3.24, "eval_accuracy": 0.9183006286621094, "eval_loss": 0.28783220052719116, "eval_runtime": 293.7099, "eval_samples_per_second": 1.042, "eval_steps_per_second": 0.262, "step": 290 }, { "epoch": 3.35, "learning_rate": 6.640449438202247e-05, "loss": 0.0172, "step": 300 }, { "epoch": 3.35, "eval_accuracy": 0.9215686321258545, "eval_loss": 0.3087099492549896, "eval_runtime": 317.6569, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.242, "step": 300 }, { "epoch": 3.46, "learning_rate": 6.52808988764045e-05, "loss": 0.1953, "step": 310 }, { "epoch": 3.46, "eval_accuracy": 0.9183006286621094, "eval_loss": 0.29489144682884216, "eval_runtime": 293.4224, "eval_samples_per_second": 1.043, "eval_steps_per_second": 0.262, "step": 310 }, { "epoch": 3.58, "learning_rate": 6.415730337078652e-05, "loss": 0.2093, "step": 320 }, { "epoch": 3.58, "eval_accuracy": 0.8921568393707275, "eval_loss": 0.38903045654296875, "eval_runtime": 313.0985, "eval_samples_per_second": 0.977, "eval_steps_per_second": 0.246, "step": 320 }, { "epoch": 3.69, "learning_rate": 6.303370786516854e-05, "loss": 0.1393, "step": 330 }, { "epoch": 3.69, "eval_accuracy": 0.898692786693573, "eval_loss": 0.34058284759521484, "eval_runtime": 316.0484, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.244, "step": 330 }, { "epoch": 3.8, "learning_rate": 6.191011235955056e-05, "loss": 0.0532, "step": 340 }, { "epoch": 3.8, "eval_accuracy": 0.9313725233078003, "eval_loss": 0.38309353590011597, "eval_runtime": 315.0427, "eval_samples_per_second": 0.971, "eval_steps_per_second": 0.244, "step": 340 }, { "epoch": 3.91, "learning_rate": 6.078651685393258e-05, "loss": 0.2061, "step": 350 }, { "epoch": 3.91, "eval_accuracy": 0.8954248428344727, "eval_loss": 0.43487662076950073, "eval_runtime": 308.5388, "eval_samples_per_second": 0.992, "eval_steps_per_second": 0.25, "step": 350 }, { "epoch": 4.02, "learning_rate": 5.96629213483146e-05, "loss": 0.1755, "step": 360 }, { "epoch": 4.02, "eval_accuracy": 0.8954248428344727, "eval_loss": 0.4112664461135864, "eval_runtime": 314.6158, "eval_samples_per_second": 0.973, "eval_steps_per_second": 0.245, "step": 360 }, { "epoch": 4.13, "learning_rate": 5.853932584269663e-05, "loss": 0.0155, "step": 370 }, { "epoch": 4.13, "eval_accuracy": 0.9084967374801636, "eval_loss": 0.34788793325424194, "eval_runtime": 317.1579, "eval_samples_per_second": 0.965, "eval_steps_per_second": 0.243, "step": 370 }, { "epoch": 4.25, "learning_rate": 5.7415730337078654e-05, "loss": 0.1389, "step": 380 }, { "epoch": 4.25, "eval_accuracy": 0.9248365759849548, "eval_loss": 0.28591012954711914, "eval_runtime": 323.1301, "eval_samples_per_second": 0.947, "eval_steps_per_second": 0.238, "step": 380 }, { "epoch": 4.36, "learning_rate": 5.6292134831460676e-05, "loss": 0.1102, "step": 390 }, { "epoch": 4.36, "eval_accuracy": 0.9183006286621094, "eval_loss": 0.2804703712463379, "eval_runtime": 308.9869, "eval_samples_per_second": 0.99, "eval_steps_per_second": 0.249, "step": 390 }, { "epoch": 4.47, "learning_rate": 5.516853932584269e-05, "loss": 0.0447, "step": 400 }, { "epoch": 4.47, "eval_accuracy": 0.9281045794487, "eval_loss": 0.28759482502937317, "eval_runtime": 325.9228, "eval_samples_per_second": 0.939, "eval_steps_per_second": 0.236, "step": 400 }, { "epoch": 4.58, "learning_rate": 5.4044943820224726e-05, "loss": 0.3047, "step": 410 }, { "epoch": 4.58, "eval_accuracy": 0.9281045794487, "eval_loss": 0.28950873017311096, "eval_runtime": 302.1849, "eval_samples_per_second": 1.013, "eval_steps_per_second": 0.255, "step": 410 }, { "epoch": 4.69, "learning_rate": 5.292134831460674e-05, "loss": 0.2309, "step": 420 }, { "epoch": 4.69, "eval_accuracy": 0.9313725233078003, "eval_loss": 0.20166385173797607, "eval_runtime": 313.1316, "eval_samples_per_second": 0.977, "eval_steps_per_second": 0.246, "step": 420 }, { "epoch": 4.8, "learning_rate": 5.179775280898876e-05, "loss": 0.2097, "step": 430 }, { "epoch": 4.8, "eval_accuracy": 0.8692810535430908, "eval_loss": 0.4348565936088562, "eval_runtime": 328.5983, "eval_samples_per_second": 0.931, "eval_steps_per_second": 0.234, "step": 430 }, { "epoch": 4.92, "learning_rate": 5.0674157303370785e-05, "loss": 0.0094, "step": 440 }, { "epoch": 4.92, "eval_accuracy": 0.898692786693573, "eval_loss": 0.3308834135532379, "eval_runtime": 261.8274, "eval_samples_per_second": 1.169, "eval_steps_per_second": 0.294, "step": 440 }, { "epoch": 5.03, "learning_rate": 4.955056179775281e-05, "loss": 0.0068, "step": 450 }, { "epoch": 5.03, "eval_accuracy": 0.9379084706306458, "eval_loss": 0.22690723836421967, "eval_runtime": 321.9057, "eval_samples_per_second": 0.951, "eval_steps_per_second": 0.239, "step": 450 }, { "epoch": 5.14, "learning_rate": 4.8426966292134836e-05, "loss": 0.0428, "step": 460 }, { "epoch": 5.14, "eval_accuracy": 0.9313725233078003, "eval_loss": 0.24997933208942413, "eval_runtime": 306.0175, "eval_samples_per_second": 1.0, "eval_steps_per_second": 0.252, "step": 460 }, { "epoch": 5.25, "learning_rate": 4.730337078651685e-05, "loss": 0.0555, "step": 470 }, { "epoch": 5.25, "eval_accuracy": 0.8888888955116272, "eval_loss": 0.4563826024532318, "eval_runtime": 304.3479, "eval_samples_per_second": 1.005, "eval_steps_per_second": 0.253, "step": 470 }, { "epoch": 5.36, "learning_rate": 4.617977528089888e-05, "loss": 0.0928, "step": 480 }, { "epoch": 5.36, "eval_accuracy": 0.915032684803009, "eval_loss": 0.3516130745410919, "eval_runtime": 320.668, "eval_samples_per_second": 0.954, "eval_steps_per_second": 0.24, "step": 480 }, { "epoch": 5.47, "learning_rate": 4.50561797752809e-05, "loss": 0.1947, "step": 490 }, { "epoch": 5.47, "eval_accuracy": 0.9379084706306458, "eval_loss": 0.24630288779735565, "eval_runtime": 305.3109, "eval_samples_per_second": 1.002, "eval_steps_per_second": 0.252, "step": 490 }, { "epoch": 5.59, "learning_rate": 4.393258426966292e-05, "loss": 0.0934, "step": 500 }, { "epoch": 5.59, "eval_accuracy": 0.9477124214172363, "eval_loss": 0.18016140162944794, "eval_runtime": 308.018, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.25, "step": 500 }, { "epoch": 5.7, "learning_rate": 4.2808988764044945e-05, "loss": 0.0035, "step": 510 }, { "epoch": 5.7, "eval_accuracy": 0.9411764740943909, "eval_loss": 0.22933033108711243, "eval_runtime": 316.2253, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.243, "step": 510 }, { "epoch": 5.81, "learning_rate": 4.168539325842697e-05, "loss": 0.0038, "step": 520 }, { "epoch": 5.81, "eval_accuracy": 0.9215686321258545, "eval_loss": 0.2865773141384125, "eval_runtime": 306.9042, "eval_samples_per_second": 0.997, "eval_steps_per_second": 0.251, "step": 520 }, { "epoch": 5.92, "learning_rate": 4.056179775280899e-05, "loss": 0.0027, "step": 530 }, { "epoch": 5.92, "eval_accuracy": 0.915032684803009, "eval_loss": 0.3221026659011841, "eval_runtime": 308.5611, "eval_samples_per_second": 0.992, "eval_steps_per_second": 0.25, "step": 530 }, { "epoch": 6.03, "learning_rate": 3.943820224719101e-05, "loss": 0.0586, "step": 540 }, { "epoch": 6.03, "eval_accuracy": 0.9215686321258545, "eval_loss": 0.2714509665966034, "eval_runtime": 307.936, "eval_samples_per_second": 0.994, "eval_steps_per_second": 0.25, "step": 540 }, { "epoch": 6.15, "learning_rate": 3.831460674157303e-05, "loss": 0.003, "step": 550 }, { "epoch": 6.15, "eval_accuracy": 0.9117646813392639, "eval_loss": 0.2935124635696411, "eval_runtime": 322.5288, "eval_samples_per_second": 0.949, "eval_steps_per_second": 0.239, "step": 550 }, { "epoch": 6.26, "learning_rate": 3.719101123595506e-05, "loss": 0.0748, "step": 560 }, { "epoch": 6.26, "eval_accuracy": 0.9379084706306458, "eval_loss": 0.2554876506328583, "eval_runtime": 310.2055, "eval_samples_per_second": 0.986, "eval_steps_per_second": 0.248, "step": 560 }, { "epoch": 6.37, "learning_rate": 3.6179775280898874e-05, "loss": 0.0273, "step": 570 }, { "epoch": 6.37, "eval_accuracy": 0.9477124214172363, "eval_loss": 0.26076748967170715, "eval_runtime": 309.3501, "eval_samples_per_second": 0.989, "eval_steps_per_second": 0.249, "step": 570 }, { "epoch": 6.48, "learning_rate": 3.50561797752809e-05, "loss": 0.0021, "step": 580 }, { "epoch": 6.48, "eval_accuracy": 0.9542483687400818, "eval_loss": 0.2612459659576416, "eval_runtime": 317.8944, "eval_samples_per_second": 0.963, "eval_steps_per_second": 0.242, "step": 580 }, { "epoch": 6.59, "learning_rate": 3.393258426966292e-05, "loss": 0.0042, "step": 590 }, { "epoch": 6.59, "eval_accuracy": 0.9575163125991821, "eval_loss": 0.24474120140075684, "eval_runtime": 308.9228, "eval_samples_per_second": 0.991, "eval_steps_per_second": 0.249, "step": 590 }, { "epoch": 6.7, "learning_rate": 3.2808988764044946e-05, "loss": 0.0274, "step": 600 }, { "epoch": 6.7, "eval_accuracy": 0.9542483687400818, "eval_loss": 0.23039507865905762, "eval_runtime": 311.9509, "eval_samples_per_second": 0.981, "eval_steps_per_second": 0.247, "step": 600 }, { "epoch": 6.82, "learning_rate": 3.168539325842697e-05, "loss": 0.0037, "step": 610 }, { "epoch": 6.82, "eval_accuracy": 0.9542483687400818, "eval_loss": 0.22053539752960205, "eval_runtime": 315.7913, "eval_samples_per_second": 0.969, "eval_steps_per_second": 0.244, "step": 610 }, { "epoch": 6.93, "learning_rate": 3.056179775280899e-05, "loss": 0.0059, "step": 620 }, { "epoch": 6.93, "eval_accuracy": 0.9379084706306458, "eval_loss": 0.26862725615501404, "eval_runtime": 321.9798, "eval_samples_per_second": 0.95, "eval_steps_per_second": 0.239, "step": 620 }, { "epoch": 7.04, "learning_rate": 2.9438202247191012e-05, "loss": 0.002, "step": 630 }, { "epoch": 7.04, "eval_accuracy": 0.9379084706306458, "eval_loss": 0.29074591398239136, "eval_runtime": 311.6792, "eval_samples_per_second": 0.982, "eval_steps_per_second": 0.247, "step": 630 }, { "epoch": 7.15, "learning_rate": 2.8314606741573037e-05, "loss": 0.0214, "step": 640 }, { "epoch": 7.15, "eval_accuracy": 0.9509803652763367, "eval_loss": 0.217881441116333, "eval_runtime": 312.0489, "eval_samples_per_second": 0.981, "eval_steps_per_second": 0.247, "step": 640 }, { "epoch": 7.26, "learning_rate": 2.7191011235955055e-05, "loss": 0.0011, "step": 650 }, { "epoch": 7.26, "eval_accuracy": 0.9444444179534912, "eval_loss": 0.24239015579223633, "eval_runtime": 320.4623, "eval_samples_per_second": 0.955, "eval_steps_per_second": 0.24, "step": 650 }, { "epoch": 7.37, "learning_rate": 2.606741573033708e-05, "loss": 0.1222, "step": 660 }, { "epoch": 7.37, "eval_accuracy": 0.9607843160629272, "eval_loss": 0.22233766317367554, "eval_runtime": 316.2672, "eval_samples_per_second": 0.968, "eval_steps_per_second": 0.243, "step": 660 }, { "epoch": 7.49, "learning_rate": 2.4943820224719103e-05, "loss": 0.0308, "step": 670 }, { "epoch": 7.49, "eval_accuracy": 0.9542483687400818, "eval_loss": 0.23289808630943298, "eval_runtime": 314.263, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.245, "step": 670 }, { "epoch": 7.6, "learning_rate": 2.3820224719101125e-05, "loss": 0.0047, "step": 680 }, { "epoch": 7.6, "eval_accuracy": 0.9444444179534912, "eval_loss": 0.2540358901023865, "eval_runtime": 314.1415, "eval_samples_per_second": 0.974, "eval_steps_per_second": 0.245, "step": 680 }, { "epoch": 7.71, "learning_rate": 2.2696629213483146e-05, "loss": 0.0033, "step": 690 }, { "epoch": 7.71, "eval_accuracy": 0.9379084706306458, "eval_loss": 0.26486942172050476, "eval_runtime": 313.7641, "eval_samples_per_second": 0.975, "eval_steps_per_second": 0.245, "step": 690 }, { "epoch": 7.82, "learning_rate": 2.157303370786517e-05, "loss": 0.0799, "step": 700 }, { "epoch": 7.82, "eval_accuracy": 0.9346405267715454, "eval_loss": 0.2804279029369354, "eval_runtime": 305.6654, "eval_samples_per_second": 1.001, "eval_steps_per_second": 0.252, "step": 700 }, { "epoch": 7.93, "learning_rate": 2.0449438202247194e-05, "loss": 0.0223, "step": 710 }, { "epoch": 7.93, "eval_accuracy": 0.9346405267715454, "eval_loss": 0.2961590886116028, "eval_runtime": 33.833, "eval_samples_per_second": 9.044, "eval_steps_per_second": 2.276, "step": 710 }, { "epoch": 8.04, "learning_rate": 1.9325842696629215e-05, "loss": 0.0065, "step": 720 }, { "epoch": 8.04, "eval_accuracy": 0.9313725233078003, "eval_loss": 0.3002856373786926, "eval_runtime": 28.9416, "eval_samples_per_second": 10.573, "eval_steps_per_second": 2.661, "step": 720 }, { "epoch": 8.16, "learning_rate": 1.8202247191011237e-05, "loss": 0.0032, "step": 730 }, { "epoch": 8.16, "eval_accuracy": 0.9215686321258545, "eval_loss": 0.33132508397102356, "eval_runtime": 28.9077, "eval_samples_per_second": 10.585, "eval_steps_per_second": 2.664, "step": 730 }, { "epoch": 8.27, "learning_rate": 1.707865168539326e-05, "loss": 0.0012, "step": 740 }, { "epoch": 8.27, "eval_accuracy": 0.9117646813392639, "eval_loss": 0.3802509605884552, "eval_runtime": 28.4035, "eval_samples_per_second": 10.773, "eval_steps_per_second": 2.711, "step": 740 }, { "epoch": 8.38, "learning_rate": 1.595505617977528e-05, "loss": 0.0022, "step": 750 }, { "epoch": 8.38, "eval_accuracy": 0.9183006286621094, "eval_loss": 0.3360930383205414, "eval_runtime": 28.4227, "eval_samples_per_second": 10.766, "eval_steps_per_second": 2.709, "step": 750 }, { "epoch": 8.49, "learning_rate": 1.4831460674157305e-05, "loss": 0.0012, "step": 760 }, { "epoch": 8.49, "eval_accuracy": 0.9346405267715454, "eval_loss": 0.29600241780281067, "eval_runtime": 300.6077, "eval_samples_per_second": 1.018, "eval_steps_per_second": 0.256, "step": 760 }, { "epoch": 8.6, "learning_rate": 1.3707865168539327e-05, "loss": 0.0011, "step": 770 }, { "epoch": 8.6, "eval_accuracy": 0.9346405267715454, "eval_loss": 0.2943996489048004, "eval_runtime": 312.1252, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.247, "step": 770 }, { "epoch": 8.72, "learning_rate": 1.258426966292135e-05, "loss": 0.0039, "step": 780 }, { "epoch": 8.72, "eval_accuracy": 0.9346405267715454, "eval_loss": 0.29687556624412537, "eval_runtime": 310.6326, "eval_samples_per_second": 0.985, "eval_steps_per_second": 0.248, "step": 780 }, { "epoch": 8.83, "learning_rate": 1.146067415730337e-05, "loss": 0.0011, "step": 790 }, { "epoch": 8.83, "eval_accuracy": 0.9281045794487, "eval_loss": 0.32291698455810547, "eval_runtime": 312.3321, "eval_samples_per_second": 0.98, "eval_steps_per_second": 0.247, "step": 790 }, { "epoch": 8.94, "learning_rate": 1.0337078651685394e-05, "loss": 0.0011, "step": 800 }, { "epoch": 8.94, "eval_accuracy": 0.9281045794487, "eval_loss": 0.3310171067714691, "eval_runtime": 309.2689, "eval_samples_per_second": 0.989, "eval_steps_per_second": 0.249, "step": 800 }, { "epoch": 9.05, "learning_rate": 9.213483146067416e-06, "loss": 0.0023, "step": 810 }, { "epoch": 9.05, "eval_accuracy": 0.9346405267715454, "eval_loss": 0.3102934658527374, "eval_runtime": 308.188, "eval_samples_per_second": 0.993, "eval_steps_per_second": 0.25, "step": 810 }, { "epoch": 9.16, "learning_rate": 8.089887640449438e-06, "loss": 0.0011, "step": 820 }, { "epoch": 9.16, "eval_accuracy": 0.9444444179534912, "eval_loss": 0.29238083958625793, "eval_runtime": 323.8949, "eval_samples_per_second": 0.945, "eval_steps_per_second": 0.238, "step": 820 }, { "epoch": 9.27, "learning_rate": 6.96629213483146e-06, "loss": 0.0015, "step": 830 }, { "epoch": 9.27, "eval_accuracy": 0.9509803652763367, "eval_loss": 0.282598614692688, "eval_runtime": 298.2173, "eval_samples_per_second": 1.026, "eval_steps_per_second": 0.258, "step": 830 }, { "epoch": 9.39, "learning_rate": 5.842696629213484e-06, "loss": 0.0015, "step": 840 }, { "epoch": 9.39, "eval_accuracy": 0.9477124214172363, "eval_loss": 0.2785097658634186, "eval_runtime": 310.1298, "eval_samples_per_second": 0.987, "eval_steps_per_second": 0.248, "step": 840 }, { "epoch": 9.5, "learning_rate": 4.719101123595506e-06, "loss": 0.0011, "step": 850 }, { "epoch": 9.5, "eval_accuracy": 0.9477124214172363, "eval_loss": 0.2726196050643921, "eval_runtime": 315.4917, "eval_samples_per_second": 0.97, "eval_steps_per_second": 0.244, "step": 850 }, { "epoch": 9.61, "learning_rate": 3.5955056179775286e-06, "loss": 0.0297, "step": 860 }, { "epoch": 9.61, "eval_accuracy": 0.9509803652763367, "eval_loss": 0.26638907194137573, "eval_runtime": 299.8251, "eval_samples_per_second": 1.021, "eval_steps_per_second": 0.257, "step": 860 }, { "epoch": 9.72, "learning_rate": 2.4719101123595505e-06, "loss": 0.0045, "step": 870 }, { "epoch": 9.72, "eval_accuracy": 0.9509803652763367, "eval_loss": 0.26572802662849426, "eval_runtime": 300.0469, "eval_samples_per_second": 1.02, "eval_steps_per_second": 0.257, "step": 870 }, { "epoch": 9.83, "learning_rate": 1.3483146067415732e-06, "loss": 0.0016, "step": 880 }, { "epoch": 9.83, "eval_accuracy": 0.9477124214172363, "eval_loss": 0.2656039297580719, "eval_runtime": 305.8484, "eval_samples_per_second": 1.0, "eval_steps_per_second": 0.252, "step": 880 }, { "epoch": 9.94, "learning_rate": 2.2471910112359554e-07, "loss": 0.0008, "step": 890 }, { "epoch": 9.94, "eval_accuracy": 0.9477124214172363, "eval_loss": 0.26553988456726074, "eval_runtime": 335.2081, "eval_samples_per_second": 0.913, "eval_steps_per_second": 0.23, "step": 890 } ], "max_steps": 890, "num_train_epochs": 10, "total_flos": 1.51047400011648e+18, "trial_name": null, "trial_params": null }