{ "best_metric": 0.8387096774193549, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset-finetuned-subset\\checkpoint-2655", "epoch": 59.00826210826211, "eval_steps": 500, "global_step": 3510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4245014245014246e-06, "loss": 1.5791, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.8490028490028492e-06, "loss": 1.6248, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.273504273504274e-06, "loss": 1.6509, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.6980056980056985e-06, "loss": 1.6033, "step": 40 }, { "epoch": 0.01, "learning_rate": 7.122507122507123e-06, "loss": 1.58, "step": 50 }, { "epoch": 0.02, "eval_accuracy": 0.37327188940092165, "eval_loss": 1.5475504398345947, "eval_runtime": 280.9942, "eval_samples_per_second": 0.772, "eval_steps_per_second": 0.1, "step": 59 }, { "epoch": 1.0, "learning_rate": 8.547008547008548e-06, "loss": 1.5857, "step": 60 }, { "epoch": 1.0, "learning_rate": 9.971509971509972e-06, "loss": 1.5372, "step": 70 }, { "epoch": 1.01, "learning_rate": 1.1396011396011397e-05, "loss": 1.4931, "step": 80 }, { "epoch": 1.01, "learning_rate": 1.282051282051282e-05, "loss": 1.6021, "step": 90 }, { "epoch": 1.01, "learning_rate": 1.4245014245014246e-05, "loss": 1.5149, "step": 100 }, { "epoch": 1.01, "learning_rate": 1.566951566951567e-05, "loss": 1.3617, "step": 110 }, { "epoch": 1.02, "eval_accuracy": 0.4976958525345622, "eval_loss": 1.2123247385025024, "eval_runtime": 282.6781, "eval_samples_per_second": 0.768, "eval_steps_per_second": 0.099, "step": 118 }, { "epoch": 2.0, "learning_rate": 1.7094017094017095e-05, "loss": 1.345, "step": 120 }, { "epoch": 2.0, "learning_rate": 1.8518518518518518e-05, "loss": 1.4019, "step": 130 }, { "epoch": 2.01, "learning_rate": 1.9943019943019945e-05, "loss": 1.2799, "step": 140 }, { "epoch": 2.01, "learning_rate": 2.1367521367521368e-05, "loss": 1.2003, "step": 150 }, { "epoch": 2.01, "learning_rate": 2.2792022792022794e-05, "loss": 1.1992, "step": 160 }, { "epoch": 2.01, "learning_rate": 2.4216524216524217e-05, "loss": 1.1668, "step": 170 }, { "epoch": 2.02, "eval_accuracy": 0.3824884792626728, "eval_loss": 1.3331272602081299, "eval_runtime": 283.1967, "eval_samples_per_second": 0.766, "eval_steps_per_second": 0.099, "step": 177 }, { "epoch": 3.0, "learning_rate": 2.564102564102564e-05, "loss": 1.5354, "step": 180 }, { "epoch": 3.0, "learning_rate": 2.706552706552707e-05, "loss": 1.2036, "step": 190 }, { "epoch": 3.01, "learning_rate": 2.8490028490028492e-05, "loss": 1.252, "step": 200 }, { "epoch": 3.01, "learning_rate": 2.9914529914529915e-05, "loss": 1.1877, "step": 210 }, { "epoch": 3.01, "learning_rate": 3.133903133903134e-05, "loss": 1.3834, "step": 220 }, { "epoch": 3.02, "learning_rate": 3.2763532763532764e-05, "loss": 1.1751, "step": 230 }, { "epoch": 3.02, "eval_accuracy": 0.3686635944700461, "eval_loss": 1.2529356479644775, "eval_runtime": 303.7124, "eval_samples_per_second": 0.714, "eval_steps_per_second": 0.092, "step": 236 }, { "epoch": 4.0, "learning_rate": 3.418803418803419e-05, "loss": 1.1436, "step": 240 }, { "epoch": 4.0, "learning_rate": 3.561253561253561e-05, "loss": 1.2635, "step": 250 }, { "epoch": 4.01, "learning_rate": 3.7037037037037037e-05, "loss": 1.2653, "step": 260 }, { "epoch": 4.01, "learning_rate": 3.846153846153846e-05, "loss": 1.2505, "step": 270 }, { "epoch": 4.01, "learning_rate": 3.988603988603989e-05, "loss": 1.1114, "step": 280 }, { "epoch": 4.02, "learning_rate": 4.131054131054131e-05, "loss": 0.9961, "step": 290 }, { "epoch": 4.02, "eval_accuracy": 0.6451612903225806, "eval_loss": 1.0007940530776978, "eval_runtime": 291.2333, "eval_samples_per_second": 0.745, "eval_steps_per_second": 0.096, "step": 295 }, { "epoch": 5.0, "learning_rate": 4.2735042735042735e-05, "loss": 1.1199, "step": 300 }, { "epoch": 5.0, "learning_rate": 4.415954415954416e-05, "loss": 1.0766, "step": 310 }, { "epoch": 5.01, "learning_rate": 4.558404558404559e-05, "loss": 1.1833, "step": 320 }, { "epoch": 5.01, "learning_rate": 4.700854700854701e-05, "loss": 1.2997, "step": 330 }, { "epoch": 5.01, "learning_rate": 4.8433048433048433e-05, "loss": 1.1507, "step": 340 }, { "epoch": 5.02, "learning_rate": 4.985754985754986e-05, "loss": 1.0562, "step": 350 }, { "epoch": 5.02, "eval_accuracy": 0.5391705069124424, "eval_loss": 1.0534659624099731, "eval_runtime": 288.9126, "eval_samples_per_second": 0.751, "eval_steps_per_second": 0.097, "step": 354 }, { "epoch": 6.0, "learning_rate": 4.985754985754986e-05, "loss": 1.1336, "step": 360 }, { "epoch": 6.0, "learning_rate": 4.9699271921494144e-05, "loss": 1.192, "step": 370 }, { "epoch": 6.01, "learning_rate": 4.9540993985438435e-05, "loss": 1.1785, "step": 380 }, { "epoch": 6.01, "learning_rate": 4.938271604938271e-05, "loss": 1.2818, "step": 390 }, { "epoch": 6.01, "learning_rate": 4.9224438113327004e-05, "loss": 1.179, "step": 400 }, { "epoch": 6.02, "learning_rate": 4.906616017727129e-05, "loss": 1.0189, "step": 410 }, { "epoch": 6.02, "eval_accuracy": 0.6866359447004609, "eval_loss": 0.9321132302284241, "eval_runtime": 300.2588, "eval_samples_per_second": 0.723, "eval_steps_per_second": 0.093, "step": 413 }, { "epoch": 7.0, "learning_rate": 4.890788224121557e-05, "loss": 1.178, "step": 420 }, { "epoch": 7.0, "learning_rate": 4.8749604305159865e-05, "loss": 1.0559, "step": 430 }, { "epoch": 7.01, "learning_rate": 4.859132636910415e-05, "loss": 1.3566, "step": 440 }, { "epoch": 7.01, "learning_rate": 4.8433048433048433e-05, "loss": 1.2278, "step": 450 }, { "epoch": 7.01, "learning_rate": 4.8274770496992725e-05, "loss": 1.1068, "step": 460 }, { "epoch": 7.02, "learning_rate": 4.811649256093701e-05, "loss": 0.8553, "step": 470 }, { "epoch": 7.02, "eval_accuracy": 0.5852534562211982, "eval_loss": 0.895534098148346, "eval_runtime": 301.0743, "eval_samples_per_second": 0.721, "eval_steps_per_second": 0.093, "step": 472 }, { "epoch": 8.0, "learning_rate": 4.7958214624881294e-05, "loss": 0.8541, "step": 480 }, { "epoch": 8.01, "learning_rate": 4.779993668882558e-05, "loss": 1.1329, "step": 490 }, { "epoch": 8.01, "learning_rate": 4.764165875276987e-05, "loss": 1.1688, "step": 500 }, { "epoch": 8.01, "learning_rate": 4.7483380816714154e-05, "loss": 0.8642, "step": 510 }, { "epoch": 8.01, "learning_rate": 4.732510288065844e-05, "loss": 0.9557, "step": 520 }, { "epoch": 8.02, "learning_rate": 4.716682494460272e-05, "loss": 0.8961, "step": 530 }, { "epoch": 8.02, "eval_accuracy": 0.7419354838709677, "eval_loss": 0.7889271974563599, "eval_runtime": 301.0199, "eval_samples_per_second": 0.721, "eval_steps_per_second": 0.093, "step": 531 }, { "epoch": 9.0, "learning_rate": 4.700854700854701e-05, "loss": 1.1113, "step": 540 }, { "epoch": 9.01, "learning_rate": 4.68502690724913e-05, "loss": 1.0851, "step": 550 }, { "epoch": 9.01, "learning_rate": 4.669199113643558e-05, "loss": 1.0467, "step": 560 }, { "epoch": 9.01, "learning_rate": 4.653371320037987e-05, "loss": 0.8215, "step": 570 }, { "epoch": 9.01, "learning_rate": 4.637543526432416e-05, "loss": 0.9789, "step": 580 }, { "epoch": 9.02, "learning_rate": 4.621715732826844e-05, "loss": 1.1626, "step": 590 }, { "epoch": 9.02, "eval_accuracy": 0.6175115207373272, "eval_loss": 0.95318603515625, "eval_runtime": 305.3217, "eval_samples_per_second": 0.711, "eval_steps_per_second": 0.092, "step": 590 }, { "epoch": 10.0, "learning_rate": 4.605887939221273e-05, "loss": 0.8144, "step": 600 }, { "epoch": 10.01, "learning_rate": 4.590060145615702e-05, "loss": 0.7724, "step": 610 }, { "epoch": 10.01, "learning_rate": 4.5742323520101296e-05, "loss": 1.088, "step": 620 }, { "epoch": 10.01, "learning_rate": 4.558404558404559e-05, "loss": 1.074, "step": 630 }, { "epoch": 10.01, "learning_rate": 4.542576764798987e-05, "loss": 0.7951, "step": 640 }, { "epoch": 10.02, "eval_accuracy": 0.5529953917050692, "eval_loss": 1.1165187358856201, "eval_runtime": 303.3614, "eval_samples_per_second": 0.715, "eval_steps_per_second": 0.092, "step": 649 }, { "epoch": 11.0, "learning_rate": 4.5267489711934157e-05, "loss": 0.8376, "step": 650 }, { "epoch": 11.0, "learning_rate": 4.510921177587845e-05, "loss": 0.9325, "step": 660 }, { "epoch": 11.01, "learning_rate": 4.4950933839822725e-05, "loss": 0.8498, "step": 670 }, { "epoch": 11.01, "learning_rate": 4.479265590376702e-05, "loss": 0.7743, "step": 680 }, { "epoch": 11.01, "learning_rate": 4.463437796771131e-05, "loss": 1.0063, "step": 690 }, { "epoch": 11.01, "learning_rate": 4.4476100031655586e-05, "loss": 0.9042, "step": 700 }, { "epoch": 11.02, "eval_accuracy": 0.7649769585253456, "eval_loss": 0.7012106776237488, "eval_runtime": 307.8679, "eval_samples_per_second": 0.705, "eval_steps_per_second": 0.091, "step": 708 }, { "epoch": 12.0, "learning_rate": 4.431782209559988e-05, "loss": 0.8243, "step": 710 }, { "epoch": 12.0, "learning_rate": 4.415954415954416e-05, "loss": 0.7756, "step": 720 }, { "epoch": 12.01, "learning_rate": 4.4001266223488446e-05, "loss": 0.9731, "step": 730 }, { "epoch": 12.01, "learning_rate": 4.384298828743274e-05, "loss": 0.8979, "step": 740 }, { "epoch": 12.01, "learning_rate": 4.368471035137702e-05, "loss": 0.9586, "step": 750 }, { "epoch": 12.01, "learning_rate": 4.3526432415321306e-05, "loss": 0.8642, "step": 760 }, { "epoch": 12.02, "eval_accuracy": 0.6774193548387096, "eval_loss": 0.7588521838188171, "eval_runtime": 303.2535, "eval_samples_per_second": 0.716, "eval_steps_per_second": 0.092, "step": 767 }, { "epoch": 13.0, "learning_rate": 4.336815447926559e-05, "loss": 0.7979, "step": 770 }, { "epoch": 13.0, "learning_rate": 4.3209876543209875e-05, "loss": 0.8616, "step": 780 }, { "epoch": 13.01, "learning_rate": 4.3051598607154166e-05, "loss": 0.6542, "step": 790 }, { "epoch": 13.01, "learning_rate": 4.289332067109845e-05, "loss": 0.732, "step": 800 }, { "epoch": 13.01, "learning_rate": 4.2735042735042735e-05, "loss": 0.806, "step": 810 }, { "epoch": 13.02, "learning_rate": 4.2576764798987026e-05, "loss": 0.8017, "step": 820 }, { "epoch": 13.02, "eval_accuracy": 0.695852534562212, "eval_loss": 0.7485441565513611, "eval_runtime": 294.6482, "eval_samples_per_second": 0.736, "eval_steps_per_second": 0.095, "step": 826 }, { "epoch": 14.0, "learning_rate": 4.241848686293131e-05, "loss": 0.7243, "step": 830 }, { "epoch": 14.0, "learning_rate": 4.2260208926875595e-05, "loss": 0.7128, "step": 840 }, { "epoch": 14.01, "learning_rate": 4.210193099081988e-05, "loss": 0.876, "step": 850 }, { "epoch": 14.01, "learning_rate": 4.194365305476417e-05, "loss": 0.7118, "step": 860 }, { "epoch": 14.01, "learning_rate": 4.1785375118708455e-05, "loss": 0.7758, "step": 870 }, { "epoch": 14.02, "learning_rate": 4.162709718265274e-05, "loss": 0.7523, "step": 880 }, { "epoch": 14.02, "eval_accuracy": 0.783410138248848, "eval_loss": 0.5616857409477234, "eval_runtime": 286.0674, "eval_samples_per_second": 0.759, "eval_steps_per_second": 0.098, "step": 885 }, { "epoch": 15.0, "learning_rate": 4.1468819246597024e-05, "loss": 0.5866, "step": 890 }, { "epoch": 15.0, "learning_rate": 4.131054131054131e-05, "loss": 0.7429, "step": 900 }, { "epoch": 15.01, "learning_rate": 4.11522633744856e-05, "loss": 0.8891, "step": 910 }, { "epoch": 15.01, "learning_rate": 4.0993985438429884e-05, "loss": 0.8778, "step": 920 }, { "epoch": 15.01, "learning_rate": 4.083570750237417e-05, "loss": 0.673, "step": 930 }, { "epoch": 15.02, "learning_rate": 4.067742956631846e-05, "loss": 0.7223, "step": 940 }, { "epoch": 15.02, "eval_accuracy": 0.6129032258064516, "eval_loss": 1.0343618392944336, "eval_runtime": 282.8856, "eval_samples_per_second": 0.767, "eval_steps_per_second": 0.099, "step": 944 }, { "epoch": 16.0, "learning_rate": 4.0519151630262745e-05, "loss": 0.8074, "step": 950 }, { "epoch": 16.0, "learning_rate": 4.036087369420703e-05, "loss": 0.6108, "step": 960 }, { "epoch": 16.01, "learning_rate": 4.020259575815132e-05, "loss": 0.9762, "step": 970 }, { "epoch": 16.01, "learning_rate": 4.00443178220956e-05, "loss": 0.7148, "step": 980 }, { "epoch": 16.01, "learning_rate": 3.988603988603989e-05, "loss": 0.8286, "step": 990 }, { "epoch": 16.02, "learning_rate": 3.9727761949984174e-05, "loss": 0.6164, "step": 1000 }, { "epoch": 16.02, "eval_accuracy": 0.7419354838709677, "eval_loss": 0.727464497089386, "eval_runtime": 330.7312, "eval_samples_per_second": 0.656, "eval_steps_per_second": 0.085, "step": 1003 }, { "epoch": 17.0, "learning_rate": 3.956948401392846e-05, "loss": 0.7374, "step": 1010 }, { "epoch": 17.0, "learning_rate": 3.941120607787275e-05, "loss": 0.8524, "step": 1020 }, { "epoch": 17.01, "learning_rate": 3.925292814181703e-05, "loss": 0.8531, "step": 1030 }, { "epoch": 17.01, "learning_rate": 3.909465020576132e-05, "loss": 0.6948, "step": 1040 }, { "epoch": 17.01, "learning_rate": 3.893637226970561e-05, "loss": 0.6618, "step": 1050 }, { "epoch": 17.02, "learning_rate": 3.877809433364989e-05, "loss": 0.6892, "step": 1060 }, { "epoch": 17.02, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.616241991519928, "eval_runtime": 331.0915, "eval_samples_per_second": 0.655, "eval_steps_per_second": 0.085, "step": 1062 }, { "epoch": 18.0, "learning_rate": 3.861981639759418e-05, "loss": 0.7827, "step": 1070 }, { "epoch": 18.01, "learning_rate": 3.846153846153846e-05, "loss": 0.6962, "step": 1080 }, { "epoch": 18.01, "learning_rate": 3.830326052548275e-05, "loss": 0.7623, "step": 1090 }, { "epoch": 18.01, "learning_rate": 3.814498258942704e-05, "loss": 0.6629, "step": 1100 }, { "epoch": 18.01, "learning_rate": 3.798670465337132e-05, "loss": 0.8101, "step": 1110 }, { "epoch": 18.02, "learning_rate": 3.782842671731561e-05, "loss": 0.7865, "step": 1120 }, { "epoch": 18.02, "eval_accuracy": 0.6589861751152074, "eval_loss": 0.7966123819351196, "eval_runtime": 336.073, "eval_samples_per_second": 0.646, "eval_steps_per_second": 0.083, "step": 1121 }, { "epoch": 19.0, "learning_rate": 3.767014878125989e-05, "loss": 0.5888, "step": 1130 }, { "epoch": 19.01, "learning_rate": 3.7511870845204176e-05, "loss": 0.8017, "step": 1140 }, { "epoch": 19.01, "learning_rate": 3.735359290914847e-05, "loss": 0.6144, "step": 1150 }, { "epoch": 19.01, "learning_rate": 3.719531497309275e-05, "loss": 0.715, "step": 1160 }, { "epoch": 19.01, "learning_rate": 3.7037037037037037e-05, "loss": 0.6533, "step": 1170 }, { "epoch": 19.02, "learning_rate": 3.687875910098133e-05, "loss": 0.6387, "step": 1180 }, { "epoch": 19.02, "eval_accuracy": 0.6774193548387096, "eval_loss": 0.7435915470123291, "eval_runtime": 310.5543, "eval_samples_per_second": 0.699, "eval_steps_per_second": 0.09, "step": 1180 }, { "epoch": 20.0, "learning_rate": 3.672048116492561e-05, "loss": 0.6266, "step": 1190 }, { "epoch": 20.01, "learning_rate": 3.65622032288699e-05, "loss": 0.519, "step": 1200 }, { "epoch": 20.01, "learning_rate": 3.640392529281418e-05, "loss": 0.5029, "step": 1210 }, { "epoch": 20.01, "learning_rate": 3.624564735675847e-05, "loss": 1.0188, "step": 1220 }, { "epoch": 20.01, "learning_rate": 3.608736942070276e-05, "loss": 0.6181, "step": 1230 }, { "epoch": 20.02, "eval_accuracy": 0.7373271889400922, "eval_loss": 0.7136892676353455, "eval_runtime": 305.6632, "eval_samples_per_second": 0.71, "eval_steps_per_second": 0.092, "step": 1239 }, { "epoch": 21.0, "learning_rate": 3.592909148464704e-05, "loss": 0.8196, "step": 1240 }, { "epoch": 21.0, "learning_rate": 3.5770813548591326e-05, "loss": 0.7009, "step": 1250 }, { "epoch": 21.01, "learning_rate": 3.561253561253561e-05, "loss": 0.7675, "step": 1260 }, { "epoch": 21.01, "learning_rate": 3.54542576764799e-05, "loss": 0.6991, "step": 1270 }, { "epoch": 21.01, "learning_rate": 3.5295979740424186e-05, "loss": 0.5857, "step": 1280 }, { "epoch": 21.01, "learning_rate": 3.513770180436847e-05, "loss": 0.6085, "step": 1290 }, { "epoch": 21.02, "eval_accuracy": 0.6682027649769585, "eval_loss": 0.9580748081207275, "eval_runtime": 300.6045, "eval_samples_per_second": 0.722, "eval_steps_per_second": 0.093, "step": 1298 }, { "epoch": 22.0, "learning_rate": 3.497942386831276e-05, "loss": 0.738, "step": 1300 }, { "epoch": 22.0, "learning_rate": 3.4821145932257046e-05, "loss": 0.6597, "step": 1310 }, { "epoch": 22.01, "learning_rate": 3.466286799620133e-05, "loss": 0.7283, "step": 1320 }, { "epoch": 22.01, "learning_rate": 3.450459006014562e-05, "loss": 0.8223, "step": 1330 }, { "epoch": 22.01, "learning_rate": 3.43463121240899e-05, "loss": 0.5629, "step": 1340 }, { "epoch": 22.01, "learning_rate": 3.418803418803419e-05, "loss": 0.7109, "step": 1350 }, { "epoch": 22.02, "eval_accuracy": 0.7096774193548387, "eval_loss": 0.7746023535728455, "eval_runtime": 300.4789, "eval_samples_per_second": 0.722, "eval_steps_per_second": 0.093, "step": 1357 }, { "epoch": 23.0, "learning_rate": 3.4029756251978475e-05, "loss": 0.8563, "step": 1360 }, { "epoch": 23.0, "learning_rate": 3.387147831592276e-05, "loss": 0.6669, "step": 1370 }, { "epoch": 23.01, "learning_rate": 3.371320037986705e-05, "loss": 0.786, "step": 1380 }, { "epoch": 23.01, "learning_rate": 3.355492244381133e-05, "loss": 0.5724, "step": 1390 }, { "epoch": 23.01, "learning_rate": 3.339664450775562e-05, "loss": 0.928, "step": 1400 }, { "epoch": 23.02, "learning_rate": 3.323836657169991e-05, "loss": 0.7686, "step": 1410 }, { "epoch": 23.02, "eval_accuracy": 0.7096774193548387, "eval_loss": 0.7969343066215515, "eval_runtime": 303.3779, "eval_samples_per_second": 0.715, "eval_steps_per_second": 0.092, "step": 1416 }, { "epoch": 24.0, "learning_rate": 3.308008863564419e-05, "loss": 0.5478, "step": 1420 }, { "epoch": 24.0, "learning_rate": 3.292181069958848e-05, "loss": 0.7107, "step": 1430 }, { "epoch": 24.01, "learning_rate": 3.2763532763532764e-05, "loss": 0.5742, "step": 1440 }, { "epoch": 24.01, "learning_rate": 3.260525482747705e-05, "loss": 0.5811, "step": 1450 }, { "epoch": 24.01, "learning_rate": 3.244697689142134e-05, "loss": 0.676, "step": 1460 }, { "epoch": 24.02, "learning_rate": 3.2288698955365625e-05, "loss": 0.5995, "step": 1470 }, { "epoch": 24.02, "eval_accuracy": 0.6129032258064516, "eval_loss": 1.0075043439865112, "eval_runtime": 295.2487, "eval_samples_per_second": 0.735, "eval_steps_per_second": 0.095, "step": 1475 }, { "epoch": 25.0, "learning_rate": 3.213042101930991e-05, "loss": 0.6382, "step": 1480 }, { "epoch": 25.0, "learning_rate": 3.1972143083254193e-05, "loss": 0.5384, "step": 1490 }, { "epoch": 25.01, "learning_rate": 3.181386514719848e-05, "loss": 0.7876, "step": 1500 }, { "epoch": 25.01, "learning_rate": 3.165558721114277e-05, "loss": 0.6693, "step": 1510 }, { "epoch": 25.01, "learning_rate": 3.1497309275087054e-05, "loss": 0.5688, "step": 1520 }, { "epoch": 25.02, "learning_rate": 3.133903133903134e-05, "loss": 0.5854, "step": 1530 }, { "epoch": 25.02, "eval_accuracy": 0.7419354838709677, "eval_loss": 0.7388768196105957, "eval_runtime": 335.3249, "eval_samples_per_second": 0.647, "eval_steps_per_second": 0.084, "step": 1534 }, { "epoch": 26.0, "learning_rate": 3.118075340297563e-05, "loss": 0.4072, "step": 1540 }, { "epoch": 26.0, "learning_rate": 3.1022475466919914e-05, "loss": 0.6526, "step": 1550 }, { "epoch": 26.01, "learning_rate": 3.08641975308642e-05, "loss": 0.6973, "step": 1560 }, { "epoch": 26.01, "learning_rate": 3.070591959480848e-05, "loss": 0.8606, "step": 1570 }, { "epoch": 26.01, "learning_rate": 3.0547641658752774e-05, "loss": 0.6417, "step": 1580 }, { "epoch": 26.02, "learning_rate": 3.0389363722697055e-05, "loss": 0.575, "step": 1590 }, { "epoch": 26.02, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.7198013067245483, "eval_runtime": 323.3382, "eval_samples_per_second": 0.671, "eval_steps_per_second": 0.087, "step": 1593 }, { "epoch": 27.0, "learning_rate": 3.0231085786641343e-05, "loss": 0.5309, "step": 1600 }, { "epoch": 27.0, "learning_rate": 3.0072807850585634e-05, "loss": 0.6287, "step": 1610 }, { "epoch": 27.01, "learning_rate": 2.9914529914529915e-05, "loss": 0.7234, "step": 1620 }, { "epoch": 27.01, "learning_rate": 2.9756251978474203e-05, "loss": 0.6081, "step": 1630 }, { "epoch": 27.01, "learning_rate": 2.9597974042418487e-05, "loss": 0.7265, "step": 1640 }, { "epoch": 27.02, "learning_rate": 2.9439696106362775e-05, "loss": 0.7478, "step": 1650 }, { "epoch": 27.02, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.6097679734230042, "eval_runtime": 329.3249, "eval_samples_per_second": 0.659, "eval_steps_per_second": 0.085, "step": 1652 }, { "epoch": 28.0, "learning_rate": 2.9281418170307063e-05, "loss": 0.5044, "step": 1660 }, { "epoch": 28.01, "learning_rate": 2.9123140234251344e-05, "loss": 0.5792, "step": 1670 }, { "epoch": 28.01, "learning_rate": 2.8964862298195632e-05, "loss": 0.5314, "step": 1680 }, { "epoch": 28.01, "learning_rate": 2.880658436213992e-05, "loss": 0.4202, "step": 1690 }, { "epoch": 28.01, "learning_rate": 2.8648306426084204e-05, "loss": 0.4768, "step": 1700 }, { "epoch": 28.02, "learning_rate": 2.8490028490028492e-05, "loss": 0.7204, "step": 1710 }, { "epoch": 28.02, "eval_accuracy": 0.7972350230414746, "eval_loss": 0.6458683013916016, "eval_runtime": 340.899, "eval_samples_per_second": 0.637, "eval_steps_per_second": 0.082, "step": 1711 }, { "epoch": 29.0, "learning_rate": 2.833175055397278e-05, "loss": 0.5258, "step": 1720 }, { "epoch": 29.01, "learning_rate": 2.817347261791706e-05, "loss": 0.7548, "step": 1730 }, { "epoch": 29.01, "learning_rate": 2.8015194681861352e-05, "loss": 0.6974, "step": 1740 }, { "epoch": 29.01, "learning_rate": 2.7856916745805633e-05, "loss": 0.5245, "step": 1750 }, { "epoch": 29.01, "learning_rate": 2.769863880974992e-05, "loss": 0.6382, "step": 1760 }, { "epoch": 29.02, "learning_rate": 2.754036087369421e-05, "loss": 0.4325, "step": 1770 }, { "epoch": 29.02, "eval_accuracy": 0.7511520737327189, "eval_loss": 0.726778507232666, "eval_runtime": 317.8406, "eval_samples_per_second": 0.683, "eval_steps_per_second": 0.088, "step": 1770 }, { "epoch": 30.0, "learning_rate": 2.7382082937638494e-05, "loss": 0.4622, "step": 1780 }, { "epoch": 30.01, "learning_rate": 2.722380500158278e-05, "loss": 0.6414, "step": 1790 }, { "epoch": 30.01, "learning_rate": 2.706552706552707e-05, "loss": 0.72, "step": 1800 }, { "epoch": 30.01, "learning_rate": 2.690724912947135e-05, "loss": 0.5362, "step": 1810 }, { "epoch": 30.01, "learning_rate": 2.6748971193415638e-05, "loss": 0.593, "step": 1820 }, { "epoch": 30.02, "eval_accuracy": 0.7880184331797235, "eval_loss": 0.5901350975036621, "eval_runtime": 316.6009, "eval_samples_per_second": 0.685, "eval_steps_per_second": 0.088, "step": 1829 }, { "epoch": 31.0, "learning_rate": 2.6590693257359926e-05, "loss": 0.4719, "step": 1830 }, { "epoch": 31.0, "learning_rate": 2.643241532130421e-05, "loss": 0.461, "step": 1840 }, { "epoch": 31.01, "learning_rate": 2.62741373852485e-05, "loss": 0.4394, "step": 1850 }, { "epoch": 31.01, "learning_rate": 2.611585944919278e-05, "loss": 0.7129, "step": 1860 }, { "epoch": 31.01, "learning_rate": 2.595758151313707e-05, "loss": 0.6618, "step": 1870 }, { "epoch": 31.01, "learning_rate": 2.579930357708136e-05, "loss": 0.6432, "step": 1880 }, { "epoch": 31.02, "eval_accuracy": 0.7880184331797235, "eval_loss": 0.5924447774887085, "eval_runtime": 310.8109, "eval_samples_per_second": 0.698, "eval_steps_per_second": 0.09, "step": 1888 }, { "epoch": 32.0, "learning_rate": 2.564102564102564e-05, "loss": 0.4229, "step": 1890 }, { "epoch": 32.0, "learning_rate": 2.5482747704969927e-05, "loss": 0.7339, "step": 1900 }, { "epoch": 32.01, "learning_rate": 2.5324469768914215e-05, "loss": 0.6894, "step": 1910 }, { "epoch": 32.01, "learning_rate": 2.51661918328585e-05, "loss": 0.5088, "step": 1920 }, { "epoch": 32.01, "learning_rate": 2.5007913896802788e-05, "loss": 0.4812, "step": 1930 }, { "epoch": 32.01, "learning_rate": 2.4849635960747072e-05, "loss": 0.4821, "step": 1940 }, { "epoch": 32.02, "eval_accuracy": 0.815668202764977, "eval_loss": 0.5730257630348206, "eval_runtime": 290.7811, "eval_samples_per_second": 0.746, "eval_steps_per_second": 0.096, "step": 1947 }, { "epoch": 33.0, "learning_rate": 2.4691358024691357e-05, "loss": 0.4808, "step": 1950 }, { "epoch": 33.0, "learning_rate": 2.4533080088635644e-05, "loss": 0.4051, "step": 1960 }, { "epoch": 33.01, "learning_rate": 2.4374802152579932e-05, "loss": 0.559, "step": 1970 }, { "epoch": 33.01, "learning_rate": 2.4216524216524217e-05, "loss": 0.4306, "step": 1980 }, { "epoch": 33.01, "learning_rate": 2.4058246280468505e-05, "loss": 0.7199, "step": 1990 }, { "epoch": 33.02, "learning_rate": 2.389996834441279e-05, "loss": 0.9189, "step": 2000 }, { "epoch": 33.02, "eval_accuracy": 0.783410138248848, "eval_loss": 0.6241814494132996, "eval_runtime": 285.8334, "eval_samples_per_second": 0.759, "eval_steps_per_second": 0.098, "step": 2006 }, { "epoch": 34.0, "learning_rate": 2.3741690408357077e-05, "loss": 0.5293, "step": 2010 }, { "epoch": 34.0, "learning_rate": 2.358341247230136e-05, "loss": 0.5029, "step": 2020 }, { "epoch": 34.01, "learning_rate": 2.342513453624565e-05, "loss": 0.7058, "step": 2030 }, { "epoch": 34.01, "learning_rate": 2.3266856600189934e-05, "loss": 0.4917, "step": 2040 }, { "epoch": 34.01, "learning_rate": 2.310857866413422e-05, "loss": 0.6182, "step": 2050 }, { "epoch": 34.02, "learning_rate": 2.295030072807851e-05, "loss": 0.6179, "step": 2060 }, { "epoch": 34.02, "eval_accuracy": 0.8018433179723502, "eval_loss": 0.584676206111908, "eval_runtime": 329.7075, "eval_samples_per_second": 0.658, "eval_steps_per_second": 0.085, "step": 2065 }, { "epoch": 35.0, "learning_rate": 2.2792022792022794e-05, "loss": 0.4189, "step": 2070 }, { "epoch": 35.0, "learning_rate": 2.2633744855967078e-05, "loss": 0.3077, "step": 2080 }, { "epoch": 35.01, "learning_rate": 2.2475466919911363e-05, "loss": 0.4646, "step": 2090 }, { "epoch": 35.01, "learning_rate": 2.2317188983855654e-05, "loss": 0.4642, "step": 2100 }, { "epoch": 35.01, "learning_rate": 2.215891104779994e-05, "loss": 0.658, "step": 2110 }, { "epoch": 35.02, "learning_rate": 2.2000633111744223e-05, "loss": 0.5767, "step": 2120 }, { "epoch": 35.02, "eval_accuracy": 0.8248847926267281, "eval_loss": 0.59653639793396, "eval_runtime": 363.307, "eval_samples_per_second": 0.597, "eval_steps_per_second": 0.077, "step": 2124 }, { "epoch": 36.0, "learning_rate": 2.184235517568851e-05, "loss": 0.5256, "step": 2130 }, { "epoch": 36.0, "learning_rate": 2.1684077239632795e-05, "loss": 0.7286, "step": 2140 }, { "epoch": 36.01, "learning_rate": 2.1525799303577083e-05, "loss": 0.5342, "step": 2150 }, { "epoch": 36.01, "learning_rate": 2.1367521367521368e-05, "loss": 0.5483, "step": 2160 }, { "epoch": 36.01, "learning_rate": 2.1209243431465655e-05, "loss": 0.4426, "step": 2170 }, { "epoch": 36.02, "learning_rate": 2.105096549540994e-05, "loss": 0.5298, "step": 2180 }, { "epoch": 36.02, "eval_accuracy": 0.7235023041474654, "eval_loss": 0.7917733788490295, "eval_runtime": 318.2413, "eval_samples_per_second": 0.682, "eval_steps_per_second": 0.088, "step": 2183 }, { "epoch": 37.0, "learning_rate": 2.0892687559354228e-05, "loss": 0.4776, "step": 2190 }, { "epoch": 37.0, "learning_rate": 2.0734409623298512e-05, "loss": 0.4896, "step": 2200 }, { "epoch": 37.01, "learning_rate": 2.05761316872428e-05, "loss": 0.4383, "step": 2210 }, { "epoch": 37.01, "learning_rate": 2.0417853751187084e-05, "loss": 0.5552, "step": 2220 }, { "epoch": 37.01, "learning_rate": 2.0259575815131372e-05, "loss": 0.3671, "step": 2230 }, { "epoch": 37.02, "learning_rate": 2.010129787907566e-05, "loss": 0.5651, "step": 2240 }, { "epoch": 37.02, "eval_accuracy": 0.7327188940092166, "eval_loss": 0.8338220119476318, "eval_runtime": 318.76, "eval_samples_per_second": 0.681, "eval_steps_per_second": 0.088, "step": 2242 }, { "epoch": 38.0, "learning_rate": 1.9943019943019945e-05, "loss": 0.5144, "step": 2250 }, { "epoch": 38.01, "learning_rate": 1.978474200696423e-05, "loss": 0.3722, "step": 2260 }, { "epoch": 38.01, "learning_rate": 1.9626464070908514e-05, "loss": 0.5191, "step": 2270 }, { "epoch": 38.01, "learning_rate": 1.9468186134852805e-05, "loss": 0.5935, "step": 2280 }, { "epoch": 38.01, "learning_rate": 1.930990819879709e-05, "loss": 0.5033, "step": 2290 }, { "epoch": 38.02, "learning_rate": 1.9151630262741374e-05, "loss": 0.9236, "step": 2300 }, { "epoch": 38.02, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.8370778560638428, "eval_runtime": 304.2351, "eval_samples_per_second": 0.713, "eval_steps_per_second": 0.092, "step": 2301 }, { "epoch": 39.0, "learning_rate": 1.899335232668566e-05, "loss": 0.3032, "step": 2310 }, { "epoch": 39.01, "learning_rate": 1.8835074390629946e-05, "loss": 0.6756, "step": 2320 }, { "epoch": 39.01, "learning_rate": 1.8676796454574234e-05, "loss": 0.3696, "step": 2330 }, { "epoch": 39.01, "learning_rate": 1.8518518518518518e-05, "loss": 0.5895, "step": 2340 }, { "epoch": 39.01, "learning_rate": 1.8360240582462806e-05, "loss": 0.6435, "step": 2350 }, { "epoch": 39.02, "learning_rate": 1.820196264640709e-05, "loss": 0.4854, "step": 2360 }, { "epoch": 39.02, "eval_accuracy": 0.7695852534562212, "eval_loss": 0.7114577889442444, "eval_runtime": 292.8077, "eval_samples_per_second": 0.741, "eval_steps_per_second": 0.096, "step": 2360 }, { "epoch": 40.0, "learning_rate": 1.804368471035138e-05, "loss": 0.534, "step": 2370 }, { "epoch": 40.01, "learning_rate": 1.7885406774295663e-05, "loss": 0.5408, "step": 2380 }, { "epoch": 40.01, "learning_rate": 1.772712883823995e-05, "loss": 0.5112, "step": 2390 }, { "epoch": 40.01, "learning_rate": 1.7568850902184235e-05, "loss": 0.4957, "step": 2400 }, { "epoch": 40.01, "learning_rate": 1.7410572966128523e-05, "loss": 0.4837, "step": 2410 }, { "epoch": 40.02, "eval_accuracy": 0.7695852534562212, "eval_loss": 0.6325687170028687, "eval_runtime": 288.9229, "eval_samples_per_second": 0.751, "eval_steps_per_second": 0.097, "step": 2419 }, { "epoch": 41.0, "learning_rate": 1.725229503007281e-05, "loss": 0.6155, "step": 2420 }, { "epoch": 41.0, "learning_rate": 1.7094017094017095e-05, "loss": 0.4444, "step": 2430 }, { "epoch": 41.01, "learning_rate": 1.693573915796138e-05, "loss": 0.5507, "step": 2440 }, { "epoch": 41.01, "learning_rate": 1.6777461221905664e-05, "loss": 0.5443, "step": 2450 }, { "epoch": 41.01, "learning_rate": 1.6619183285849956e-05, "loss": 0.5536, "step": 2460 }, { "epoch": 41.01, "learning_rate": 1.646090534979424e-05, "loss": 0.4142, "step": 2470 }, { "epoch": 41.02, "eval_accuracy": 0.8202764976958525, "eval_loss": 0.6266201138496399, "eval_runtime": 291.4173, "eval_samples_per_second": 0.745, "eval_steps_per_second": 0.096, "step": 2478 }, { "epoch": 42.0, "learning_rate": 1.6302627413738524e-05, "loss": 0.7275, "step": 2480 }, { "epoch": 42.0, "learning_rate": 1.6144349477682812e-05, "loss": 0.425, "step": 2490 }, { "epoch": 42.01, "learning_rate": 1.5986071541627097e-05, "loss": 0.445, "step": 2500 }, { "epoch": 42.01, "learning_rate": 1.5827793605571385e-05, "loss": 0.5725, "step": 2510 }, { "epoch": 42.01, "learning_rate": 1.566951566951567e-05, "loss": 0.619, "step": 2520 }, { "epoch": 42.01, "learning_rate": 1.5511237733459957e-05, "loss": 0.3309, "step": 2530 }, { "epoch": 42.02, "eval_accuracy": 0.8110599078341014, "eval_loss": 0.5805542469024658, "eval_runtime": 347.0673, "eval_samples_per_second": 0.625, "eval_steps_per_second": 0.081, "step": 2537 }, { "epoch": 43.0, "learning_rate": 1.535295979740424e-05, "loss": 0.5073, "step": 2540 }, { "epoch": 43.0, "learning_rate": 1.5194681861348528e-05, "loss": 0.5336, "step": 2550 }, { "epoch": 43.01, "learning_rate": 1.5036403925292817e-05, "loss": 0.4556, "step": 2560 }, { "epoch": 43.01, "learning_rate": 1.4878125989237102e-05, "loss": 0.5441, "step": 2570 }, { "epoch": 43.01, "learning_rate": 1.4719848053181388e-05, "loss": 0.4513, "step": 2580 }, { "epoch": 43.02, "learning_rate": 1.4561570117125672e-05, "loss": 0.3939, "step": 2590 }, { "epoch": 43.02, "eval_accuracy": 0.8248847926267281, "eval_loss": 0.5745505690574646, "eval_runtime": 351.9313, "eval_samples_per_second": 0.617, "eval_steps_per_second": 0.08, "step": 2596 }, { "epoch": 44.0, "learning_rate": 1.440329218106996e-05, "loss": 0.4569, "step": 2600 }, { "epoch": 44.0, "learning_rate": 1.4245014245014246e-05, "loss": 0.3418, "step": 2610 }, { "epoch": 44.01, "learning_rate": 1.408673630895853e-05, "loss": 0.5716, "step": 2620 }, { "epoch": 44.01, "learning_rate": 1.3928458372902817e-05, "loss": 0.4198, "step": 2630 }, { "epoch": 44.01, "learning_rate": 1.3770180436847105e-05, "loss": 0.5132, "step": 2640 }, { "epoch": 44.02, "learning_rate": 1.361190250079139e-05, "loss": 0.4993, "step": 2650 }, { "epoch": 44.02, "eval_accuracy": 0.8387096774193549, "eval_loss": 0.6113536357879639, "eval_runtime": 312.9643, "eval_samples_per_second": 0.693, "eval_steps_per_second": 0.089, "step": 2655 }, { "epoch": 45.0, "learning_rate": 1.3453624564735675e-05, "loss": 0.4347, "step": 2660 }, { "epoch": 45.0, "learning_rate": 1.3295346628679963e-05, "loss": 0.6201, "step": 2670 }, { "epoch": 45.01, "learning_rate": 1.313706869262425e-05, "loss": 0.4937, "step": 2680 }, { "epoch": 45.01, "learning_rate": 1.2978790756568535e-05, "loss": 0.4629, "step": 2690 }, { "epoch": 45.01, "learning_rate": 1.282051282051282e-05, "loss": 0.6889, "step": 2700 }, { "epoch": 45.02, "learning_rate": 1.2662234884457108e-05, "loss": 0.3785, "step": 2710 }, { "epoch": 45.02, "eval_accuracy": 0.8018433179723502, "eval_loss": 0.6317055821418762, "eval_runtime": 311.0444, "eval_samples_per_second": 0.698, "eval_steps_per_second": 0.09, "step": 2714 }, { "epoch": 46.0, "learning_rate": 1.2503956948401394e-05, "loss": 0.3386, "step": 2720 }, { "epoch": 46.0, "learning_rate": 1.2345679012345678e-05, "loss": 0.5517, "step": 2730 }, { "epoch": 46.01, "learning_rate": 1.2187401076289966e-05, "loss": 0.3834, "step": 2740 }, { "epoch": 46.01, "learning_rate": 1.2029123140234252e-05, "loss": 0.4447, "step": 2750 }, { "epoch": 46.01, "learning_rate": 1.1870845204178538e-05, "loss": 0.3444, "step": 2760 }, { "epoch": 46.02, "learning_rate": 1.1712567268122825e-05, "loss": 0.5224, "step": 2770 }, { "epoch": 46.02, "eval_accuracy": 0.7972350230414746, "eval_loss": 0.666746199131012, "eval_runtime": 298.4433, "eval_samples_per_second": 0.727, "eval_steps_per_second": 0.094, "step": 2773 }, { "epoch": 47.0, "learning_rate": 1.155428933206711e-05, "loss": 0.4419, "step": 2780 }, { "epoch": 47.0, "learning_rate": 1.1396011396011397e-05, "loss": 0.3244, "step": 2790 }, { "epoch": 47.01, "learning_rate": 1.1237733459955681e-05, "loss": 0.5273, "step": 2800 }, { "epoch": 47.01, "learning_rate": 1.107945552389997e-05, "loss": 0.3442, "step": 2810 }, { "epoch": 47.01, "learning_rate": 1.0921177587844255e-05, "loss": 0.4701, "step": 2820 }, { "epoch": 47.02, "learning_rate": 1.0762899651788542e-05, "loss": 0.5705, "step": 2830 }, { "epoch": 47.02, "eval_accuracy": 0.7926267281105991, "eval_loss": 0.6382005214691162, "eval_runtime": 298.8237, "eval_samples_per_second": 0.726, "eval_steps_per_second": 0.094, "step": 2832 }, { "epoch": 48.0, "learning_rate": 1.0604621715732828e-05, "loss": 0.5681, "step": 2840 }, { "epoch": 48.01, "learning_rate": 1.0446343779677114e-05, "loss": 0.4655, "step": 2850 }, { "epoch": 48.01, "learning_rate": 1.02880658436214e-05, "loss": 0.5486, "step": 2860 }, { "epoch": 48.01, "learning_rate": 1.0129787907565686e-05, "loss": 0.4268, "step": 2870 }, { "epoch": 48.01, "learning_rate": 9.971509971509972e-06, "loss": 0.5592, "step": 2880 }, { "epoch": 48.02, "learning_rate": 9.813232035454257e-06, "loss": 0.3342, "step": 2890 }, { "epoch": 48.02, "eval_accuracy": 0.815668202764977, "eval_loss": 0.5592498779296875, "eval_runtime": 307.6803, "eval_samples_per_second": 0.705, "eval_steps_per_second": 0.091, "step": 2891 }, { "epoch": 49.0, "learning_rate": 9.654954099398545e-06, "loss": 0.4835, "step": 2900 }, { "epoch": 49.01, "learning_rate": 9.49667616334283e-06, "loss": 0.3971, "step": 2910 }, { "epoch": 49.01, "learning_rate": 9.338398227287117e-06, "loss": 0.4908, "step": 2920 }, { "epoch": 49.01, "learning_rate": 9.180120291231403e-06, "loss": 0.4665, "step": 2930 }, { "epoch": 49.01, "learning_rate": 9.02184235517569e-06, "loss": 0.4262, "step": 2940 }, { "epoch": 49.02, "learning_rate": 8.863564419119975e-06, "loss": 0.5044, "step": 2950 }, { "epoch": 49.02, "eval_accuracy": 0.7926267281105991, "eval_loss": 0.574846625328064, "eval_runtime": 296.7758, "eval_samples_per_second": 0.731, "eval_steps_per_second": 0.094, "step": 2950 }, { "epoch": 50.0, "learning_rate": 8.705286483064262e-06, "loss": 0.5795, "step": 2960 }, { "epoch": 50.01, "learning_rate": 8.547008547008548e-06, "loss": 0.6522, "step": 2970 }, { "epoch": 50.01, "learning_rate": 8.388730610952832e-06, "loss": 0.4391, "step": 2980 }, { "epoch": 50.01, "learning_rate": 8.23045267489712e-06, "loss": 0.3763, "step": 2990 }, { "epoch": 50.01, "learning_rate": 8.072174738841406e-06, "loss": 0.3838, "step": 3000 }, { "epoch": 50.02, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.6050373315811157, "eval_runtime": 338.1899, "eval_samples_per_second": 0.642, "eval_steps_per_second": 0.083, "step": 3009 }, { "epoch": 51.0, "learning_rate": 7.913896802785692e-06, "loss": 0.3898, "step": 3010 }, { "epoch": 51.0, "learning_rate": 7.755618866729978e-06, "loss": 0.4642, "step": 3020 }, { "epoch": 51.01, "learning_rate": 7.597340930674264e-06, "loss": 0.4582, "step": 3030 }, { "epoch": 51.01, "learning_rate": 7.439062994618551e-06, "loss": 0.4935, "step": 3040 }, { "epoch": 51.01, "learning_rate": 7.280785058562836e-06, "loss": 0.3878, "step": 3050 }, { "epoch": 51.01, "learning_rate": 7.122507122507123e-06, "loss": 0.5099, "step": 3060 }, { "epoch": 51.02, "eval_accuracy": 0.7603686635944701, "eval_loss": 0.6556740999221802, "eval_runtime": 332.6773, "eval_samples_per_second": 0.652, "eval_steps_per_second": 0.084, "step": 3068 }, { "epoch": 52.0, "learning_rate": 6.964229186451408e-06, "loss": 0.4264, "step": 3070 }, { "epoch": 52.0, "learning_rate": 6.805951250395695e-06, "loss": 0.378, "step": 3080 }, { "epoch": 52.01, "learning_rate": 6.6476733143399815e-06, "loss": 0.4568, "step": 3090 }, { "epoch": 52.01, "learning_rate": 6.489395378284268e-06, "loss": 0.4432, "step": 3100 }, { "epoch": 52.01, "learning_rate": 6.331117442228554e-06, "loss": 0.523, "step": 3110 }, { "epoch": 52.01, "learning_rate": 6.172839506172839e-06, "loss": 0.4335, "step": 3120 }, { "epoch": 52.02, "eval_accuracy": 0.7511520737327189, "eval_loss": 0.7118790149688721, "eval_runtime": 337.6952, "eval_samples_per_second": 0.643, "eval_steps_per_second": 0.083, "step": 3127 }, { "epoch": 53.0, "learning_rate": 6.014561570117126e-06, "loss": 0.3422, "step": 3130 }, { "epoch": 53.0, "learning_rate": 5.856283634061412e-06, "loss": 0.3893, "step": 3140 }, { "epoch": 53.01, "learning_rate": 5.6980056980056985e-06, "loss": 0.407, "step": 3150 }, { "epoch": 53.01, "learning_rate": 5.539727761949985e-06, "loss": 0.561, "step": 3160 }, { "epoch": 53.01, "learning_rate": 5.381449825894271e-06, "loss": 0.2949, "step": 3170 }, { "epoch": 53.02, "learning_rate": 5.223171889838557e-06, "loss": 0.4122, "step": 3180 }, { "epoch": 53.02, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.6562320590019226, "eval_runtime": 360.9203, "eval_samples_per_second": 0.601, "eval_steps_per_second": 0.078, "step": 3186 }, { "epoch": 54.0, "learning_rate": 5.064893953782843e-06, "loss": 0.291, "step": 3190 }, { "epoch": 54.0, "learning_rate": 4.906616017727128e-06, "loss": 0.3532, "step": 3200 }, { "epoch": 54.01, "learning_rate": 4.748338081671415e-06, "loss": 0.3911, "step": 3210 }, { "epoch": 54.01, "learning_rate": 4.5900601456157015e-06, "loss": 0.5269, "step": 3220 }, { "epoch": 54.01, "learning_rate": 4.431782209559988e-06, "loss": 0.3041, "step": 3230 }, { "epoch": 54.02, "learning_rate": 4.273504273504274e-06, "loss": 0.4431, "step": 3240 }, { "epoch": 54.02, "eval_accuracy": 0.7649769585253456, "eval_loss": 0.6700997948646545, "eval_runtime": 319.5418, "eval_samples_per_second": 0.679, "eval_steps_per_second": 0.088, "step": 3245 }, { "epoch": 55.0, "learning_rate": 4.11522633744856e-06, "loss": 0.5306, "step": 3250 }, { "epoch": 55.0, "learning_rate": 3.956948401392846e-06, "loss": 0.5086, "step": 3260 }, { "epoch": 55.01, "learning_rate": 3.798670465337132e-06, "loss": 0.3345, "step": 3270 }, { "epoch": 55.01, "learning_rate": 3.640392529281418e-06, "loss": 0.3607, "step": 3280 }, { "epoch": 55.01, "learning_rate": 3.482114593225704e-06, "loss": 0.4845, "step": 3290 }, { "epoch": 55.02, "learning_rate": 3.3238366571699908e-06, "loss": 0.4536, "step": 3300 }, { "epoch": 55.02, "eval_accuracy": 0.7695852534562212, "eval_loss": 0.6659231781959534, "eval_runtime": 312.5964, "eval_samples_per_second": 0.694, "eval_steps_per_second": 0.09, "step": 3304 }, { "epoch": 56.0, "learning_rate": 3.165558721114277e-06, "loss": 0.3168, "step": 3310 }, { "epoch": 56.0, "learning_rate": 3.007280785058563e-06, "loss": 0.459, "step": 3320 }, { "epoch": 56.01, "learning_rate": 2.8490028490028492e-06, "loss": 0.4485, "step": 3330 }, { "epoch": 56.01, "learning_rate": 2.6907249129471354e-06, "loss": 0.4044, "step": 3340 }, { "epoch": 56.01, "learning_rate": 2.5324469768914215e-06, "loss": 0.4181, "step": 3350 }, { "epoch": 56.02, "learning_rate": 2.3741690408357077e-06, "loss": 0.3867, "step": 3360 }, { "epoch": 56.02, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.6631873846054077, "eval_runtime": 302.7333, "eval_samples_per_second": 0.717, "eval_steps_per_second": 0.092, "step": 3363 }, { "epoch": 57.0, "learning_rate": 2.215891104779994e-06, "loss": 0.4615, "step": 3370 }, { "epoch": 57.0, "learning_rate": 2.05761316872428e-06, "loss": 0.459, "step": 3380 }, { "epoch": 57.01, "learning_rate": 1.899335232668566e-06, "loss": 0.2905, "step": 3390 }, { "epoch": 57.01, "learning_rate": 1.741057296612852e-06, "loss": 0.3776, "step": 3400 }, { "epoch": 57.01, "learning_rate": 1.5827793605571385e-06, "loss": 0.4584, "step": 3410 }, { "epoch": 57.02, "learning_rate": 1.4245014245014246e-06, "loss": 0.3878, "step": 3420 }, { "epoch": 57.02, "eval_accuracy": 0.7741935483870968, "eval_loss": 0.6911003589630127, "eval_runtime": 288.2901, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.097, "step": 3422 }, { "epoch": 58.0, "learning_rate": 1.2662234884457108e-06, "loss": 0.3642, "step": 3430 }, { "epoch": 58.01, "learning_rate": 1.107945552389997e-06, "loss": 0.5532, "step": 3440 }, { "epoch": 58.01, "learning_rate": 9.49667616334283e-07, "loss": 0.3968, "step": 3450 }, { "epoch": 58.01, "learning_rate": 7.913896802785692e-07, "loss": 0.4202, "step": 3460 }, { "epoch": 58.01, "learning_rate": 6.331117442228554e-07, "loss": 0.5291, "step": 3470 }, { "epoch": 58.02, "learning_rate": 4.748338081671415e-07, "loss": 0.3853, "step": 3480 }, { "epoch": 58.02, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.6931061148643494, "eval_runtime": 338.901, "eval_samples_per_second": 0.64, "eval_steps_per_second": 0.083, "step": 3481 }, { "epoch": 59.0, "learning_rate": 3.165558721114277e-07, "loss": 0.4615, "step": 3490 }, { "epoch": 59.01, "learning_rate": 1.5827793605571385e-07, "loss": 0.4759, "step": 3500 }, { "epoch": 59.01, "learning_rate": 0.0, "loss": 0.3268, "step": 3510 }, { "epoch": 59.01, "eval_accuracy": 0.7788018433179723, "eval_loss": 0.6913787126541138, "eval_runtime": 307.5387, "eval_samples_per_second": 0.706, "eval_steps_per_second": 0.091, "step": 3510 }, { "epoch": 59.01, "step": 3510, "total_flos": 3.476987046291161e+19, "train_loss": 0.68851605188473, "train_runtime": 67730.7107, "train_samples_per_second": 0.415, "train_steps_per_second": 0.052 }, { "epoch": 59.01, "eval_accuracy": 0.8148148148148148, "eval_loss": 0.567668616771698, "eval_runtime": 331.762, "eval_samples_per_second": 0.651, "eval_steps_per_second": 0.081, "step": 3510 }, { "epoch": 59.01, "eval_accuracy": 0.8148148148148148, "eval_loss": 0.5676685571670532, "eval_runtime": 292.9189, "eval_samples_per_second": 0.737, "eval_steps_per_second": 0.092, "step": 3510 }, { "epoch": 59.01, "eval_accuracy": 0.8165137614678899, "eval_loss": 0.5670480728149414, "eval_runtime": 312.7468, "eval_samples_per_second": 0.697, "eval_steps_per_second": 0.09, "step": 3510 } ], "logging_steps": 10, "max_steps": 3510, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 3.476987046291161e+19, "trial_name": null, "trial_params": null }