{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1523941982912775, "eval_steps": 50, "global_step": 1450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3e-05, "loss": 2.0927, "step": 10 }, { "epoch": 0.02, "learning_rate": 3e-05, "loss": 0.267, "step": 20 }, { "epoch": 0.02, "learning_rate": 3e-05, "loss": 0.1926, "step": 30 }, { "epoch": 0.03, "learning_rate": 3e-05, "loss": 0.1601, "step": 40 }, { "epoch": 0.04, "learning_rate": 3e-05, "loss": 0.1255, "step": 50 }, { "epoch": 0.04, "eval_accuracy": 0.2330188679245283, "eval_f1_macro": 0.07731831394851697, "eval_f1_micro": 0.2330188679245283, "eval_loss": 0.24590551853179932, "eval_precision_macro": 0.09801465635567799, "eval_precision_micro": 0.2330188679245283, "eval_recall_macro": 0.0939265996231733, "eval_recall_micro": 0.2330188679245283, "eval_runtime": 67.1714, "eval_samples_per_second": 15.781, "eval_steps_per_second": 3.945, "step": 50 }, { "epoch": 0.05, "learning_rate": 3e-05, "loss": 0.6981, "step": 60 }, { "epoch": 0.06, "learning_rate": 3e-05, "loss": 0.1356, "step": 70 }, { "epoch": 0.06, "learning_rate": 3e-05, "loss": 0.0993, "step": 80 }, { "epoch": 0.07, "learning_rate": 3e-05, "loss": 0.1038, "step": 90 }, { "epoch": 0.08, "learning_rate": 3e-05, "loss": 0.1076, "step": 100 }, { "epoch": 0.08, "eval_accuracy": 0.4075471698113208, "eval_f1_macro": 0.1681261191284492, "eval_f1_micro": 0.4075471698113208, "eval_loss": 0.14505280554294586, "eval_precision_macro": 0.19505399860785297, "eval_precision_micro": 0.4075471698113208, "eval_recall_macro": 0.18462138174503467, "eval_recall_micro": 0.4075471698113208, "eval_runtime": 67.1009, "eval_samples_per_second": 15.797, "eval_steps_per_second": 3.949, "step": 100 }, { "epoch": 0.09, "learning_rate": 3e-05, "loss": 0.331, "step": 110 }, { "epoch": 0.1, "learning_rate": 3e-05, "loss": 0.0809, "step": 120 }, { "epoch": 0.1, "learning_rate": 3e-05, "loss": 0.0812, "step": 130 }, { "epoch": 0.11, "learning_rate": 3e-05, "loss": 0.0601, "step": 140 }, { "epoch": 0.12, "learning_rate": 3e-05, "loss": 0.066, "step": 150 }, { "epoch": 0.12, "eval_accuracy": 0.5386792452830189, "eval_f1_macro": 0.2780127225833117, "eval_f1_micro": 0.5386792452830189, "eval_loss": 0.10953618586063385, "eval_precision_macro": 0.3493311966119182, "eval_precision_micro": 0.5386792452830189, "eval_recall_macro": 0.2871523900319283, "eval_recall_micro": 0.5386792452830189, "eval_runtime": 67.0903, "eval_samples_per_second": 15.8, "eval_steps_per_second": 3.95, "step": 150 }, { "epoch": 0.13, "learning_rate": 3e-05, "loss": 0.2732, "step": 160 }, { "epoch": 0.14, "learning_rate": 3e-05, "loss": 0.0754, "step": 170 }, { "epoch": 0.14, "learning_rate": 3e-05, "loss": 0.0649, "step": 180 }, { "epoch": 0.15, "learning_rate": 3e-05, "loss": 0.0674, "step": 190 }, { "epoch": 0.16, "learning_rate": 3e-05, "loss": 0.0699, "step": 200 }, { "epoch": 0.16, "eval_accuracy": 0.620754716981132, "eval_f1_macro": 0.3797608124202816, "eval_f1_micro": 0.620754716981132, "eval_loss": 0.09009388834238052, "eval_precision_macro": 0.3837197141355178, "eval_precision_micro": 0.620754716981132, "eval_recall_macro": 0.39915842112719735, "eval_recall_micro": 0.620754716981132, "eval_runtime": 67.0671, "eval_samples_per_second": 15.805, "eval_steps_per_second": 3.951, "step": 200 }, { "epoch": 0.17, "learning_rate": 3e-05, "loss": 0.1946, "step": 210 }, { "epoch": 0.17, "learning_rate": 3e-05, "loss": 0.0657, "step": 220 }, { "epoch": 0.18, "learning_rate": 3e-05, "loss": 0.0547, "step": 230 }, { "epoch": 0.19, "learning_rate": 3e-05, "loss": 0.0615, "step": 240 }, { "epoch": 0.2, "learning_rate": 3e-05, "loss": 0.066, "step": 250 }, { "epoch": 0.2, "eval_accuracy": 0.6103773584905661, "eval_f1_macro": 0.41348516498355786, "eval_f1_micro": 0.6103773584905661, "eval_loss": 0.08832413703203201, "eval_precision_macro": 0.45439839834135715, "eval_precision_micro": 0.6103773584905661, "eval_recall_macro": 0.4312111435526721, "eval_recall_micro": 0.6103773584905661, "eval_runtime": 66.9345, "eval_samples_per_second": 15.836, "eval_steps_per_second": 3.959, "step": 250 }, { "epoch": 0.21, "learning_rate": 3e-05, "loss": 0.1494, "step": 260 }, { "epoch": 0.21, "learning_rate": 3e-05, "loss": 0.0655, "step": 270 }, { "epoch": 0.22, "learning_rate": 3e-05, "loss": 0.06, "step": 280 }, { "epoch": 0.23, "learning_rate": 3e-05, "loss": 0.0616, "step": 290 }, { "epoch": 0.24, "learning_rate": 3e-05, "loss": 0.0452, "step": 300 }, { "epoch": 0.24, "eval_accuracy": 0.6877358490566038, "eval_f1_macro": 0.5091555575082085, "eval_f1_micro": 0.6877358490566038, "eval_loss": 0.08789286762475967, "eval_precision_macro": 0.5649217974276287, "eval_precision_micro": 0.6877358490566038, "eval_recall_macro": 0.513496327466451, "eval_recall_micro": 0.6877358490566038, "eval_runtime": 67.1619, "eval_samples_per_second": 15.783, "eval_steps_per_second": 3.946, "step": 300 }, { "epoch": 0.25, "learning_rate": 3e-05, "loss": 0.1535, "step": 310 }, { "epoch": 0.25, "learning_rate": 3e-05, "loss": 0.0731, "step": 320 }, { "epoch": 0.26, "learning_rate": 3e-05, "loss": 0.044, "step": 330 }, { "epoch": 0.27, "learning_rate": 3e-05, "loss": 0.053, "step": 340 }, { "epoch": 0.28, "learning_rate": 3e-05, "loss": 0.0545, "step": 350 }, { "epoch": 0.28, "eval_accuracy": 0.6764150943396227, "eval_f1_macro": 0.503999030020007, "eval_f1_micro": 0.6764150943396227, "eval_loss": 0.07607663422822952, "eval_precision_macro": 0.5194445629359009, "eval_precision_micro": 0.6764150943396227, "eval_recall_macro": 0.5287937722322651, "eval_recall_micro": 0.6764150943396227, "eval_runtime": 67.2353, "eval_samples_per_second": 15.766, "eval_steps_per_second": 3.941, "step": 350 }, { "epoch": 0.29, "learning_rate": 3e-05, "loss": 0.1543, "step": 360 }, { "epoch": 0.29, "learning_rate": 3e-05, "loss": 0.0609, "step": 370 }, { "epoch": 0.3, "learning_rate": 3e-05, "loss": 0.0479, "step": 380 }, { "epoch": 0.31, "learning_rate": 3e-05, "loss": 0.0532, "step": 390 }, { "epoch": 0.32, "learning_rate": 3e-05, "loss": 0.0647, "step": 400 }, { "epoch": 0.32, "eval_accuracy": 0.7339622641509433, "eval_f1_macro": 0.5492932704438783, "eval_f1_micro": 0.7339622641509433, "eval_loss": 0.06653406471014023, "eval_precision_macro": 0.6193164476598846, "eval_precision_micro": 0.7339622641509433, "eval_recall_macro": 0.5252411264940735, "eval_recall_micro": 0.7339622641509433, "eval_runtime": 67.4334, "eval_samples_per_second": 15.719, "eval_steps_per_second": 3.93, "step": 400 }, { "epoch": 0.33, "learning_rate": 3e-05, "loss": 0.104, "step": 410 }, { "epoch": 0.33, "learning_rate": 3e-05, "loss": 0.0458, "step": 420 }, { "epoch": 0.34, "learning_rate": 3e-05, "loss": 0.0552, "step": 430 }, { "epoch": 0.35, "learning_rate": 3e-05, "loss": 0.0512, "step": 440 }, { "epoch": 0.36, "learning_rate": 3e-05, "loss": 0.056, "step": 450 }, { "epoch": 0.36, "eval_accuracy": 0.7396226415094339, "eval_f1_macro": 0.5671730153967399, "eval_f1_micro": 0.7396226415094339, "eval_loss": 0.05136344954371452, "eval_precision_macro": 0.6096698581228938, "eval_precision_micro": 0.7396226415094339, "eval_recall_macro": 0.5767264087198709, "eval_recall_micro": 0.7396226415094339, "eval_runtime": 66.8962, "eval_samples_per_second": 15.845, "eval_steps_per_second": 3.961, "step": 450 }, { "epoch": 0.37, "learning_rate": 3e-05, "loss": 0.0773, "step": 460 }, { "epoch": 0.37, "learning_rate": 3e-05, "loss": 0.0474, "step": 470 }, { "epoch": 0.38, "learning_rate": 3e-05, "loss": 0.0405, "step": 480 }, { "epoch": 0.39, "learning_rate": 3e-05, "loss": 0.0461, "step": 490 }, { "epoch": 0.4, "learning_rate": 3e-05, "loss": 0.0513, "step": 500 }, { "epoch": 0.4, "eval_accuracy": 0.7613207547169811, "eval_f1_macro": 0.601977568492687, "eval_f1_micro": 0.761320754716981, "eval_loss": 0.047934673726558685, "eval_precision_macro": 0.638418606498986, "eval_precision_micro": 0.7613207547169811, "eval_recall_macro": 0.6145296570629574, "eval_recall_micro": 0.7613207547169811, "eval_runtime": 66.9411, "eval_samples_per_second": 15.835, "eval_steps_per_second": 3.959, "step": 500 }, { "epoch": 0.41, "learning_rate": 3e-05, "loss": 0.0788, "step": 510 }, { "epoch": 0.41, "learning_rate": 3e-05, "loss": 0.0495, "step": 520 }, { "epoch": 0.42, "learning_rate": 3e-05, "loss": 0.0552, "step": 530 }, { "epoch": 0.43, "learning_rate": 3e-05, "loss": 0.0415, "step": 540 }, { "epoch": 0.44, "learning_rate": 3e-05, "loss": 0.0501, "step": 550 }, { "epoch": 0.44, "eval_accuracy": 0.7509433962264151, "eval_f1_macro": 0.6074975120648255, "eval_f1_micro": 0.7509433962264151, "eval_loss": 0.05019384250044823, "eval_precision_macro": 0.624502704252128, "eval_precision_micro": 0.7509433962264151, "eval_recall_macro": 0.6167049341328479, "eval_recall_micro": 0.7509433962264151, "eval_runtime": 67.3498, "eval_samples_per_second": 15.739, "eval_steps_per_second": 3.935, "step": 550 }, { "epoch": 0.45, "learning_rate": 3e-05, "loss": 0.0633, "step": 560 }, { "epoch": 0.45, "learning_rate": 3e-05, "loss": 0.0484, "step": 570 }, { "epoch": 0.46, "learning_rate": 3e-05, "loss": 0.0418, "step": 580 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 0.0524, "step": 590 }, { "epoch": 0.48, "learning_rate": 3e-05, "loss": 0.0533, "step": 600 }, { "epoch": 0.48, "eval_accuracy": 0.7641509433962265, "eval_f1_macro": 0.607265930345707, "eval_f1_micro": 0.7641509433962265, "eval_loss": 0.048058342188596725, "eval_precision_macro": 0.6499724898555727, "eval_precision_micro": 0.7641509433962265, "eval_recall_macro": 0.6139175086252339, "eval_recall_micro": 0.7641509433962265, "eval_runtime": 66.897, "eval_samples_per_second": 15.845, "eval_steps_per_second": 3.961, "step": 600 }, { "epoch": 0.48, "learning_rate": 3e-05, "loss": 0.0418, "step": 610 }, { "epoch": 0.49, "learning_rate": 3e-05, "loss": 0.0482, "step": 620 }, { "epoch": 0.5, "learning_rate": 3e-05, "loss": 0.0458, "step": 630 }, { "epoch": 0.51, "learning_rate": 3e-05, "loss": 0.0432, "step": 640 }, { "epoch": 0.52, "learning_rate": 3e-05, "loss": 0.0462, "step": 650 }, { "epoch": 0.52, "eval_accuracy": 0.7481132075471698, "eval_f1_macro": 0.5679477471859753, "eval_f1_micro": 0.7481132075471698, "eval_loss": 0.047320980578660965, "eval_precision_macro": 0.5941670973495327, "eval_precision_micro": 0.7481132075471698, "eval_recall_macro": 0.5739727328111488, "eval_recall_micro": 0.7481132075471698, "eval_runtime": 67.2106, "eval_samples_per_second": 15.771, "eval_steps_per_second": 3.943, "step": 650 }, { "epoch": 0.52, "learning_rate": 3e-05, "loss": 0.0668, "step": 660 }, { "epoch": 0.53, "learning_rate": 3e-05, "loss": 0.0501, "step": 670 }, { "epoch": 0.54, "learning_rate": 3e-05, "loss": 0.0366, "step": 680 }, { "epoch": 0.55, "learning_rate": 3e-05, "loss": 0.0374, "step": 690 }, { "epoch": 0.56, "learning_rate": 3e-05, "loss": 0.0496, "step": 700 }, { "epoch": 0.56, "eval_accuracy": 0.7971698113207547, "eval_f1_macro": 0.6517694520426227, "eval_f1_micro": 0.7971698113207546, "eval_loss": 0.04193812981247902, "eval_precision_macro": 0.6678204026981202, "eval_precision_micro": 0.7971698113207547, "eval_recall_macro": 0.6480125227888868, "eval_recall_micro": 0.7971698113207547, "eval_runtime": 67.3982, "eval_samples_per_second": 15.727, "eval_steps_per_second": 3.932, "step": 700 }, { "epoch": 0.56, "learning_rate": 3e-05, "loss": 0.0649, "step": 710 }, { "epoch": 0.57, "learning_rate": 3e-05, "loss": 0.0447, "step": 720 }, { "epoch": 0.58, "learning_rate": 3e-05, "loss": 0.0442, "step": 730 }, { "epoch": 0.59, "learning_rate": 3e-05, "loss": 0.037, "step": 740 }, { "epoch": 0.6, "learning_rate": 3e-05, "loss": 0.0614, "step": 750 }, { "epoch": 0.6, "eval_accuracy": 0.7773584905660378, "eval_f1_macro": 0.6308119664331103, "eval_f1_micro": 0.7773584905660378, "eval_loss": 0.04885416477918625, "eval_precision_macro": 0.6677975283624125, "eval_precision_micro": 0.7773584905660378, "eval_recall_macro": 0.6360471775658058, "eval_recall_micro": 0.7773584905660378, "eval_runtime": 67.7832, "eval_samples_per_second": 15.638, "eval_steps_per_second": 3.91, "step": 750 }, { "epoch": 0.6, "learning_rate": 3e-05, "loss": 0.0649, "step": 760 }, { "epoch": 0.61, "learning_rate": 3e-05, "loss": 0.0426, "step": 770 }, { "epoch": 0.62, "learning_rate": 3e-05, "loss": 0.0347, "step": 780 }, { "epoch": 0.63, "learning_rate": 3e-05, "loss": 0.0414, "step": 790 }, { "epoch": 0.64, "learning_rate": 3e-05, "loss": 0.0468, "step": 800 }, { "epoch": 0.64, "eval_accuracy": 0.7830188679245284, "eval_f1_macro": 0.6493890925237205, "eval_f1_micro": 0.7830188679245284, "eval_loss": 0.044340912252664566, "eval_precision_macro": 0.6435014283226803, "eval_precision_micro": 0.7830188679245284, "eval_recall_macro": 0.6816157451405587, "eval_recall_micro": 0.7830188679245284, "eval_runtime": 67.2351, "eval_samples_per_second": 15.766, "eval_steps_per_second": 3.941, "step": 800 }, { "epoch": 0.64, "learning_rate": 3e-05, "loss": 0.052, "step": 810 }, { "epoch": 0.65, "learning_rate": 3e-05, "loss": 0.0414, "step": 820 }, { "epoch": 0.66, "learning_rate": 3e-05, "loss": 0.0342, "step": 830 }, { "epoch": 0.67, "learning_rate": 3e-05, "loss": 0.0451, "step": 840 }, { "epoch": 0.68, "learning_rate": 3e-05, "loss": 0.0477, "step": 850 }, { "epoch": 0.68, "eval_accuracy": 0.7971698113207547, "eval_f1_macro": 0.6662808099368048, "eval_f1_micro": 0.7971698113207546, "eval_loss": 0.041995830833911896, "eval_precision_macro": 0.7040157648486967, "eval_precision_micro": 0.7971698113207547, "eval_recall_macro": 0.6567342355863813, "eval_recall_micro": 0.7971698113207547, "eval_runtime": 67.3249, "eval_samples_per_second": 15.745, "eval_steps_per_second": 3.936, "step": 850 }, { "epoch": 0.68, "learning_rate": 3e-05, "loss": 0.0468, "step": 860 }, { "epoch": 0.69, "learning_rate": 3e-05, "loss": 0.0461, "step": 870 }, { "epoch": 0.7, "learning_rate": 3e-05, "loss": 0.0436, "step": 880 }, { "epoch": 0.71, "learning_rate": 3e-05, "loss": 0.0369, "step": 890 }, { "epoch": 0.72, "learning_rate": 3e-05, "loss": 0.0519, "step": 900 }, { "epoch": 0.72, "eval_accuracy": 0.7632075471698113, "eval_f1_macro": 0.6291599323302522, "eval_f1_micro": 0.7632075471698113, "eval_loss": 0.04627140238881111, "eval_precision_macro": 0.6519385252086033, "eval_precision_micro": 0.7632075471698113, "eval_recall_macro": 0.6290591814696965, "eval_recall_micro": 0.7632075471698113, "eval_runtime": 67.0228, "eval_samples_per_second": 15.816, "eval_steps_per_second": 3.954, "step": 900 }, { "epoch": 0.72, "learning_rate": 3e-05, "loss": 0.0543, "step": 910 }, { "epoch": 0.73, "learning_rate": 3e-05, "loss": 0.0426, "step": 920 }, { "epoch": 0.74, "learning_rate": 3e-05, "loss": 0.0421, "step": 930 }, { "epoch": 0.75, "learning_rate": 3e-05, "loss": 0.0338, "step": 940 }, { "epoch": 0.76, "learning_rate": 3e-05, "loss": 0.0453, "step": 950 }, { "epoch": 0.76, "eval_accuracy": 0.780188679245283, "eval_f1_macro": 0.6564187596520696, "eval_f1_micro": 0.780188679245283, "eval_loss": 0.042860858142375946, "eval_precision_macro": 0.67574812222591, "eval_precision_micro": 0.780188679245283, "eval_recall_macro": 0.6697872775950671, "eval_recall_micro": 0.780188679245283, "eval_runtime": 67.3483, "eval_samples_per_second": 15.739, "eval_steps_per_second": 3.935, "step": 950 }, { "epoch": 0.76, "learning_rate": 3e-05, "loss": 0.0554, "step": 960 }, { "epoch": 0.77, "learning_rate": 3e-05, "loss": 0.0397, "step": 970 }, { "epoch": 0.78, "learning_rate": 3e-05, "loss": 0.0407, "step": 980 }, { "epoch": 0.79, "learning_rate": 3e-05, "loss": 0.0361, "step": 990 }, { "epoch": 0.79, "learning_rate": 3e-05, "loss": 0.0452, "step": 1000 }, { "epoch": 0.79, "eval_accuracy": 0.7377358490566037, "eval_f1_macro": 0.6049285124615932, "eval_f1_micro": 0.7377358490566037, "eval_loss": 0.047125279903411865, "eval_precision_macro": 0.6181852032037266, "eval_precision_micro": 0.7377358490566037, "eval_recall_macro": 0.6300074429793591, "eval_recall_micro": 0.7377358490566037, "eval_runtime": 66.8035, "eval_samples_per_second": 15.867, "eval_steps_per_second": 3.967, "step": 1000 }, { "epoch": 0.8, "learning_rate": 3e-05, "loss": 0.0482, "step": 1010 }, { "epoch": 0.81, "learning_rate": 3e-05, "loss": 0.0379, "step": 1020 }, { "epoch": 0.82, "learning_rate": 3e-05, "loss": 0.0403, "step": 1030 }, { "epoch": 0.83, "learning_rate": 3e-05, "loss": 0.0471, "step": 1040 }, { "epoch": 0.83, "learning_rate": 3e-05, "loss": 0.0367, "step": 1050 }, { "epoch": 0.83, "eval_accuracy": 0.7981132075471699, "eval_f1_macro": 0.6800660818700823, "eval_f1_micro": 0.79811320754717, "eval_loss": 0.03875497728586197, "eval_precision_macro": 0.6856812225733196, "eval_precision_micro": 0.7981132075471699, "eval_recall_macro": 0.6992476720564776, "eval_recall_micro": 0.7981132075471699, "eval_runtime": 66.8444, "eval_samples_per_second": 15.858, "eval_steps_per_second": 3.964, "step": 1050 }, { "epoch": 0.84, "learning_rate": 3e-05, "loss": 0.0351, "step": 1060 }, { "epoch": 0.85, "learning_rate": 3e-05, "loss": 0.0479, "step": 1070 }, { "epoch": 0.86, "learning_rate": 3e-05, "loss": 0.0421, "step": 1080 }, { "epoch": 0.87, "learning_rate": 3e-05, "loss": 0.0406, "step": 1090 }, { "epoch": 0.87, "learning_rate": 3e-05, "loss": 0.0377, "step": 1100 }, { "epoch": 0.87, "eval_accuracy": 0.8, "eval_f1_macro": 0.6590911576508658, "eval_f1_micro": 0.8000000000000002, "eval_loss": 0.03815627098083496, "eval_precision_macro": 0.6636349851737382, "eval_precision_micro": 0.8, "eval_recall_macro": 0.6697553358712118, "eval_recall_micro": 0.8, "eval_runtime": 66.9434, "eval_samples_per_second": 15.834, "eval_steps_per_second": 3.959, "step": 1100 }, { "epoch": 0.88, "learning_rate": 3e-05, "loss": 0.0365, "step": 1110 }, { "epoch": 0.89, "learning_rate": 3e-05, "loss": 0.0353, "step": 1120 }, { "epoch": 0.9, "learning_rate": 3e-05, "loss": 0.0388, "step": 1130 }, { "epoch": 0.91, "learning_rate": 3e-05, "loss": 0.0358, "step": 1140 }, { "epoch": 0.91, "learning_rate": 3e-05, "loss": 0.0429, "step": 1150 }, { "epoch": 0.91, "eval_accuracy": 0.7952830188679245, "eval_f1_macro": 0.6465609013784224, "eval_f1_micro": 0.7952830188679245, "eval_loss": 0.03976297378540039, "eval_precision_macro": 0.6923924758215005, "eval_precision_micro": 0.7952830188679245, "eval_recall_macro": 0.6441492192889419, "eval_recall_micro": 0.7952830188679245, "eval_runtime": 67.1705, "eval_samples_per_second": 15.781, "eval_steps_per_second": 3.945, "step": 1150 }, { "epoch": 0.92, "learning_rate": 3e-05, "loss": 0.0461, "step": 1160 }, { "epoch": 0.93, "learning_rate": 3e-05, "loss": 0.0434, "step": 1170 }, { "epoch": 0.94, "learning_rate": 3e-05, "loss": 0.0524, "step": 1180 }, { "epoch": 0.95, "learning_rate": 3e-05, "loss": 0.0362, "step": 1190 }, { "epoch": 0.95, "learning_rate": 3e-05, "loss": 0.0451, "step": 1200 }, { "epoch": 0.95, "eval_accuracy": 0.7943396226415095, "eval_f1_macro": 0.6535399936575059, "eval_f1_micro": 0.7943396226415095, "eval_loss": 0.037755727767944336, "eval_precision_macro": 0.6712905678869693, "eval_precision_micro": 0.7943396226415095, "eval_recall_macro": 0.6537773538776073, "eval_recall_micro": 0.7943396226415095, "eval_runtime": 66.9611, "eval_samples_per_second": 15.83, "eval_steps_per_second": 3.958, "step": 1200 }, { "epoch": 0.96, "learning_rate": 3e-05, "loss": 0.0456, "step": 1210 }, { "epoch": 0.97, "learning_rate": 3e-05, "loss": 0.0455, "step": 1220 }, { "epoch": 0.98, "learning_rate": 3e-05, "loss": 0.0409, "step": 1230 }, { "epoch": 0.99, "learning_rate": 3e-05, "loss": 0.037, "step": 1240 }, { "epoch": 0.99, "learning_rate": 3e-05, "loss": 0.0347, "step": 1250 }, { "epoch": 0.99, "eval_accuracy": 0.7839622641509434, "eval_f1_macro": 0.6330944207402169, "eval_f1_micro": 0.7839622641509434, "eval_loss": 0.041340529918670654, "eval_precision_macro": 0.6735372413807635, "eval_precision_micro": 0.7839622641509434, "eval_recall_macro": 0.6450299050285588, "eval_recall_micro": 0.7839622641509434, "eval_runtime": 66.9053, "eval_samples_per_second": 15.843, "eval_steps_per_second": 3.961, "step": 1250 }, { "epoch": 1.0, "learning_rate": 3e-05, "loss": 0.0421, "step": 1260 }, { "epoch": 1.01, "learning_rate": 3e-05, "loss": 0.041, "step": 1270 }, { "epoch": 1.02, "learning_rate": 3e-05, "loss": 0.033, "step": 1280 }, { "epoch": 1.03, "learning_rate": 3e-05, "loss": 0.036, "step": 1290 }, { "epoch": 1.03, "learning_rate": 3e-05, "loss": 0.0378, "step": 1300 }, { "epoch": 1.03, "eval_accuracy": 0.8047169811320755, "eval_f1_macro": 0.6488791804614907, "eval_f1_micro": 0.8047169811320755, "eval_loss": 0.037683386355638504, "eval_precision_macro": 0.7109359814450084, "eval_precision_micro": 0.8047169811320755, "eval_recall_macro": 0.6387082579227776, "eval_recall_micro": 0.8047169811320755, "eval_runtime": 67.3206, "eval_samples_per_second": 15.746, "eval_steps_per_second": 3.936, "step": 1300 }, { "epoch": 1.04, "learning_rate": 3e-05, "loss": 0.0343, "step": 1310 }, { "epoch": 1.05, "learning_rate": 3e-05, "loss": 0.0321, "step": 1320 }, { "epoch": 1.06, "learning_rate": 3e-05, "loss": 0.031, "step": 1330 }, { "epoch": 1.06, "learning_rate": 3e-05, "loss": 0.039, "step": 1340 }, { "epoch": 1.07, "learning_rate": 3e-05, "loss": 0.0357, "step": 1350 }, { "epoch": 1.07, "eval_accuracy": 0.8028301886792453, "eval_f1_macro": 0.6648963473667772, "eval_f1_micro": 0.8028301886792453, "eval_loss": 0.03860827535390854, "eval_precision_macro": 0.6898539099210392, "eval_precision_micro": 0.8028301886792453, "eval_recall_macro": 0.6558796396655843, "eval_recall_micro": 0.8028301886792453, "eval_runtime": 67.0656, "eval_samples_per_second": 15.805, "eval_steps_per_second": 3.951, "step": 1350 }, { "epoch": 1.08, "learning_rate": 3e-05, "loss": 0.0445, "step": 1360 }, { "epoch": 1.09, "learning_rate": 3e-05, "loss": 0.0375, "step": 1370 }, { "epoch": 1.1, "learning_rate": 3e-05, "loss": 0.0375, "step": 1380 }, { "epoch": 1.1, "learning_rate": 3e-05, "loss": 0.0333, "step": 1390 }, { "epoch": 1.11, "learning_rate": 3e-05, "loss": 0.0418, "step": 1400 }, { "epoch": 1.11, "eval_accuracy": 0.7962264150943397, "eval_f1_macro": 0.6910242491250081, "eval_f1_micro": 0.7962264150943396, "eval_loss": 0.0368194542825222, "eval_precision_macro": 0.7114033533579757, "eval_precision_micro": 0.7962264150943397, "eval_recall_macro": 0.6942176996685531, "eval_recall_micro": 0.7962264150943397, "eval_runtime": 66.8832, "eval_samples_per_second": 15.849, "eval_steps_per_second": 3.962, "step": 1400 }, { "epoch": 1.12, "learning_rate": 3e-05, "loss": 0.0414, "step": 1410 }, { "epoch": 1.13, "learning_rate": 3e-05, "loss": 0.0357, "step": 1420 }, { "epoch": 1.14, "learning_rate": 3e-05, "loss": 0.0272, "step": 1430 }, { "epoch": 1.14, "learning_rate": 3e-05, "loss": 0.0323, "step": 1440 }, { "epoch": 1.15, "learning_rate": 3e-05, "loss": 0.0293, "step": 1450 }, { "epoch": 1.15, "eval_accuracy": 0.8141509433962264, "eval_f1_macro": 0.7097996478763092, "eval_f1_micro": 0.8141509433962264, "eval_loss": 0.035770244896411896, "eval_precision_macro": 0.7222302630120379, "eval_precision_micro": 0.8141509433962264, "eval_recall_macro": 0.7125706602249756, "eval_recall_micro": 0.8141509433962264, "eval_runtime": 67.0694, "eval_samples_per_second": 15.805, "eval_steps_per_second": 3.951, "step": 1450 }, { "epoch": 1.15, "step": 1450, "total_flos": 3.612646182806976e+17, "train_loss": 0.07879953698865298, "train_runtime": 5948.326, "train_samples_per_second": 3.9, "train_steps_per_second": 0.244 } ], "logging_steps": 10, "max_steps": 1450, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 250, "total_flos": 3.612646182806976e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }