{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 52190, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.000999980839241234, "loss": 0.7834, "step": 1 }, { "epoch": 0.05, "learning_rate": 0.0009899980839241235, "loss": 0.7384, "step": 522 }, { "epoch": 0.1, "learning_rate": 0.000979996167848247, "loss": 0.7597, "step": 1044 }, { "epoch": 0.15, "learning_rate": 0.0009699942517723703, "loss": 0.7401, "step": 1566 }, { "epoch": 0.2, "learning_rate": 0.0009599923356964936, "loss": 0.7495, "step": 2088 }, { "epoch": 0.25, "learning_rate": 0.000949990419620617, "loss": 0.7017, "step": 2610 }, { "epoch": 0.3, "learning_rate": 0.0009399885035447404, "loss": 0.7398, "step": 3132 }, { "epoch": 0.35, "learning_rate": 0.0009299865874688638, "loss": 0.6949, "step": 3654 }, { "epoch": 0.4, "learning_rate": 0.0009199846713929871, "loss": 0.6335, "step": 4176 }, { "epoch": 0.45, "learning_rate": 0.0009099827553171107, "loss": 0.648, "step": 4698 }, { "epoch": 0.5, "learning_rate": 0.000899980839241234, "loss": 0.6445, "step": 5220 }, { "epoch": 0.55, "learning_rate": 0.0008899789231653573, "loss": 0.6429, "step": 5742 }, { "epoch": 0.6, "learning_rate": 0.0008799770070894808, "loss": 0.6316, "step": 6264 }, { "epoch": 0.65, "learning_rate": 0.0008699750910136041, "loss": 0.6066, "step": 6786 }, { "epoch": 0.7, "learning_rate": 0.0008599731749377275, "loss": 0.6397, "step": 7308 }, { "epoch": 0.75, "learning_rate": 0.0008499712588618509, "loss": 0.6243, "step": 7830 }, { "epoch": 0.8, "learning_rate": 0.0008399693427859744, "loss": 0.6271, "step": 8352 }, { "epoch": 0.85, "learning_rate": 0.0008299674267100977, "loss": 0.614, "step": 8874 }, { "epoch": 0.9, "learning_rate": 0.0008199655106342211, "loss": 0.6358, "step": 9396 }, { "epoch": 0.95, "learning_rate": 0.0008099635945583445, "loss": 0.633, "step": 9918 }, { "epoch": 1.0, "eval_Macro F1": 0.5744881635875178, "eval_Macro Precision": 0.6541570568719827, "eval_Macro Recall": 0.564346253205228, "eval_Micro F1": 0.826105701864693, "eval_Micro Precision": 0.826105701864693, "eval_Micro Recall": 0.826105701864693, "eval_Weighted F1": 0.7913544313349856, "eval_Weighted Precision": 0.7843732666581203, "eval_Weighted Recall": 0.826105701864693, "eval_accuracy": 0.826105701864693, "eval_loss": 0.5608153343200684, "eval_runtime": 2136.2944, "eval_samples_per_second": 13.029, "eval_steps_per_second": 1.629, "step": 10438 }, { "epoch": 1.0, "learning_rate": 0.0007999616784824679, "loss": 0.6139, "step": 10440 }, { "epoch": 1.05, "learning_rate": 0.0007899597624065913, "loss": 0.5945, "step": 10962 }, { "epoch": 1.1, "learning_rate": 0.0007799578463307148, "loss": 0.6018, "step": 11484 }, { "epoch": 1.15, "learning_rate": 0.0007699559302548381, "loss": 0.597, "step": 12006 }, { "epoch": 1.2, "learning_rate": 0.0007599540141789615, "loss": 0.5891, "step": 12528 }, { "epoch": 1.25, "learning_rate": 0.0007499520981030848, "loss": 0.6139, "step": 13050 }, { "epoch": 1.3, "learning_rate": 0.0007399501820272084, "loss": 0.5878, "step": 13572 }, { "epoch": 1.35, "learning_rate": 0.0007299482659513317, "loss": 0.6183, "step": 14094 }, { "epoch": 1.4, "learning_rate": 0.0007199463498754551, "loss": 0.6038, "step": 14616 }, { "epoch": 1.45, "learning_rate": 0.0007099444337995785, "loss": 0.6506, "step": 15138 }, { "epoch": 1.5, "learning_rate": 0.0006999425177237019, "loss": 0.6063, "step": 15660 }, { "epoch": 1.55, "learning_rate": 0.0006899406016478252, "loss": 0.5931, "step": 16182 }, { "epoch": 1.6, "learning_rate": 0.0006799386855719488, "loss": 0.5828, "step": 16704 }, { "epoch": 1.65, "learning_rate": 0.0006699367694960721, "loss": 0.5912, "step": 17226 }, { "epoch": 1.7, "learning_rate": 0.0006599348534201954, "loss": 0.6105, "step": 17748 }, { "epoch": 1.75, "learning_rate": 0.0006499329373443188, "loss": 0.5893, "step": 18270 }, { "epoch": 1.8, "learning_rate": 0.0006399310212684422, "loss": 0.611, "step": 18792 }, { "epoch": 1.85, "learning_rate": 0.0006299291051925656, "loss": 0.6371, "step": 19314 }, { "epoch": 1.9, "learning_rate": 0.000619927189116689, "loss": 0.5925, "step": 19836 }, { "epoch": 1.95, "learning_rate": 0.0006099252730408125, "loss": 0.6029, "step": 20358 }, { "epoch": 2.0, "eval_Macro F1": 0.5060450221643912, "eval_Macro Precision": 0.6929448637916937, "eval_Macro Recall": 0.5239477514921818, "eval_Micro F1": 0.8330758452197032, "eval_Micro Precision": 0.8330758452197032, "eval_Micro Recall": 0.8330758452197032, "eval_Weighted F1": 0.7724263350491593, "eval_Weighted Precision": 0.7892310862523967, "eval_Weighted Recall": 0.8330758452197032, "eval_accuracy": 0.8330758452197032, "eval_loss": 0.6489848494529724, "eval_runtime": 1320.9895, "eval_samples_per_second": 21.07, "eval_steps_per_second": 2.634, "step": 20876 }, { "epoch": 2.0, "learning_rate": 0.0005999233569649358, "loss": 0.5882, "step": 20880 }, { "epoch": 2.05, "learning_rate": 0.0005899214408890592, "loss": 0.5831, "step": 21402 }, { "epoch": 2.1, "learning_rate": 0.0005799195248131826, "loss": 0.5755, "step": 21924 }, { "epoch": 2.15, "learning_rate": 0.000569917608737306, "loss": 0.5822, "step": 22446 }, { "epoch": 2.2, "learning_rate": 0.0005599156926614294, "loss": 0.5632, "step": 22968 }, { "epoch": 2.25, "learning_rate": 0.0005499137765855528, "loss": 0.5756, "step": 23490 }, { "epoch": 2.3, "learning_rate": 0.0005399118605096762, "loss": 0.6018, "step": 24012 }, { "epoch": 2.35, "learning_rate": 0.0005299099444337996, "loss": 0.5978, "step": 24534 }, { "epoch": 2.4, "learning_rate": 0.0005199080283579229, "loss": 0.5767, "step": 25056 }, { "epoch": 2.45, "learning_rate": 0.0005099061122820464, "loss": 0.5538, "step": 25578 }, { "epoch": 2.5, "learning_rate": 0.0004999041962061698, "loss": 0.5621, "step": 26100 }, { "epoch": 2.55, "learning_rate": 0.0004899022801302932, "loss": 0.594, "step": 26622 }, { "epoch": 2.6, "learning_rate": 0.00047990036405441656, "loss": 0.5616, "step": 27144 }, { "epoch": 2.65, "learning_rate": 0.00046989844797853996, "loss": 0.5444, "step": 27666 }, { "epoch": 2.7, "learning_rate": 0.00045989653190266336, "loss": 0.5558, "step": 28188 }, { "epoch": 2.75, "learning_rate": 0.00044989461582678675, "loss": 0.5415, "step": 28710 }, { "epoch": 2.8, "learning_rate": 0.0004398926997509101, "loss": 0.5452, "step": 29232 }, { "epoch": 2.85, "learning_rate": 0.00042989078367503355, "loss": 0.5595, "step": 29754 }, { "epoch": 2.9, "learning_rate": 0.00041988886759915695, "loss": 0.565, "step": 30276 }, { "epoch": 2.95, "learning_rate": 0.0004098869515232803, "loss": 0.5478, "step": 30798 }, { "epoch": 3.0, "eval_Macro F1": 0.6188634906559709, "eval_Macro Precision": 0.6783819638851676, "eval_Macro Recall": 0.6003398559173716, "eval_Micro F1": 0.8304530593180757, "eval_Micro Precision": 0.8304530593180757, "eval_Micro Recall": 0.8304530593180757, "eval_Weighted F1": 0.8070780134214183, "eval_Weighted Precision": 0.8001960025950923, "eval_Weighted Recall": 0.8304530593180757, "eval_accuracy": 0.8304530593180757, "eval_loss": 0.5508156418800354, "eval_runtime": 1285.8765, "eval_samples_per_second": 21.645, "eval_steps_per_second": 2.706, "step": 31314 }, { "epoch": 3.0, "learning_rate": 0.00039988503544740374, "loss": 0.5593, "step": 31320 }, { "epoch": 3.05, "learning_rate": 0.0003898831193715271, "loss": 0.5348, "step": 31842 }, { "epoch": 3.1, "learning_rate": 0.0003798812032956505, "loss": 0.5459, "step": 32364 }, { "epoch": 3.15, "learning_rate": 0.00036987928721977394, "loss": 0.5544, "step": 32886 }, { "epoch": 3.2, "learning_rate": 0.0003598773711438973, "loss": 0.5442, "step": 33408 }, { "epoch": 3.25, "learning_rate": 0.00034987545506802074, "loss": 0.542, "step": 33930 }, { "epoch": 3.3, "learning_rate": 0.0003398735389921441, "loss": 0.5325, "step": 34452 }, { "epoch": 3.35, "learning_rate": 0.0003298716229162675, "loss": 0.5525, "step": 34974 }, { "epoch": 3.4, "learning_rate": 0.00031986970684039093, "loss": 0.5547, "step": 35496 }, { "epoch": 3.45, "learning_rate": 0.0003098677907645143, "loss": 0.5484, "step": 36018 }, { "epoch": 3.5, "learning_rate": 0.00029986587468863767, "loss": 0.5368, "step": 36540 }, { "epoch": 3.55, "learning_rate": 0.00028986395861276107, "loss": 0.5487, "step": 37062 }, { "epoch": 3.6, "learning_rate": 0.00027986204253688447, "loss": 0.5403, "step": 37584 }, { "epoch": 3.65, "learning_rate": 0.00026986012646100787, "loss": 0.5459, "step": 38106 }, { "epoch": 3.7, "learning_rate": 0.00025985821038513126, "loss": 0.5376, "step": 38628 }, { "epoch": 3.75, "learning_rate": 0.00024985629430925466, "loss": 0.5186, "step": 39150 }, { "epoch": 3.8, "learning_rate": 0.00023985437823337806, "loss": 0.5371, "step": 39672 }, { "epoch": 3.85, "learning_rate": 0.00022985246215750143, "loss": 0.5413, "step": 40194 }, { "epoch": 3.9, "learning_rate": 0.00021985054608162483, "loss": 0.5328, "step": 40716 }, { "epoch": 3.95, "learning_rate": 0.00020984863000574823, "loss": 0.513, "step": 41238 }, { "epoch": 4.0, "eval_Macro F1": 0.6224111150035051, "eval_Macro Precision": 0.6915564589610266, "eval_Macro Recall": 0.60231662265339, "eval_Micro F1": 0.8347285596234686, "eval_Micro Precision": 0.8347285596234686, "eval_Micro Recall": 0.8347285596234686, "eval_Weighted F1": 0.8100695837354521, "eval_Weighted Precision": 0.8049325371288767, "eval_Weighted Recall": 0.8347285596234686, "eval_accuracy": 0.8347285596234686, "eval_loss": 0.5459285974502563, "eval_runtime": 1277.4842, "eval_samples_per_second": 21.787, "eval_steps_per_second": 2.724, "step": 41752 }, { "epoch": 4.0, "learning_rate": 0.00019984671392987163, "loss": 0.5375, "step": 41760 }, { "epoch": 4.05, "learning_rate": 0.00018984479785399502, "loss": 0.5505, "step": 42282 }, { "epoch": 4.1, "learning_rate": 0.00017984288177811842, "loss": 0.5258, "step": 42804 }, { "epoch": 4.15, "learning_rate": 0.00016984096570224182, "loss": 0.5132, "step": 43326 }, { "epoch": 4.2, "learning_rate": 0.0001598390496263652, "loss": 0.5182, "step": 43848 }, { "epoch": 4.25, "learning_rate": 0.00014983713355048862, "loss": 0.5147, "step": 44370 }, { "epoch": 4.3, "learning_rate": 0.00013983521747461201, "loss": 0.5364, "step": 44892 }, { "epoch": 4.35, "learning_rate": 0.00012983330139873538, "loss": 0.53, "step": 45414 }, { "epoch": 4.4, "learning_rate": 0.00011983138532285878, "loss": 0.5288, "step": 45936 }, { "epoch": 4.45, "learning_rate": 0.00010982946924698218, "loss": 0.5087, "step": 46458 }, { "epoch": 4.5, "learning_rate": 9.982755317110558e-05, "loss": 0.5101, "step": 46980 }, { "epoch": 4.55, "learning_rate": 8.982563709522898e-05, "loss": 0.5235, "step": 47502 }, { "epoch": 4.6, "learning_rate": 7.982372101935238e-05, "loss": 0.5143, "step": 48024 }, { "epoch": 4.65, "learning_rate": 6.982180494347576e-05, "loss": 0.5299, "step": 48546 }, { "epoch": 4.7, "learning_rate": 5.981988886759916e-05, "loss": 0.5362, "step": 49068 }, { "epoch": 4.75, "learning_rate": 4.981797279172255e-05, "loss": 0.5186, "step": 49590 }, { "epoch": 4.8, "learning_rate": 3.981605671584595e-05, "loss": 0.5256, "step": 50112 }, { "epoch": 4.85, "learning_rate": 2.9814140639969346e-05, "loss": 0.5233, "step": 50634 }, { "epoch": 4.9, "learning_rate": 1.9812224564092737e-05, "loss": 0.5263, "step": 51156 }, { "epoch": 4.95, "learning_rate": 9.810308488216133e-06, "loss": 0.5288, "step": 51678 }, { "epoch": 5.0, "eval_Macro F1": 0.6307998158823078, "eval_Macro Precision": 0.7029273840489014, "eval_Macro Recall": 0.6089557840149206, "eval_Micro F1": 0.8381417741529839, "eval_Micro Precision": 0.8381417741529839, "eval_Micro Recall": 0.8381417741529839, "eval_Weighted F1": 0.8141748808079556, "eval_Weighted Precision": 0.810121498718634, "eval_Weighted Recall": 0.8381417741529839, "eval_accuracy": 0.8381417741529839, "eval_loss": 0.5336272716522217, "eval_runtime": 1277.7048, "eval_samples_per_second": 21.784, "eval_steps_per_second": 2.724, "step": 52190 }, { "epoch": 5.0, "step": 52190, "total_flos": 2.1040687845486864e+16, "train_loss": 0.5799241374931839, "train_runtime": 32322.6365, "train_samples_per_second": 12.916, "train_steps_per_second": 1.615 } ], "max_steps": 52190, "num_train_epochs": 5, "total_flos": 2.1040687845486864e+16, "trial_name": null, "trial_params": null }