|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 52190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000999980839241234, |
|
"loss": 0.7834, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009899980839241235, |
|
"loss": 0.7384, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000979996167848247, |
|
"loss": 0.7597, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009699942517723703, |
|
"loss": 0.7401, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009599923356964936, |
|
"loss": 0.7495, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000949990419620617, |
|
"loss": 0.7017, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009399885035447404, |
|
"loss": 0.7398, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009299865874688638, |
|
"loss": 0.6949, |
|
"step": 3654 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0009199846713929871, |
|
"loss": 0.6335, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0009099827553171107, |
|
"loss": 0.648, |
|
"step": 4698 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000899980839241234, |
|
"loss": 0.6445, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0008899789231653573, |
|
"loss": 0.6429, |
|
"step": 5742 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0008799770070894808, |
|
"loss": 0.6316, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0008699750910136041, |
|
"loss": 0.6066, |
|
"step": 6786 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0008599731749377275, |
|
"loss": 0.6397, |
|
"step": 7308 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0008499712588618509, |
|
"loss": 0.6243, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0008399693427859744, |
|
"loss": 0.6271, |
|
"step": 8352 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0008299674267100977, |
|
"loss": 0.614, |
|
"step": 8874 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0008199655106342211, |
|
"loss": 0.6358, |
|
"step": 9396 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0008099635945583445, |
|
"loss": 0.633, |
|
"step": 9918 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_Macro F1": 0.5744881635875178, |
|
"eval_Macro Precision": 0.6541570568719827, |
|
"eval_Macro Recall": 0.564346253205228, |
|
"eval_Micro F1": 0.826105701864693, |
|
"eval_Micro Precision": 0.826105701864693, |
|
"eval_Micro Recall": 0.826105701864693, |
|
"eval_Weighted F1": 0.7913544313349856, |
|
"eval_Weighted Precision": 0.7843732666581203, |
|
"eval_Weighted Recall": 0.826105701864693, |
|
"eval_accuracy": 0.826105701864693, |
|
"eval_loss": 0.5608153343200684, |
|
"eval_runtime": 2136.2944, |
|
"eval_samples_per_second": 13.029, |
|
"eval_steps_per_second": 1.629, |
|
"step": 10438 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0007999616784824679, |
|
"loss": 0.6139, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0007899597624065913, |
|
"loss": 0.5945, |
|
"step": 10962 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0007799578463307148, |
|
"loss": 0.6018, |
|
"step": 11484 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0007699559302548381, |
|
"loss": 0.597, |
|
"step": 12006 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0007599540141789615, |
|
"loss": 0.5891, |
|
"step": 12528 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0007499520981030848, |
|
"loss": 0.6139, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0007399501820272084, |
|
"loss": 0.5878, |
|
"step": 13572 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0007299482659513317, |
|
"loss": 0.6183, |
|
"step": 14094 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0007199463498754551, |
|
"loss": 0.6038, |
|
"step": 14616 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0007099444337995785, |
|
"loss": 0.6506, |
|
"step": 15138 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0006999425177237019, |
|
"loss": 0.6063, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0006899406016478252, |
|
"loss": 0.5931, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0006799386855719488, |
|
"loss": 0.5828, |
|
"step": 16704 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0006699367694960721, |
|
"loss": 0.5912, |
|
"step": 17226 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0006599348534201954, |
|
"loss": 0.6105, |
|
"step": 17748 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0006499329373443188, |
|
"loss": 0.5893, |
|
"step": 18270 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0006399310212684422, |
|
"loss": 0.611, |
|
"step": 18792 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0006299291051925656, |
|
"loss": 0.6371, |
|
"step": 19314 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.000619927189116689, |
|
"loss": 0.5925, |
|
"step": 19836 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0006099252730408125, |
|
"loss": 0.6029, |
|
"step": 20358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Macro F1": 0.5060450221643912, |
|
"eval_Macro Precision": 0.6929448637916937, |
|
"eval_Macro Recall": 0.5239477514921818, |
|
"eval_Micro F1": 0.8330758452197032, |
|
"eval_Micro Precision": 0.8330758452197032, |
|
"eval_Micro Recall": 0.8330758452197032, |
|
"eval_Weighted F1": 0.7724263350491593, |
|
"eval_Weighted Precision": 0.7892310862523967, |
|
"eval_Weighted Recall": 0.8330758452197032, |
|
"eval_accuracy": 0.8330758452197032, |
|
"eval_loss": 0.6489848494529724, |
|
"eval_runtime": 1320.9895, |
|
"eval_samples_per_second": 21.07, |
|
"eval_steps_per_second": 2.634, |
|
"step": 20876 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0005999233569649358, |
|
"loss": 0.5882, |
|
"step": 20880 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0005899214408890592, |
|
"loss": 0.5831, |
|
"step": 21402 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0005799195248131826, |
|
"loss": 0.5755, |
|
"step": 21924 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.000569917608737306, |
|
"loss": 0.5822, |
|
"step": 22446 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0005599156926614294, |
|
"loss": 0.5632, |
|
"step": 22968 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0005499137765855528, |
|
"loss": 0.5756, |
|
"step": 23490 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0005399118605096762, |
|
"loss": 0.6018, |
|
"step": 24012 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0005299099444337996, |
|
"loss": 0.5978, |
|
"step": 24534 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0005199080283579229, |
|
"loss": 0.5767, |
|
"step": 25056 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0005099061122820464, |
|
"loss": 0.5538, |
|
"step": 25578 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0004999041962061698, |
|
"loss": 0.5621, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0004899022801302932, |
|
"loss": 0.594, |
|
"step": 26622 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00047990036405441656, |
|
"loss": 0.5616, |
|
"step": 27144 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00046989844797853996, |
|
"loss": 0.5444, |
|
"step": 27666 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00045989653190266336, |
|
"loss": 0.5558, |
|
"step": 28188 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00044989461582678675, |
|
"loss": 0.5415, |
|
"step": 28710 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0004398926997509101, |
|
"loss": 0.5452, |
|
"step": 29232 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00042989078367503355, |
|
"loss": 0.5595, |
|
"step": 29754 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00041988886759915695, |
|
"loss": 0.565, |
|
"step": 30276 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0004098869515232803, |
|
"loss": 0.5478, |
|
"step": 30798 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_Macro F1": 0.6188634906559709, |
|
"eval_Macro Precision": 0.6783819638851676, |
|
"eval_Macro Recall": 0.6003398559173716, |
|
"eval_Micro F1": 0.8304530593180757, |
|
"eval_Micro Precision": 0.8304530593180757, |
|
"eval_Micro Recall": 0.8304530593180757, |
|
"eval_Weighted F1": 0.8070780134214183, |
|
"eval_Weighted Precision": 0.8001960025950923, |
|
"eval_Weighted Recall": 0.8304530593180757, |
|
"eval_accuracy": 0.8304530593180757, |
|
"eval_loss": 0.5508156418800354, |
|
"eval_runtime": 1285.8765, |
|
"eval_samples_per_second": 21.645, |
|
"eval_steps_per_second": 2.706, |
|
"step": 31314 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00039988503544740374, |
|
"loss": 0.5593, |
|
"step": 31320 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0003898831193715271, |
|
"loss": 0.5348, |
|
"step": 31842 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.0003798812032956505, |
|
"loss": 0.5459, |
|
"step": 32364 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00036987928721977394, |
|
"loss": 0.5544, |
|
"step": 32886 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0003598773711438973, |
|
"loss": 0.5442, |
|
"step": 33408 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00034987545506802074, |
|
"loss": 0.542, |
|
"step": 33930 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0003398735389921441, |
|
"loss": 0.5325, |
|
"step": 34452 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0003298716229162675, |
|
"loss": 0.5525, |
|
"step": 34974 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00031986970684039093, |
|
"loss": 0.5547, |
|
"step": 35496 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.0003098677907645143, |
|
"loss": 0.5484, |
|
"step": 36018 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00029986587468863767, |
|
"loss": 0.5368, |
|
"step": 36540 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.00028986395861276107, |
|
"loss": 0.5487, |
|
"step": 37062 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.00027986204253688447, |
|
"loss": 0.5403, |
|
"step": 37584 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00026986012646100787, |
|
"loss": 0.5459, |
|
"step": 38106 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.00025985821038513126, |
|
"loss": 0.5376, |
|
"step": 38628 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00024985629430925466, |
|
"loss": 0.5186, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00023985437823337806, |
|
"loss": 0.5371, |
|
"step": 39672 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.00022985246215750143, |
|
"loss": 0.5413, |
|
"step": 40194 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.00021985054608162483, |
|
"loss": 0.5328, |
|
"step": 40716 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00020984863000574823, |
|
"loss": 0.513, |
|
"step": 41238 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_Macro F1": 0.6224111150035051, |
|
"eval_Macro Precision": 0.6915564589610266, |
|
"eval_Macro Recall": 0.60231662265339, |
|
"eval_Micro F1": 0.8347285596234686, |
|
"eval_Micro Precision": 0.8347285596234686, |
|
"eval_Micro Recall": 0.8347285596234686, |
|
"eval_Weighted F1": 0.8100695837354521, |
|
"eval_Weighted Precision": 0.8049325371288767, |
|
"eval_Weighted Recall": 0.8347285596234686, |
|
"eval_accuracy": 0.8347285596234686, |
|
"eval_loss": 0.5459285974502563, |
|
"eval_runtime": 1277.4842, |
|
"eval_samples_per_second": 21.787, |
|
"eval_steps_per_second": 2.724, |
|
"step": 41752 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00019984671392987163, |
|
"loss": 0.5375, |
|
"step": 41760 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00018984479785399502, |
|
"loss": 0.5505, |
|
"step": 42282 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00017984288177811842, |
|
"loss": 0.5258, |
|
"step": 42804 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00016984096570224182, |
|
"loss": 0.5132, |
|
"step": 43326 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0001598390496263652, |
|
"loss": 0.5182, |
|
"step": 43848 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00014983713355048862, |
|
"loss": 0.5147, |
|
"step": 44370 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00013983521747461201, |
|
"loss": 0.5364, |
|
"step": 44892 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00012983330139873538, |
|
"loss": 0.53, |
|
"step": 45414 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00011983138532285878, |
|
"loss": 0.5288, |
|
"step": 45936 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00010982946924698218, |
|
"loss": 0.5087, |
|
"step": 46458 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 9.982755317110558e-05, |
|
"loss": 0.5101, |
|
"step": 46980 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 8.982563709522898e-05, |
|
"loss": 0.5235, |
|
"step": 47502 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 7.982372101935238e-05, |
|
"loss": 0.5143, |
|
"step": 48024 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.982180494347576e-05, |
|
"loss": 0.5299, |
|
"step": 48546 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.981988886759916e-05, |
|
"loss": 0.5362, |
|
"step": 49068 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 4.981797279172255e-05, |
|
"loss": 0.5186, |
|
"step": 49590 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3.981605671584595e-05, |
|
"loss": 0.5256, |
|
"step": 50112 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.9814140639969346e-05, |
|
"loss": 0.5233, |
|
"step": 50634 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.9812224564092737e-05, |
|
"loss": 0.5263, |
|
"step": 51156 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 9.810308488216133e-06, |
|
"loss": 0.5288, |
|
"step": 51678 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_Macro F1": 0.6307998158823078, |
|
"eval_Macro Precision": 0.7029273840489014, |
|
"eval_Macro Recall": 0.6089557840149206, |
|
"eval_Micro F1": 0.8381417741529839, |
|
"eval_Micro Precision": 0.8381417741529839, |
|
"eval_Micro Recall": 0.8381417741529839, |
|
"eval_Weighted F1": 0.8141748808079556, |
|
"eval_Weighted Precision": 0.810121498718634, |
|
"eval_Weighted Recall": 0.8381417741529839, |
|
"eval_accuracy": 0.8381417741529839, |
|
"eval_loss": 0.5336272716522217, |
|
"eval_runtime": 1277.7048, |
|
"eval_samples_per_second": 21.784, |
|
"eval_steps_per_second": 2.724, |
|
"step": 52190 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 52190, |
|
"total_flos": 2.1040687845486864e+16, |
|
"train_loss": 0.5799241374931839, |
|
"train_runtime": 32322.6365, |
|
"train_samples_per_second": 12.916, |
|
"train_steps_per_second": 1.615 |
|
} |
|
], |
|
"max_steps": 52190, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.1040687845486864e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|