{ "best_metric": 0.7120954003407156, "best_model_checkpoint": "Distil4\\checkpoint-40348", "epoch": 100.0, "global_step": 52400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.950095419847329e-06, "loss": 0.4749, "step": 523 }, { "epoch": 1.0, "eval_accuracy": 0.4026503567787971, "eval_f1": 0.5392528424472117, "eval_loss": 0.4015094041824341, "eval_roc_auc": 0.6891349750981157, "eval_runtime": 9.4449, "eval_samples_per_second": 103.866, "eval_steps_per_second": 6.988, "step": 524 }, { "epoch": 2.0, "learning_rate": 4.900190839694656e-06, "loss": 0.3845, "step": 1046 }, { "epoch": 2.0, "eval_accuracy": 0.5168195718654435, "eval_f1": 0.6353754940711462, "eval_loss": 0.36452510952949524, "eval_roc_auc": 0.747088659888458, "eval_runtime": 9.9987, "eval_samples_per_second": 98.113, "eval_steps_per_second": 6.601, "step": 1048 }, { "epoch": 2.99, "learning_rate": 4.850286259541985e-06, "loss": 0.3475, "step": 1569 }, { "epoch": 3.0, "eval_accuracy": 0.5310907237512742, "eval_f1": 0.6470871449205585, "eval_loss": 0.3549170196056366, "eval_roc_auc": 0.7563264556700557, "eval_runtime": 10.07, "eval_samples_per_second": 97.418, "eval_steps_per_second": 6.554, "step": 1572 }, { "epoch": 3.99, "learning_rate": 4.800381679389313e-06, "loss": 0.326, "step": 2092 }, { "epoch": 4.0, "eval_accuracy": 0.5545361875637105, "eval_f1": 0.674766355140187, "eval_loss": 0.3448670506477356, "eval_roc_auc": 0.7760477152234284, "eval_runtime": 10.1448, "eval_samples_per_second": 96.7, "eval_steps_per_second": 6.506, "step": 2096 }, { "epoch": 4.99, "learning_rate": 4.750477099236642e-06, "loss": 0.3083, "step": 2615 }, { "epoch": 5.0, "eval_accuracy": 0.5565749235474006, "eval_f1": 0.6812674743709226, "eval_loss": 0.34419235587120056, "eval_roc_auc": 0.7803114456863509, "eval_runtime": 10.1716, "eval_samples_per_second": 96.445, "eval_steps_per_second": 6.489, "step": 2620 }, { "epoch": 5.99, "learning_rate": 4.700572519083969e-06, "loss": 0.2952, "step": 3138 }, { "epoch": 6.0, "eval_accuracy": 0.5718654434250765, "eval_f1": 0.6965452847805789, "eval_loss": 0.3325030505657196, "eval_roc_auc": 0.789289320878566, "eval_runtime": 10.2795, "eval_samples_per_second": 95.433, "eval_steps_per_second": 6.421, "step": 3144 }, { "epoch": 6.99, "learning_rate": 4.650667938931298e-06, "loss": 0.2799, "step": 3661 }, { "epoch": 7.0, "eval_accuracy": 0.5728848114169215, "eval_f1": 0.6865116279069767, "eval_loss": 0.34394344687461853, "eval_roc_auc": 0.7837168070505612, "eval_runtime": 10.3609, "eval_samples_per_second": 94.683, "eval_steps_per_second": 6.37, "step": 3668 }, { "epoch": 7.98, "learning_rate": 4.600763358778627e-06, "loss": 0.2626, "step": 4184 }, { "epoch": 8.0, "eval_accuracy": 0.5800203873598369, "eval_f1": 0.692876965772433, "eval_loss": 0.33631590008735657, "eval_roc_auc": 0.7883041472539074, "eval_runtime": 9.1916, "eval_samples_per_second": 106.728, "eval_steps_per_second": 7.18, "step": 4192 }, { "epoch": 8.98, "learning_rate": 4.550858778625955e-06, "loss": 0.2529, "step": 4707 }, { "epoch": 9.0, "eval_accuracy": 0.5749235474006116, "eval_f1": 0.6857670979667283, "eval_loss": 0.34780624508857727, "eval_roc_auc": 0.7840966468522641, "eval_runtime": 8.7207, "eval_samples_per_second": 112.491, "eval_steps_per_second": 7.568, "step": 4716 }, { "epoch": 9.98, "learning_rate": 4.500954198473283e-06, "loss": 0.237, "step": 5230 }, { "epoch": 10.0, "eval_accuracy": 0.5667686034658511, "eval_f1": 0.68721251149954, "eval_loss": 0.34721097350120544, "eval_roc_auc": 0.7855741204011842, "eval_runtime": 8.7533, "eval_samples_per_second": 112.072, "eval_steps_per_second": 7.54, "step": 5240 }, { "epoch": 10.98, "learning_rate": 4.451049618320611e-06, "loss": 0.2223, "step": 5753 }, { "epoch": 11.0, "eval_accuracy": 0.5657492354740061, "eval_f1": 0.6845698680018207, "eval_loss": 0.3609465956687927, "eval_roc_auc": 0.785313626035666, "eval_runtime": 8.7365, "eval_samples_per_second": 112.288, "eval_steps_per_second": 7.555, "step": 5764 }, { "epoch": 11.98, "learning_rate": 4.40114503816794e-06, "loss": 0.2117, "step": 6276 }, { "epoch": 12.0, "eval_accuracy": 0.581039755351682, "eval_f1": 0.6981818181818182, "eval_loss": 0.36286601424217224, "eval_roc_auc": 0.7939185467398039, "eval_runtime": 8.7731, "eval_samples_per_second": 111.819, "eval_steps_per_second": 7.523, "step": 6288 }, { "epoch": 12.98, "learning_rate": 4.351240458015267e-06, "loss": 0.2, "step": 6799 }, { "epoch": 13.0, "eval_accuracy": 0.5851172273190621, "eval_f1": 0.7062999112688554, "eval_loss": 0.3647877275943756, "eval_roc_auc": 0.8024597782929012, "eval_runtime": 8.7204, "eval_samples_per_second": 112.495, "eval_steps_per_second": 7.568, "step": 6812 }, { "epoch": 13.97, "learning_rate": 4.301335877862596e-06, "loss": 0.1909, "step": 7322 }, { "epoch": 14.0, "eval_accuracy": 0.5698267074413863, "eval_f1": 0.7012058954890575, "eval_loss": 0.3725411891937256, "eval_roc_auc": 0.7982735076082716, "eval_runtime": 8.798, "eval_samples_per_second": 111.503, "eval_steps_per_second": 7.502, "step": 7336 }, { "epoch": 14.97, "learning_rate": 4.2514312977099246e-06, "loss": 0.1782, "step": 7845 }, { "epoch": 15.0, "eval_accuracy": 0.5800203873598369, "eval_f1": 0.7042128603104212, "eval_loss": 0.37900835275650024, "eval_roc_auc": 0.8012003396754722, "eval_runtime": 8.7328, "eval_samples_per_second": 112.335, "eval_steps_per_second": 7.558, "step": 7860 }, { "epoch": 15.97, "learning_rate": 4.2015267175572526e-06, "loss": 0.1653, "step": 8368 }, { "epoch": 16.0, "eval_accuracy": 0.5739041794087666, "eval_f1": 0.7040141155712396, "eval_loss": 0.3948748707771301, "eval_roc_auc": 0.8018475591563196, "eval_runtime": 8.6978, "eval_samples_per_second": 112.788, "eval_steps_per_second": 7.588, "step": 8384 }, { "epoch": 16.97, "learning_rate": 4.1516221374045806e-06, "loss": 0.156, "step": 8891 }, { "epoch": 17.0, "eval_accuracy": 0.5769622833843018, "eval_f1": 0.7074468085106383, "eval_loss": 0.39961278438568115, "eval_roc_auc": 0.8033181473916137, "eval_runtime": 8.6958, "eval_samples_per_second": 112.813, "eval_steps_per_second": 7.59, "step": 8908 }, { "epoch": 17.97, "learning_rate": 4.1017175572519085e-06, "loss": 0.1478, "step": 9414 }, { "epoch": 18.0, "eval_accuracy": 0.5575942915392457, "eval_f1": 0.6918402777777779, "eval_loss": 0.41351279616355896, "eval_roc_auc": 0.79633816070322, "eval_runtime": 8.7585, "eval_samples_per_second": 112.006, "eval_steps_per_second": 7.536, "step": 9432 }, { "epoch": 18.96, "learning_rate": 4.051812977099237e-06, "loss": 0.1362, "step": 9937 }, { "epoch": 19.0, "eval_accuracy": 0.5616717635066258, "eval_f1": 0.7035263387026557, "eval_loss": 0.4201168119907379, "eval_roc_auc": 0.8034656078584379, "eval_runtime": 8.7893, "eval_samples_per_second": 111.613, "eval_steps_per_second": 7.509, "step": 9956 }, { "epoch": 19.96, "learning_rate": 4.001908396946565e-06, "loss": 0.1316, "step": 10460 }, { "epoch": 20.0, "eval_accuracy": 0.54638124362895, "eval_f1": 0.6954270923209663, "eval_loss": 0.4302760362625122, "eval_roc_auc": 0.7995323724495651, "eval_runtime": 8.7776, "eval_samples_per_second": 111.762, "eval_steps_per_second": 7.519, "step": 10480 }, { "epoch": 20.96, "learning_rate": 3.952003816793893e-06, "loss": 0.1189, "step": 10983 }, { "epoch": 21.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.6955767562879446, "eval_loss": 0.4442458152770996, "eval_roc_auc": 0.7988851529687178, "eval_runtime": 9.0585, "eval_samples_per_second": 108.296, "eval_steps_per_second": 7.286, "step": 11004 }, { "epoch": 21.96, "learning_rate": 3.902099236641222e-06, "loss": 0.1153, "step": 11506 }, { "epoch": 22.0, "eval_accuracy": 0.5800203873598369, "eval_f1": 0.7025121198765976, "eval_loss": 0.4538831412792206, "eval_roc_auc": 0.8010173050882469, "eval_runtime": 8.6472, "eval_samples_per_second": 113.447, "eval_steps_per_second": 7.633, "step": 11528 }, { "epoch": 22.96, "learning_rate": 3.85219465648855e-06, "loss": 0.1037, "step": 12029 }, { "epoch": 23.0, "eval_accuracy": 0.5565749235474006, "eval_f1": 0.7012430347192455, "eval_loss": 0.4749497175216675, "eval_roc_auc": 0.804281517523123, "eval_runtime": 8.6607, "eval_samples_per_second": 113.27, "eval_steps_per_second": 7.621, "step": 12052 }, { "epoch": 23.95, "learning_rate": 3.802290076335878e-06, "loss": 0.0968, "step": 12552 }, { "epoch": 24.0, "eval_accuracy": 0.5606523955147809, "eval_f1": 0.7047124945957631, "eval_loss": 0.479137659072876, "eval_roc_auc": 0.8052666911477818, "eval_runtime": 8.7017, "eval_samples_per_second": 112.737, "eval_steps_per_second": 7.585, "step": 12576 }, { "epoch": 24.95, "learning_rate": 3.752385496183206e-06, "loss": 0.0923, "step": 13075 }, { "epoch": 25.0, "eval_accuracy": 0.5555555555555556, "eval_f1": 0.6873362445414847, "eval_loss": 0.50052809715271, "eval_roc_auc": 0.7925810745679466, "eval_runtime": 8.625, "eval_samples_per_second": 113.739, "eval_steps_per_second": 7.652, "step": 13100 }, { "epoch": 25.95, "learning_rate": 3.7024809160305346e-06, "loss": 0.0846, "step": 13598 }, { "epoch": 26.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.6971770744225834, "eval_loss": 0.5201558470726013, "eval_roc_auc": 0.801924445158477, "eval_runtime": 8.6427, "eval_samples_per_second": 113.506, "eval_steps_per_second": 7.636, "step": 13624 }, { "epoch": 26.95, "learning_rate": 3.6525763358778626e-06, "loss": 0.0807, "step": 14121 }, { "epoch": 27.0, "eval_accuracy": 0.5688073394495413, "eval_f1": 0.7090358841331603, "eval_loss": 0.52068030834198, "eval_roc_auc": 0.8080810630924239, "eval_runtime": 8.6305, "eval_samples_per_second": 113.667, "eval_steps_per_second": 7.647, "step": 14148 }, { "epoch": 27.95, "learning_rate": 3.602671755725191e-06, "loss": 0.0748, "step": 14644 }, { "epoch": 28.0, "eval_accuracy": 0.5728848114169215, "eval_f1": 0.7102321582115221, "eval_loss": 0.5269947052001953, "eval_roc_auc": 0.8097203415115557, "eval_runtime": 8.6597, "eval_samples_per_second": 113.284, "eval_steps_per_second": 7.622, "step": 14672 }, { "epoch": 28.94, "learning_rate": 3.5527671755725195e-06, "loss": 0.0727, "step": 15167 }, { "epoch": 29.0, "eval_accuracy": 0.528032619775739, "eval_f1": 0.6827004219409282, "eval_loss": 0.5663571953773499, "eval_roc_auc": 0.7942691239585964, "eval_runtime": 8.6961, "eval_samples_per_second": 112.809, "eval_steps_per_second": 7.59, "step": 15196 }, { "epoch": 29.94, "learning_rate": 3.5028625954198474e-06, "loss": 0.0685, "step": 15690 }, { "epoch": 30.0, "eval_accuracy": 0.5586136595310908, "eval_f1": 0.696027633851468, "eval_loss": 0.5751686096191406, "eval_roc_auc": 0.7997997521287095, "eval_runtime": 8.6393, "eval_samples_per_second": 113.551, "eval_steps_per_second": 7.639, "step": 15720 }, { "epoch": 30.94, "learning_rate": 3.452958015267176e-06, "loss": 0.0629, "step": 16213 }, { "epoch": 31.0, "eval_accuracy": 0.5596330275229358, "eval_f1": 0.7002606429192008, "eval_loss": 0.5864209532737732, "eval_roc_auc": 0.8016714098827202, "eval_runtime": 8.6544, "eval_samples_per_second": 113.352, "eval_steps_per_second": 7.626, "step": 16244 }, { "epoch": 31.94, "learning_rate": 3.403053435114504e-06, "loss": 0.0586, "step": 16736 }, { "epoch": 32.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.7002992731936725, "eval_loss": 0.5988386273384094, "eval_roc_auc": 0.8040422528746184, "eval_runtime": 8.6401, "eval_samples_per_second": 113.54, "eval_steps_per_second": 7.639, "step": 16768 }, { "epoch": 32.94, "learning_rate": 3.3531488549618323e-06, "loss": 0.0573, "step": 17259 }, { "epoch": 33.0, "eval_accuracy": 0.5596330275229358, "eval_f1": 0.7038148306900984, "eval_loss": 0.5976923108100891, "eval_roc_auc": 0.8059701406899085, "eval_runtime": 10.028, "eval_samples_per_second": 97.827, "eval_steps_per_second": 6.582, "step": 17292 }, { "epoch": 33.94, "learning_rate": 3.3032442748091603e-06, "loss": 0.0549, "step": 17782 }, { "epoch": 34.0, "eval_accuracy": 0.5759429153924567, "eval_f1": 0.7030091583078936, "eval_loss": 0.6293498873710632, "eval_roc_auc": 0.8028746184388699, "eval_runtime": 9.2788, "eval_samples_per_second": 105.725, "eval_steps_per_second": 7.113, "step": 17816 }, { "epoch": 34.93, "learning_rate": 3.2533396946564887e-06, "loss": 0.0503, "step": 18305 }, { "epoch": 35.0, "eval_accuracy": 0.5667686034658511, "eval_f1": 0.7081545064377682, "eval_loss": 0.630769670009613, "eval_roc_auc": 0.8086227077643385, "eval_runtime": 9.6294, "eval_samples_per_second": 101.876, "eval_steps_per_second": 6.854, "step": 18340 }, { "epoch": 35.93, "learning_rate": 3.203435114503817e-06, "loss": 0.0477, "step": 18828 }, { "epoch": 36.0, "eval_accuracy": 0.527013251783894, "eval_f1": 0.6998341625207297, "eval_loss": 0.6515944004058838, "eval_roc_auc": 0.8083547543090588, "eval_runtime": 10.0399, "eval_samples_per_second": 97.71, "eval_steps_per_second": 6.574, "step": 18864 }, { "epoch": 36.93, "learning_rate": 3.153530534351145e-06, "loss": 0.0464, "step": 19351 }, { "epoch": 37.0, "eval_accuracy": 0.5484199796126402, "eval_f1": 0.7017984107068171, "eval_loss": 0.6635262370109558, "eval_roc_auc": 0.8083478689954327, "eval_runtime": 9.7697, "eval_samples_per_second": 100.412, "eval_steps_per_second": 6.756, "step": 19388 }, { "epoch": 37.93, "learning_rate": 3.1036259541984735e-06, "loss": 0.0436, "step": 19874 }, { "epoch": 38.0, "eval_accuracy": 0.563710499490316, "eval_f1": 0.7022834984920293, "eval_loss": 0.6706992387771606, "eval_roc_auc": 0.8041971724312043, "eval_runtime": 9.7315, "eval_samples_per_second": 100.807, "eval_steps_per_second": 6.782, "step": 19912 }, { "epoch": 38.93, "learning_rate": 3.0537213740458015e-06, "loss": 0.0394, "step": 20397 }, { "epoch": 39.0, "eval_accuracy": 0.527013251783894, "eval_f1": 0.6885798567214496, "eval_loss": 0.7015945911407471, "eval_roc_auc": 0.7983710495513071, "eval_runtime": 9.8867, "eval_samples_per_second": 99.224, "eval_steps_per_second": 6.676, "step": 20436 }, { "epoch": 39.92, "learning_rate": 3.00381679389313e-06, "loss": 0.038, "step": 20920 }, { "epoch": 40.0, "eval_accuracy": 0.5372069317023446, "eval_f1": 0.6997894736842106, "eval_loss": 0.7073464393615723, "eval_roc_auc": 0.8059839113171605, "eval_runtime": 9.9063, "eval_samples_per_second": 99.028, "eval_steps_per_second": 6.662, "step": 20960 }, { "epoch": 40.92, "learning_rate": 2.953912213740458e-06, "loss": 0.036, "step": 21443 }, { "epoch": 41.0, "eval_accuracy": 0.5474006116207951, "eval_f1": 0.6956521739130435, "eval_loss": 0.7225540280342102, "eval_roc_auc": 0.7999896720295611, "eval_runtime": 9.951, "eval_samples_per_second": 98.583, "eval_steps_per_second": 6.632, "step": 21484 }, { "epoch": 41.92, "learning_rate": 2.9040076335877863e-06, "loss": 0.0341, "step": 21966 }, { "epoch": 42.0, "eval_accuracy": 0.564729867482161, "eval_f1": 0.7089262613195343, "eval_loss": 0.7202900052070618, "eval_roc_auc": 0.8084046728328477, "eval_runtime": 9.6425, "eval_samples_per_second": 101.737, "eval_steps_per_second": 6.845, "step": 22008 }, { "epoch": 42.92, "learning_rate": 2.8541030534351148e-06, "loss": 0.0323, "step": 22489 }, { "epoch": 43.0, "eval_accuracy": 0.5606523955147809, "eval_f1": 0.7040552200172563, "eval_loss": 0.7239031791687012, "eval_roc_auc": 0.8051611163388492, "eval_runtime": 9.9111, "eval_samples_per_second": 98.98, "eval_steps_per_second": 6.659, "step": 22532 }, { "epoch": 43.92, "learning_rate": 2.8041984732824428e-06, "loss": 0.0302, "step": 23012 }, { "epoch": 44.0, "eval_accuracy": 0.5535168195718655, "eval_f1": 0.6988466467321658, "eval_loss": 0.7510971426963806, "eval_roc_auc": 0.8032119988065456, "eval_runtime": 9.9666, "eval_samples_per_second": 98.429, "eval_steps_per_second": 6.622, "step": 23056 }, { "epoch": 44.91, "learning_rate": 2.754293893129771e-06, "loss": 0.0286, "step": 23535 }, { "epoch": 45.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.7002132196162046, "eval_loss": 0.7605226635932922, "eval_roc_auc": 0.8043658626150422, "eval_runtime": 9.9921, "eval_samples_per_second": 98.178, "eval_steps_per_second": 6.605, "step": 23580 }, { "epoch": 45.91, "learning_rate": 2.704389312977099e-06, "loss": 0.0275, "step": 24058 }, { "epoch": 46.0, "eval_accuracy": 0.5484199796126402, "eval_f1": 0.6999573196756296, "eval_loss": 0.774695634841919, "eval_roc_auc": 0.8040703679052581, "eval_runtime": 9.9221, "eval_samples_per_second": 98.87, "eval_steps_per_second": 6.652, "step": 24104 }, { "epoch": 46.91, "learning_rate": 2.6544847328244276e-06, "loss": 0.026, "step": 24581 }, { "epoch": 47.0, "eval_accuracy": 0.5372069317023446, "eval_f1": 0.6884827879303017, "eval_loss": 0.7950236797332764, "eval_roc_auc": 0.7971047256202519, "eval_runtime": 10.0992, "eval_samples_per_second": 97.137, "eval_steps_per_second": 6.535, "step": 24628 }, { "epoch": 47.91, "learning_rate": 2.6045801526717556e-06, "loss": 0.0247, "step": 25104 }, { "epoch": 48.0, "eval_accuracy": 0.5474006116207951, "eval_f1": 0.6989293361884368, "eval_loss": 0.8052034974098206, "eval_roc_auc": 0.802888389066122, "eval_runtime": 10.0437, "eval_samples_per_second": 97.673, "eval_steps_per_second": 6.571, "step": 25152 }, { "epoch": 48.91, "learning_rate": 2.554675572519084e-06, "loss": 0.0223, "step": 25627 }, { "epoch": 49.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.7021276595744681, "eval_loss": 0.8275096416473389, "eval_roc_auc": 0.8059489109728948, "eval_runtime": 9.9609, "eval_samples_per_second": 98.485, "eval_steps_per_second": 6.626, "step": 25676 }, { "epoch": 49.9, "learning_rate": 2.5047709923664124e-06, "loss": 0.0239, "step": 26150 }, { "epoch": 50.0, "eval_accuracy": 0.5382262996941896, "eval_f1": 0.6999168744804655, "eval_loss": 0.8239336609840393, "eval_roc_auc": 0.8080311445686351, "eval_runtime": 9.9615, "eval_samples_per_second": 98.479, "eval_steps_per_second": 6.626, "step": 26200 }, { "epoch": 50.9, "learning_rate": 2.4548664122137404e-06, "loss": 0.023, "step": 26673 }, { "epoch": 51.0, "eval_accuracy": 0.5484199796126402, "eval_f1": 0.7054823629409264, "eval_loss": 0.8209096193313599, "eval_roc_auc": 0.8083622133988203, "eval_runtime": 10.0023, "eval_samples_per_second": 98.078, "eval_steps_per_second": 6.598, "step": 26724 }, { "epoch": 51.9, "learning_rate": 2.404961832061069e-06, "loss": 0.0199, "step": 27196 }, { "epoch": 52.0, "eval_accuracy": 0.5484199796126402, "eval_f1": 0.7072438919845693, "eval_loss": 0.828449010848999, "eval_roc_auc": 0.8082216382456221, "eval_runtime": 10.055, "eval_samples_per_second": 97.563, "eval_steps_per_second": 6.564, "step": 27248 }, { "epoch": 52.9, "learning_rate": 2.3550572519083973e-06, "loss": 0.0197, "step": 27719 }, { "epoch": 53.0, "eval_accuracy": 0.563710499490316, "eval_f1": 0.7059333044608056, "eval_loss": 0.8516786098480225, "eval_roc_auc": 0.8058014505060705, "eval_runtime": 10.0044, "eval_samples_per_second": 98.057, "eval_steps_per_second": 6.597, "step": 27772 }, { "epoch": 53.9, "learning_rate": 2.3051526717557252e-06, "loss": 0.0168, "step": 28242 }, { "epoch": 54.0, "eval_accuracy": 0.5351681957186545, "eval_f1": 0.699581589958159, "eval_loss": 0.8833754658699036, "eval_roc_auc": 0.8067929356682197, "eval_runtime": 10.0433, "eval_samples_per_second": 97.678, "eval_steps_per_second": 6.572, "step": 28296 }, { "epoch": 54.9, "learning_rate": 2.2552480916030537e-06, "loss": 0.018, "step": 28765 }, { "epoch": 55.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.7060839760068551, "eval_loss": 0.8679118156433105, "eval_roc_auc": 0.8075250740171215, "eval_runtime": 9.9308, "eval_samples_per_second": 98.784, "eval_steps_per_second": 6.646, "step": 28820 }, { "epoch": 55.89, "learning_rate": 2.2053435114503817e-06, "loss": 0.0169, "step": 29288 }, { "epoch": 56.0, "eval_accuracy": 0.5575942915392457, "eval_f1": 0.7040417209908736, "eval_loss": 0.8795809149742126, "eval_roc_auc": 0.8040565972780059, "eval_runtime": 9.9865, "eval_samples_per_second": 98.232, "eval_steps_per_second": 6.609, "step": 29344 }, { "epoch": 56.89, "learning_rate": 2.15543893129771e-06, "loss": 0.0168, "step": 29811 }, { "epoch": 57.0, "eval_accuracy": 0.5433231396534148, "eval_f1": 0.6988879384088965, "eval_loss": 0.9083885550498962, "eval_roc_auc": 0.8030501939363338, "eval_runtime": 10.0982, "eval_samples_per_second": 97.146, "eval_steps_per_second": 6.536, "step": 29868 }, { "epoch": 57.89, "learning_rate": 2.105534351145038e-06, "loss": 0.0156, "step": 30334 }, { "epoch": 58.0, "eval_accuracy": 0.5606523955147809, "eval_f1": 0.710651142733937, "eval_loss": 0.9000456929206848, "eval_roc_auc": 0.8095304216107044, "eval_runtime": 10.0091, "eval_samples_per_second": 98.011, "eval_steps_per_second": 6.594, "step": 30392 }, { "epoch": 58.89, "learning_rate": 2.055629770992367e-06, "loss": 0.0138, "step": 30857 }, { "epoch": 59.0, "eval_accuracy": 0.5382262996941896, "eval_f1": 0.6970849176172369, "eval_loss": 0.9262450337409973, "eval_roc_auc": 0.8036761837001675, "eval_runtime": 9.8719, "eval_samples_per_second": 99.373, "eval_steps_per_second": 6.686, "step": 30916 }, { "epoch": 59.89, "learning_rate": 2.005725190839695e-06, "loss": 0.0139, "step": 31380 }, { "epoch": 60.0, "eval_accuracy": 0.5596330275229358, "eval_f1": 0.7065868263473054, "eval_loss": 0.923125684261322, "eval_roc_auc": 0.8081160634366895, "eval_runtime": 10.0204, "eval_samples_per_second": 97.901, "eval_steps_per_second": 6.587, "step": 31440 }, { "epoch": 60.88, "learning_rate": 1.955820610687023e-06, "loss": 0.0155, "step": 31903 }, { "epoch": 61.0, "eval_accuracy": 0.5596330275229358, "eval_f1": 0.7081545064377682, "eval_loss": 0.9300869107246399, "eval_roc_auc": 0.8086227077643385, "eval_runtime": 9.9625, "eval_samples_per_second": 98.47, "eval_steps_per_second": 6.625, "step": 31964 }, { "epoch": 61.88, "learning_rate": 1.9059160305343513e-06, "loss": 0.0149, "step": 32426 }, { "epoch": 62.0, "eval_accuracy": 0.5504587155963303, "eval_f1": 0.7104930467762326, "eval_loss": 0.9461256861686707, "eval_roc_auc": 0.8130057836634459, "eval_runtime": 10.0052, "eval_samples_per_second": 98.049, "eval_steps_per_second": 6.597, "step": 32488 }, { "epoch": 62.88, "learning_rate": 1.8560114503816795e-06, "loss": 0.0124, "step": 32949 }, { "epoch": 63.0, "eval_accuracy": 0.5565749235474006, "eval_f1": 0.7081380485726461, "eval_loss": 0.9584424495697021, "eval_roc_auc": 0.809727226825182, "eval_runtime": 9.989, "eval_samples_per_second": 98.208, "eval_steps_per_second": 6.607, "step": 33012 }, { "epoch": 63.88, "learning_rate": 1.8061068702290077e-06, "loss": 0.011, "step": 33472 }, { "epoch": 64.0, "eval_accuracy": 0.5545361875637105, "eval_f1": 0.7051226861816616, "eval_loss": 0.9570773243904114, "eval_roc_auc": 0.8061812903077735, "eval_runtime": 10.0472, "eval_samples_per_second": 97.639, "eval_steps_per_second": 6.569, "step": 33536 }, { "epoch": 64.88, "learning_rate": 1.756202290076336e-06, "loss": 0.0114, "step": 33995 }, { "epoch": 65.0, "eval_accuracy": 0.5565749235474006, "eval_f1": 0.7043701799485862, "eval_loss": 0.9560405015945435, "eval_roc_auc": 0.8063993252392647, "eval_runtime": 10.0628, "eval_samples_per_second": 97.488, "eval_steps_per_second": 6.559, "step": 34060 }, { "epoch": 65.87, "learning_rate": 1.7062977099236644e-06, "loss": 0.011, "step": 34518 }, { "epoch": 66.0, "eval_accuracy": 0.5504587155963303, "eval_f1": 0.7023809523809524, "eval_loss": 0.9797949194908142, "eval_roc_auc": 0.8062444056826787, "eval_runtime": 10.0681, "eval_samples_per_second": 97.436, "eval_steps_per_second": 6.555, "step": 34584 }, { "epoch": 66.87, "learning_rate": 1.6563931297709926e-06, "loss": 0.0107, "step": 35041 }, { "epoch": 67.0, "eval_accuracy": 0.545361875637105, "eval_f1": 0.6986301369863014, "eval_loss": 0.9825329780578613, "eval_roc_auc": 0.8027546992265496, "eval_runtime": 10.0516, "eval_samples_per_second": 97.596, "eval_steps_per_second": 6.566, "step": 35108 }, { "epoch": 67.87, "learning_rate": 1.6064885496183208e-06, "loss": 0.0091, "step": 35564 }, { "epoch": 68.0, "eval_accuracy": 0.5606523955147809, "eval_f1": 0.7040552200172563, "eval_loss": 0.9886102676391602, "eval_roc_auc": 0.8051611163388492, "eval_runtime": 10.0044, "eval_samples_per_second": 98.056, "eval_steps_per_second": 6.597, "step": 35632 }, { "epoch": 68.87, "learning_rate": 1.556583969465649e-06, "loss": 0.0095, "step": 36087 }, { "epoch": 69.0, "eval_accuracy": 0.5392456676860347, "eval_f1": 0.7075, "eval_loss": 1.007102131843567, "eval_roc_auc": 0.8127734043285672, "eval_runtime": 10.0072, "eval_samples_per_second": 98.029, "eval_steps_per_second": 6.595, "step": 36156 }, { "epoch": 69.87, "learning_rate": 1.5066793893129772e-06, "loss": 0.0088, "step": 36610 }, { "epoch": 70.0, "eval_accuracy": 0.5443425076452599, "eval_f1": 0.7051336444633007, "eval_loss": 1.004128098487854, "eval_roc_auc": 0.8083903284294599, "eval_runtime": 10.0021, "eval_samples_per_second": 98.079, "eval_steps_per_second": 6.599, "step": 36680 }, { "epoch": 70.86, "learning_rate": 1.4567748091603054e-06, "loss": 0.0102, "step": 37133 }, { "epoch": 71.0, "eval_accuracy": 0.5474006116207951, "eval_f1": 0.7023354564755838, "eval_loss": 1.023705244064331, "eval_roc_auc": 0.8064062105528907, "eval_runtime": 10.0301, "eval_samples_per_second": 97.806, "eval_steps_per_second": 6.58, "step": 37204 }, { "epoch": 71.86, "learning_rate": 1.4068702290076336e-06, "loss": 0.0086, "step": 37656 }, { "epoch": 72.0, "eval_accuracy": 0.5606523955147809, "eval_f1": 0.7097887020267357, "eval_loss": 1.0078336000442505, "eval_roc_auc": 0.808967547221776, "eval_runtime": 10.0138, "eval_samples_per_second": 97.964, "eval_steps_per_second": 6.591, "step": 37728 }, { "epoch": 72.86, "learning_rate": 1.356965648854962e-06, "loss": 0.0084, "step": 38179 }, { "epoch": 73.0, "eval_accuracy": 0.5504587155963303, "eval_f1": 0.7039249146757679, "eval_loss": 1.0251305103302002, "eval_roc_auc": 0.8067510500103279, "eval_runtime": 9.9979, "eval_samples_per_second": 98.12, "eval_steps_per_second": 6.601, "step": 38252 }, { "epoch": 73.86, "learning_rate": 1.3070610687022902e-06, "loss": 0.0084, "step": 38702 }, { "epoch": 74.0, "eval_accuracy": 0.5565749235474006, "eval_f1": 0.7060839760068551, "eval_loss": 1.0233700275421143, "eval_roc_auc": 0.8075250740171215, "eval_runtime": 10.0267, "eval_samples_per_second": 97.839, "eval_steps_per_second": 6.582, "step": 38776 }, { "epoch": 74.86, "learning_rate": 1.2571564885496184e-06, "loss": 0.0076, "step": 39225 }, { "epoch": 75.0, "eval_accuracy": 0.5433231396534148, "eval_f1": 0.7029787234042555, "eval_loss": 1.0505975484848022, "eval_roc_auc": 0.8065117853618233, "eval_runtime": 9.9951, "eval_samples_per_second": 98.148, "eval_steps_per_second": 6.603, "step": 39300 }, { "epoch": 75.85, "learning_rate": 1.2072519083969466e-06, "loss": 0.0089, "step": 39748 }, { "epoch": 76.0, "eval_accuracy": 0.5575942915392457, "eval_f1": 0.7084398976982098, "eval_loss": 1.0305790901184082, "eval_roc_auc": 0.8098609166647541, "eval_runtime": 10.0015, "eval_samples_per_second": 98.086, "eval_steps_per_second": 6.599, "step": 39824 }, { "epoch": 76.85, "learning_rate": 1.1573473282442748e-06, "loss": 0.0074, "step": 40271 }, { "epoch": 77.0, "eval_accuracy": 0.5514780835881753, "eval_f1": 0.7120954003407156, "eval_loss": 1.037413477897644, "eval_roc_auc": 0.8124079089302518, "eval_runtime": 10.0466, "eval_samples_per_second": 97.645, "eval_steps_per_second": 6.569, "step": 40348 }, { "epoch": 77.85, "learning_rate": 1.1074427480916033e-06, "loss": 0.0064, "step": 40794 }, { "epoch": 78.0, "eval_accuracy": 0.563710499490316, "eval_f1": 0.7064418504107222, "eval_loss": 1.0435516834259033, "eval_roc_auc": 0.8063924399256387, "eval_runtime": 9.9844, "eval_samples_per_second": 98.254, "eval_steps_per_second": 6.61, "step": 40872 }, { "epoch": 78.85, "learning_rate": 1.0575381679389315e-06, "loss": 0.0079, "step": 41317 }, { "epoch": 79.0, "eval_accuracy": 0.5535168195718655, "eval_f1": 0.7080479452054794, "eval_loss": 1.0525715351104736, "eval_roc_auc": 0.8089463175047622, "eval_runtime": 10.0182, "eval_samples_per_second": 97.922, "eval_steps_per_second": 6.588, "step": 41396 }, { "epoch": 79.85, "learning_rate": 1.0076335877862597e-06, "loss": 0.0059, "step": 41840 }, { "epoch": 80.0, "eval_accuracy": 0.545361875637105, "eval_f1": 0.7050359712230215, "eval_loss": 1.0556447505950928, "eval_roc_auc": 0.8087139381698836, "eval_runtime": 9.9919, "eval_samples_per_second": 98.18, "eval_steps_per_second": 6.605, "step": 41920 }, { "epoch": 80.85, "learning_rate": 9.577290076335879e-07, "loss": 0.0063, "step": 42363 }, { "epoch": 81.0, "eval_accuracy": 0.5412844036697247, "eval_f1": 0.7034834324553951, "eval_loss": 1.0627212524414062, "eval_roc_auc": 0.8071027747813913, "eval_runtime": 10.1039, "eval_samples_per_second": 97.091, "eval_steps_per_second": 6.532, "step": 42444 }, { "epoch": 81.84, "learning_rate": 9.078244274809162e-07, "loss": 0.0064, "step": 42886 }, { "epoch": 82.0, "eval_accuracy": 0.5382262996941896, "eval_f1": 0.7048903878583473, "eval_loss": 1.0669602155685425, "eval_roc_auc": 0.8091993527805191, "eval_runtime": 10.2231, "eval_samples_per_second": 95.96, "eval_steps_per_second": 6.456, "step": 42968 }, { "epoch": 82.84, "learning_rate": 8.579198473282444e-07, "loss": 0.0064, "step": 43409 }, { "epoch": 83.0, "eval_accuracy": 0.5575942915392457, "eval_f1": 0.70824434002563, "eval_loss": 1.0584640502929688, "eval_roc_auc": 0.8094036170847583, "eval_runtime": 10.0727, "eval_samples_per_second": 97.392, "eval_steps_per_second": 6.552, "step": 43492 }, { "epoch": 83.84, "learning_rate": 8.080152671755725e-07, "loss": 0.006, "step": 43932 }, { "epoch": 84.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.7100340136054422, "eval_loss": 1.0683461427688599, "eval_roc_auc": 0.8113102751830346, "eval_runtime": 10.0298, "eval_samples_per_second": 97.809, "eval_steps_per_second": 6.58, "step": 44016 }, { "epoch": 84.84, "learning_rate": 7.581106870229009e-07, "loss": 0.0056, "step": 44455 }, { "epoch": 85.0, "eval_accuracy": 0.5484199796126402, "eval_f1": 0.7041294167730949, "eval_loss": 1.0729014873504639, "eval_roc_auc": 0.8072083495903238, "eval_runtime": 10.0817, "eval_samples_per_second": 97.305, "eval_steps_per_second": 6.547, "step": 44540 }, { "epoch": 85.84, "learning_rate": 7.082061068702291e-07, "loss": 0.0063, "step": 44978 }, { "epoch": 86.0, "eval_accuracy": 0.564729867482161, "eval_f1": 0.7094274644855791, "eval_loss": 1.0694997310638428, "eval_roc_auc": 0.8089956622524155, "eval_runtime": 10.0647, "eval_samples_per_second": 97.469, "eval_steps_per_second": 6.558, "step": 45064 }, { "epoch": 86.83, "learning_rate": 6.583015267175573e-07, "loss": 0.0051, "step": 45501 }, { "epoch": 87.0, "eval_accuracy": 0.5555555555555556, "eval_f1": 0.7070967741935484, "eval_loss": 1.0698884725570679, "eval_roc_auc": 0.8076025337954145, "eval_runtime": 10.0544, "eval_samples_per_second": 97.569, "eval_steps_per_second": 6.564, "step": 45588 }, { "epoch": 87.83, "learning_rate": 6.083969465648855e-07, "loss": 0.0059, "step": 46024 }, { "epoch": 88.0, "eval_accuracy": 0.5494393476044852, "eval_f1": 0.7032119914346895, "eval_loss": 1.073889970779419, "eval_roc_auc": 0.8057027610107641, "eval_runtime": 10.0443, "eval_samples_per_second": 97.668, "eval_steps_per_second": 6.571, "step": 46112 }, { "epoch": 88.83, "learning_rate": 5.584923664122137e-07, "loss": 0.0067, "step": 46547 }, { "epoch": 89.0, "eval_accuracy": 0.5535168195718655, "eval_f1": 0.7074422583404619, "eval_loss": 1.0765037536621094, "eval_roc_auc": 0.808678937825618, "eval_runtime": 10.0208, "eval_samples_per_second": 97.897, "eval_steps_per_second": 6.586, "step": 46636 }, { "epoch": 89.83, "learning_rate": 5.085877862595421e-07, "loss": 0.0055, "step": 47070 }, { "epoch": 90.0, "eval_accuracy": 0.5423037716615698, "eval_f1": 0.705531914893617, "eval_loss": 1.0747418403625488, "eval_roc_auc": 0.8082004085286085, "eval_runtime": 10.0179, "eval_samples_per_second": 97.924, "eval_steps_per_second": 6.588, "step": 47160 }, { "epoch": 90.83, "learning_rate": 4.586832061068703e-07, "loss": 0.0041, "step": 47593 }, { "epoch": 91.0, "eval_accuracy": 0.5524974515800204, "eval_f1": 0.7061855670103093, "eval_loss": 1.077362060546875, "eval_roc_auc": 0.8072014642766979, "eval_runtime": 9.974, "eval_samples_per_second": 98.356, "eval_steps_per_second": 6.617, "step": 47684 }, { "epoch": 91.82, "learning_rate": 4.0877862595419847e-07, "loss": 0.0051, "step": 48116 }, { "epoch": 92.0, "eval_accuracy": 0.5565749235474006, "eval_f1": 0.706792777300086, "eval_loss": 1.0821139812469482, "eval_roc_auc": 0.8074688439558423, "eval_runtime": 9.9576, "eval_samples_per_second": 98.517, "eval_steps_per_second": 6.628, "step": 48208 }, { "epoch": 92.82, "learning_rate": 3.588740458015268e-07, "loss": 0.0053, "step": 48639 }, { "epoch": 93.0, "eval_accuracy": 0.5535168195718655, "eval_f1": 0.7076526225279449, "eval_loss": 1.0813453197479248, "eval_roc_auc": 0.8080317183447706, "eval_runtime": 10.0101, "eval_samples_per_second": 98.001, "eval_steps_per_second": 6.593, "step": 48732 }, { "epoch": 93.82, "learning_rate": 3.08969465648855e-07, "loss": 0.0052, "step": 49162 }, { "epoch": 94.0, "eval_accuracy": 0.5494393476044852, "eval_f1": 0.7078364565587735, "eval_loss": 1.0872832536697388, "eval_roc_auc": 0.8095935369856098, "eval_runtime": 10.0385, "eval_samples_per_second": 97.724, "eval_steps_per_second": 6.575, "step": 49256 }, { "epoch": 94.82, "learning_rate": 2.5906488549618325e-07, "loss": 0.0049, "step": 49685 }, { "epoch": 95.0, "eval_accuracy": 0.5504587155963303, "eval_f1": 0.7095681625740897, "eval_loss": 1.0950355529785156, "eval_roc_auc": 0.8116619999540979, "eval_runtime": 10.0471, "eval_samples_per_second": 97.64, "eval_steps_per_second": 6.569, "step": 49780 }, { "epoch": 95.82, "learning_rate": 2.0916030534351148e-07, "loss": 0.0055, "step": 50208 }, { "epoch": 96.0, "eval_accuracy": 0.54638124362895, "eval_f1": 0.7064846416382252, "eval_loss": 1.091185450553894, "eval_roc_auc": 0.8084396731771133, "eval_runtime": 10.0192, "eval_samples_per_second": 97.912, "eval_steps_per_second": 6.587, "step": 50304 }, { "epoch": 96.81, "learning_rate": 1.5925572519083971e-07, "loss": 0.0048, "step": 50731 }, { "epoch": 97.0, "eval_accuracy": 0.5555555555555556, "eval_f1": 0.70926243567753, "eval_loss": 1.0918930768966675, "eval_roc_auc": 0.809481076863051, "eval_runtime": 10.1024, "eval_samples_per_second": 97.106, "eval_steps_per_second": 6.533, "step": 50828 }, { "epoch": 97.81, "learning_rate": 1.0935114503816793e-07, "loss": 0.0052, "step": 51254 }, { "epoch": 98.0, "eval_accuracy": 0.5514780835881753, "eval_f1": 0.7080479452054794, "eval_loss": 1.0924346446990967, "eval_roc_auc": 0.8089463175047622, "eval_runtime": 10.042, "eval_samples_per_second": 97.689, "eval_steps_per_second": 6.572, "step": 51352 }, { "epoch": 98.81, "learning_rate": 5.9446564885496193e-08, "loss": 0.0041, "step": 51777 }, { "epoch": 99.0, "eval_accuracy": 0.5504587155963303, "eval_f1": 0.70824434002563, "eval_loss": 1.0939857959747314, "eval_roc_auc": 0.8094036170847583, "eval_runtime": 10.0515, "eval_samples_per_second": 97.598, "eval_steps_per_second": 6.566, "step": 51876 }, { "epoch": 99.81, "learning_rate": 9.541984732824428e-09, "loss": 0.0043, "step": 52300 }, { "epoch": 100.0, "eval_accuracy": 0.5514780835881753, "eval_f1": 0.7073378839590444, "eval_loss": 1.0944114923477173, "eval_roc_auc": 0.8090025475660416, "eval_runtime": 10.0289, "eval_samples_per_second": 97.818, "eval_steps_per_second": 6.581, "step": 52400 } ], "max_steps": 52400, "num_train_epochs": 100, "total_flos": 1.03939481527296e+17, "trial_name": null, "trial_params": null }