{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "global_step": 26299, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 1.923951481044907e-05, "loss": 0.0205, "step": 1000 }, { "epoch": 0.27, "eval_EG_f1": 0.9960332310455804, "eval_ET_f1": 0.9988771614641814, "eval_TE_f1": 0.9952833720146739, "eval_loss": 0.0021067976485937834, "eval_overall_accuracy": 0.9995180550845189, "eval_overall_f1": 0.9967312922623949, "eval_overall_precision": 0.9966566866267466, "eval_overall_recall": 0.9968059090682237, "eval_runtime": 16.2281, "eval_samples_per_second": 411.569, "eval_steps_per_second": 25.758, "step": 1000 }, { "epoch": 0.53, "learning_rate": 1.8479029620898135e-05, "loss": 0.0027, "step": 2000 }, { "epoch": 0.53, "eval_EG_f1": 0.9955841628620612, "eval_ET_f1": 0.9992513849378649, "eval_TE_f1": 0.995433789954338, "eval_loss": 0.0024818568490445614, "eval_overall_accuracy": 0.9993931064027275, "eval_overall_f1": 0.9967563251659264, "eval_overall_precision": 0.9966568534504266, "eval_overall_recall": 0.9968558167390328, "eval_runtime": 16.4446, "eval_samples_per_second": 406.152, "eval_steps_per_second": 25.419, "step": 2000 }, { "epoch": 0.8, "learning_rate": 1.77185444313472e-05, "loss": 0.0018, "step": 3000 }, { "epoch": 0.8, "eval_EG_f1": 0.9987274496594056, "eval_ET_f1": 0.999700553975146, "eval_TE_f1": 0.9982783142450782, "eval_loss": 0.0007141608512029052, "eval_overall_accuracy": 0.9998393516948396, "eval_overall_f1": 0.9989020860365306, "eval_overall_precision": 0.9988522381356355, "eval_overall_recall": 0.9989519389130109, "eval_runtime": 16.3629, "eval_samples_per_second": 408.181, "eval_steps_per_second": 25.546, "step": 3000 }, { "epoch": 1.06, "learning_rate": 1.695805924179627e-05, "loss": 0.0015, "step": 4000 }, { "epoch": 1.06, "eval_EG_f1": 0.9991765850737331, "eval_ET_f1": 0.9992513849378649, "eval_TE_f1": 0.9983530468633028, "eval_loss": 0.0010974227916449308, "eval_overall_accuracy": 0.9997858022597862, "eval_overall_f1": 0.9989270118527761, "eval_overall_precision": 0.9989020860365306, "eval_overall_recall": 0.9989519389130109, "eval_runtime": 16.7722, "eval_samples_per_second": 398.219, "eval_steps_per_second": 24.922, "step": 4000 }, { "epoch": 1.33, "learning_rate": 1.6197574052245332e-05, "loss": 0.0005, "step": 5000 }, { "epoch": 1.33, "eval_EG_f1": 0.9982030548068285, "eval_ET_f1": 0.999850276987573, "eval_TE_f1": 0.9979038778260219, "eval_loss": 0.0007138435612432659, "eval_overall_accuracy": 0.9998125769773129, "eval_overall_f1": 0.998652425633859, "eval_overall_precision": 0.998702271025705, "eval_overall_recall": 0.9986025852173479, "eval_runtime": 16.5358, "eval_samples_per_second": 403.912, "eval_steps_per_second": 25.279, "step": 5000 }, { "epoch": 1.6, "learning_rate": 1.54370888626944e-05, "loss": 0.0008, "step": 6000 }, { "epoch": 1.6, "eval_EG_f1": 0.9983530468633028, "eval_ET_f1": 0.999550830962719, "eval_TE_f1": 0.9979038778260219, "eval_loss": 0.0009068112121894956, "eval_overall_accuracy": 0.9997947271656284, "eval_overall_f1": 0.9986025852173479, "eval_overall_precision": 0.9986025852173479, "eval_overall_recall": 0.9986025852173479, "eval_runtime": 16.3597, "eval_samples_per_second": 408.259, "eval_steps_per_second": 25.551, "step": 6000 }, { "epoch": 1.86, "learning_rate": 1.4676603673143466e-05, "loss": 0.0009, "step": 7000 }, { "epoch": 1.86, "eval_EG_f1": 0.9989519389130109, "eval_ET_f1": 0.999850276987573, "eval_TE_f1": 0.9988022159005839, "eval_loss": 0.0010577181819826365, "eval_overall_accuracy": 0.9998482766006819, "eval_overall_f1": 0.999201477267056, "eval_overall_precision": 0.999201477267056, "eval_overall_recall": 0.999201477267056, "eval_runtime": 16.3447, "eval_samples_per_second": 408.634, "eval_steps_per_second": 25.574, "step": 7000 }, { "epoch": 2.13, "learning_rate": 1.3916118483592534e-05, "loss": 0.0005, "step": 8000 }, { "epoch": 2.13, "eval_EG_f1": 0.9991765850737331, "eval_ET_f1": 0.999850276987573, "eval_TE_f1": 0.9991765850737331, "eval_loss": 0.0005780701176263392, "eval_overall_accuracy": 0.9998839762240508, "eval_overall_f1": 0.9994011378381076, "eval_overall_precision": 0.9993512650331853, "eval_overall_recall": 0.999451015621101, "eval_runtime": 16.6712, "eval_samples_per_second": 400.631, "eval_steps_per_second": 25.073, "step": 8000 }, { "epoch": 2.4, "learning_rate": 1.31556332940416e-05, "loss": 0.0002, "step": 9000 }, { "epoch": 2.4, "eval_EG_f1": 0.9988771614641814, "eval_ET_f1": 0.999850276987573, "eval_TE_f1": 0.9985777378546299, "eval_loss": 0.0005696099251508713, "eval_overall_accuracy": 0.999892901129893, "eval_overall_f1": 0.9991017067571614, "eval_overall_precision": 0.9990518488946554, "eval_overall_recall": 0.999151569596247, "eval_runtime": 16.4448, "eval_samples_per_second": 406.147, "eval_steps_per_second": 25.418, "step": 9000 }, { "epoch": 2.66, "learning_rate": 1.2395148104490666e-05, "loss": 0.0005, "step": 10000 }, { "epoch": 2.66, "eval_EG_f1": 0.9989519389130109, "eval_ET_f1": 0.999850276987573, "eval_TE_f1": 0.9988771614641814, "eval_loss": 0.0004689108463935554, "eval_overall_accuracy": 0.999892901129893, "eval_overall_f1": 0.9992264504054896, "eval_overall_precision": 0.9992015171174768, "eval_overall_recall": 0.9992513849378649, "eval_runtime": 16.3326, "eval_samples_per_second": 408.938, "eval_steps_per_second": 25.593, "step": 10000 }, { "epoch": 2.93, "learning_rate": 1.1634662914939731e-05, "loss": 0.0002, "step": 11000 }, { "epoch": 2.93, "eval_EG_f1": 0.9988771614641814, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9990268732689572, "eval_loss": 0.0005850127199664712, "eval_overall_accuracy": 0.9999286007532621, "eval_overall_f1": 0.9993013274777922, "eval_overall_precision": 0.9992514596536753, "eval_overall_recall": 0.999351200279483, "eval_runtime": 16.4437, "eval_samples_per_second": 406.175, "eval_steps_per_second": 25.42, "step": 11000 }, { "epoch": 3.19, "learning_rate": 1.08741777253888e-05, "loss": 0.0008, "step": 12000 }, { "epoch": 3.19, "eval_EG_f1": 0.9992513849378649, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9992513849378649, "eval_loss": 0.0004198316019028425, "eval_overall_accuracy": 0.9999464505649466, "eval_overall_f1": 0.99950092329191, "eval_overall_precision": 0.99950092329191, "eval_overall_recall": 0.99950092329191, "eval_runtime": 16.4892, "eval_samples_per_second": 405.052, "eval_steps_per_second": 25.35, "step": 12000 }, { "epoch": 3.46, "learning_rate": 1.0113692535837865e-05, "loss": 0.0002, "step": 13000 }, { "epoch": 3.46, "eval_EG_f1": 0.9991016619254379, "eval_ET_f1": 0.999700553975146, "eval_TE_f1": 0.9989519389130109, "eval_loss": 0.0011158857960253954, "eval_overall_accuracy": 0.9998482766006819, "eval_overall_f1": 0.9992513849378649, "eval_overall_precision": 0.9992513849378649, "eval_overall_recall": 0.9992513849378649, "eval_runtime": 16.7537, "eval_samples_per_second": 398.659, "eval_steps_per_second": 24.95, "step": 13000 }, { "epoch": 3.73, "learning_rate": 9.353207346286931e-06, "loss": 0.0003, "step": 14000 }, { "epoch": 3.73, "eval_EG_f1": 0.9988022159005839, "eval_ET_f1": 0.999850276987573, "eval_TE_f1": 0.9986524928881569, "eval_loss": 0.0010174426715821028, "eval_overall_accuracy": 0.9998393516948396, "eval_overall_f1": 0.9991016619254379, "eval_overall_precision": 0.9991016619254379, "eval_overall_recall": 0.9991016619254379, "eval_runtime": 16.5225, "eval_samples_per_second": 404.237, "eval_steps_per_second": 25.299, "step": 14000 }, { "epoch": 3.99, "learning_rate": 8.592722156735998e-06, "loss": 0.0, "step": 15000 }, { "epoch": 3.99, "eval_EG_f1": 0.9992513849378649, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9992513849378649, "eval_loss": 0.001382750691846013, "eval_overall_accuracy": 0.9999018260357353, "eval_overall_f1": 0.99950092329191, "eval_overall_precision": 0.99950092329191, "eval_overall_recall": 0.99950092329191, "eval_runtime": 16.3741, "eval_samples_per_second": 407.899, "eval_steps_per_second": 25.528, "step": 15000 }, { "epoch": 4.26, "learning_rate": 7.832236967185065e-06, "loss": 0.0002, "step": 16000 }, { "epoch": 4.26, "eval_EG_f1": 0.9990268732689572, "eval_ET_f1": 0.9997754322928364, "eval_TE_f1": 0.9991016619254379, "eval_loss": 0.0008425627020187676, "eval_overall_accuracy": 0.9998839762240508, "eval_overall_f1": 0.9993013274777922, "eval_overall_precision": 0.9992514596536753, "eval_overall_recall": 0.999351200279483, "eval_runtime": 16.4088, "eval_samples_per_second": 407.037, "eval_steps_per_second": 25.474, "step": 16000 }, { "epoch": 4.52, "learning_rate": 7.071751777634132e-06, "loss": 0.0001, "step": 17000 }, { "epoch": 4.52, "eval_EG_f1": 0.9994011079502919, "eval_ET_f1": 0.999700553975146, "eval_TE_f1": 0.9992513849378649, "eval_loss": 0.000884207256603986, "eval_overall_accuracy": 0.9999196758474198, "eval_overall_f1": 0.999451015621101, "eval_overall_precision": 0.999451015621101, "eval_overall_recall": 0.999451015621101, "eval_runtime": 16.6905, "eval_samples_per_second": 400.169, "eval_steps_per_second": 25.044, "step": 17000 }, { "epoch": 4.79, "learning_rate": 6.311266588083198e-06, "loss": 0.0005, "step": 18000 }, { "epoch": 4.79, "eval_EG_f1": 0.9992513849378649, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9992513849378649, "eval_loss": 0.0005574871320277452, "eval_overall_accuracy": 0.9999375256591043, "eval_overall_f1": 0.99950092329191, "eval_overall_precision": 0.99950092329191, "eval_overall_recall": 0.99950092329191, "eval_runtime": 16.5015, "eval_samples_per_second": 404.752, "eval_steps_per_second": 25.331, "step": 18000 }, { "epoch": 5.06, "learning_rate": 5.5507813985322644e-06, "loss": 0.0, "step": 19000 }, { "epoch": 5.06, "eval_EG_f1": 0.9989519389130109, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9989519389130109, "eval_loss": 0.0005674651474691927, "eval_overall_accuracy": 0.9999018260357353, "eval_overall_f1": 0.999301292608674, "eval_overall_precision": 0.999301292608674, "eval_overall_recall": 0.999301292608674, "eval_runtime": 16.4647, "eval_samples_per_second": 405.657, "eval_steps_per_second": 25.388, "step": 19000 }, { "epoch": 5.32, "learning_rate": 4.79029620898133e-06, "loss": 0.0001, "step": 20000 }, { "epoch": 5.32, "eval_EG_f1": 0.9992513849378649, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9992513849378649, "eval_loss": 0.0006091786199249327, "eval_overall_accuracy": 0.9999375256591043, "eval_overall_f1": 0.99950092329191, "eval_overall_precision": 0.99950092329191, "eval_overall_recall": 0.99950092329191, "eval_runtime": 16.4349, "eval_samples_per_second": 406.391, "eval_steps_per_second": 25.434, "step": 20000 }, { "epoch": 5.59, "learning_rate": 4.029811019430397e-06, "loss": 0.0, "step": 21000 }, { "epoch": 5.59, "eval_EG_f1": 0.9994011079502919, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9994011079502919, "eval_loss": 0.0006266526179388165, "eval_overall_accuracy": 0.9999553754707888, "eval_overall_f1": 0.9996007386335279, "eval_overall_precision": 0.9996007386335279, "eval_overall_recall": 0.9996007386335279, "eval_runtime": 16.6877, "eval_samples_per_second": 400.236, "eval_steps_per_second": 25.048, "step": 21000 }, { "epoch": 5.86, "learning_rate": 3.2693258298794635e-06, "loss": 0.0, "step": 22000 }, { "epoch": 5.86, "eval_EG_f1": 0.9992513849378649, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9992513849378649, "eval_loss": 0.0006635423633269966, "eval_overall_accuracy": 0.9999375256591043, "eval_overall_f1": 0.99950092329191, "eval_overall_precision": 0.99950092329191, "eval_overall_recall": 0.99950092329191, "eval_runtime": 16.716, "eval_samples_per_second": 399.557, "eval_steps_per_second": 25.006, "step": 22000 }, { "epoch": 6.12, "learning_rate": 2.50884064032853e-06, "loss": 0.0, "step": 23000 }, { "epoch": 6.12, "eval_EG_f1": 0.9994011079502919, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9994011079502919, "eval_loss": 0.0006512191030196846, "eval_overall_accuracy": 0.9999464505649466, "eval_overall_f1": 0.9996007386335279, "eval_overall_precision": 0.9996007386335279, "eval_overall_recall": 0.9996007386335279, "eval_runtime": 16.3818, "eval_samples_per_second": 407.709, "eval_steps_per_second": 25.516, "step": 23000 }, { "epoch": 6.39, "learning_rate": 1.7483554507775962e-06, "loss": 0.0, "step": 24000 }, { "epoch": 6.39, "eval_EG_f1": 0.9994011079502919, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9994011079502919, "eval_loss": 0.000729710329324007, "eval_overall_accuracy": 0.9999464505649466, "eval_overall_f1": 0.9996007386335279, "eval_overall_precision": 0.9996007386335279, "eval_overall_recall": 0.9996007386335279, "eval_runtime": 16.5641, "eval_samples_per_second": 403.222, "eval_steps_per_second": 25.235, "step": 24000 }, { "epoch": 6.65, "learning_rate": 9.878702612266627e-07, "loss": 0.0001, "step": 25000 }, { "epoch": 6.65, "eval_EG_f1": 0.9994011079502919, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9994011079502919, "eval_loss": 0.0005905419238843024, "eval_overall_accuracy": 0.9999464505649466, "eval_overall_f1": 0.9996007386335279, "eval_overall_precision": 0.9996007386335279, "eval_overall_recall": 0.9996007386335279, "eval_runtime": 16.5058, "eval_samples_per_second": 404.647, "eval_steps_per_second": 25.325, "step": 25000 }, { "epoch": 6.92, "learning_rate": 2.2738507167572915e-07, "loss": 0.0001, "step": 26000 }, { "epoch": 6.92, "eval_EG_f1": 0.9994011079502919, "eval_ET_f1": 1.0, "eval_TE_f1": 0.9994011079502919, "eval_loss": 0.0006466144695878029, "eval_overall_accuracy": 0.9999464505649466, "eval_overall_f1": 0.9996007386335279, "eval_overall_precision": 0.9996007386335279, "eval_overall_recall": 0.9996007386335279, "eval_runtime": 16.8702, "eval_samples_per_second": 395.906, "eval_steps_per_second": 24.777, "step": 26000 } ], "max_steps": 26299, "num_train_epochs": 7, "total_flos": 6405072201643752.0, "trial_name": null, "trial_params": null }