{ "best_metric": 0.685027729264332, "best_model_checkpoint": "modernbert-medical-classifier/checkpoint-2024", "epoch": 25.0, "eval_steps": 500, "global_step": 2300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_f1": 0.2555762427123016, "eval_loss": 1.1453216075897217, "eval_runtime": 7.2906, "eval_samples_per_second": 12.619, "eval_steps_per_second": 3.155, "step": 92 }, { "epoch": 1.0869565217391304, "grad_norm": 27.595176696777344, "learning_rate": 8.695652173913044e-06, "loss": 1.2432, "step": 100 }, { "epoch": 2.0, "eval_f1": 0.22693092661056924, "eval_loss": 1.1348090171813965, "eval_runtime": 7.2682, "eval_samples_per_second": 12.658, "eval_steps_per_second": 3.164, "step": 184 }, { "epoch": 2.1739130434782608, "grad_norm": 10.296772003173828, "learning_rate": 1.739130434782609e-05, "loss": 1.1447, "step": 200 }, { "epoch": 3.0, "eval_f1": 0.2623751468860165, "eval_loss": 1.194385051727295, "eval_runtime": 7.2397, "eval_samples_per_second": 12.708, "eval_steps_per_second": 3.177, "step": 276 }, { "epoch": 3.260869565217391, "grad_norm": 8.205018997192383, "learning_rate": 1.932367149758454e-05, "loss": 1.0924, "step": 300 }, { "epoch": 4.0, "eval_f1": 0.46725195094760313, "eval_loss": 0.9543380737304688, "eval_runtime": 7.257, "eval_samples_per_second": 12.677, "eval_steps_per_second": 3.169, "step": 368 }, { "epoch": 4.3478260869565215, "grad_norm": 11.217053413391113, "learning_rate": 1.8357487922705315e-05, "loss": 0.9918, "step": 400 }, { "epoch": 5.0, "eval_f1": 0.48917610522469857, "eval_loss": 1.0969688892364502, "eval_runtime": 7.2942, "eval_samples_per_second": 12.613, "eval_steps_per_second": 3.153, "step": 460 }, { "epoch": 5.434782608695652, "grad_norm": 17.887250900268555, "learning_rate": 1.739130434782609e-05, "loss": 0.8981, "step": 500 }, { "epoch": 6.0, "eval_f1": 0.5668988939449298, "eval_loss": 1.2105615139007568, "eval_runtime": 7.3182, "eval_samples_per_second": 12.571, "eval_steps_per_second": 3.143, "step": 552 }, { "epoch": 6.521739130434782, "grad_norm": 26.001848220825195, "learning_rate": 1.6425120772946863e-05, "loss": 0.9261, "step": 600 }, { "epoch": 7.0, "eval_f1": 0.6268890955591723, "eval_loss": 0.8952301740646362, "eval_runtime": 7.2679, "eval_samples_per_second": 12.658, "eval_steps_per_second": 3.165, "step": 644 }, { "epoch": 7.608695652173913, "grad_norm": 21.24846649169922, "learning_rate": 1.5458937198067633e-05, "loss": 0.8208, "step": 700 }, { "epoch": 8.0, "eval_f1": 0.5480383816552855, "eval_loss": 1.1408698558807373, "eval_runtime": 7.2908, "eval_samples_per_second": 12.619, "eval_steps_per_second": 3.155, "step": 736 }, { "epoch": 8.695652173913043, "grad_norm": 45.331424713134766, "learning_rate": 1.4492753623188407e-05, "loss": 0.645, "step": 800 }, { "epoch": 9.0, "eval_f1": 0.5747727830999755, "eval_loss": 1.728603482246399, "eval_runtime": 7.2891, "eval_samples_per_second": 12.622, "eval_steps_per_second": 3.155, "step": 828 }, { "epoch": 9.782608695652174, "grad_norm": 5.040703296661377, "learning_rate": 1.352657004830918e-05, "loss": 0.6745, "step": 900 }, { "epoch": 10.0, "eval_f1": 0.6229138501039525, "eval_loss": 1.4749873876571655, "eval_runtime": 7.3224, "eval_samples_per_second": 12.564, "eval_steps_per_second": 3.141, "step": 920 }, { "epoch": 10.869565217391305, "grad_norm": 69.3198013305664, "learning_rate": 1.2560386473429953e-05, "loss": 0.5947, "step": 1000 }, { "epoch": 11.0, "eval_f1": 0.662759974441483, "eval_loss": 1.709149718284607, "eval_runtime": 7.2845, "eval_samples_per_second": 12.63, "eval_steps_per_second": 3.157, "step": 1012 }, { "epoch": 11.956521739130435, "grad_norm": 127.6854476928711, "learning_rate": 1.1594202898550726e-05, "loss": 0.517, "step": 1100 }, { "epoch": 12.0, "eval_f1": 0.617952852218392, "eval_loss": 3.094771385192871, "eval_runtime": 7.2912, "eval_samples_per_second": 12.618, "eval_steps_per_second": 3.155, "step": 1104 }, { "epoch": 13.0, "eval_f1": 0.6671065935820338, "eval_loss": 2.594010829925537, "eval_runtime": 7.3078, "eval_samples_per_second": 12.589, "eval_steps_per_second": 3.147, "step": 1196 }, { "epoch": 13.043478260869565, "grad_norm": 5.261804580688477, "learning_rate": 1.0628019323671499e-05, "loss": 0.4901, "step": 1200 }, { "epoch": 14.0, "eval_f1": 0.6347571828121027, "eval_loss": 2.9827301502227783, "eval_runtime": 7.2649, "eval_samples_per_second": 12.664, "eval_steps_per_second": 3.166, "step": 1288 }, { "epoch": 14.130434782608695, "grad_norm": 12.201338768005371, "learning_rate": 9.66183574879227e-06, "loss": 0.2048, "step": 1300 }, { "epoch": 15.0, "eval_f1": 0.6483390799764528, "eval_loss": 3.1022789478302, "eval_runtime": 7.3193, "eval_samples_per_second": 12.569, "eval_steps_per_second": 3.142, "step": 1380 }, { "epoch": 15.217391304347826, "grad_norm": 0.27592945098876953, "learning_rate": 8.695652173913044e-06, "loss": 0.1188, "step": 1400 }, { "epoch": 16.0, "eval_f1": 0.6331910652276784, "eval_loss": 3.7382447719573975, "eval_runtime": 7.3015, "eval_samples_per_second": 12.6, "eval_steps_per_second": 3.15, "step": 1472 }, { "epoch": 16.304347826086957, "grad_norm": 0.04186500236392021, "learning_rate": 7.729468599033817e-06, "loss": 0.1236, "step": 1500 }, { "epoch": 17.0, "eval_f1": 0.6535368679296716, "eval_loss": 3.5499069690704346, "eval_runtime": 7.3103, "eval_samples_per_second": 12.585, "eval_steps_per_second": 3.146, "step": 1564 }, { "epoch": 17.391304347826086, "grad_norm": 0.7084795236587524, "learning_rate": 6.76328502415459e-06, "loss": 0.0828, "step": 1600 }, { "epoch": 18.0, "eval_f1": 0.6533094924399271, "eval_loss": 3.6281237602233887, "eval_runtime": 7.3293, "eval_samples_per_second": 12.552, "eval_steps_per_second": 3.138, "step": 1656 }, { "epoch": 18.47826086956522, "grad_norm": 0.0006457903073169291, "learning_rate": 5.797101449275363e-06, "loss": 0.0479, "step": 1700 }, { "epoch": 19.0, "eval_f1": 0.6820012787723785, "eval_loss": 3.2783100605010986, "eval_runtime": 7.3182, "eval_samples_per_second": 12.571, "eval_steps_per_second": 3.143, "step": 1748 }, { "epoch": 19.565217391304348, "grad_norm": 0.09326278418302536, "learning_rate": 4.830917874396135e-06, "loss": 0.0081, "step": 1800 }, { "epoch": 20.0, "eval_f1": 0.6675544814093237, "eval_loss": 3.6055147647857666, "eval_runtime": 7.3242, "eval_samples_per_second": 12.561, "eval_steps_per_second": 3.14, "step": 1840 }, { "epoch": 20.652173913043477, "grad_norm": 0.002304959110915661, "learning_rate": 3.864734299516908e-06, "loss": 0.031, "step": 1900 }, { "epoch": 21.0, "eval_f1": 0.6777883740084095, "eval_loss": 3.6255407333374023, "eval_runtime": 7.3281, "eval_samples_per_second": 12.554, "eval_steps_per_second": 3.139, "step": 1932 }, { "epoch": 21.73913043478261, "grad_norm": 0.0011914765927940607, "learning_rate": 2.8985507246376816e-06, "loss": 0.0008, "step": 2000 }, { "epoch": 22.0, "eval_f1": 0.685027729264332, "eval_loss": 3.6279380321502686, "eval_runtime": 7.2477, "eval_samples_per_second": 12.694, "eval_steps_per_second": 3.173, "step": 2024 }, { "epoch": 22.82608695652174, "grad_norm": 0.00998605228960514, "learning_rate": 1.932367149758454e-06, "loss": 0.0, "step": 2100 }, { "epoch": 23.0, "eval_f1": 0.6675544814093237, "eval_loss": 3.6375534534454346, "eval_runtime": 7.3379, "eval_samples_per_second": 12.538, "eval_steps_per_second": 3.134, "step": 2116 }, { "epoch": 23.91304347826087, "grad_norm": 0.0006493396940641105, "learning_rate": 9.66183574879227e-07, "loss": 0.0001, "step": 2200 }, { "epoch": 24.0, "eval_f1": 0.6675544814093237, "eval_loss": 3.64399790763855, "eval_runtime": 7.3823, "eval_samples_per_second": 12.462, "eval_steps_per_second": 3.116, "step": 2208 }, { "epoch": 25.0, "grad_norm": 6.634181318077026e-06, "learning_rate": 0.0, "loss": 0.0, "step": 2300 }, { "epoch": 25.0, "eval_f1": 0.6675544814093237, "eval_loss": 3.65128493309021, "eval_runtime": 7.4088, "eval_samples_per_second": 12.418, "eval_steps_per_second": 3.104, "step": 2300 } ], "logging_steps": 100, "max_steps": 2300, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.579010926545e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }