|
{ |
|
"best_metric": 0.685027729264332, |
|
"best_model_checkpoint": "modernbert-medical-classifier/checkpoint-2024", |
|
"epoch": 25.0, |
|
"eval_steps": 500, |
|
"global_step": 2300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.2555762427123016, |
|
"eval_loss": 1.1453216075897217, |
|
"eval_runtime": 7.2906, |
|
"eval_samples_per_second": 12.619, |
|
"eval_steps_per_second": 3.155, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.0869565217391304, |
|
"grad_norm": 27.595176696777344, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 1.2432, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.22693092661056924, |
|
"eval_loss": 1.1348090171813965, |
|
"eval_runtime": 7.2682, |
|
"eval_samples_per_second": 12.658, |
|
"eval_steps_per_second": 3.164, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.1739130434782608, |
|
"grad_norm": 10.296772003173828, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 1.1447, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.2623751468860165, |
|
"eval_loss": 1.194385051727295, |
|
"eval_runtime": 7.2397, |
|
"eval_samples_per_second": 12.708, |
|
"eval_steps_per_second": 3.177, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 3.260869565217391, |
|
"grad_norm": 8.205018997192383, |
|
"learning_rate": 1.932367149758454e-05, |
|
"loss": 1.0924, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.46725195094760313, |
|
"eval_loss": 0.9543380737304688, |
|
"eval_runtime": 7.257, |
|
"eval_samples_per_second": 12.677, |
|
"eval_steps_per_second": 3.169, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 4.3478260869565215, |
|
"grad_norm": 11.217053413391113, |
|
"learning_rate": 1.8357487922705315e-05, |
|
"loss": 0.9918, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.48917610522469857, |
|
"eval_loss": 1.0969688892364502, |
|
"eval_runtime": 7.2942, |
|
"eval_samples_per_second": 12.613, |
|
"eval_steps_per_second": 3.153, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.434782608695652, |
|
"grad_norm": 17.887250900268555, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 0.8981, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.5668988939449298, |
|
"eval_loss": 1.2105615139007568, |
|
"eval_runtime": 7.3182, |
|
"eval_samples_per_second": 12.571, |
|
"eval_steps_per_second": 3.143, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 6.521739130434782, |
|
"grad_norm": 26.001848220825195, |
|
"learning_rate": 1.6425120772946863e-05, |
|
"loss": 0.9261, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.6268890955591723, |
|
"eval_loss": 0.8952301740646362, |
|
"eval_runtime": 7.2679, |
|
"eval_samples_per_second": 12.658, |
|
"eval_steps_per_second": 3.165, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 7.608695652173913, |
|
"grad_norm": 21.24846649169922, |
|
"learning_rate": 1.5458937198067633e-05, |
|
"loss": 0.8208, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.5480383816552855, |
|
"eval_loss": 1.1408698558807373, |
|
"eval_runtime": 7.2908, |
|
"eval_samples_per_second": 12.619, |
|
"eval_steps_per_second": 3.155, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 8.695652173913043, |
|
"grad_norm": 45.331424713134766, |
|
"learning_rate": 1.4492753623188407e-05, |
|
"loss": 0.645, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.5747727830999755, |
|
"eval_loss": 1.728603482246399, |
|
"eval_runtime": 7.2891, |
|
"eval_samples_per_second": 12.622, |
|
"eval_steps_per_second": 3.155, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 9.782608695652174, |
|
"grad_norm": 5.040703296661377, |
|
"learning_rate": 1.352657004830918e-05, |
|
"loss": 0.6745, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.6229138501039525, |
|
"eval_loss": 1.4749873876571655, |
|
"eval_runtime": 7.3224, |
|
"eval_samples_per_second": 12.564, |
|
"eval_steps_per_second": 3.141, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.869565217391305, |
|
"grad_norm": 69.3198013305664, |
|
"learning_rate": 1.2560386473429953e-05, |
|
"loss": 0.5947, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.662759974441483, |
|
"eval_loss": 1.709149718284607, |
|
"eval_runtime": 7.2845, |
|
"eval_samples_per_second": 12.63, |
|
"eval_steps_per_second": 3.157, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 11.956521739130435, |
|
"grad_norm": 127.6854476928711, |
|
"learning_rate": 1.1594202898550726e-05, |
|
"loss": 0.517, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.617952852218392, |
|
"eval_loss": 3.094771385192871, |
|
"eval_runtime": 7.2912, |
|
"eval_samples_per_second": 12.618, |
|
"eval_steps_per_second": 3.155, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.6671065935820338, |
|
"eval_loss": 2.594010829925537, |
|
"eval_runtime": 7.3078, |
|
"eval_samples_per_second": 12.589, |
|
"eval_steps_per_second": 3.147, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 13.043478260869565, |
|
"grad_norm": 5.261804580688477, |
|
"learning_rate": 1.0628019323671499e-05, |
|
"loss": 0.4901, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.6347571828121027, |
|
"eval_loss": 2.9827301502227783, |
|
"eval_runtime": 7.2649, |
|
"eval_samples_per_second": 12.664, |
|
"eval_steps_per_second": 3.166, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 14.130434782608695, |
|
"grad_norm": 12.201338768005371, |
|
"learning_rate": 9.66183574879227e-06, |
|
"loss": 0.2048, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.6483390799764528, |
|
"eval_loss": 3.1022789478302, |
|
"eval_runtime": 7.3193, |
|
"eval_samples_per_second": 12.569, |
|
"eval_steps_per_second": 3.142, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 15.217391304347826, |
|
"grad_norm": 0.27592945098876953, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.1188, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.6331910652276784, |
|
"eval_loss": 3.7382447719573975, |
|
"eval_runtime": 7.3015, |
|
"eval_samples_per_second": 12.6, |
|
"eval_steps_per_second": 3.15, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 16.304347826086957, |
|
"grad_norm": 0.04186500236392021, |
|
"learning_rate": 7.729468599033817e-06, |
|
"loss": 0.1236, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.6535368679296716, |
|
"eval_loss": 3.5499069690704346, |
|
"eval_runtime": 7.3103, |
|
"eval_samples_per_second": 12.585, |
|
"eval_steps_per_second": 3.146, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 17.391304347826086, |
|
"grad_norm": 0.7084795236587524, |
|
"learning_rate": 6.76328502415459e-06, |
|
"loss": 0.0828, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.6533094924399271, |
|
"eval_loss": 3.6281237602233887, |
|
"eval_runtime": 7.3293, |
|
"eval_samples_per_second": 12.552, |
|
"eval_steps_per_second": 3.138, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 18.47826086956522, |
|
"grad_norm": 0.0006457903073169291, |
|
"learning_rate": 5.797101449275363e-06, |
|
"loss": 0.0479, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.6820012787723785, |
|
"eval_loss": 3.2783100605010986, |
|
"eval_runtime": 7.3182, |
|
"eval_samples_per_second": 12.571, |
|
"eval_steps_per_second": 3.143, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 19.565217391304348, |
|
"grad_norm": 0.09326278418302536, |
|
"learning_rate": 4.830917874396135e-06, |
|
"loss": 0.0081, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.6675544814093237, |
|
"eval_loss": 3.6055147647857666, |
|
"eval_runtime": 7.3242, |
|
"eval_samples_per_second": 12.561, |
|
"eval_steps_per_second": 3.14, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 20.652173913043477, |
|
"grad_norm": 0.002304959110915661, |
|
"learning_rate": 3.864734299516908e-06, |
|
"loss": 0.031, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_f1": 0.6777883740084095, |
|
"eval_loss": 3.6255407333374023, |
|
"eval_runtime": 7.3281, |
|
"eval_samples_per_second": 12.554, |
|
"eval_steps_per_second": 3.139, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"grad_norm": 0.0011914765927940607, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"loss": 0.0008, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_f1": 0.685027729264332, |
|
"eval_loss": 3.6279380321502686, |
|
"eval_runtime": 7.2477, |
|
"eval_samples_per_second": 12.694, |
|
"eval_steps_per_second": 3.173, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 22.82608695652174, |
|
"grad_norm": 0.00998605228960514, |
|
"learning_rate": 1.932367149758454e-06, |
|
"loss": 0.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_f1": 0.6675544814093237, |
|
"eval_loss": 3.6375534534454346, |
|
"eval_runtime": 7.3379, |
|
"eval_samples_per_second": 12.538, |
|
"eval_steps_per_second": 3.134, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 23.91304347826087, |
|
"grad_norm": 0.0006493396940641105, |
|
"learning_rate": 9.66183574879227e-07, |
|
"loss": 0.0001, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_f1": 0.6675544814093237, |
|
"eval_loss": 3.64399790763855, |
|
"eval_runtime": 7.3823, |
|
"eval_samples_per_second": 12.462, |
|
"eval_steps_per_second": 3.116, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 6.634181318077026e-06, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_f1": 0.6675544814093237, |
|
"eval_loss": 3.65128493309021, |
|
"eval_runtime": 7.4088, |
|
"eval_samples_per_second": 12.418, |
|
"eval_steps_per_second": 3.104, |
|
"step": 2300 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.579010926545e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|