|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 36.0, |
|
"global_step": 972, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.2262943855309169e-05, |
|
"loss": 2.057, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6772454727151596, |
|
"eval_loss": 1.7236038446426392, |
|
"eval_runtime": 13.5733, |
|
"eval_samples_per_second": 32.711, |
|
"eval_steps_per_second": 0.147, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.4841962570206113e-05, |
|
"loss": 1.7092, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6982139066622192, |
|
"eval_loss": 1.5525641441345215, |
|
"eval_runtime": 12.8991, |
|
"eval_samples_per_second": 34.421, |
|
"eval_steps_per_second": 0.155, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.6350591807078892e-05, |
|
"loss": 1.5646, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7100154712905392, |
|
"eval_loss": 1.452789068222046, |
|
"eval_runtime": 12.9607, |
|
"eval_samples_per_second": 34.258, |
|
"eval_steps_per_second": 0.154, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7420981285103056e-05, |
|
"loss": 1.4688, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7290463925156171, |
|
"eval_loss": 1.3419641256332397, |
|
"eval_runtime": 12.9711, |
|
"eval_samples_per_second": 34.23, |
|
"eval_steps_per_second": 0.154, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.825123986666868e-05, |
|
"loss": 1.3785, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.74070063507858, |
|
"eval_loss": 1.2742513418197632, |
|
"eval_runtime": 12.944, |
|
"eval_samples_per_second": 34.302, |
|
"eval_steps_per_second": 0.155, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.892961052197583e-05, |
|
"loss": 1.3459, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.739266862170088, |
|
"eval_loss": 1.2691913843154907, |
|
"eval_runtime": 12.9659, |
|
"eval_samples_per_second": 34.244, |
|
"eval_steps_per_second": 0.154, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9503164738653782e-05, |
|
"loss": 1.3059, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7480331638828371, |
|
"eval_loss": 1.2231497764587402, |
|
"eval_runtime": 12.921, |
|
"eval_samples_per_second": 34.363, |
|
"eval_steps_per_second": 0.155, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.2666, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7594158570229099, |
|
"eval_loss": 1.151406168937683, |
|
"eval_runtime": 12.9461, |
|
"eval_samples_per_second": 34.296, |
|
"eval_steps_per_second": 0.154, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2463, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7459728430463017, |
|
"eval_loss": 1.2034211158752441, |
|
"eval_runtime": 13.0359, |
|
"eval_samples_per_second": 34.06, |
|
"eval_steps_per_second": 0.153, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2276, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7586074755335456, |
|
"eval_loss": 1.1566089391708374, |
|
"eval_runtime": 12.9245, |
|
"eval_samples_per_second": 34.353, |
|
"eval_steps_per_second": 0.155, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.189, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7611567732115677, |
|
"eval_loss": 1.1319142580032349, |
|
"eval_runtime": 12.9698, |
|
"eval_samples_per_second": 34.233, |
|
"eval_steps_per_second": 0.154, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1568, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7545374996471618, |
|
"eval_loss": 1.1536731719970703, |
|
"eval_runtime": 12.9366, |
|
"eval_samples_per_second": 34.321, |
|
"eval_steps_per_second": 0.155, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1447, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7683153013910355, |
|
"eval_loss": 1.0927276611328125, |
|
"eval_runtime": 12.9899, |
|
"eval_samples_per_second": 34.18, |
|
"eval_steps_per_second": 0.154, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1262, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7699539058709365, |
|
"eval_loss": 1.0704097747802734, |
|
"eval_runtime": 12.9791, |
|
"eval_samples_per_second": 34.209, |
|
"eval_steps_per_second": 0.154, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1173, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.774438983954053, |
|
"eval_loss": 1.0296632051467896, |
|
"eval_runtime": 12.9361, |
|
"eval_samples_per_second": 34.323, |
|
"eval_steps_per_second": 0.155, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0997, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7737643712219984, |
|
"eval_loss": 1.0550481081008911, |
|
"eval_runtime": 12.9715, |
|
"eval_samples_per_second": 34.229, |
|
"eval_steps_per_second": 0.154, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0962, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7746747253401614, |
|
"eval_loss": 1.0655121803283691, |
|
"eval_runtime": 12.9954, |
|
"eval_samples_per_second": 34.166, |
|
"eval_steps_per_second": 0.154, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0864, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7735955893309492, |
|
"eval_loss": 1.0611152648925781, |
|
"eval_runtime": 12.9386, |
|
"eval_samples_per_second": 34.316, |
|
"eval_steps_per_second": 0.155, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0817, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7697911607576493, |
|
"eval_loss": 1.0739043951034546, |
|
"eval_runtime": 13.0147, |
|
"eval_samples_per_second": 34.115, |
|
"eval_steps_per_second": 0.154, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0615, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7779283925151024, |
|
"eval_loss": 1.0259206295013428, |
|
"eval_runtime": 12.248, |
|
"eval_samples_per_second": 36.251, |
|
"eval_steps_per_second": 0.163, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0337, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7820505322259913, |
|
"eval_loss": 1.0050867795944214, |
|
"eval_runtime": 13.0151, |
|
"eval_samples_per_second": 34.114, |
|
"eval_steps_per_second": 0.154, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0248, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7823669101512009, |
|
"eval_loss": 0.9815566539764404, |
|
"eval_runtime": 12.966, |
|
"eval_samples_per_second": 34.243, |
|
"eval_steps_per_second": 0.154, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0078, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7890613318979696, |
|
"eval_loss": 0.9701399207115173, |
|
"eval_runtime": 12.9372, |
|
"eval_samples_per_second": 34.32, |
|
"eval_steps_per_second": 0.155, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0161, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7868592237542407, |
|
"eval_loss": 0.9783701300621033, |
|
"eval_runtime": 12.9845, |
|
"eval_samples_per_second": 34.195, |
|
"eval_steps_per_second": 0.154, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0005, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.782170183167169, |
|
"eval_loss": 0.9962915182113647, |
|
"eval_runtime": 13.0152, |
|
"eval_samples_per_second": 34.114, |
|
"eval_steps_per_second": 0.154, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0008, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.790520909757887, |
|
"eval_loss": 0.9529848694801331, |
|
"eval_runtime": 13.0266, |
|
"eval_samples_per_second": 34.084, |
|
"eval_steps_per_second": 0.154, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9961, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7787046824557895, |
|
"eval_loss": 1.0195859670639038, |
|
"eval_runtime": 12.9955, |
|
"eval_samples_per_second": 34.166, |
|
"eval_steps_per_second": 0.154, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9834, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7917842772205873, |
|
"eval_loss": 0.9555456638336182, |
|
"eval_runtime": 12.2326, |
|
"eval_samples_per_second": 36.296, |
|
"eval_steps_per_second": 0.163, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9647, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7914557776443338, |
|
"eval_loss": 0.9375360608100891, |
|
"eval_runtime": 13.0096, |
|
"eval_samples_per_second": 34.129, |
|
"eval_steps_per_second": 0.154, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.967, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.793787977110495, |
|
"eval_loss": 0.9494355320930481, |
|
"eval_runtime": 13.0239, |
|
"eval_samples_per_second": 34.091, |
|
"eval_steps_per_second": 0.154, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9625, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7860187306097597, |
|
"eval_loss": 0.9812522530555725, |
|
"eval_runtime": 12.989, |
|
"eval_samples_per_second": 34.183, |
|
"eval_steps_per_second": 0.154, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9578, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7956582591346297, |
|
"eval_loss": 0.9389752149581909, |
|
"eval_runtime": 13.0492, |
|
"eval_samples_per_second": 34.025, |
|
"eval_steps_per_second": 0.153, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9462, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.79146801197472, |
|
"eval_loss": 0.9519514441490173, |
|
"eval_runtime": 12.965, |
|
"eval_samples_per_second": 34.246, |
|
"eval_steps_per_second": 0.154, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9468, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7949606757937664, |
|
"eval_loss": 0.922423243522644, |
|
"eval_runtime": 12.9466, |
|
"eval_samples_per_second": 34.295, |
|
"eval_steps_per_second": 0.154, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9357, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8009954921111946, |
|
"eval_loss": 0.908001184463501, |
|
"eval_runtime": 12.9778, |
|
"eval_samples_per_second": 34.212, |
|
"eval_steps_per_second": 0.154, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9328, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7935578330893118, |
|
"eval_loss": 0.9237804412841797, |
|
"eval_runtime": 12.9467, |
|
"eval_samples_per_second": 34.294, |
|
"eval_steps_per_second": 0.154, |
|
"step": 972 |
|
} |
|
], |
|
"max_steps": 1080, |
|
"num_train_epochs": 40, |
|
"total_flos": 302668861931520.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|