{ "best_metric": null, "best_model_checkpoint": null, "epoch": 36.0, "global_step": 972, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.2262943855309169e-05, "loss": 2.057, "step": 27 }, { "epoch": 1.0, "eval_accuracy": 0.6772454727151596, "eval_loss": 1.7236038446426392, "eval_runtime": 13.5733, "eval_samples_per_second": 32.711, "eval_steps_per_second": 0.147, "step": 27 }, { "epoch": 2.0, "learning_rate": 1.4841962570206113e-05, "loss": 1.7092, "step": 54 }, { "epoch": 2.0, "eval_accuracy": 0.6982139066622192, "eval_loss": 1.5525641441345215, "eval_runtime": 12.8991, "eval_samples_per_second": 34.421, "eval_steps_per_second": 0.155, "step": 54 }, { "epoch": 3.0, "learning_rate": 1.6350591807078892e-05, "loss": 1.5646, "step": 81 }, { "epoch": 3.0, "eval_accuracy": 0.7100154712905392, "eval_loss": 1.452789068222046, "eval_runtime": 12.9607, "eval_samples_per_second": 34.258, "eval_steps_per_second": 0.154, "step": 81 }, { "epoch": 4.0, "learning_rate": 1.7420981285103056e-05, "loss": 1.4688, "step": 108 }, { "epoch": 4.0, "eval_accuracy": 0.7290463925156171, "eval_loss": 1.3419641256332397, "eval_runtime": 12.9711, "eval_samples_per_second": 34.23, "eval_steps_per_second": 0.154, "step": 108 }, { "epoch": 5.0, "learning_rate": 1.825123986666868e-05, "loss": 1.3785, "step": 135 }, { "epoch": 5.0, "eval_accuracy": 0.74070063507858, "eval_loss": 1.2742513418197632, "eval_runtime": 12.944, "eval_samples_per_second": 34.302, "eval_steps_per_second": 0.155, "step": 135 }, { "epoch": 6.0, "learning_rate": 1.892961052197583e-05, "loss": 1.3459, "step": 162 }, { "epoch": 6.0, "eval_accuracy": 0.739266862170088, "eval_loss": 1.2691913843154907, "eval_runtime": 12.9659, "eval_samples_per_second": 34.244, "eval_steps_per_second": 0.154, "step": 162 }, { "epoch": 7.0, "learning_rate": 1.9503164738653782e-05, "loss": 1.3059, "step": 189 }, { "epoch": 7.0, "eval_accuracy": 0.7480331638828371, "eval_loss": 1.2231497764587402, "eval_runtime": 12.921, "eval_samples_per_second": 34.363, "eval_steps_per_second": 0.155, "step": 189 }, { "epoch": 8.0, "learning_rate": 1.9999999999999998e-05, "loss": 1.2666, "step": 216 }, { "epoch": 8.0, "eval_accuracy": 0.7594158570229099, "eval_loss": 1.151406168937683, "eval_runtime": 12.9461, "eval_samples_per_second": 34.296, "eval_steps_per_second": 0.154, "step": 216 }, { "epoch": 9.0, "learning_rate": 2e-05, "loss": 1.2463, "step": 243 }, { "epoch": 9.0, "eval_accuracy": 0.7459728430463017, "eval_loss": 1.2034211158752441, "eval_runtime": 13.0359, "eval_samples_per_second": 34.06, "eval_steps_per_second": 0.153, "step": 243 }, { "epoch": 10.0, "learning_rate": 2e-05, "loss": 1.2276, "step": 270 }, { "epoch": 10.0, "eval_accuracy": 0.7586074755335456, "eval_loss": 1.1566089391708374, "eval_runtime": 12.9245, "eval_samples_per_second": 34.353, "eval_steps_per_second": 0.155, "step": 270 }, { "epoch": 11.0, "learning_rate": 2e-05, "loss": 1.189, "step": 297 }, { "epoch": 11.0, "eval_accuracy": 0.7611567732115677, "eval_loss": 1.1319142580032349, "eval_runtime": 12.9698, "eval_samples_per_second": 34.233, "eval_steps_per_second": 0.154, "step": 297 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 1.1568, "step": 324 }, { "epoch": 12.0, "eval_accuracy": 0.7545374996471618, "eval_loss": 1.1536731719970703, "eval_runtime": 12.9366, "eval_samples_per_second": 34.321, "eval_steps_per_second": 0.155, "step": 324 }, { "epoch": 13.0, "learning_rate": 2e-05, "loss": 1.1447, "step": 351 }, { "epoch": 13.0, "eval_accuracy": 0.7683153013910355, "eval_loss": 1.0927276611328125, "eval_runtime": 12.9899, "eval_samples_per_second": 34.18, "eval_steps_per_second": 0.154, "step": 351 }, { "epoch": 14.0, "learning_rate": 2e-05, "loss": 1.1262, "step": 378 }, { "epoch": 14.0, "eval_accuracy": 0.7699539058709365, "eval_loss": 1.0704097747802734, "eval_runtime": 12.9791, "eval_samples_per_second": 34.209, "eval_steps_per_second": 0.154, "step": 378 }, { "epoch": 15.0, "learning_rate": 2e-05, "loss": 1.1173, "step": 405 }, { "epoch": 15.0, "eval_accuracy": 0.774438983954053, "eval_loss": 1.0296632051467896, "eval_runtime": 12.9361, "eval_samples_per_second": 34.323, "eval_steps_per_second": 0.155, "step": 405 }, { "epoch": 16.0, "learning_rate": 2e-05, "loss": 1.0997, "step": 432 }, { "epoch": 16.0, "eval_accuracy": 0.7737643712219984, "eval_loss": 1.0550481081008911, "eval_runtime": 12.9715, "eval_samples_per_second": 34.229, "eval_steps_per_second": 0.154, "step": 432 }, { "epoch": 17.0, "learning_rate": 2e-05, "loss": 1.0962, "step": 459 }, { "epoch": 17.0, "eval_accuracy": 0.7746747253401614, "eval_loss": 1.0655121803283691, "eval_runtime": 12.9954, "eval_samples_per_second": 34.166, "eval_steps_per_second": 0.154, "step": 459 }, { "epoch": 18.0, "learning_rate": 2e-05, "loss": 1.0864, "step": 486 }, { "epoch": 18.0, "eval_accuracy": 0.7735955893309492, "eval_loss": 1.0611152648925781, "eval_runtime": 12.9386, "eval_samples_per_second": 34.316, "eval_steps_per_second": 0.155, "step": 486 }, { "epoch": 19.0, "learning_rate": 2e-05, "loss": 1.0817, "step": 513 }, { "epoch": 19.0, "eval_accuracy": 0.7697911607576493, "eval_loss": 1.0739043951034546, "eval_runtime": 13.0147, "eval_samples_per_second": 34.115, "eval_steps_per_second": 0.154, "step": 513 }, { "epoch": 20.0, "learning_rate": 2e-05, "loss": 1.0615, "step": 540 }, { "epoch": 20.0, "eval_accuracy": 0.7779283925151024, "eval_loss": 1.0259206295013428, "eval_runtime": 12.248, "eval_samples_per_second": 36.251, "eval_steps_per_second": 0.163, "step": 540 }, { "epoch": 21.0, "learning_rate": 2e-05, "loss": 1.0337, "step": 567 }, { "epoch": 21.0, "eval_accuracy": 0.7820505322259913, "eval_loss": 1.0050867795944214, "eval_runtime": 13.0151, "eval_samples_per_second": 34.114, "eval_steps_per_second": 0.154, "step": 567 }, { "epoch": 22.0, "learning_rate": 2e-05, "loss": 1.0248, "step": 594 }, { "epoch": 22.0, "eval_accuracy": 0.7823669101512009, "eval_loss": 0.9815566539764404, "eval_runtime": 12.966, "eval_samples_per_second": 34.243, "eval_steps_per_second": 0.154, "step": 594 }, { "epoch": 23.0, "learning_rate": 2e-05, "loss": 1.0078, "step": 621 }, { "epoch": 23.0, "eval_accuracy": 0.7890613318979696, "eval_loss": 0.9701399207115173, "eval_runtime": 12.9372, "eval_samples_per_second": 34.32, "eval_steps_per_second": 0.155, "step": 621 }, { "epoch": 24.0, "learning_rate": 2e-05, "loss": 1.0161, "step": 648 }, { "epoch": 24.0, "eval_accuracy": 0.7868592237542407, "eval_loss": 0.9783701300621033, "eval_runtime": 12.9845, "eval_samples_per_second": 34.195, "eval_steps_per_second": 0.154, "step": 648 }, { "epoch": 25.0, "learning_rate": 2e-05, "loss": 1.0005, "step": 675 }, { "epoch": 25.0, "eval_accuracy": 0.782170183167169, "eval_loss": 0.9962915182113647, "eval_runtime": 13.0152, "eval_samples_per_second": 34.114, "eval_steps_per_second": 0.154, "step": 675 }, { "epoch": 26.0, "learning_rate": 2e-05, "loss": 1.0008, "step": 702 }, { "epoch": 26.0, "eval_accuracy": 0.790520909757887, "eval_loss": 0.9529848694801331, "eval_runtime": 13.0266, "eval_samples_per_second": 34.084, "eval_steps_per_second": 0.154, "step": 702 }, { "epoch": 27.0, "learning_rate": 2e-05, "loss": 0.9961, "step": 729 }, { "epoch": 27.0, "eval_accuracy": 0.7787046824557895, "eval_loss": 1.0195859670639038, "eval_runtime": 12.9955, "eval_samples_per_second": 34.166, "eval_steps_per_second": 0.154, "step": 729 }, { "epoch": 28.0, "learning_rate": 2e-05, "loss": 0.9834, "step": 756 }, { "epoch": 28.0, "eval_accuracy": 0.7917842772205873, "eval_loss": 0.9555456638336182, "eval_runtime": 12.2326, "eval_samples_per_second": 36.296, "eval_steps_per_second": 0.163, "step": 756 }, { "epoch": 29.0, "learning_rate": 2e-05, "loss": 0.9647, "step": 783 }, { "epoch": 29.0, "eval_accuracy": 0.7914557776443338, "eval_loss": 0.9375360608100891, "eval_runtime": 13.0096, "eval_samples_per_second": 34.129, "eval_steps_per_second": 0.154, "step": 783 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 0.967, "step": 810 }, { "epoch": 30.0, "eval_accuracy": 0.793787977110495, "eval_loss": 0.9494355320930481, "eval_runtime": 13.0239, "eval_samples_per_second": 34.091, "eval_steps_per_second": 0.154, "step": 810 }, { "epoch": 31.0, "learning_rate": 2e-05, "loss": 0.9625, "step": 837 }, { "epoch": 31.0, "eval_accuracy": 0.7860187306097597, "eval_loss": 0.9812522530555725, "eval_runtime": 12.989, "eval_samples_per_second": 34.183, "eval_steps_per_second": 0.154, "step": 837 }, { "epoch": 32.0, "learning_rate": 2e-05, "loss": 0.9578, "step": 864 }, { "epoch": 32.0, "eval_accuracy": 0.7956582591346297, "eval_loss": 0.9389752149581909, "eval_runtime": 13.0492, "eval_samples_per_second": 34.025, "eval_steps_per_second": 0.153, "step": 864 }, { "epoch": 33.0, "learning_rate": 2e-05, "loss": 0.9462, "step": 891 }, { "epoch": 33.0, "eval_accuracy": 0.79146801197472, "eval_loss": 0.9519514441490173, "eval_runtime": 12.965, "eval_samples_per_second": 34.246, "eval_steps_per_second": 0.154, "step": 891 }, { "epoch": 34.0, "learning_rate": 2e-05, "loss": 0.9468, "step": 918 }, { "epoch": 34.0, "eval_accuracy": 0.7949606757937664, "eval_loss": 0.922423243522644, "eval_runtime": 12.9466, "eval_samples_per_second": 34.295, "eval_steps_per_second": 0.154, "step": 918 }, { "epoch": 35.0, "learning_rate": 2e-05, "loss": 0.9357, "step": 945 }, { "epoch": 35.0, "eval_accuracy": 0.8009954921111946, "eval_loss": 0.908001184463501, "eval_runtime": 12.9778, "eval_samples_per_second": 34.212, "eval_steps_per_second": 0.154, "step": 945 }, { "epoch": 36.0, "learning_rate": 2e-05, "loss": 0.9328, "step": 972 }, { "epoch": 36.0, "eval_accuracy": 0.7935578330893118, "eval_loss": 0.9237804412841797, "eval_runtime": 12.9467, "eval_samples_per_second": 34.294, "eval_steps_per_second": 0.154, "step": 972 } ], "max_steps": 1080, "num_train_epochs": 40, "total_flos": 302668861931520.0, "trial_name": null, "trial_params": null }