{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.149321266968325, "eval_steps": 1000, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 1.9201030731201172, "eval_runtime": 10.5666, "eval_samples_per_second": 20.915, "eval_steps_per_second": 5.3, "step": 221 }, { "epoch": 2.0, "eval_loss": 1.8042283058166504, "eval_runtime": 10.5219, "eval_samples_per_second": 21.004, "eval_steps_per_second": 5.322, "step": 442 }, { "epoch": 2.26, "learning_rate": 4.622926093514329e-05, "loss": 2.1534, "step": 500 }, { "epoch": 3.0, "eval_loss": 1.7254605293273926, "eval_runtime": 10.5492, "eval_samples_per_second": 20.95, "eval_steps_per_second": 5.308, "step": 663 }, { "epoch": 4.0, "eval_loss": 1.653130054473877, "eval_runtime": 10.5858, "eval_samples_per_second": 20.877, "eval_steps_per_second": 5.29, "step": 884 }, { "epoch": 4.52, "learning_rate": 4.2458521870286574e-05, "loss": 1.8808, "step": 1000 }, { "epoch": 5.0, "eval_loss": 1.5984195470809937, "eval_runtime": 10.5172, "eval_samples_per_second": 21.013, "eval_steps_per_second": 5.325, "step": 1105 }, { "epoch": 6.0, "eval_loss": 1.5462396144866943, "eval_runtime": 10.5813, "eval_samples_per_second": 20.886, "eval_steps_per_second": 5.292, "step": 1326 }, { "epoch": 6.79, "learning_rate": 3.868778280542987e-05, "loss": 1.7597, "step": 1500 }, { "epoch": 7.0, "eval_loss": 1.4970717430114746, "eval_runtime": 10.5617, "eval_samples_per_second": 20.925, "eval_steps_per_second": 5.302, "step": 1547 }, { "epoch": 8.0, "eval_loss": 1.4608112573623657, "eval_runtime": 10.5482, "eval_samples_per_second": 20.952, "eval_steps_per_second": 5.309, "step": 1768 }, { "epoch": 9.0, "eval_loss": 1.426717758178711, "eval_runtime": 10.5377, "eval_samples_per_second": 20.972, "eval_steps_per_second": 5.314, "step": 1989 }, { "epoch": 9.05, "learning_rate": 3.491704374057315e-05, "loss": 1.673, "step": 2000 }, { "epoch": 10.0, "eval_loss": 1.3964918851852417, "eval_runtime": 10.5931, "eval_samples_per_second": 20.863, "eval_steps_per_second": 5.286, "step": 2210 }, { "epoch": 11.0, "eval_loss": 1.3659923076629639, "eval_runtime": 10.5424, "eval_samples_per_second": 20.963, "eval_steps_per_second": 5.312, "step": 2431 }, { "epoch": 11.31, "learning_rate": 3.114630467571644e-05, "loss": 1.6087, "step": 2500 }, { "epoch": 12.0, "eval_loss": 1.3398691415786743, "eval_runtime": 10.5482, "eval_samples_per_second": 20.951, "eval_steps_per_second": 5.309, "step": 2652 }, { "epoch": 13.0, "eval_loss": 1.3123878240585327, "eval_runtime": 10.4901, "eval_samples_per_second": 21.067, "eval_steps_per_second": 5.338, "step": 2873 }, { "epoch": 13.57, "learning_rate": 2.737556561085973e-05, "loss": 1.5523, "step": 3000 }, { "epoch": 14.0, "eval_loss": 1.2912379503250122, "eval_runtime": 10.5302, "eval_samples_per_second": 20.987, "eval_steps_per_second": 5.318, "step": 3094 }, { "epoch": 15.0, "eval_loss": 1.2690876722335815, "eval_runtime": 10.5127, "eval_samples_per_second": 21.022, "eval_steps_per_second": 5.327, "step": 3315 }, { "epoch": 15.84, "learning_rate": 2.3604826546003017e-05, "loss": 1.5072, "step": 3500 }, { "epoch": 16.0, "eval_loss": 1.2487969398498535, "eval_runtime": 10.5677, "eval_samples_per_second": 20.913, "eval_steps_per_second": 5.299, "step": 3536 }, { "epoch": 17.0, "eval_loss": 1.2326879501342773, "eval_runtime": 10.5424, "eval_samples_per_second": 20.963, "eval_steps_per_second": 5.312, "step": 3757 }, { "epoch": 18.0, "eval_loss": 1.2160953283309937, "eval_runtime": 10.5427, "eval_samples_per_second": 20.962, "eval_steps_per_second": 5.312, "step": 3978 }, { "epoch": 18.1, "learning_rate": 1.9834087481146303e-05, "loss": 1.4711, "step": 4000 }, { "epoch": 19.0, "eval_loss": 1.2029471397399902, "eval_runtime": 10.5308, "eval_samples_per_second": 20.986, "eval_steps_per_second": 5.318, "step": 4199 }, { "epoch": 20.0, "eval_loss": 1.1921287775039673, "eval_runtime": 10.5502, "eval_samples_per_second": 20.948, "eval_steps_per_second": 5.308, "step": 4420 }, { "epoch": 20.36, "learning_rate": 1.6063348416289596e-05, "loss": 1.4329, "step": 4500 }, { "epoch": 21.0, "eval_loss": 1.1807990074157715, "eval_runtime": 10.5791, "eval_samples_per_second": 20.89, "eval_steps_per_second": 5.293, "step": 4641 }, { "epoch": 22.0, "eval_loss": 1.170788288116455, "eval_runtime": 10.5201, "eval_samples_per_second": 21.007, "eval_steps_per_second": 5.323, "step": 4862 }, { "epoch": 22.62, "learning_rate": 1.229260935143288e-05, "loss": 1.4091, "step": 5000 }, { "epoch": 23.0, "eval_loss": 1.1616638898849487, "eval_runtime": 10.541, "eval_samples_per_second": 20.966, "eval_steps_per_second": 5.313, "step": 5083 }, { "epoch": 24.0, "eval_loss": 1.1520771980285645, "eval_runtime": 10.5678, "eval_samples_per_second": 20.913, "eval_steps_per_second": 5.299, "step": 5304 }, { "epoch": 24.89, "learning_rate": 8.52187028657617e-06, "loss": 1.392, "step": 5500 }, { "epoch": 25.0, "eval_loss": 1.1455986499786377, "eval_runtime": 10.5773, "eval_samples_per_second": 20.894, "eval_steps_per_second": 5.294, "step": 5525 }, { "epoch": 26.0, "eval_loss": 1.1402668952941895, "eval_runtime": 10.5575, "eval_samples_per_second": 20.933, "eval_steps_per_second": 5.304, "step": 5746 }, { "epoch": 27.0, "eval_loss": 1.1367387771606445, "eval_runtime": 10.5633, "eval_samples_per_second": 20.921, "eval_steps_per_second": 5.301, "step": 5967 }, { "epoch": 27.15, "learning_rate": 4.751131221719457e-06, "loss": 1.3771, "step": 6000 } ], "logging_steps": 500, "max_steps": 6630, "num_train_epochs": 30, "save_steps": 1000, "total_flos": 9484103172096000.0, "trial_name": null, "trial_params": null }