{ "best_metric": null, "best_model_checkpoint": null, "epoch": 59.927739863508634, "eval_steps": 1866, "global_step": 18660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.992773986350863, "grad_norm": 7.660736560821533, "learning_rate": 9.001071811361202e-06, "loss": 2.077, "step": 1866 }, { "epoch": 5.992773986350863, "eval_accuracy": 0.07665166078598369, "eval_loss": 3.0592968463897705, "eval_runtime": 32.9334, "eval_samples_per_second": 249.564, "eval_steps_per_second": 12.48, "step": 1866 }, { "epoch": 11.985547972701726, "grad_norm": 8.02175521850586, "learning_rate": 8.0021436227224e-06, "loss": 1.8747, "step": 3732 }, { "epoch": 11.985547972701726, "eval_accuracy": 0.07878087358559435, "eval_loss": 3.1968600749969482, "eval_runtime": 33.6195, "eval_samples_per_second": 244.471, "eval_steps_per_second": 12.225, "step": 3732 }, { "epoch": 17.97832195905259, "grad_norm": 7.991235733032227, "learning_rate": 7.002143622722402e-06, "loss": 1.7613, "step": 5598 }, { "epoch": 17.97832195905259, "eval_accuracy": 0.07819280528855903, "eval_loss": 3.2275424003601074, "eval_runtime": 33.0021, "eval_samples_per_second": 249.045, "eval_steps_per_second": 12.454, "step": 5598 }, { "epoch": 23.97109594540345, "grad_norm": 4.696638584136963, "learning_rate": 6.002679528403002e-06, "loss": 1.703, "step": 7464 }, { "epoch": 23.97109594540345, "eval_accuracy": 0.07881129091130308, "eval_loss": 3.367746114730835, "eval_runtime": 33.0083, "eval_samples_per_second": 248.998, "eval_steps_per_second": 12.451, "step": 7464 }, { "epoch": 29.963869931754317, "grad_norm": 2.74943208694458, "learning_rate": 5.0037513397642025e-06, "loss": 1.676, "step": 9330 }, { "epoch": 29.963869931754317, "eval_accuracy": 0.07835503102567222, "eval_loss": 3.4368343353271484, "eval_runtime": 32.8486, "eval_samples_per_second": 250.208, "eval_steps_per_second": 12.512, "step": 9330 }, { "epoch": 35.95664391810518, "grad_norm": 4.24375057220459, "learning_rate": 4.003751339764202e-06, "loss": 1.6495, "step": 11196 }, { "epoch": 35.95664391810518, "eval_accuracy": 0.07829419637425478, "eval_loss": 3.5519556999206543, "eval_runtime": 32.9918, "eval_samples_per_second": 249.123, "eval_steps_per_second": 12.458, "step": 11196 }, { "epoch": 41.94941790445604, "grad_norm": 3.4726574420928955, "learning_rate": 3.004287245444802e-06, "loss": 1.6449, "step": 13062 }, { "epoch": 41.94941790445604, "eval_accuracy": 0.07814645507795526, "eval_loss": 3.556215763092041, "eval_runtime": 32.6442, "eval_samples_per_second": 251.776, "eval_steps_per_second": 12.59, "step": 13062 }, { "epoch": 47.9421918908069, "grad_norm": 3.2187790870666504, "learning_rate": 2.004287245444802e-06, "loss": 1.6293, "step": 14928 }, { "epoch": 47.9421918908069, "eval_accuracy": 0.07750334590582796, "eval_loss": 3.621753215789795, "eval_runtime": 32.8748, "eval_samples_per_second": 250.009, "eval_steps_per_second": 12.502, "step": 14928 }, { "epoch": 53.934965877157765, "grad_norm": 1.5753388404846191, "learning_rate": 1.0048231511254019e-06, "loss": 1.6301, "step": 16794 }, { "epoch": 53.934965877157765, "eval_accuracy": 0.07704370631734057, "eval_loss": 3.7435097694396973, "eval_runtime": 32.6254, "eval_samples_per_second": 251.92, "eval_steps_per_second": 12.598, "step": 16794 }, { "epoch": 59.927739863508634, "grad_norm": 1.8606178760528564, "learning_rate": 5.359056806002144e-09, "loss": 1.6232, "step": 18660 }, { "epoch": 59.927739863508634, "eval_accuracy": 0.07645698990144786, "eval_loss": 3.775934934616089, "eval_runtime": 32.9764, "eval_samples_per_second": 249.239, "eval_steps_per_second": 12.463, "step": 18660 }, { "epoch": 59.927739863508634, "step": 18660, "total_flos": 1.4754079994623488e+18, "train_loss": 1.726910066757938, "train_runtime": 33547.8653, "train_samples_per_second": 111.342, "train_steps_per_second": 0.556 } ], "logging_steps": 1866, "max_steps": 18660, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4754079994623488e+18, "train_batch_size": 25, "trial_name": null, "trial_params": null }