{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.7, "eval_steps": 3, "global_step": 27, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_loss": 1.4759451150894165, "eval_runtime": 1.7224, "eval_samples_per_second": 9.87, "eval_steps_per_second": 1.742, "step": 1 }, { "epoch": 0.3, "grad_norm": 1.6763296127319336, "learning_rate": 3e-05, "loss": 1.4056, "step": 3 }, { "epoch": 0.3, "eval_loss": 1.4381709098815918, "eval_runtime": 1.2057, "eval_samples_per_second": 14.1, "eval_steps_per_second": 2.488, "step": 3 }, { "epoch": 0.6, "grad_norm": 1.269885778427124, "learning_rate": 6e-05, "loss": 1.3039, "step": 6 }, { "epoch": 0.6, "eval_loss": 1.2148373126983643, "eval_runtime": 1.2078, "eval_samples_per_second": 14.075, "eval_steps_per_second": 2.484, "step": 6 }, { "epoch": 0.9, "grad_norm": 0.949137806892395, "learning_rate": 9e-05, "loss": 1.1307, "step": 9 }, { "epoch": 0.9, "eval_loss": 1.1424036026000977, "eval_runtime": 1.2138, "eval_samples_per_second": 14.006, "eval_steps_per_second": 2.472, "step": 9 }, { "epoch": 1.2, "grad_norm": 0.9828763008117676, "learning_rate": 9.755282581475769e-05, "loss": 1.0364, "step": 12 }, { "epoch": 1.2, "eval_loss": 1.1044687032699585, "eval_runtime": 1.2122, "eval_samples_per_second": 14.024, "eval_steps_per_second": 2.475, "step": 12 }, { "epoch": 1.5, "grad_norm": 0.9127976298332214, "learning_rate": 8.535533905932738e-05, "loss": 0.8639, "step": 15 }, { "epoch": 1.5, "eval_loss": 1.0964289903640747, "eval_runtime": 1.2142, "eval_samples_per_second": 14.001, "eval_steps_per_second": 2.471, "step": 15 }, { "epoch": 1.8, "grad_norm": 1.0147470235824585, "learning_rate": 6.545084971874738e-05, "loss": 0.8137, "step": 18 }, { "epoch": 1.8, "eval_loss": 1.1079833507537842, "eval_runtime": 1.2155, "eval_samples_per_second": 13.986, "eval_steps_per_second": 2.468, "step": 18 }, { "epoch": 2.1, "grad_norm": 0.8224146962165833, "learning_rate": 4.2178276747988446e-05, "loss": 0.7955, "step": 21 }, { "epoch": 2.1, "eval_loss": 1.1106747388839722, "eval_runtime": 1.2161, "eval_samples_per_second": 13.979, "eval_steps_per_second": 2.467, "step": 21 }, { "epoch": 2.4, "grad_norm": 0.9233430624008179, "learning_rate": 2.061073738537635e-05, "loss": 0.6105, "step": 24 }, { "epoch": 2.4, "eval_loss": 1.1175633668899536, "eval_runtime": 1.2153, "eval_samples_per_second": 13.988, "eval_steps_per_second": 2.468, "step": 24 }, { "epoch": 2.7, "grad_norm": 0.8103647232055664, "learning_rate": 5.449673790581611e-06, "loss": 0.5713, "step": 27 }, { "epoch": 2.7, "eval_loss": 1.127324104309082, "eval_runtime": 1.2229, "eval_samples_per_second": 13.902, "eval_steps_per_second": 2.453, "step": 27 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.794665429008384e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }