{ "best_metric": 3.8871757984161377, "best_model_checkpoint": "./models/gpt_test9/checkpoint-10000", "epoch": 4.0, "global_step": 10592, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "learning_rate": 2.830188679245283e-05, "loss": 3.311, "step": 1000 }, { "epoch": 0.38, "eval_loss": 4.033947467803955, "eval_runtime": 6.7632, "eval_samples_per_second": 63.432, "eval_steps_per_second": 3.992, "step": 1000 }, { "epoch": 0.76, "learning_rate": 2.7041544271926144e-05, "loss": 3.1133, "step": 2000 }, { "epoch": 0.76, "eval_loss": 3.9776785373687744, "eval_runtime": 6.7558, "eval_samples_per_second": 63.501, "eval_steps_per_second": 3.997, "step": 2000 }, { "epoch": 1.13, "learning_rate": 2.3894250944187998e-05, "loss": 2.9875, "step": 3000 }, { "epoch": 1.13, "eval_loss": 3.9545652866363525, "eval_runtime": 6.7588, "eval_samples_per_second": 63.472, "eval_steps_per_second": 3.995, "step": 3000 }, { "epoch": 1.51, "learning_rate": 2.0746957616449853e-05, "loss": 2.8697, "step": 4000 }, { "epoch": 1.51, "eval_loss": 3.9269347190856934, "eval_runtime": 6.7568, "eval_samples_per_second": 63.491, "eval_steps_per_second": 3.996, "step": 4000 }, { "epoch": 1.89, "learning_rate": 1.759966428871171e-05, "loss": 2.8669, "step": 5000 }, { "epoch": 1.89, "eval_loss": 3.915926933288574, "eval_runtime": 6.7665, "eval_samples_per_second": 63.401, "eval_steps_per_second": 3.99, "step": 5000 }, { "epoch": 2.27, "learning_rate": 1.4452370960973563e-05, "loss": 2.7308, "step": 6000 }, { "epoch": 2.27, "eval_loss": 3.9065561294555664, "eval_runtime": 6.7494, "eval_samples_per_second": 63.562, "eval_steps_per_second": 4.0, "step": 6000 }, { "epoch": 2.64, "learning_rate": 1.1305077633235418e-05, "loss": 2.709, "step": 7000 }, { "epoch": 2.64, "eval_loss": 3.899543285369873, "eval_runtime": 6.761, "eval_samples_per_second": 63.452, "eval_steps_per_second": 3.993, "step": 7000 }, { "epoch": 3.02, "learning_rate": 8.157784305497272e-06, "loss": 2.6979, "step": 8000 }, { "epoch": 3.02, "eval_loss": 3.8976171016693115, "eval_runtime": 6.7563, "eval_samples_per_second": 63.496, "eval_steps_per_second": 3.996, "step": 8000 }, { "epoch": 3.4, "learning_rate": 5.010490977759127e-06, "loss": 2.5878, "step": 9000 }, { "epoch": 3.4, "eval_loss": 3.897768020629883, "eval_runtime": 6.7533, "eval_samples_per_second": 63.524, "eval_steps_per_second": 3.998, "step": 9000 }, { "epoch": 3.78, "learning_rate": 1.863197650020982e-06, "loss": 2.5824, "step": 10000 }, { "epoch": 3.78, "eval_loss": 3.8871757984161377, "eval_runtime": 6.7573, "eval_samples_per_second": 63.487, "eval_steps_per_second": 3.996, "step": 10000 }, { "epoch": 4.0, "step": 10592, "total_flos": 4.4280638078976e+16, "train_loss": 2.831424217569864, "train_runtime": 8708.1966, "train_samples_per_second": 19.461, "train_steps_per_second": 1.216 } ], "max_steps": 10592, "num_train_epochs": 4, "total_flos": 4.4280638078976e+16, "trial_name": null, "trial_params": null }