{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.005858230814294083, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005858230814294083, "grad_norm": 9.5940523147583, "learning_rate": 2e-05, "loss": 15.9668, "step": 1 }, { "epoch": 0.0005858230814294083, "eval_loss": 16.182086944580078, "eval_runtime": 23.0092, "eval_samples_per_second": 31.248, "eval_steps_per_second": 15.646, "step": 1 }, { "epoch": 0.0011716461628588166, "grad_norm": 8.103241920471191, "learning_rate": 4e-05, "loss": 15.8215, "step": 2 }, { "epoch": 0.0017574692442882249, "grad_norm": 8.939263343811035, "learning_rate": 6e-05, "loss": 15.952, "step": 3 }, { "epoch": 0.0017574692442882249, "eval_loss": 16.123308181762695, "eval_runtime": 21.6338, "eval_samples_per_second": 33.235, "eval_steps_per_second": 16.641, "step": 3 }, { "epoch": 0.0023432923257176333, "grad_norm": 9.193857192993164, "learning_rate": 8e-05, "loss": 16.0648, "step": 4 }, { "epoch": 0.0029291154071470417, "grad_norm": 9.71928596496582, "learning_rate": 0.0001, "loss": 16.0317, "step": 5 }, { "epoch": 0.0035149384885764497, "grad_norm": 10.246051788330078, "learning_rate": 0.00012, "loss": 15.2528, "step": 6 }, { "epoch": 0.0035149384885764497, "eval_loss": 14.887479782104492, "eval_runtime": 21.7432, "eval_samples_per_second": 33.068, "eval_steps_per_second": 16.557, "step": 6 }, { "epoch": 0.004100761570005858, "grad_norm": 13.074706077575684, "learning_rate": 0.00014, "loss": 15.5862, "step": 7 }, { "epoch": 0.0046865846514352666, "grad_norm": 14.782737731933594, "learning_rate": 0.00016, "loss": 13.6667, "step": 8 }, { "epoch": 0.005272407732864675, "grad_norm": 14.997851371765137, "learning_rate": 0.00018, "loss": 12.6867, "step": 9 }, { "epoch": 0.005272407732864675, "eval_loss": 10.434623718261719, "eval_runtime": 21.7497, "eval_samples_per_second": 33.058, "eval_steps_per_second": 16.552, "step": 9 }, { "epoch": 0.005858230814294083, "grad_norm": 17.834928512573242, "learning_rate": 0.0002, "loss": 10.6756, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1849564248145920.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }