{ "best_metric": 1.9583721160888672, "best_model_checkpoint": "./outputs/checkpoint-900", "epoch": 1.2784090909090908, "eval_steps": 100, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.4609, "step": 100 }, { "epoch": 0.14, "eval_loss": 2.123192071914673, "eval_runtime": 56.1237, "eval_samples_per_second": 26.584, "eval_steps_per_second": 3.332, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.2635, "step": 200 }, { "epoch": 0.28, "eval_loss": 2.083603858947754, "eval_runtime": 53.713, "eval_samples_per_second": 27.777, "eval_steps_per_second": 3.481, "step": 200 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.2344, "step": 300 }, { "epoch": 0.43, "eval_loss": 2.054030656814575, "eval_runtime": 53.5493, "eval_samples_per_second": 27.862, "eval_steps_per_second": 3.492, "step": 300 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.2009, "step": 400 }, { "epoch": 0.57, "eval_loss": 2.038727045059204, "eval_runtime": 53.5913, "eval_samples_per_second": 27.84, "eval_steps_per_second": 3.489, "step": 400 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.1811, "step": 500 }, { "epoch": 0.71, "eval_loss": 2.0150375366210938, "eval_runtime": 53.7081, "eval_samples_per_second": 27.78, "eval_steps_per_second": 3.482, "step": 500 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.1648, "step": 600 }, { "epoch": 0.85, "eval_loss": 1.9949842691421509, "eval_runtime": 53.6059, "eval_samples_per_second": 27.833, "eval_steps_per_second": 3.488, "step": 600 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.1446, "step": 700 }, { "epoch": 0.99, "eval_loss": 1.9850085973739624, "eval_runtime": 53.5892, "eval_samples_per_second": 27.841, "eval_steps_per_second": 3.49, "step": 700 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.1122, "step": 800 }, { "epoch": 1.14, "eval_loss": 1.9744948148727417, "eval_runtime": 53.6175, "eval_samples_per_second": 27.827, "eval_steps_per_second": 3.488, "step": 800 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.0852, "step": 900 }, { "epoch": 1.28, "eval_loss": 1.9583721160888672, "eval_runtime": 53.7484, "eval_samples_per_second": 27.759, "eval_steps_per_second": 3.479, "step": 900 } ], "logging_steps": 100, "max_steps": 2112, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.726008577316864e+16, "trial_name": null, "trial_params": null }