{ "best_metric": 1.929431438446045, "best_model_checkpoint": "./outputs/checkpoint-1200", "epoch": 1.7045454545454546, "eval_steps": 100, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.4609, "step": 100 }, { "epoch": 0.14, "eval_loss": 2.123192071914673, "eval_runtime": 56.1237, "eval_samples_per_second": 26.584, "eval_steps_per_second": 3.332, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.2635, "step": 200 }, { "epoch": 0.28, "eval_loss": 2.083603858947754, "eval_runtime": 53.713, "eval_samples_per_second": 27.777, "eval_steps_per_second": 3.481, "step": 200 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.2344, "step": 300 }, { "epoch": 0.43, "eval_loss": 2.054030656814575, "eval_runtime": 53.5493, "eval_samples_per_second": 27.862, "eval_steps_per_second": 3.492, "step": 300 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.2009, "step": 400 }, { "epoch": 0.57, "eval_loss": 2.038727045059204, "eval_runtime": 53.5913, "eval_samples_per_second": 27.84, "eval_steps_per_second": 3.489, "step": 400 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.1811, "step": 500 }, { "epoch": 0.71, "eval_loss": 2.0150375366210938, "eval_runtime": 53.7081, "eval_samples_per_second": 27.78, "eval_steps_per_second": 3.482, "step": 500 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.1648, "step": 600 }, { "epoch": 0.85, "eval_loss": 1.9949842691421509, "eval_runtime": 53.6059, "eval_samples_per_second": 27.833, "eval_steps_per_second": 3.488, "step": 600 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.1446, "step": 700 }, { "epoch": 0.99, "eval_loss": 1.9850085973739624, "eval_runtime": 53.5892, "eval_samples_per_second": 27.841, "eval_steps_per_second": 3.49, "step": 700 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.1122, "step": 800 }, { "epoch": 1.14, "eval_loss": 1.9744948148727417, "eval_runtime": 53.6175, "eval_samples_per_second": 27.827, "eval_steps_per_second": 3.488, "step": 800 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.0852, "step": 900 }, { "epoch": 1.28, "eval_loss": 1.9583721160888672, "eval_runtime": 53.7484, "eval_samples_per_second": 27.759, "eval_steps_per_second": 3.479, "step": 900 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 2.0848, "step": 1000 }, { "epoch": 1.42, "eval_loss": 1.9483006000518799, "eval_runtime": 53.6139, "eval_samples_per_second": 27.829, "eval_steps_per_second": 3.488, "step": 1000 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.091, "step": 1100 }, { "epoch": 1.56, "eval_loss": 1.936788558959961, "eval_runtime": 53.6874, "eval_samples_per_second": 27.791, "eval_steps_per_second": 3.483, "step": 1100 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.0684, "step": 1200 }, { "epoch": 1.7, "eval_loss": 1.929431438446045, "eval_runtime": 53.7712, "eval_samples_per_second": 27.747, "eval_steps_per_second": 3.478, "step": 1200 } ], "logging_steps": 100, "max_steps": 2112, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 3.634363552684032e+16, "trial_name": null, "trial_params": null }