{ "best_metric": 1.8635917901992798, "best_model_checkpoint": "./outputs/checkpoint-2100", "epoch": 2.9829545454545454, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.0002, "loss": 2.4609, "step": 100 }, { "epoch": 0.14, "eval_loss": 2.123192071914673, "eval_runtime": 56.1237, "eval_samples_per_second": 26.584, "eval_steps_per_second": 3.332, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 2.2635, "step": 200 }, { "epoch": 0.28, "eval_loss": 2.083603858947754, "eval_runtime": 53.713, "eval_samples_per_second": 27.777, "eval_steps_per_second": 3.481, "step": 200 }, { "epoch": 0.43, "learning_rate": 0.0002, "loss": 2.2344, "step": 300 }, { "epoch": 0.43, "eval_loss": 2.054030656814575, "eval_runtime": 53.5493, "eval_samples_per_second": 27.862, "eval_steps_per_second": 3.492, "step": 300 }, { "epoch": 0.57, "learning_rate": 0.0002, "loss": 2.2009, "step": 400 }, { "epoch": 0.57, "eval_loss": 2.038727045059204, "eval_runtime": 53.5913, "eval_samples_per_second": 27.84, "eval_steps_per_second": 3.489, "step": 400 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 2.1811, "step": 500 }, { "epoch": 0.71, "eval_loss": 2.0150375366210938, "eval_runtime": 53.7081, "eval_samples_per_second": 27.78, "eval_steps_per_second": 3.482, "step": 500 }, { "epoch": 0.85, "learning_rate": 0.0002, "loss": 2.1648, "step": 600 }, { "epoch": 0.85, "eval_loss": 1.9949842691421509, "eval_runtime": 53.6059, "eval_samples_per_second": 27.833, "eval_steps_per_second": 3.488, "step": 600 }, { "epoch": 0.99, "learning_rate": 0.0002, "loss": 2.1446, "step": 700 }, { "epoch": 0.99, "eval_loss": 1.9850085973739624, "eval_runtime": 53.5892, "eval_samples_per_second": 27.841, "eval_steps_per_second": 3.49, "step": 700 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 2.1122, "step": 800 }, { "epoch": 1.14, "eval_loss": 1.9744948148727417, "eval_runtime": 53.6175, "eval_samples_per_second": 27.827, "eval_steps_per_second": 3.488, "step": 800 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 2.0852, "step": 900 }, { "epoch": 1.28, "eval_loss": 1.9583721160888672, "eval_runtime": 53.7484, "eval_samples_per_second": 27.759, "eval_steps_per_second": 3.479, "step": 900 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 2.0848, "step": 1000 }, { "epoch": 1.42, "eval_loss": 1.9483006000518799, "eval_runtime": 53.6139, "eval_samples_per_second": 27.829, "eval_steps_per_second": 3.488, "step": 1000 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 2.091, "step": 1100 }, { "epoch": 1.56, "eval_loss": 1.936788558959961, "eval_runtime": 53.6874, "eval_samples_per_second": 27.791, "eval_steps_per_second": 3.483, "step": 1100 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 2.0684, "step": 1200 }, { "epoch": 1.7, "eval_loss": 1.929431438446045, "eval_runtime": 53.7712, "eval_samples_per_second": 27.747, "eval_steps_per_second": 3.478, "step": 1200 }, { "epoch": 1.85, "learning_rate": 0.0002, "loss": 2.0524, "step": 1300 }, { "epoch": 1.85, "eval_loss": 1.9179173707962036, "eval_runtime": 53.6365, "eval_samples_per_second": 27.817, "eval_steps_per_second": 3.486, "step": 1300 }, { "epoch": 1.99, "learning_rate": 0.0002, "loss": 2.0562, "step": 1400 }, { "epoch": 1.99, "eval_loss": 1.9093526601791382, "eval_runtime": 53.6029, "eval_samples_per_second": 27.834, "eval_steps_per_second": 3.489, "step": 1400 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 2.0075, "step": 1500 }, { "epoch": 2.13, "eval_loss": 1.9054597616195679, "eval_runtime": 53.6616, "eval_samples_per_second": 27.804, "eval_steps_per_second": 3.485, "step": 1500 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 2.0119, "step": 1600 }, { "epoch": 2.27, "eval_loss": 1.8929505348205566, "eval_runtime": 53.7348, "eval_samples_per_second": 27.766, "eval_steps_per_second": 3.48, "step": 1600 }, { "epoch": 2.41, "learning_rate": 0.0002, "loss": 1.9964, "step": 1700 }, { "epoch": 2.41, "eval_loss": 1.8908874988555908, "eval_runtime": 53.6745, "eval_samples_per_second": 27.797, "eval_steps_per_second": 3.484, "step": 1700 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 1.9869, "step": 1800 }, { "epoch": 2.56, "eval_loss": 1.8806322813034058, "eval_runtime": 53.6914, "eval_samples_per_second": 27.788, "eval_steps_per_second": 3.483, "step": 1800 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 2.0004, "step": 1900 }, { "epoch": 2.7, "eval_loss": 1.877378225326538, "eval_runtime": 53.6054, "eval_samples_per_second": 27.833, "eval_steps_per_second": 3.488, "step": 1900 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.991, "step": 2000 }, { "epoch": 2.84, "eval_loss": 1.8717212677001953, "eval_runtime": 53.7679, "eval_samples_per_second": 27.749, "eval_steps_per_second": 3.478, "step": 2000 }, { "epoch": 2.98, "learning_rate": 0.0002, "loss": 1.9898, "step": 2100 }, { "epoch": 2.98, "eval_loss": 1.8635917901992798, "eval_runtime": 53.6308, "eval_samples_per_second": 27.82, "eval_steps_per_second": 3.487, "step": 2100 } ], "logging_steps": 100, "max_steps": 2112, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 6.361597443704832e+16, "trial_name": null, "trial_params": null }