{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.292993630573248, "eval_steps": 100, "global_step": 1616, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.64, "learning_rate": 2e-05, "loss": 1.1355, "step": 100 }, { "epoch": 0.64, "eval_loss": 1.157366156578064, "eval_runtime": 30.4555, "eval_samples_per_second": 65.67, "eval_steps_per_second": 1.051, "step": 100 }, { "epoch": 1.27, "learning_rate": 2e-05, "loss": 0.881, "step": 200 }, { "epoch": 1.27, "eval_loss": 1.3262789249420166, "eval_runtime": 29.1383, "eval_samples_per_second": 68.638, "eval_steps_per_second": 1.098, "step": 200 }, { "epoch": 1.91, "learning_rate": 2e-05, "loss": 0.5496, "step": 300 }, { "epoch": 1.91, "eval_loss": 1.3176809549331665, "eval_runtime": 28.8909, "eval_samples_per_second": 69.226, "eval_steps_per_second": 1.108, "step": 300 }, { "epoch": 2.55, "learning_rate": 2e-05, "loss": 0.2597, "step": 400 }, { "epoch": 2.55, "eval_loss": 1.5806535482406616, "eval_runtime": 29.1193, "eval_samples_per_second": 68.683, "eval_steps_per_second": 1.099, "step": 400 }, { "epoch": 3.18, "learning_rate": 2e-05, "loss": 0.1877, "step": 500 }, { "epoch": 3.18, "eval_loss": 1.7526657581329346, "eval_runtime": 29.1196, "eval_samples_per_second": 68.682, "eval_steps_per_second": 1.099, "step": 500 }, { "epoch": 3.82, "learning_rate": 2e-05, "loss": 0.1158, "step": 600 }, { "epoch": 3.82, "eval_loss": 1.7486767768859863, "eval_runtime": 29.2706, "eval_samples_per_second": 68.328, "eval_steps_per_second": 1.093, "step": 600 }, { "epoch": 4.46, "learning_rate": 2e-05, "loss": 0.0855, "step": 700 }, { "epoch": 4.46, "eval_loss": 1.873838186264038, "eval_runtime": 29.2292, "eval_samples_per_second": 68.425, "eval_steps_per_second": 1.095, "step": 700 }, { "epoch": 5.1, "learning_rate": 2e-05, "loss": 0.0645, "step": 800 }, { "epoch": 5.1, "eval_loss": 1.9275007247924805, "eval_runtime": 29.013, "eval_samples_per_second": 68.935, "eval_steps_per_second": 1.103, "step": 800 }, { "epoch": 5.73, "learning_rate": 2e-05, "loss": 0.0518, "step": 900 }, { "epoch": 5.73, "eval_loss": 1.9070993661880493, "eval_runtime": 30.5536, "eval_samples_per_second": 65.459, "eval_steps_per_second": 1.047, "step": 900 }, { "epoch": 6.37, "learning_rate": 2e-05, "loss": 0.0464, "step": 1000 }, { "epoch": 6.37, "eval_loss": 1.9601927995681763, "eval_runtime": 28.9827, "eval_samples_per_second": 69.007, "eval_steps_per_second": 1.104, "step": 1000 }, { "epoch": 7.01, "learning_rate": 2e-05, "loss": 0.0367, "step": 1100 }, { "epoch": 7.01, "eval_loss": 1.973179817199707, "eval_runtime": 29.0672, "eval_samples_per_second": 68.806, "eval_steps_per_second": 1.101, "step": 1100 }, { "epoch": 7.64, "learning_rate": 2e-05, "loss": 0.0288, "step": 1200 }, { "epoch": 7.64, "eval_loss": 2.0399632453918457, "eval_runtime": 29.0049, "eval_samples_per_second": 68.954, "eval_steps_per_second": 1.103, "step": 1200 }, { "epoch": 8.28, "learning_rate": 2e-05, "loss": 0.0265, "step": 1300 }, { "epoch": 8.28, "eval_loss": 2.0276734828948975, "eval_runtime": 28.9115, "eval_samples_per_second": 69.177, "eval_steps_per_second": 1.107, "step": 1300 }, { "epoch": 8.92, "learning_rate": 2e-05, "loss": 0.0287, "step": 1400 }, { "epoch": 8.92, "eval_loss": 2.049071788787842, "eval_runtime": 29.0231, "eval_samples_per_second": 68.911, "eval_steps_per_second": 1.103, "step": 1400 }, { "epoch": 9.55, "learning_rate": 2e-05, "loss": 0.0195, "step": 1500 }, { "epoch": 9.55, "eval_loss": 2.043515205383301, "eval_runtime": 29.1891, "eval_samples_per_second": 68.519, "eval_steps_per_second": 1.096, "step": 1500 }, { "epoch": 10.19, "learning_rate": 2e-05, "loss": 0.0215, "step": 1600 }, { "epoch": 10.19, "eval_loss": 2.101804733276367, "eval_runtime": 28.9478, "eval_samples_per_second": 69.09, "eval_steps_per_second": 1.105, "step": 1600 } ], "logging_steps": 100, "max_steps": 1616, "num_train_epochs": 11, "save_steps": 808, "total_flos": 0.0, "trial_name": null, "trial_params": null }