{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 5940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.2936731278896332, "eval_runtime": 114.2823, "eval_samples_per_second": 41.502, "eval_steps_per_second": 0.656, "step": 297 }, { "epoch": 1.68, "learning_rate": 4.57912457912458e-05, "loss": 0.5604, "step": 500 }, { "epoch": 2.0, "eval_loss": 0.21221154928207397, "eval_runtime": 121.2766, "eval_samples_per_second": 39.109, "eval_steps_per_second": 0.618, "step": 594 }, { "epoch": 3.0, "eval_loss": 0.05269444361329079, "eval_runtime": 119.2093, "eval_samples_per_second": 39.787, "eval_steps_per_second": 0.629, "step": 891 }, { "epoch": 3.37, "learning_rate": 4.158249158249159e-05, "loss": 0.1318, "step": 1000 }, { "epoch": 4.0, "eval_loss": 0.014130596071481705, "eval_runtime": 183.4661, "eval_samples_per_second": 25.852, "eval_steps_per_second": 0.409, "step": 1188 }, { "epoch": 5.0, "eval_loss": 0.00778605230152607, "eval_runtime": 130.1216, "eval_samples_per_second": 36.451, "eval_steps_per_second": 0.576, "step": 1485 }, { "epoch": 5.05, "learning_rate": 3.7373737373737376e-05, "loss": 0.0552, "step": 1500 }, { "epoch": 6.0, "eval_loss": 0.004267835058271885, "eval_runtime": 130.782, "eval_samples_per_second": 36.266, "eval_steps_per_second": 0.573, "step": 1782 }, { "epoch": 6.73, "learning_rate": 3.3164983164983165e-05, "loss": 0.0197, "step": 2000 }, { "epoch": 7.0, "eval_loss": 0.002914531622081995, "eval_runtime": 126.9043, "eval_samples_per_second": 37.375, "eval_steps_per_second": 0.591, "step": 2079 }, { "epoch": 8.0, "eval_loss": 0.0024584291968494654, "eval_runtime": 129.977, "eval_samples_per_second": 36.491, "eval_steps_per_second": 0.577, "step": 2376 }, { "epoch": 8.42, "learning_rate": 2.8956228956228958e-05, "loss": 0.0104, "step": 2500 }, { "epoch": 9.0, "eval_loss": 0.0008079797262325883, "eval_runtime": 124.2007, "eval_samples_per_second": 38.188, "eval_steps_per_second": 0.604, "step": 2673 }, { "epoch": 10.0, "eval_loss": 0.0014460551319643855, "eval_runtime": 124.7091, "eval_samples_per_second": 38.033, "eval_steps_per_second": 0.601, "step": 2970 }, { "epoch": 10.1, "learning_rate": 2.474747474747475e-05, "loss": 0.0062, "step": 3000 }, { "epoch": 11.0, "eval_loss": 0.0016354549443349242, "eval_runtime": 126.5389, "eval_samples_per_second": 37.483, "eval_steps_per_second": 0.593, "step": 3267 }, { "epoch": 11.78, "learning_rate": 2.0538720538720542e-05, "loss": 0.0045, "step": 3500 }, { "epoch": 12.0, "eval_loss": 0.000995765090920031, "eval_runtime": 131.3803, "eval_samples_per_second": 36.101, "eval_steps_per_second": 0.571, "step": 3564 }, { "epoch": 13.0, "eval_loss": 0.00040818448178470135, "eval_runtime": 136.6658, "eval_samples_per_second": 34.705, "eval_steps_per_second": 0.549, "step": 3861 }, { "epoch": 13.47, "learning_rate": 1.632996632996633e-05, "loss": 0.0031, "step": 4000 }, { "epoch": 14.0, "eval_loss": 0.0001601761905476451, "eval_runtime": 128.2083, "eval_samples_per_second": 36.994, "eval_steps_per_second": 0.585, "step": 4158 }, { "epoch": 15.0, "eval_loss": 0.0001224653678946197, "eval_runtime": 104.2251, "eval_samples_per_second": 45.507, "eval_steps_per_second": 0.72, "step": 4455 }, { "epoch": 15.15, "learning_rate": 1.2121212121212122e-05, "loss": 0.0017, "step": 4500 }, { "epoch": 16.0, "eval_loss": 0.00017290345567744225, "eval_runtime": 131.2825, "eval_samples_per_second": 36.128, "eval_steps_per_second": 0.571, "step": 4752 }, { "epoch": 16.84, "learning_rate": 7.912457912457913e-06, "loss": 0.0012, "step": 5000 }, { "epoch": 17.0, "eval_loss": 4.626844383892603e-05, "eval_runtime": 98.0039, "eval_samples_per_second": 48.396, "eval_steps_per_second": 0.765, "step": 5049 }, { "epoch": 18.0, "eval_loss": 7.748394273221493e-05, "eval_runtime": 114.2754, "eval_samples_per_second": 41.505, "eval_steps_per_second": 0.656, "step": 5346 }, { "epoch": 18.52, "learning_rate": 3.7037037037037037e-06, "loss": 0.0009, "step": 5500 }, { "epoch": 19.0, "eval_loss": 3.669047146104276e-05, "eval_runtime": 103.8038, "eval_samples_per_second": 45.692, "eval_steps_per_second": 0.723, "step": 5643 }, { "epoch": 20.0, "eval_loss": 3.5578090319177136e-05, "eval_runtime": 99.0675, "eval_samples_per_second": 47.876, "eval_steps_per_second": 0.757, "step": 5940 } ], "logging_steps": 500, "max_steps": 5940, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 785921954204160.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }