{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.18691588785046728, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003738317757009346, "eval_loss": 3.356201171875, "eval_runtime": 39.4512, "eval_samples_per_second": 11.432, "eval_steps_per_second": 1.445, "step": 1 }, { "epoch": 0.011214953271028037, "grad_norm": 10.225142478942871, "learning_rate": 3e-05, "loss": 3.4561, "step": 3 }, { "epoch": 0.018691588785046728, "eval_loss": 2.506544589996338, "eval_runtime": 40.0711, "eval_samples_per_second": 11.255, "eval_steps_per_second": 1.422, "step": 5 }, { "epoch": 0.022429906542056073, "grad_norm": 6.831135272979736, "learning_rate": 6e-05, "loss": 2.8683, "step": 6 }, { "epoch": 0.03364485981308411, "grad_norm": 6.806216716766357, "learning_rate": 9e-05, "loss": 2.2714, "step": 9 }, { "epoch": 0.037383177570093455, "eval_loss": 1.89436936378479, "eval_runtime": 40.0997, "eval_samples_per_second": 11.247, "eval_steps_per_second": 1.421, "step": 10 }, { "epoch": 0.044859813084112146, "grad_norm": 5.123374938964844, "learning_rate": 9.938441702975689e-05, "loss": 1.986, "step": 12 }, { "epoch": 0.056074766355140186, "grad_norm": 4.615382671356201, "learning_rate": 9.619397662556435e-05, "loss": 1.7308, "step": 15 }, { "epoch": 0.056074766355140186, "eval_loss": 1.6895991563796997, "eval_runtime": 40.1021, "eval_samples_per_second": 11.246, "eval_steps_per_second": 1.421, "step": 15 }, { "epoch": 0.06728971962616823, "grad_norm": 4.685133457183838, "learning_rate": 9.045084971874738e-05, "loss": 1.6272, "step": 18 }, { "epoch": 0.07476635514018691, "eval_loss": 1.5995190143585205, "eval_runtime": 40.0882, "eval_samples_per_second": 11.25, "eval_steps_per_second": 1.422, "step": 20 }, { "epoch": 0.07850467289719626, "grad_norm": 4.204235553741455, "learning_rate": 8.247240241650918e-05, "loss": 1.6219, "step": 21 }, { "epoch": 0.08971962616822429, "grad_norm": 3.952303647994995, "learning_rate": 7.269952498697734e-05, "loss": 1.5429, "step": 24 }, { "epoch": 0.09345794392523364, "eval_loss": 1.5367388725280762, "eval_runtime": 40.085, "eval_samples_per_second": 11.251, "eval_steps_per_second": 1.422, "step": 25 }, { "epoch": 0.10093457943925234, "grad_norm": 3.8612864017486572, "learning_rate": 6.167226819279528e-05, "loss": 1.5595, "step": 27 }, { "epoch": 0.11214953271028037, "grad_norm": 5.475238800048828, "learning_rate": 5e-05, "loss": 1.4384, "step": 30 }, { "epoch": 0.11214953271028037, "eval_loss": 1.50581693649292, "eval_runtime": 40.1174, "eval_samples_per_second": 11.242, "eval_steps_per_second": 1.421, "step": 30 }, { "epoch": 0.1233644859813084, "grad_norm": 4.102995872497559, "learning_rate": 3.832773180720475e-05, "loss": 1.5612, "step": 33 }, { "epoch": 0.1308411214953271, "eval_loss": 1.483141303062439, "eval_runtime": 40.2029, "eval_samples_per_second": 11.218, "eval_steps_per_second": 1.418, "step": 35 }, { "epoch": 0.13457943925233645, "grad_norm": 3.9326908588409424, "learning_rate": 2.7300475013022663e-05, "loss": 1.4843, "step": 36 }, { "epoch": 0.14579439252336449, "grad_norm": 3.5036063194274902, "learning_rate": 1.7527597583490822e-05, "loss": 1.4234, "step": 39 }, { "epoch": 0.14953271028037382, "eval_loss": 1.4650609493255615, "eval_runtime": 40.1268, "eval_samples_per_second": 11.239, "eval_steps_per_second": 1.42, "step": 40 }, { "epoch": 0.15700934579439252, "grad_norm": 3.635544538497925, "learning_rate": 9.549150281252633e-06, "loss": 1.4576, "step": 42 }, { "epoch": 0.16822429906542055, "grad_norm": 3.66231632232666, "learning_rate": 3.8060233744356633e-06, "loss": 1.3813, "step": 45 }, { "epoch": 0.16822429906542055, "eval_loss": 1.4580384492874146, "eval_runtime": 40.1352, "eval_samples_per_second": 11.237, "eval_steps_per_second": 1.42, "step": 45 }, { "epoch": 0.17943925233644858, "grad_norm": 4.305198669433594, "learning_rate": 6.15582970243117e-07, "loss": 1.4448, "step": 48 }, { "epoch": 0.18691588785046728, "eval_loss": 1.4549893140792847, "eval_runtime": 40.1439, "eval_samples_per_second": 11.235, "eval_steps_per_second": 1.42, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.81641131753472e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }