{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0064525495636463355, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.226274781823168e-05, "eval_loss": 3.6124956607818604, "eval_runtime": 307.4909, "eval_samples_per_second": 42.444, "eval_steps_per_second": 21.223, "step": 1 }, { "epoch": 0.0001613137390911584, "grad_norm": 0.14286714792251587, "learning_rate": 5e-05, "loss": 3.2497, "step": 5 }, { "epoch": 0.0003226274781823168, "grad_norm": 0.14805500209331512, "learning_rate": 0.0001, "loss": 3.3692, "step": 10 }, { "epoch": 0.0004839412172734752, "grad_norm": 0.18735414743423462, "learning_rate": 9.98292246503335e-05, "loss": 3.4622, "step": 15 }, { "epoch": 0.0006452549563646336, "grad_norm": 0.22555524110794067, "learning_rate": 9.931806517013612e-05, "loss": 3.3926, "step": 20 }, { "epoch": 0.0008065686954557919, "grad_norm": 0.27350375056266785, "learning_rate": 9.847001329696653e-05, "loss": 3.4236, "step": 25 }, { "epoch": 0.0009678824345469504, "grad_norm": 0.33609363436698914, "learning_rate": 9.729086208503174e-05, "loss": 3.6329, "step": 30 }, { "epoch": 0.0011291961736381087, "grad_norm": 0.38991883397102356, "learning_rate": 9.578866633275288e-05, "loss": 3.707, "step": 35 }, { "epoch": 0.0012905099127292672, "grad_norm": 0.4257192015647888, "learning_rate": 9.397368756032445e-05, "loss": 3.7489, "step": 40 }, { "epoch": 0.0014518236518204256, "grad_norm": 0.4441923499107361, "learning_rate": 9.185832391312644e-05, "loss": 3.5653, "step": 45 }, { "epoch": 0.0016131373909115839, "grad_norm": 0.6182786226272583, "learning_rate": 8.945702546981969e-05, "loss": 3.6585, "step": 50 }, { "epoch": 0.0016131373909115839, "eval_loss": 3.442338705062866, "eval_runtime": 308.2071, "eval_samples_per_second": 42.345, "eval_steps_per_second": 21.174, "step": 50 }, { "epoch": 0.0017744511300027423, "grad_norm": 0.18179060518741608, "learning_rate": 8.678619553365659e-05, "loss": 3.2418, "step": 55 }, { "epoch": 0.0019357648690939008, "grad_norm": 0.2533986568450928, "learning_rate": 8.386407858128706e-05, "loss": 3.2397, "step": 60 }, { "epoch": 0.0020970786081850592, "grad_norm": 0.24411137402057648, "learning_rate": 8.07106356344834e-05, "loss": 3.2814, "step": 65 }, { "epoch": 0.0022583923472762175, "grad_norm": 0.24782365560531616, "learning_rate": 7.734740790612136e-05, "loss": 3.2308, "step": 70 }, { "epoch": 0.002419706086367376, "grad_norm": 0.24265138804912567, "learning_rate": 7.379736965185368e-05, "loss": 3.2418, "step": 75 }, { "epoch": 0.0025810198254585344, "grad_norm": 0.29976701736450195, "learning_rate": 7.008477123264848e-05, "loss": 3.4099, "step": 80 }, { "epoch": 0.0027423335645496926, "grad_norm": 0.36268311738967896, "learning_rate": 6.623497346023418e-05, "loss": 3.3549, "step": 85 }, { "epoch": 0.0029036473036408513, "grad_norm": 0.36004823446273804, "learning_rate": 6.227427435703997e-05, "loss": 3.3432, "step": 90 }, { "epoch": 0.0030649610427320095, "grad_norm": 0.526280403137207, "learning_rate": 5.8229729514036705e-05, "loss": 3.5244, "step": 95 }, { "epoch": 0.0032262747818231677, "grad_norm": 0.8874719738960266, "learning_rate": 5.4128967273616625e-05, "loss": 3.7889, "step": 100 }, { "epoch": 0.0032262747818231677, "eval_loss": 3.360200881958008, "eval_runtime": 307.9178, "eval_samples_per_second": 42.385, "eval_steps_per_second": 21.194, "step": 100 }, { "epoch": 0.0033875885209143264, "grad_norm": 0.23607748746871948, "learning_rate": 5e-05, "loss": 3.2003, "step": 105 }, { "epoch": 0.0035489022600054846, "grad_norm": 0.26606330275535583, "learning_rate": 4.5871032726383386e-05, "loss": 3.0856, "step": 110 }, { "epoch": 0.003710215999096643, "grad_norm": 0.2770606577396393, "learning_rate": 4.17702704859633e-05, "loss": 3.2055, "step": 115 }, { "epoch": 0.0038715297381878015, "grad_norm": 0.25630635023117065, "learning_rate": 3.772572564296005e-05, "loss": 3.3092, "step": 120 }, { "epoch": 0.00403284347727896, "grad_norm": 0.27507317066192627, "learning_rate": 3.3765026539765834e-05, "loss": 3.3265, "step": 125 }, { "epoch": 0.0041941572163701184, "grad_norm": 0.28478625416755676, "learning_rate": 2.991522876735154e-05, "loss": 3.3682, "step": 130 }, { "epoch": 0.004355470955461276, "grad_norm": 0.3185293972492218, "learning_rate": 2.6202630348146324e-05, "loss": 3.3184, "step": 135 }, { "epoch": 0.004516784694552435, "grad_norm": 0.37928304076194763, "learning_rate": 2.2652592093878666e-05, "loss": 3.4225, "step": 140 }, { "epoch": 0.004678098433643594, "grad_norm": 0.5270272493362427, "learning_rate": 1.928936436551661e-05, "loss": 3.3142, "step": 145 }, { "epoch": 0.004839412172734752, "grad_norm": 1.016218662261963, "learning_rate": 1.6135921418712956e-05, "loss": 3.6347, "step": 150 }, { "epoch": 0.004839412172734752, "eval_loss": 3.3451364040374756, "eval_runtime": 307.2314, "eval_samples_per_second": 42.479, "eval_steps_per_second": 21.241, "step": 150 }, { "epoch": 0.00500072591182591, "grad_norm": 0.20172300934791565, "learning_rate": 1.3213804466343421e-05, "loss": 3.1708, "step": 155 }, { "epoch": 0.005162039650917069, "grad_norm": 0.23905648291110992, "learning_rate": 1.0542974530180327e-05, "loss": 3.1856, "step": 160 }, { "epoch": 0.005323353390008227, "grad_norm": 0.2721221148967743, "learning_rate": 8.141676086873572e-06, "loss": 3.2851, "step": 165 }, { "epoch": 0.005484667129099385, "grad_norm": 0.25581252574920654, "learning_rate": 6.026312439675552e-06, "loss": 3.2723, "step": 170 }, { "epoch": 0.005645980868190544, "grad_norm": 0.2833029329776764, "learning_rate": 4.2113336672471245e-06, "loss": 3.3305, "step": 175 }, { "epoch": 0.0058072946072817025, "grad_norm": 0.3108144700527191, "learning_rate": 2.7091379149682685e-06, "loss": 3.4338, "step": 180 }, { "epoch": 0.00596860834637286, "grad_norm": 0.3516363501548767, "learning_rate": 1.5299867030334814e-06, "loss": 3.2965, "step": 185 }, { "epoch": 0.006129922085464019, "grad_norm": 0.39249828457832336, "learning_rate": 6.819348298638839e-07, "loss": 3.4179, "step": 190 }, { "epoch": 0.006291235824555178, "grad_norm": 0.44656136631965637, "learning_rate": 1.7077534966650766e-07, "loss": 3.2208, "step": 195 }, { "epoch": 0.0064525495636463355, "grad_norm": 0.875480592250824, "learning_rate": 0.0, "loss": 3.6721, "step": 200 }, { "epoch": 0.0064525495636463355, "eval_loss": 3.3430535793304443, "eval_runtime": 308.6144, "eval_samples_per_second": 42.289, "eval_steps_per_second": 21.146, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1072035764895744.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }