{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.014216914574114553, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.108457287057277e-05, "eval_loss": 1.6025950908660889, "eval_runtime": 203.3782, "eval_samples_per_second": 29.128, "eval_steps_per_second": 14.564, "step": 1 }, { "epoch": 0.00035542286435286385, "grad_norm": 0.32894086837768555, "learning_rate": 5e-05, "loss": 1.3078, "step": 5 }, { "epoch": 0.0007108457287057277, "grad_norm": 0.5316031575202942, "learning_rate": 0.0001, "loss": 1.3399, "step": 10 }, { "epoch": 0.0010662685930585914, "grad_norm": 0.22600068151950836, "learning_rate": 9.98292246503335e-05, "loss": 1.3483, "step": 15 }, { "epoch": 0.0014216914574114554, "grad_norm": 0.32068952918052673, "learning_rate": 9.931806517013612e-05, "loss": 1.3313, "step": 20 }, { "epoch": 0.0017771143217643192, "grad_norm": 0.31301984190940857, "learning_rate": 9.847001329696653e-05, "loss": 1.2896, "step": 25 }, { "epoch": 0.0021325371861171828, "grad_norm": 0.47712332010269165, "learning_rate": 9.729086208503174e-05, "loss": 1.2427, "step": 30 }, { "epoch": 0.0024879600504700465, "grad_norm": 0.4070054590702057, "learning_rate": 9.578866633275288e-05, "loss": 1.2394, "step": 35 }, { "epoch": 0.0028433829148229108, "grad_norm": 0.5145549178123474, "learning_rate": 9.397368756032445e-05, "loss": 1.2066, "step": 40 }, { "epoch": 0.0031988057791757746, "grad_norm": 0.5820503234863281, "learning_rate": 9.185832391312644e-05, "loss": 1.0762, "step": 45 }, { "epoch": 0.0035542286435286384, "grad_norm": 0.6279653310775757, "learning_rate": 8.945702546981969e-05, "loss": 1.0131, "step": 50 }, { "epoch": 0.0035542286435286384, "eval_loss": 1.0860192775726318, "eval_runtime": 203.332, "eval_samples_per_second": 29.135, "eval_steps_per_second": 14.567, "step": 50 }, { "epoch": 0.003909651507881502, "grad_norm": 0.34148985147476196, "learning_rate": 8.678619553365659e-05, "loss": 1.1582, "step": 55 }, { "epoch": 0.0042650743722343655, "grad_norm": 0.2676335573196411, "learning_rate": 8.386407858128706e-05, "loss": 1.0721, "step": 60 }, { "epoch": 0.004620497236587229, "grad_norm": 0.27544882893562317, "learning_rate": 8.07106356344834e-05, "loss": 1.1066, "step": 65 }, { "epoch": 0.004975920100940093, "grad_norm": 0.3091588616371155, "learning_rate": 7.734740790612136e-05, "loss": 1.0596, "step": 70 }, { "epoch": 0.005331342965292957, "grad_norm": 0.3925229609012604, "learning_rate": 7.379736965185368e-05, "loss": 1.0647, "step": 75 }, { "epoch": 0.0056867658296458215, "grad_norm": 0.47207650542259216, "learning_rate": 7.008477123264848e-05, "loss": 0.957, "step": 80 }, { "epoch": 0.006042188693998685, "grad_norm": 0.4618014991283417, "learning_rate": 6.623497346023418e-05, "loss": 0.8818, "step": 85 }, { "epoch": 0.006397611558351549, "grad_norm": 0.6277055740356445, "learning_rate": 6.227427435703997e-05, "loss": 0.8484, "step": 90 }, { "epoch": 0.006753034422704413, "grad_norm": 0.5092412233352661, "learning_rate": 5.8229729514036705e-05, "loss": 0.7792, "step": 95 }, { "epoch": 0.007108457287057277, "grad_norm": 0.8425192832946777, "learning_rate": 5.4128967273616625e-05, "loss": 0.8068, "step": 100 }, { "epoch": 0.007108457287057277, "eval_loss": 0.9189969301223755, "eval_runtime": 203.1123, "eval_samples_per_second": 29.166, "eval_steps_per_second": 14.583, "step": 100 }, { "epoch": 0.0074638801514101405, "grad_norm": 0.5134663581848145, "learning_rate": 5e-05, "loss": 1.0384, "step": 105 }, { "epoch": 0.007819303015763003, "grad_norm": 0.35175126791000366, "learning_rate": 4.5871032726383386e-05, "loss": 0.9458, "step": 110 }, { "epoch": 0.008174725880115868, "grad_norm": 0.4536231756210327, "learning_rate": 4.17702704859633e-05, "loss": 1.077, "step": 115 }, { "epoch": 0.008530148744468731, "grad_norm": 0.41004303097724915, "learning_rate": 3.772572564296005e-05, "loss": 1.0068, "step": 120 }, { "epoch": 0.008885571608821596, "grad_norm": 0.47319185733795166, "learning_rate": 3.3765026539765834e-05, "loss": 0.9159, "step": 125 }, { "epoch": 0.009240994473174459, "grad_norm": 0.748140811920166, "learning_rate": 2.991522876735154e-05, "loss": 0.9558, "step": 130 }, { "epoch": 0.009596417337527323, "grad_norm": 0.7021692395210266, "learning_rate": 2.6202630348146324e-05, "loss": 0.9694, "step": 135 }, { "epoch": 0.009951840201880186, "grad_norm": 0.5735881328582764, "learning_rate": 2.2652592093878666e-05, "loss": 0.6431, "step": 140 }, { "epoch": 0.01030726306623305, "grad_norm": 0.7242112755775452, "learning_rate": 1.928936436551661e-05, "loss": 0.7265, "step": 145 }, { "epoch": 0.010662685930585914, "grad_norm": 1.036838412284851, "learning_rate": 1.6135921418712956e-05, "loss": 0.8044, "step": 150 }, { "epoch": 0.010662685930585914, "eval_loss": 0.8387618660926819, "eval_runtime": 203.5878, "eval_samples_per_second": 29.098, "eval_steps_per_second": 14.549, "step": 150 }, { "epoch": 0.011018108794938778, "grad_norm": 0.48848214745521545, "learning_rate": 1.3213804466343421e-05, "loss": 1.0428, "step": 155 }, { "epoch": 0.011373531659291643, "grad_norm": 0.38314196467399597, "learning_rate": 1.0542974530180327e-05, "loss": 0.8563, "step": 160 }, { "epoch": 0.011728954523644506, "grad_norm": 0.46951794624328613, "learning_rate": 8.141676086873572e-06, "loss": 1.0063, "step": 165 }, { "epoch": 0.01208437738799737, "grad_norm": 0.4432726502418518, "learning_rate": 6.026312439675552e-06, "loss": 0.9649, "step": 170 }, { "epoch": 0.012439800252350234, "grad_norm": 0.5523079037666321, "learning_rate": 4.2113336672471245e-06, "loss": 0.8765, "step": 175 }, { "epoch": 0.012795223116703098, "grad_norm": 0.8644940853118896, "learning_rate": 2.7091379149682685e-06, "loss": 0.7888, "step": 180 }, { "epoch": 0.013150645981055961, "grad_norm": 0.7664920091629028, "learning_rate": 1.5299867030334814e-06, "loss": 0.7794, "step": 185 }, { "epoch": 0.013506068845408826, "grad_norm": 0.6504991054534912, "learning_rate": 6.819348298638839e-07, "loss": 0.6936, "step": 190 }, { "epoch": 0.013861491709761689, "grad_norm": 0.7600428462028503, "learning_rate": 1.7077534966650766e-07, "loss": 0.6089, "step": 195 }, { "epoch": 0.014216914574114553, "grad_norm": 1.446298599243164, "learning_rate": 0.0, "loss": 0.7815, "step": 200 }, { "epoch": 0.014216914574114553, "eval_loss": 0.8283114433288574, "eval_runtime": 203.69, "eval_samples_per_second": 29.083, "eval_steps_per_second": 14.542, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.59228597633024e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }