{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 72, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013888888888888888, "grad_norm": 2.69645357131958, "learning_rate": 2.5e-05, "loss": 6.4485, "step": 1 }, { "epoch": 0.06944444444444445, "grad_norm": 3.235274076461792, "learning_rate": 0.000125, "loss": 6.3994, "step": 5 }, { "epoch": 0.1388888888888889, "grad_norm": 3.4826488494873047, "learning_rate": 0.0001995184726672197, "loss": 5.6587, "step": 10 }, { "epoch": 0.20833333333333334, "grad_norm": 4.541069984436035, "learning_rate": 0.00019415440651830208, "loss": 4.6657, "step": 15 }, { "epoch": 0.2777777777777778, "grad_norm": 4.04740047454834, "learning_rate": 0.00018314696123025454, "loss": 3.7298, "step": 20 }, { "epoch": 0.3472222222222222, "grad_norm": 1.9904128313064575, "learning_rate": 0.00016715589548470185, "loss": 3.0269, "step": 25 }, { "epoch": 0.4166666666666667, "grad_norm": 1.2599273920059204, "learning_rate": 0.0001471396736825998, "loss": 2.541, "step": 30 }, { "epoch": 0.4861111111111111, "grad_norm": 0.848343014717102, "learning_rate": 0.0001242980179903264, "loss": 2.2349, "step": 35 }, { "epoch": 0.5555555555555556, "grad_norm": 0.7984316349029541, "learning_rate": 0.0001, "loss": 2.0682, "step": 40 }, { "epoch": 0.625, "grad_norm": 0.6735253930091858, "learning_rate": 7.570198200967362e-05, "loss": 1.9724, "step": 45 }, { "epoch": 0.6944444444444444, "grad_norm": 0.6062337160110474, "learning_rate": 5.286032631740023e-05, "loss": 1.9033, "step": 50 }, { "epoch": 0.7638888888888888, "grad_norm": 0.5750789046287537, "learning_rate": 3.2844104515298155e-05, "loss": 1.8667, "step": 55 }, { "epoch": 0.8333333333333334, "grad_norm": 0.6575501561164856, "learning_rate": 1.6853038769745467e-05, "loss": 1.8513, "step": 60 }, { "epoch": 0.9027777777777778, "grad_norm": 0.5935372710227966, "learning_rate": 5.8455934816979305e-06, "loss": 1.8311, "step": 65 }, { "epoch": 0.9722222222222222, "grad_norm": 0.527962327003479, "learning_rate": 4.815273327803182e-07, "loss": 1.8348, "step": 70 }, { "epoch": 1.0, "eval_loss": 2.020827293395996, "eval_runtime": 1.1461, "eval_samples_per_second": 3.49, "eval_steps_per_second": 0.873, "step": 72 }, { "epoch": 1.0, "step": 72, "total_flos": 7.280931693081395e+17, "train_loss": 2.938576486375597, "train_runtime": 504.3487, "train_samples_per_second": 36.431, "train_steps_per_second": 0.143 } ], "logging_steps": 5, "max_steps": 72, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.280931693081395e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }