{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09913258983890955, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004956629491945477, "grad_norm": 57.75039291381836, "learning_rate": 1e-05, "loss": 17.0424, "step": 1 }, { "epoch": 0.004956629491945477, "eval_loss": 1.0527280569076538, "eval_runtime": 74.2544, "eval_samples_per_second": 4.579, "eval_steps_per_second": 2.289, "step": 1 }, { "epoch": 0.009913258983890954, "grad_norm": 68.40726470947266, "learning_rate": 2e-05, "loss": 15.6664, "step": 2 }, { "epoch": 0.01486988847583643, "grad_norm": 69.10187530517578, "learning_rate": 3e-05, "loss": 15.8254, "step": 3 }, { "epoch": 0.01982651796778191, "grad_norm": 54.53337097167969, "learning_rate": 4e-05, "loss": 15.9839, "step": 4 }, { "epoch": 0.024783147459727387, "grad_norm": 51.78641891479492, "learning_rate": 5e-05, "loss": 15.6566, "step": 5 }, { "epoch": 0.024783147459727387, "eval_loss": 0.9015545845031738, "eval_runtime": 23.2714, "eval_samples_per_second": 14.61, "eval_steps_per_second": 7.305, "step": 5 }, { "epoch": 0.02973977695167286, "grad_norm": 44.10242462158203, "learning_rate": 6e-05, "loss": 14.3549, "step": 6 }, { "epoch": 0.03469640644361834, "grad_norm": 43.91465759277344, "learning_rate": 7e-05, "loss": 14.9588, "step": 7 }, { "epoch": 0.03965303593556382, "grad_norm": 40.00635528564453, "learning_rate": 8e-05, "loss": 14.169, "step": 8 }, { "epoch": 0.04460966542750929, "grad_norm": 28.840185165405273, "learning_rate": 9e-05, "loss": 12.7394, "step": 9 }, { "epoch": 0.04956629491945477, "grad_norm": 26.274221420288086, "learning_rate": 0.0001, "loss": 12.2512, "step": 10 }, { "epoch": 0.04956629491945477, "eval_loss": 0.8085873126983643, "eval_runtime": 23.137, "eval_samples_per_second": 14.695, "eval_steps_per_second": 7.348, "step": 10 }, { "epoch": 0.05452292441140025, "grad_norm": 27.2174072265625, "learning_rate": 9.755282581475769e-05, "loss": 12.8522, "step": 11 }, { "epoch": 0.05947955390334572, "grad_norm": 24.871192932128906, "learning_rate": 9.045084971874738e-05, "loss": 12.846, "step": 12 }, { "epoch": 0.0644361833952912, "grad_norm": 24.186389923095703, "learning_rate": 7.938926261462366e-05, "loss": 12.4232, "step": 13 }, { "epoch": 0.06939281288723669, "grad_norm": 24.49249839782715, "learning_rate": 6.545084971874738e-05, "loss": 12.1087, "step": 14 }, { "epoch": 0.07434944237918216, "grad_norm": 23.352947235107422, "learning_rate": 5e-05, "loss": 11.0825, "step": 15 }, { "epoch": 0.07434944237918216, "eval_loss": 0.7625203132629395, "eval_runtime": 23.206, "eval_samples_per_second": 14.651, "eval_steps_per_second": 7.326, "step": 15 }, { "epoch": 0.07930607187112763, "grad_norm": 22.63839340209961, "learning_rate": 3.4549150281252636e-05, "loss": 12.5134, "step": 16 }, { "epoch": 0.08426270136307311, "grad_norm": 22.474016189575195, "learning_rate": 2.061073738537635e-05, "loss": 11.1247, "step": 17 }, { "epoch": 0.08921933085501858, "grad_norm": 22.08704376220703, "learning_rate": 9.549150281252633e-06, "loss": 11.216, "step": 18 }, { "epoch": 0.09417596034696406, "grad_norm": 21.62132453918457, "learning_rate": 2.4471741852423237e-06, "loss": 12.0317, "step": 19 }, { "epoch": 0.09913258983890955, "grad_norm": 22.85521125793457, "learning_rate": 0.0, "loss": 12.7622, "step": 20 }, { "epoch": 0.09913258983890955, "eval_loss": 0.7505396008491516, "eval_runtime": 23.3853, "eval_samples_per_second": 14.539, "eval_steps_per_second": 7.27, "step": 20 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.402137439371264e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }