{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.026928471248246846, "eval_steps": 6, "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011220196353436186, "grad_norm": 0.6813791394233704, "learning_rate": 2e-05, "loss": 2.1705, "step": 1 }, { "epoch": 0.0011220196353436186, "eval_loss": 2.0811240673065186, "eval_runtime": 12.7097, "eval_samples_per_second": 29.584, "eval_steps_per_second": 14.792, "step": 1 }, { "epoch": 0.002244039270687237, "grad_norm": 0.4890865683555603, "learning_rate": 4e-05, "loss": 1.9746, "step": 2 }, { "epoch": 0.0033660589060308557, "grad_norm": 0.8481997847557068, "learning_rate": 6e-05, "loss": 2.2087, "step": 3 }, { "epoch": 0.004488078541374474, "grad_norm": 0.40311843156814575, "learning_rate": 8e-05, "loss": 2.0627, "step": 4 }, { "epoch": 0.005610098176718092, "grad_norm": 0.5440235733985901, "learning_rate": 0.0001, "loss": 2.1454, "step": 5 }, { "epoch": 0.006732117812061711, "grad_norm": 0.3888591229915619, "learning_rate": 0.00012, "loss": 2.366, "step": 6 }, { "epoch": 0.006732117812061711, "eval_loss": 2.0609025955200195, "eval_runtime": 12.7241, "eval_samples_per_second": 29.55, "eval_steps_per_second": 14.775, "step": 6 }, { "epoch": 0.00785413744740533, "grad_norm": 0.6480532288551331, "learning_rate": 0.00014, "loss": 2.5427, "step": 7 }, { "epoch": 0.008976157082748949, "grad_norm": 0.6572309136390686, "learning_rate": 0.00016, "loss": 2.3367, "step": 8 }, { "epoch": 0.010098176718092567, "grad_norm": 0.4551832973957062, "learning_rate": 0.00018, "loss": 1.9394, "step": 9 }, { "epoch": 0.011220196353436185, "grad_norm": 0.6103149652481079, "learning_rate": 0.0002, "loss": 2.0257, "step": 10 }, { "epoch": 0.012342215988779803, "grad_norm": 0.7382659912109375, "learning_rate": 0.00019749279121818235, "loss": 1.862, "step": 11 }, { "epoch": 0.013464235624123423, "grad_norm": 0.4550475478172302, "learning_rate": 0.0001900968867902419, "loss": 2.2914, "step": 12 }, { "epoch": 0.013464235624123423, "eval_loss": 1.8999028205871582, "eval_runtime": 13.0197, "eval_samples_per_second": 28.879, "eval_steps_per_second": 14.44, "step": 12 }, { "epoch": 0.014586255259467041, "grad_norm": 0.44409334659576416, "learning_rate": 0.000178183148246803, "loss": 1.6056, "step": 13 }, { "epoch": 0.01570827489481066, "grad_norm": 0.7820721864700317, "learning_rate": 0.00016234898018587337, "loss": 1.696, "step": 14 }, { "epoch": 0.016830294530154277, "grad_norm": 0.9811131358146667, "learning_rate": 0.00014338837391175582, "loss": 1.5621, "step": 15 }, { "epoch": 0.017952314165497897, "grad_norm": 0.8004341721534729, "learning_rate": 0.00012225209339563145, "loss": 1.8898, "step": 16 }, { "epoch": 0.019074333800841514, "grad_norm": 0.6704489588737488, "learning_rate": 0.0001, "loss": 1.8404, "step": 17 }, { "epoch": 0.020196353436185133, "grad_norm": 0.6039404273033142, "learning_rate": 7.774790660436858e-05, "loss": 1.8087, "step": 18 }, { "epoch": 0.020196353436185133, "eval_loss": 1.8163129091262817, "eval_runtime": 12.9895, "eval_samples_per_second": 28.946, "eval_steps_per_second": 14.473, "step": 18 }, { "epoch": 0.021318373071528753, "grad_norm": 0.5095430016517639, "learning_rate": 5.6611626088244194e-05, "loss": 1.6128, "step": 19 }, { "epoch": 0.02244039270687237, "grad_norm": 0.6485509872436523, "learning_rate": 3.7651019814126654e-05, "loss": 1.885, "step": 20 }, { "epoch": 0.02356241234221599, "grad_norm": 0.6329189538955688, "learning_rate": 2.181685175319702e-05, "loss": 1.9701, "step": 21 }, { "epoch": 0.024684431977559606, "grad_norm": 0.5708733201026917, "learning_rate": 9.903113209758096e-06, "loss": 2.4112, "step": 22 }, { "epoch": 0.025806451612903226, "grad_norm": 0.7266702055931091, "learning_rate": 2.5072087818176382e-06, "loss": 1.5506, "step": 23 }, { "epoch": 0.026928471248246846, "grad_norm": 0.7287759780883789, "learning_rate": 0.0, "loss": 2.0879, "step": 24 }, { "epoch": 0.026928471248246846, "eval_loss": 1.8048760890960693, "eval_runtime": 12.9656, "eval_samples_per_second": 29.0, "eval_steps_per_second": 14.5, "step": 24 } ], "logging_steps": 1, "max_steps": 24, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1679736258428928.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }