{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16129032258064516, "grad_norm": 1.0336285829544067, "learning_rate": 7.692307692307693e-05, "loss": 1.4909, "step": 10 }, { "epoch": 0.3225806451612903, "grad_norm": 0.3877207636833191, "learning_rate": 9.902193239806635e-05, "loss": 1.2682, "step": 20 }, { "epoch": 0.4838709677419355, "grad_norm": 0.36371299624443054, "learning_rate": 9.432328436130493e-05, "loss": 1.1908, "step": 30 }, { "epoch": 0.6451612903225806, "grad_norm": 0.27169424295425415, "learning_rate": 8.609780469772623e-05, "loss": 1.1151, "step": 40 }, { "epoch": 0.8064516129032258, "grad_norm": 0.32646557688713074, "learning_rate": 7.500000000000001e-05, "loss": 1.1586, "step": 50 }, { "epoch": 0.967741935483871, "grad_norm": 0.28673896193504333, "learning_rate": 6.191292957115825e-05, "loss": 1.1778, "step": 60 }, { "epoch": 1.129032258064516, "grad_norm": 0.36622354388237, "learning_rate": 4.78779398401926e-05, "loss": 1.1185, "step": 70 }, { "epoch": 1.2903225806451613, "grad_norm": 0.6359225511550903, "learning_rate": 3.401180377143774e-05, "loss": 1.1429, "step": 80 }, { "epoch": 1.4516129032258065, "grad_norm": 0.3551563024520874, "learning_rate": 2.141785853707607e-05, "loss": 1.0632, "step": 90 }, { "epoch": 1.6129032258064515, "grad_norm": 0.38739389181137085, "learning_rate": 1.1098212284078036e-05, "loss": 1.0262, "step": 100 }, { "epoch": 1.7741935483870968, "grad_norm": 0.685509443283081, "learning_rate": 3.87400575837657e-06, "loss": 1.0784, "step": 110 }, { "epoch": 1.935483870967742, "grad_norm": 0.3741980791091919, "learning_rate": 3.2007361901485455e-07, "loss": 1.0823, "step": 120 }, { "epoch": 2.0, "step": 124, "total_flos": 3.3413325815545856e+16, "train_loss": 1.1585943698883057, "train_runtime": 205.3538, "train_samples_per_second": 9.554, "train_steps_per_second": 0.604 } ], "logging_steps": 10, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.3413325815545856e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }