{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 500, "global_step": 945, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.0049484120681881905, "eval_runtime": 86.2189, "eval_samples_per_second": 11.598, "eval_steps_per_second": 0.731, "step": 63 }, { "epoch": 2.0, "eval_loss": 0.0015708731953054667, "eval_runtime": 82.6736, "eval_samples_per_second": 12.096, "eval_steps_per_second": 0.762, "step": 126 }, { "epoch": 3.0, "eval_loss": 0.0008609762298874557, "eval_runtime": 86.3582, "eval_samples_per_second": 11.58, "eval_steps_per_second": 0.73, "step": 189 }, { "epoch": 4.0, "eval_loss": 0.0005577785195782781, "eval_runtime": 83.2641, "eval_samples_per_second": 12.01, "eval_steps_per_second": 0.757, "step": 252 }, { "epoch": 5.0, "eval_loss": 0.0003976120497100055, "eval_runtime": 120.3456, "eval_samples_per_second": 8.309, "eval_steps_per_second": 0.523, "step": 315 }, { "epoch": 6.0, "eval_loss": 0.00029855340835638344, "eval_runtime": 1114.9335, "eval_samples_per_second": 0.897, "eval_steps_per_second": 0.057, "step": 378 }, { "epoch": 7.0, "eval_loss": 0.00023258940200321376, "eval_runtime": 1077.4431, "eval_samples_per_second": 0.928, "eval_steps_per_second": 0.058, "step": 441 }, { "epoch": 7.936507936507937, "grad_norm": 0.00615890184417367, "learning_rate": 9.417989417989418e-06, "loss": 0.0229, "step": 500 }, { "epoch": 8.0, "eval_loss": 0.0001905104873003438, "eval_runtime": 92.7233, "eval_samples_per_second": 10.785, "eval_steps_per_second": 0.679, "step": 504 }, { "epoch": 9.0, "eval_loss": 0.00016125467664096504, "eval_runtime": 157.6217, "eval_samples_per_second": 6.344, "eval_steps_per_second": 0.4, "step": 567 }, { "epoch": 10.0, "eval_loss": 0.00014133936201687902, "eval_runtime": 253.9397, "eval_samples_per_second": 3.938, "eval_steps_per_second": 0.248, "step": 630 }, { "epoch": 11.0, "eval_loss": 0.00012699734361376613, "eval_runtime": 290.6194, "eval_samples_per_second": 3.441, "eval_steps_per_second": 0.217, "step": 693 }, { "epoch": 12.0, "eval_loss": 0.00011713778803823516, "eval_runtime": 89.5033, "eval_samples_per_second": 11.173, "eval_steps_per_second": 0.704, "step": 756 }, { "epoch": 13.0, "eval_loss": 0.00011058501695515588, "eval_runtime": 81.9569, "eval_samples_per_second": 12.202, "eval_steps_per_second": 0.769, "step": 819 }, { "epoch": 14.0, "eval_loss": 0.00010690685303416103, "eval_runtime": 82.071, "eval_samples_per_second": 12.185, "eval_steps_per_second": 0.768, "step": 882 } ], "logging_steps": 500, "max_steps": 945, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1987010979840000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }