{ "best_metric": 1.016390323638916, "best_model_checkpoint": "/home/ankur/projects/llm_test/Akshay_Work/mistral_12B_snippets_v1/checkpoint-5149", "epoch": 6.998640512720916, "eval_steps": 3000, "global_step": 12012, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9998057875315596, "eval_loss": 1.1702903509140015, "eval_runtime": 1084.2637, "eval_samples_per_second": 2.364, "eval_steps_per_second": 2.364, "step": 1716 }, { "epoch": 1.747912215964265, "grad_norm": 0.8725250363349915, "learning_rate": 0.00015795285251073526, "loss": 1.1644, "step": 3000 }, { "epoch": 1.9996115750631192, "eval_loss": 1.0563772916793823, "eval_runtime": 1084.6886, "eval_samples_per_second": 2.363, "eval_steps_per_second": 2.363, "step": 3432 }, { "epoch": 3.0, "eval_loss": 1.016390323638916, "eval_runtime": 1085.9027, "eval_samples_per_second": 2.36, "eval_steps_per_second": 2.36, "step": 5149 }, { "epoch": 3.4958244319285297, "grad_norm": 0.9269574880599976, "learning_rate": 0.00010540706335991588, "loss": 0.6414, "step": 6000 }, { "epoch": 3.9998057875315594, "eval_loss": 1.0506994724273682, "eval_runtime": 1098.0815, "eval_samples_per_second": 2.334, "eval_steps_per_second": 2.334, "step": 6865 }, { "epoch": 4.999611575063119, "eval_loss": 1.1407289505004883, "eval_runtime": 1094.1268, "eval_samples_per_second": 2.343, "eval_steps_per_second": 2.343, "step": 8581 }, { "epoch": 5.243736647892795, "grad_norm": 1.0000396966934204, "learning_rate": 5.2843747261414424e-05, "loss": 0.3137, "step": 9000 }, { "epoch": 6.0, "eval_loss": 1.2437316179275513, "eval_runtime": 1079.3569, "eval_samples_per_second": 2.375, "eval_steps_per_second": 2.375, "step": 10298 }, { "epoch": 6.991648863857059, "grad_norm": 0.6523720026016235, "learning_rate": 2.804311629129787e-07, "loss": 0.1744, "step": 12000 }, { "epoch": 6.998640512720916, "eval_loss": 1.377004623413086, "eval_runtime": 1079.4922, "eval_samples_per_second": 2.374, "eval_steps_per_second": 2.374, "step": 12012 } ], "logging_steps": 3000, "max_steps": 12012, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4877646120458547e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }