{ "best_metric": 0.8413172364234924, "best_model_checkpoint": "saves/BLOOM-7B/lora/train_1/checkpoint-200", "epoch": 1.3125512715340442, "eval_steps": 200, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21875854525567404, "grad_norm": 0.41011762619018555, "learning_rate": 0.00029856911617379416, "loss": 0.817, "step": 200 }, { "epoch": 0.21875854525567404, "eval_loss": 0.8413172364234924, "eval_runtime": 2018.9188, "eval_samples_per_second": 23.339, "eval_steps_per_second": 0.73, "step": 200 }, { "epoch": 0.4375170905113481, "grad_norm": 0.5003934502601624, "learning_rate": 0.0002896017519370078, "loss": 0.381, "step": 400 }, { "epoch": 0.4375170905113481, "eval_loss": 0.8805686831474304, "eval_runtime": 2017.6263, "eval_samples_per_second": 23.354, "eval_steps_per_second": 0.73, "step": 400 }, { "epoch": 0.6562756357670222, "grad_norm": 0.671746551990509, "learning_rate": 0.0002728756302319302, "loss": 0.3025, "step": 600 }, { "epoch": 0.6562756357670222, "eval_loss": 1.017870545387268, "eval_runtime": 2018.3325, "eval_samples_per_second": 23.346, "eval_steps_per_second": 0.73, "step": 600 }, { "epoch": 0.8750341810226961, "grad_norm": 0.7479655146598816, "learning_rate": 0.00024932035201194605, "loss": 0.1879, "step": 800 }, { "epoch": 0.8750341810226961, "eval_loss": 1.1555241346359253, "eval_runtime": 2018.658, "eval_samples_per_second": 23.342, "eval_steps_per_second": 0.73, "step": 800 }, { "epoch": 1.0937927262783702, "grad_norm": 0.6166426539421082, "learning_rate": 0.00022024506768721243, "loss": 0.0744, "step": 1000 }, { "epoch": 1.0937927262783702, "eval_loss": 1.2785167694091797, "eval_runtime": 2018.1701, "eval_samples_per_second": 23.348, "eval_steps_per_second": 0.73, "step": 1000 }, { "epoch": 1.3125512715340442, "grad_norm": 0.3912193179130554, "learning_rate": 0.0001872657174323126, "loss": 0.0231, "step": 1200 }, { "epoch": 1.3125512715340442, "eval_loss": 1.336869478225708, "eval_runtime": 2018.0932, "eval_samples_per_second": 23.349, "eval_steps_per_second": 0.73, "step": 1200 }, { "epoch": 1.3125512715340442, "step": 1200, "total_flos": 1.7087206139898102e+18, "train_loss": 0.2976297422250112, "train_runtime": 65664.4367, "train_samples_per_second": 10.693, "train_steps_per_second": 0.042 } ], "logging_steps": 200, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7087206139898102e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }