{ "best_metric": 1.0858668088912964, "best_model_checkpoint": "/home/khalid/Documents/github_rep/bigscience/data/processed/50/bloom-560m_my_bitfit_100000samples_-1vocab_original-frozen/checkpoint-25000", "epoch": 0.39631741854686253, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 9e-05, "loss": 1.3263, "step": 2500 }, { "epoch": 0.08, "learning_rate": 8e-05, "loss": 1.1812, "step": 5000 }, { "epoch": 0.08, "eval_loss": 1.1644270420074463, "eval_runtime": 2656.1309, "eval_samples_per_second": 9.443, "eval_steps_per_second": 4.722, "step": 5000 }, { "epoch": 0.12, "learning_rate": 7e-05, "loss": 1.1436, "step": 7500 }, { "epoch": 0.16, "learning_rate": 6e-05, "loss": 1.1212, "step": 10000 }, { "epoch": 0.16, "eval_loss": 1.1189237833023071, "eval_runtime": 2658.6123, "eval_samples_per_second": 9.434, "eval_steps_per_second": 4.717, "step": 10000 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 1.1092, "step": 12500 }, { "epoch": 0.24, "learning_rate": 4e-05, "loss": 1.0977, "step": 15000 }, { "epoch": 0.24, "eval_loss": 1.0991963148117065, "eval_runtime": 2656.572, "eval_samples_per_second": 9.441, "eval_steps_per_second": 4.721, "step": 15000 }, { "epoch": 0.28, "learning_rate": 3e-05, "loss": 1.0908, "step": 17500 }, { "epoch": 0.32, "learning_rate": 2e-05, "loss": 1.0856, "step": 20000 }, { "epoch": 0.32, "eval_loss": 1.0892250537872314, "eval_runtime": 2657.3128, "eval_samples_per_second": 9.438, "eval_steps_per_second": 4.719, "step": 20000 }, { "epoch": 0.36, "learning_rate": 1e-05, "loss": 1.0825, "step": 22500 }, { "epoch": 0.4, "learning_rate": 0.0, "loss": 1.08, "step": 25000 }, { "epoch": 0.4, "eval_loss": 1.0858668088912964, "eval_runtime": 2656.2605, "eval_samples_per_second": 9.442, "eval_steps_per_second": 4.721, "step": 25000 }, { "epoch": 0.4, "step": 25000, "total_flos": 3.714827943936e+17, "train_loss": 1.1317986328125, "train_runtime": 62833.7719, "train_samples_per_second": 3.183, "train_steps_per_second": 0.398 } ], "max_steps": 25000, "num_train_epochs": 1, "total_flos": 3.714827943936e+17, "trial_name": null, "trial_params": null }