{ "best_metric": 1.251373291015625, "best_model_checkpoint": "./saved_checkpoints/ethical/mistral/checkpoint-50", "epoch": 3.000749962501875, "eval_steps": 50, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "eval_loss": 1.251373291015625, "eval_runtime": 4559.2323, "eval_samples_per_second": 2.193, "eval_steps_per_second": 0.366, "step": 50 }, { "epoch": 0.12, "learning_rate": 9.842414742769675e-06, "loss": 1.2609, "step": 100 }, { "epoch": 0.12, "eval_loss": 1.2726035118103027, "eval_runtime": 4449.7033, "eval_samples_per_second": 2.247, "eval_steps_per_second": 0.375, "step": 100 }, { "epoch": 0.18, "eval_loss": 1.2832562923431396, "eval_runtime": 4453.6157, "eval_samples_per_second": 2.245, "eval_steps_per_second": 0.374, "step": 150 }, { "epoch": 0.24, "learning_rate": 1e-05, "loss": 1.2778, "step": 200 }, { "epoch": 0.24, "eval_loss": 1.2775572538375854, "eval_runtime": 4472.9332, "eval_samples_per_second": 2.236, "eval_steps_per_second": 0.373, "step": 200 }, { "epoch": 0.3, "eval_loss": 1.271767258644104, "eval_runtime": 4524.5689, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.368, "step": 250 }, { "epoch": 0.36, "learning_rate": 1e-05, "loss": 1.2742, "step": 300 }, { "epoch": 0.36, "eval_loss": 1.2736577987670898, "eval_runtime": 4552.8861, "eval_samples_per_second": 2.196, "eval_steps_per_second": 0.366, "step": 300 }, { "epoch": 0.42, "eval_loss": 1.2683041095733643, "eval_runtime": 4481.4789, "eval_samples_per_second": 2.231, "eval_steps_per_second": 0.372, "step": 350 }, { "epoch": 0.48, "learning_rate": 1e-05, "loss": 1.2707, "step": 400 }, { "epoch": 0.48, "eval_loss": 1.2667808532714844, "eval_runtime": 4512.9429, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.369, "step": 400 }, { "epoch": 0.54, "eval_loss": 1.2664167881011963, "eval_runtime": 4503.4031, "eval_samples_per_second": 2.221, "eval_steps_per_second": 0.37, "step": 450 }, { "epoch": 0.6, "learning_rate": 1e-05, "loss": 1.2633, "step": 500 }, { "epoch": 0.6, "eval_loss": 1.2625447511672974, "eval_runtime": 4523.2077, "eval_samples_per_second": 2.211, "eval_steps_per_second": 0.369, "step": 500 }, { "epoch": 0.66, "eval_loss": 1.2571557760238647, "eval_runtime": 4492.8054, "eval_samples_per_second": 2.226, "eval_steps_per_second": 0.371, "step": 550 }, { "epoch": 0.72, "learning_rate": 1e-05, "loss": 1.2605, "step": 600 }, { "epoch": 0.72, "eval_loss": 1.2611154317855835, "eval_runtime": 4465.1303, "eval_samples_per_second": 2.24, "eval_steps_per_second": 0.373, "step": 600 }, { "epoch": 0.78, "eval_loss": 1.261015772819519, "eval_runtime": 4573.2523, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.365, "step": 650 }, { "epoch": 0.84, "learning_rate": 1e-05, "loss": 1.2533, "step": 700 }, { "epoch": 0.84, "eval_loss": 1.2556573152542114, "eval_runtime": 4450.2442, "eval_samples_per_second": 2.247, "eval_steps_per_second": 0.375, "step": 700 }, { "epoch": 0.9, "eval_loss": 1.2533992528915405, "eval_runtime": 4528.9739, "eval_samples_per_second": 2.208, "eval_steps_per_second": 0.368, "step": 750 }, { "epoch": 0.96, "learning_rate": 1e-05, "loss": 1.2519, "step": 800 }, { "epoch": 0.96, "eval_loss": 1.2552576065063477, "eval_runtime": 4553.5444, "eval_samples_per_second": 2.196, "eval_steps_per_second": 0.366, "step": 800 }, { "epoch": 1.02, "eval_loss": 1.3253833055496216, "eval_runtime": 4454.7877, "eval_samples_per_second": 2.245, "eval_steps_per_second": 0.374, "step": 850 }, { "epoch": 1.08, "learning_rate": 1e-05, "loss": 0.7228, "step": 900 }, { "epoch": 1.08, "eval_loss": 1.3570994138717651, "eval_runtime": 4515.2615, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.369, "step": 900 }, { "epoch": 1.14, "eval_loss": 1.3670397996902466, "eval_runtime": 4433.2773, "eval_samples_per_second": 2.256, "eval_steps_per_second": 0.376, "step": 950 }, { "epoch": 1.2, "learning_rate": 1e-05, "loss": 0.7433, "step": 1000 }, { "epoch": 1.2, "eval_loss": 1.3700823783874512, "eval_runtime": 4573.0757, "eval_samples_per_second": 2.187, "eval_steps_per_second": 0.365, "step": 1000 }, { "epoch": 1.26, "eval_loss": 1.36603844165802, "eval_runtime": 4525.6502, "eval_samples_per_second": 2.21, "eval_steps_per_second": 0.368, "step": 1050 }, { "epoch": 1.32, "learning_rate": 1e-05, "loss": 0.758, "step": 1100 }, { "epoch": 1.32, "eval_loss": 1.3708640336990356, "eval_runtime": 4539.0904, "eval_samples_per_second": 2.203, "eval_steps_per_second": 0.367, "step": 1100 }, { "epoch": 1.38, "eval_loss": 1.3683034181594849, "eval_runtime": 4412.3439, "eval_samples_per_second": 2.266, "eval_steps_per_second": 0.378, "step": 1150 }, { "epoch": 1.44, "learning_rate": 1e-05, "loss": 0.7668, "step": 1200 }, { "epoch": 1.44, "eval_loss": 1.3628712892532349, "eval_runtime": 4468.3564, "eval_samples_per_second": 2.238, "eval_steps_per_second": 0.373, "step": 1200 }, { "epoch": 1.5, "eval_loss": 1.371540904045105, "eval_runtime": 4466.1065, "eval_samples_per_second": 2.239, "eval_steps_per_second": 0.373, "step": 1250 }, { "epoch": 1.56, "learning_rate": 1e-05, "loss": 0.7754, "step": 1300 }, { "epoch": 1.56, "eval_loss": 1.37712562084198, "eval_runtime": 4422.9951, "eval_samples_per_second": 2.261, "eval_steps_per_second": 0.377, "step": 1300 }, { "epoch": 1.62, "eval_loss": 1.3581366539001465, "eval_runtime": 4440.3874, "eval_samples_per_second": 2.252, "eval_steps_per_second": 0.375, "step": 1350 }, { "epoch": 1.68, "learning_rate": 1e-05, "loss": 0.7827, "step": 1400 }, { "epoch": 1.68, "eval_loss": 1.3591225147247314, "eval_runtime": 4566.96, "eval_samples_per_second": 2.19, "eval_steps_per_second": 0.365, "step": 1400 }, { "epoch": 1.74, "eval_loss": 1.3655798435211182, "eval_runtime": 4518.9623, "eval_samples_per_second": 2.213, "eval_steps_per_second": 0.369, "step": 1450 }, { "epoch": 1.8, "learning_rate": 1e-05, "loss": 0.7928, "step": 1500 }, { "epoch": 1.8, "eval_loss": 1.3691887855529785, "eval_runtime": 4550.0865, "eval_samples_per_second": 2.198, "eval_steps_per_second": 0.366, "step": 1500 }, { "epoch": 1.86, "eval_loss": 1.3695429563522339, "eval_runtime": 4559.5122, "eval_samples_per_second": 2.193, "eval_steps_per_second": 0.366, "step": 1550 }, { "epoch": 1.92, "learning_rate": 1e-05, "loss": 0.7998, "step": 1600 }, { "epoch": 1.92, "eval_loss": 1.3639189004898071, "eval_runtime": 4389.9035, "eval_samples_per_second": 2.278, "eval_steps_per_second": 0.38, "step": 1600 }, { "epoch": 1.98, "eval_loss": 1.3616678714752197, "eval_runtime": 4562.3432, "eval_samples_per_second": 2.192, "eval_steps_per_second": 0.365, "step": 1650 }, { "epoch": 2.04, "learning_rate": 1e-05, "loss": 0.5729, "step": 1700 }, { "epoch": 2.04, "eval_loss": 1.5398352146148682, "eval_runtime": 4441.3682, "eval_samples_per_second": 2.252, "eval_steps_per_second": 0.375, "step": 1700 }, { "epoch": 2.1, "eval_loss": 1.562680721282959, "eval_runtime": 4492.8359, "eval_samples_per_second": 2.226, "eval_steps_per_second": 0.371, "step": 1750 }, { "epoch": 2.16, "learning_rate": 1e-05, "loss": 0.4759, "step": 1800 }, { "epoch": 2.16, "eval_loss": 1.5819048881530762, "eval_runtime": 4460.5449, "eval_samples_per_second": 2.242, "eval_steps_per_second": 0.374, "step": 1800 }, { "epoch": 2.22, "eval_loss": 1.5582789182662964, "eval_runtime": 4553.8843, "eval_samples_per_second": 2.196, "eval_steps_per_second": 0.366, "step": 1850 }, { "epoch": 2.28, "learning_rate": 1e-05, "loss": 0.4857, "step": 1900 }, { "epoch": 2.28, "eval_loss": 1.54148530960083, "eval_runtime": 4406.6155, "eval_samples_per_second": 2.269, "eval_steps_per_second": 0.378, "step": 1900 }, { "epoch": 2.34, "eval_loss": 1.564630150794983, "eval_runtime": 4557.1018, "eval_samples_per_second": 2.194, "eval_steps_per_second": 0.366, "step": 1950 }, { "epoch": 2.4, "learning_rate": 1e-05, "loss": 0.4921, "step": 2000 }, { "epoch": 2.4, "eval_loss": 1.5717904567718506, "eval_runtime": 4430.1088, "eval_samples_per_second": 2.257, "eval_steps_per_second": 0.376, "step": 2000 }, { "epoch": 2.46, "eval_loss": 1.5744233131408691, "eval_runtime": 4555.2722, "eval_samples_per_second": 2.195, "eval_steps_per_second": 0.366, "step": 2050 }, { "epoch": 2.52, "learning_rate": 1e-05, "loss": 0.5034, "step": 2100 }, { "epoch": 2.52, "eval_loss": 1.5743005275726318, "eval_runtime": 4515.3853, "eval_samples_per_second": 2.215, "eval_steps_per_second": 0.369, "step": 2100 }, { "epoch": 2.58, "eval_loss": 1.5678597688674927, "eval_runtime": 4504.4169, "eval_samples_per_second": 2.22, "eval_steps_per_second": 0.37, "step": 2150 }, { "epoch": 2.64, "learning_rate": 1e-05, "loss": 0.5071, "step": 2200 }, { "epoch": 2.64, "eval_loss": 1.5610437393188477, "eval_runtime": 4513.3469, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.369, "step": 2200 }, { "epoch": 2.7, "eval_loss": 1.544044852256775, "eval_runtime": 4583.4964, "eval_samples_per_second": 2.182, "eval_steps_per_second": 0.364, "step": 2250 }, { "epoch": 2.76, "learning_rate": 1e-05, "loss": 0.5117, "step": 2300 }, { "epoch": 2.76, "eval_loss": 1.570798397064209, "eval_runtime": 4561.3159, "eval_samples_per_second": 2.192, "eval_steps_per_second": 0.365, "step": 2300 }, { "epoch": 2.82, "eval_loss": 1.5632375478744507, "eval_runtime": 4405.4774, "eval_samples_per_second": 2.27, "eval_steps_per_second": 0.378, "step": 2350 }, { "epoch": 2.88, "learning_rate": 1e-05, "loss": 0.5191, "step": 2400 }, { "epoch": 2.88, "eval_loss": 1.5500311851501465, "eval_runtime": 4512.5389, "eval_samples_per_second": 2.216, "eval_steps_per_second": 0.369, "step": 2400 }, { "epoch": 2.94, "eval_loss": 1.571359395980835, "eval_runtime": 4476.4923, "eval_samples_per_second": 2.234, "eval_steps_per_second": 0.372, "step": 2450 }, { "epoch": 3.0, "learning_rate": 1e-05, "loss": 0.52, "step": 2500 }, { "epoch": 3.0, "eval_loss": 1.581729531288147, "eval_runtime": 4430.015, "eval_samples_per_second": 2.257, "eval_steps_per_second": 0.376, "step": 2500 } ], "logging_steps": 100, "max_steps": 1000000, "num_input_tokens_seen": 0, "num_train_epochs": 1201, "save_steps": 50, "total_flos": 27154959237120.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }