{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.474576271186441, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 9.994067796610171e-06, "loss": 2.3903, "step": 10 }, { "epoch": 0.17, "learning_rate": 9.986440677966102e-06, "loss": 1.2215, "step": 20 }, { "epoch": 0.25, "learning_rate": 9.977966101694917e-06, "loss": 1.1136, "step": 30 }, { "epoch": 0.34, "learning_rate": 9.96949152542373e-06, "loss": 1.1659, "step": 40 }, { "epoch": 0.42, "learning_rate": 9.961016949152543e-06, "loss": 0.9895, "step": 50 }, { "epoch": 0.51, "learning_rate": 9.953389830508475e-06, "loss": 1.2496, "step": 60 }, { "epoch": 0.59, "learning_rate": 9.944915254237288e-06, "loss": 1.058, "step": 70 }, { "epoch": 0.68, "learning_rate": 9.936440677966102e-06, "loss": 0.8977, "step": 80 }, { "epoch": 0.76, "learning_rate": 9.927966101694915e-06, "loss": 1.0084, "step": 90 }, { "epoch": 0.85, "learning_rate": 9.91949152542373e-06, "loss": 1.1805, "step": 100 }, { "epoch": 0.93, "learning_rate": 9.911016949152543e-06, "loss": 0.971, "step": 110 }, { "epoch": 1.02, "learning_rate": 9.902542372881356e-06, "loss": 0.8166, "step": 120 }, { "epoch": 1.1, "learning_rate": 9.89406779661017e-06, "loss": 0.713, "step": 130 }, { "epoch": 1.19, "learning_rate": 9.885593220338984e-06, "loss": 0.7842, "step": 140 }, { "epoch": 1.27, "learning_rate": 9.877118644067798e-06, "loss": 0.6229, "step": 150 }, { "epoch": 1.36, "learning_rate": 9.86949152542373e-06, "loss": 0.9259, "step": 160 }, { "epoch": 1.44, "learning_rate": 9.861016949152544e-06, "loss": 0.8035, "step": 170 }, { "epoch": 1.53, "learning_rate": 9.852542372881356e-06, "loss": 1.7852, "step": 180 }, { "epoch": 1.61, "learning_rate": 9.844067796610171e-06, "loss": 0.79, "step": 190 }, { "epoch": 1.69, "learning_rate": 9.835593220338984e-06, "loss": 0.8361, "step": 200 }, { "epoch": 1.78, "learning_rate": 9.827118644067797e-06, "loss": 0.789, "step": 210 }, { "epoch": 1.86, "learning_rate": 9.818644067796612e-06, "loss": 0.9851, "step": 220 }, { "epoch": 1.95, "learning_rate": 9.810169491525425e-06, "loss": 0.7422, "step": 230 }, { "epoch": 2.03, "learning_rate": 9.801694915254238e-06, "loss": 0.7643, "step": 240 }, { "epoch": 2.12, "learning_rate": 9.79322033898305e-06, "loss": 0.6639, "step": 250 }, { "epoch": 2.2, "learning_rate": 9.784745762711865e-06, "loss": 0.5698, "step": 260 }, { "epoch": 2.29, "learning_rate": 9.776271186440678e-06, "loss": 0.5661, "step": 270 }, { "epoch": 2.37, "learning_rate": 9.767796610169491e-06, "loss": 0.8881, "step": 280 }, { "epoch": 2.46, "learning_rate": 9.759322033898306e-06, "loss": 0.5563, "step": 290 }, { "epoch": 2.54, "learning_rate": 9.750847457627119e-06, "loss": 0.7705, "step": 300 }, { "epoch": 2.63, "learning_rate": 9.742372881355932e-06, "loss": 0.5988, "step": 310 }, { "epoch": 2.71, "learning_rate": 9.733898305084747e-06, "loss": 0.7516, "step": 320 }, { "epoch": 2.8, "learning_rate": 9.72542372881356e-06, "loss": 1.0188, "step": 330 }, { "epoch": 2.88, "learning_rate": 9.717796610169492e-06, "loss": 1.6122, "step": 340 }, { "epoch": 2.97, "learning_rate": 9.709322033898307e-06, "loss": 1.5343, "step": 350 }, { "epoch": 3.05, "learning_rate": 9.70084745762712e-06, "loss": 0.8529, "step": 360 }, { "epoch": 3.14, "learning_rate": 9.692372881355932e-06, "loss": 0.7866, "step": 370 }, { "epoch": 3.22, "learning_rate": 9.683898305084747e-06, "loss": 0.8368, "step": 380 }, { "epoch": 3.31, "learning_rate": 9.67542372881356e-06, "loss": 0.6965, "step": 390 }, { "epoch": 3.39, "learning_rate": 9.666949152542375e-06, "loss": 0.9716, "step": 400 }, { "epoch": 3.47, "learning_rate": 9.659322033898307e-06, "loss": 0.8647, "step": 410 }, { "epoch": 3.56, "learning_rate": 9.65084745762712e-06, "loss": 0.7379, "step": 420 }, { "epoch": 3.64, "learning_rate": 9.642372881355933e-06, "loss": 0.5591, "step": 430 }, { "epoch": 3.73, "learning_rate": 9.633898305084746e-06, "loss": 0.5949, "step": 440 }, { "epoch": 3.81, "learning_rate": 9.62542372881356e-06, "loss": 0.6818, "step": 450 }, { "epoch": 3.9, "learning_rate": 9.616949152542374e-06, "loss": 0.607, "step": 460 }, { "epoch": 3.98, "learning_rate": 9.608474576271187e-06, "loss": 0.6291, "step": 470 }, { "epoch": 4.07, "learning_rate": 9.600000000000001e-06, "loss": 0.3769, "step": 480 }, { "epoch": 4.15, "learning_rate": 9.591525423728814e-06, "loss": 0.5266, "step": 490 }, { "epoch": 4.24, "learning_rate": 9.583050847457627e-06, "loss": 0.6024, "step": 500 }, { "epoch": 4.32, "learning_rate": 9.57457627118644e-06, "loss": 0.5602, "step": 510 }, { "epoch": 4.41, "learning_rate": 9.566101694915255e-06, "loss": 0.6886, "step": 520 }, { "epoch": 4.49, "learning_rate": 9.557627118644068e-06, "loss": 0.6933, "step": 530 }, { "epoch": 4.58, "learning_rate": 9.549152542372883e-06, "loss": 0.4223, "step": 540 }, { "epoch": 4.66, "learning_rate": 9.540677966101696e-06, "loss": 0.4604, "step": 550 }, { "epoch": 4.75, "learning_rate": 9.532203389830508e-06, "loss": 0.3966, "step": 560 }, { "epoch": 4.83, "learning_rate": 9.523728813559323e-06, "loss": 0.4864, "step": 570 }, { "epoch": 4.92, "learning_rate": 9.515254237288136e-06, "loss": 0.6848, "step": 580 }, { "epoch": 5.0, "learning_rate": 9.506779661016949e-06, "loss": 0.488, "step": 590 }, { "epoch": 5.08, "learning_rate": 9.498305084745764e-06, "loss": 0.5127, "step": 600 }, { "epoch": 5.17, "learning_rate": 9.489830508474577e-06, "loss": 0.4368, "step": 610 }, { "epoch": 5.25, "learning_rate": 9.481355932203391e-06, "loss": 0.4431, "step": 620 }, { "epoch": 5.34, "learning_rate": 9.472881355932204e-06, "loss": 0.7604, "step": 630 }, { "epoch": 5.42, "learning_rate": 9.464406779661017e-06, "loss": 0.4196, "step": 640 }, { "epoch": 5.51, "learning_rate": 9.455932203389832e-06, "loss": 0.5376, "step": 650 }, { "epoch": 5.59, "learning_rate": 9.447457627118645e-06, "loss": 0.4184, "step": 660 }, { "epoch": 5.68, "learning_rate": 9.43898305084746e-06, "loss": 0.3217, "step": 670 }, { "epoch": 5.76, "learning_rate": 9.430508474576273e-06, "loss": 0.4748, "step": 680 }, { "epoch": 5.85, "learning_rate": 9.422033898305086e-06, "loss": 0.6145, "step": 690 }, { "epoch": 5.93, "learning_rate": 9.413559322033899e-06, "loss": 0.3761, "step": 700 }, { "epoch": 6.02, "learning_rate": 9.405084745762713e-06, "loss": 0.3156, "step": 710 }, { "epoch": 6.1, "learning_rate": 9.396610169491526e-06, "loss": 0.4638, "step": 720 }, { "epoch": 6.19, "learning_rate": 9.38813559322034e-06, "loss": 0.488, "step": 730 }, { "epoch": 6.27, "learning_rate": 9.379661016949152e-06, "loss": 0.5872, "step": 740 }, { "epoch": 6.36, "learning_rate": 9.371186440677967e-06, "loss": 0.5414, "step": 750 }, { "epoch": 6.44, "learning_rate": 9.36271186440678e-06, "loss": 0.3205, "step": 760 }, { "epoch": 6.53, "learning_rate": 9.354237288135593e-06, "loss": 0.4158, "step": 770 }, { "epoch": 6.61, "learning_rate": 9.346610169491525e-06, "loss": 0.3371, "step": 780 }, { "epoch": 6.69, "learning_rate": 9.33813559322034e-06, "loss": 0.4453, "step": 790 }, { "epoch": 6.78, "learning_rate": 9.329661016949153e-06, "loss": 0.4018, "step": 800 }, { "epoch": 6.86, "learning_rate": 9.321186440677967e-06, "loss": 0.4046, "step": 810 }, { "epoch": 6.95, "learning_rate": 9.31271186440678e-06, "loss": 0.438, "step": 820 }, { "epoch": 7.03, "learning_rate": 9.304237288135593e-06, "loss": 0.4067, "step": 830 }, { "epoch": 7.12, "learning_rate": 9.295762711864408e-06, "loss": 0.3016, "step": 840 }, { "epoch": 7.2, "learning_rate": 9.287288135593221e-06, "loss": 0.3699, "step": 850 }, { "epoch": 7.29, "learning_rate": 9.278813559322036e-06, "loss": 0.3857, "step": 860 }, { "epoch": 7.37, "learning_rate": 9.270338983050849e-06, "loss": 0.4237, "step": 870 }, { "epoch": 7.46, "learning_rate": 9.261864406779662e-06, "loss": 0.3207, "step": 880 }, { "epoch": 7.54, "learning_rate": 9.253389830508476e-06, "loss": 0.2815, "step": 890 }, { "epoch": 7.63, "learning_rate": 9.24491525423729e-06, "loss": 0.2975, "step": 900 }, { "epoch": 7.71, "learning_rate": 9.236440677966102e-06, "loss": 0.3149, "step": 910 }, { "epoch": 7.8, "learning_rate": 9.227966101694917e-06, "loss": 0.3816, "step": 920 }, { "epoch": 7.88, "learning_rate": 9.21949152542373e-06, "loss": 0.3822, "step": 930 }, { "epoch": 7.97, "learning_rate": 9.211016949152543e-06, "loss": 0.3699, "step": 940 }, { "epoch": 8.05, "learning_rate": 9.202542372881356e-06, "loss": 0.2304, "step": 950 }, { "epoch": 8.14, "learning_rate": 9.19406779661017e-06, "loss": 0.3732, "step": 960 }, { "epoch": 8.22, "learning_rate": 9.185593220338984e-06, "loss": 0.3458, "step": 970 }, { "epoch": 8.31, "learning_rate": 9.177118644067797e-06, "loss": 0.3233, "step": 980 }, { "epoch": 8.39, "learning_rate": 9.168644067796611e-06, "loss": 0.3938, "step": 990 }, { "epoch": 8.47, "learning_rate": 9.160169491525424e-06, "loss": 0.3358, "step": 1000 } ], "logging_steps": 10, "max_steps": 11800, "num_train_epochs": 100, "save_steps": 1000, "total_flos": 0.0, "trial_name": null, "trial_params": null }