{ "best_metric": 0.6951560974121094, "best_model_checkpoint": "output/morgenshtern/checkpoint-294", "epoch": 3.0, "global_step": 294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.8622670741210196e-05, "loss": 0.9101, "step": 5 }, { "epoch": 0.1, "learning_rate": 5.902924774844624e-05, "loss": 0.8731, "step": 10 }, { "epoch": 0.15, "learning_rate": 6.96668542977361e-05, "loss": 0.9581, "step": 15 }, { "epoch": 0.2, "learning_rate": 8.027870792255626e-05, "loss": 0.8474, "step": 20 }, { "epoch": 0.25, "learning_rate": 9.060864780933411e-05, "loss": 0.9838, "step": 25 }, { "epoch": 0.3, "learning_rate": 0.00010040731829419309, "loss": 1.012, "step": 30 }, { "epoch": 0.35, "learning_rate": 0.00010943818808937948, "loss": 0.8702, "step": 35 }, { "epoch": 0.4, "learning_rate": 0.00011748325994051893, "loss": 0.9656, "step": 40 }, { "epoch": 0.45, "learning_rate": 0.00012434833288861775, "loss": 0.9745, "step": 45 }, { "epoch": 0.5, "learning_rate": 0.0001298676901104138, "loss": 0.8875, "step": 50 }, { "epoch": 0.54, "learning_rate": 0.00013390809917674526, "loss": 0.9281, "step": 55 }, { "epoch": 0.59, "learning_rate": 0.00013637202816621594, "loss": 0.932, "step": 60 }, { "epoch": 0.64, "learning_rate": 0.0001372, "loss": 1.071, "step": 65 }, { "epoch": 0.69, "learning_rate": 0.000136372028166216, "loss": 0.8787, "step": 70 }, { "epoch": 0.74, "learning_rate": 0.00013390809917674537, "loss": 0.9074, "step": 75 }, { "epoch": 0.79, "learning_rate": 0.00012986769011041397, "loss": 1.0496, "step": 80 }, { "epoch": 0.84, "learning_rate": 0.00012434833288861794, "loss": 0.9943, "step": 85 }, { "epoch": 0.89, "learning_rate": 0.00011748325994051916, "loss": 1.0026, "step": 90 }, { "epoch": 0.94, "learning_rate": 0.00010943818808937974, "loss": 1.0201, "step": 95 }, { "epoch": 0.99, "learning_rate": 0.00010040731829419337, "loss": 0.9215, "step": 100 }, { "epoch": 1.0, "eval_loss": 0.8902494311332703, "eval_runtime": 5.4222, "eval_samples_per_second": 22.684, "eval_steps_per_second": 2.951, "step": 101 }, { "epoch": 1.04, "learning_rate": 9.060864780933398e-05, "loss": 0.9922, "step": 105 }, { "epoch": 1.09, "learning_rate": 8.027870792255707e-05, "loss": 0.8911, "step": 110 }, { "epoch": 1.14, "learning_rate": 6.966685429773643e-05, "loss": 0.9184, "step": 115 }, { "epoch": 1.19, "learning_rate": 5.902924774844707e-05, "loss": 0.8339, "step": 120 }, { "epoch": 1.24, "learning_rate": 4.862267074121052e-05, "loss": 0.791, "step": 125 }, { "epoch": 1.29, "learning_rate": 3.869832889258939e-05, "loss": 0.8984, "step": 130 }, { "epoch": 1.34, "learning_rate": 2.9495787086535028e-05, "loss": 0.7645, "step": 135 }, { "epoch": 1.39, "learning_rate": 2.1237186588777798e-05, "loss": 0.8448, "step": 140 }, { "epoch": 1.44, "learning_rate": 1.4121882752050083e-05, "loss": 0.8783, "step": 145 }, { "epoch": 1.49, "learning_rate": 8.321632753190209e-06, "loss": 0.8214, "step": 150 }, { "epoch": 1.53, "learning_rate": 3.976449525958718e-06, "loss": 0.7646, "step": 155 }, { "epoch": 1.58, "learning_rate": 1.1912219719526667e-06, "loss": 0.8915, "step": 160 }, { "epoch": 1.63, "learning_rate": 3.318303476960258e-08, "loss": 0.7759, "step": 165 }, { "epoch": 1.68, "learning_rate": 5.302867558791814e-07, "loss": 0.9129, "step": 170 }, { "epoch": 1.73, "learning_rate": 2.670533488647443e-06, "loss": 0.8839, "step": 175 }, { "epoch": 1.78, "learning_rate": 6.402259559252401e-06, "loss": 0.9542, "step": 180 }, { "epoch": 1.83, "learning_rate": 1.1635384382334973e-05, "loss": 0.891, "step": 185 }, { "epoch": 1.88, "learning_rate": 1.824358492710126e-05, "loss": 0.7246, "step": 190 }, { "epoch": 1.93, "learning_rate": 2.6067345044190458e-05, "loss": 0.7276, "step": 195 }, { "epoch": 1.98, "learning_rate": 3.491780604522984e-05, "loss": 0.8327, "step": 200 }, { "epoch": 2.0, "eval_loss": 0.8691701889038086, "eval_runtime": 5.4146, "eval_samples_per_second": 22.717, "eval_steps_per_second": 2.955, "step": 202 }, { "epoch": 2.09, "learning_rate": 0.00013663679494827462, "loss": 0.8056, "step": 205 }, { "epoch": 2.14, "learning_rate": 0.00013436461471831498, "loss": 0.8153, "step": 210 }, { "epoch": 2.19, "learning_rate": 0.00013040646433810593, "loss": 0.8439, "step": 215 }, { "epoch": 2.24, "learning_rate": 0.00012486381666535154, "loss": 0.8468, "step": 220 }, { "epoch": 2.3, "learning_rate": 0.00011787876541670438, "loss": 0.9434, "step": 225 }, { "epoch": 2.35, "learning_rate": 0.00010963038239169752, "loss": 0.9117, "step": 230 }, { "epoch": 2.4, "learning_rate": 0.0001003301267105212, "loss": 0.9685, "step": 235 }, { "epoch": 2.45, "learning_rate": 9.021642375642094e-05, "loss": 0.8183, "step": 240 }, { "epoch": 2.5, "learning_rate": 7.954855279929014e-05, "loss": 0.8455, "step": 245 }, { "epoch": 2.55, "learning_rate": 6.860000000000005e-05, "loss": 0.8961, "step": 250 }, { "epoch": 2.6, "learning_rate": 5.765144720071096e-05, "loss": 0.9542, "step": 255 }, { "epoch": 2.65, "learning_rate": 4.698357624357919e-05, "loss": 0.9393, "step": 260 }, { "epoch": 2.7, "learning_rate": 3.686987328947893e-05, "loss": 0.9105, "step": 265 }, { "epoch": 2.76, "learning_rate": 2.7569617608302584e-05, "loss": 0.9638, "step": 270 }, { "epoch": 2.81, "learning_rate": 1.932123458329638e-05, "loss": 0.8269, "step": 275 }, { "epoch": 2.86, "learning_rate": 1.233618333464853e-05, "loss": 0.9693, "step": 280 }, { "epoch": 2.91, "learning_rate": 6.7935356618941304e-06, "loss": 0.8474, "step": 285 }, { "epoch": 2.96, "learning_rate": 2.8353852816850615e-06, "loss": 0.8138, "step": 290 }, { "epoch": 3.0, "eval_loss": 0.6951560974121094, "eval_runtime": 6.9159, "eval_samples_per_second": 20.677, "eval_steps_per_second": 2.603, "step": 294 } ], "max_steps": 294, "num_train_epochs": 3, "total_flos": 305711677440000.0, "trial_name": null, "trial_params": null }