|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 570, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08771929824561403, |
|
"grad_norm": 0.2171279639005661, |
|
"learning_rate": 1.3877192982456139e-05, |
|
"loss": 2.3087, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 0.21565403044223785, |
|
"learning_rate": 1.375438596491228e-05, |
|
"loss": 2.2512, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 0.24440069496631622, |
|
"learning_rate": 1.3631578947368421e-05, |
|
"loss": 2.1928, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 0.24185337126255035, |
|
"learning_rate": 1.350877192982456e-05, |
|
"loss": 2.0981, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.43859649122807015, |
|
"grad_norm": 0.26988154649734497, |
|
"learning_rate": 1.3385964912280702e-05, |
|
"loss": 2.0544, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.2789638042449951, |
|
"learning_rate": 1.3263157894736841e-05, |
|
"loss": 1.9431, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6140350877192983, |
|
"grad_norm": 0.32065337896347046, |
|
"learning_rate": 1.3140350877192982e-05, |
|
"loss": 1.9116, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.34021827578544617, |
|
"learning_rate": 1.3017543859649123e-05, |
|
"loss": 1.8307, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7894736842105263, |
|
"grad_norm": 0.3664143681526184, |
|
"learning_rate": 1.2894736842105262e-05, |
|
"loss": 1.6933, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 0.33720481395721436, |
|
"learning_rate": 1.2771929824561402e-05, |
|
"loss": 1.5989, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9649122807017544, |
|
"grad_norm": 0.3138943910598755, |
|
"learning_rate": 1.2649122807017545e-05, |
|
"loss": 1.5041, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.31080183386802673, |
|
"learning_rate": 1.2526315789473684e-05, |
|
"loss": 1.4221, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1403508771929824, |
|
"grad_norm": 0.26811471581459045, |
|
"learning_rate": 1.2403508771929823e-05, |
|
"loss": 1.3467, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.2280701754385965, |
|
"grad_norm": 0.25024041533470154, |
|
"learning_rate": 1.2280701754385964e-05, |
|
"loss": 1.2758, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3157894736842106, |
|
"grad_norm": 0.2651083171367645, |
|
"learning_rate": 1.2157894736842105e-05, |
|
"loss": 1.2279, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 0.25894564390182495, |
|
"learning_rate": 1.2035087719298245e-05, |
|
"loss": 1.173, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4912280701754386, |
|
"grad_norm": 0.2526499032974243, |
|
"learning_rate": 1.1912280701754386e-05, |
|
"loss": 1.1539, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.22998183965682983, |
|
"learning_rate": 1.1789473684210525e-05, |
|
"loss": 1.0704, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.23367486894130707, |
|
"learning_rate": 1.1666666666666666e-05, |
|
"loss": 1.0527, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.20629191398620605, |
|
"learning_rate": 1.1543859649122807e-05, |
|
"loss": 1.0167, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8421052631578947, |
|
"grad_norm": 0.19482989609241486, |
|
"learning_rate": 1.1421052631578947e-05, |
|
"loss": 1.0097, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.9298245614035088, |
|
"grad_norm": 0.19629421830177307, |
|
"learning_rate": 1.1298245614035088e-05, |
|
"loss": 0.9724, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.017543859649123, |
|
"grad_norm": 0.16687360405921936, |
|
"learning_rate": 1.1175438596491229e-05, |
|
"loss": 0.9819, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.17633700370788574, |
|
"learning_rate": 1.1052631578947368e-05, |
|
"loss": 0.9482, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.192982456140351, |
|
"grad_norm": 0.18230170011520386, |
|
"learning_rate": 1.0929824561403509e-05, |
|
"loss": 0.9359, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.280701754385965, |
|
"grad_norm": 0.1811186671257019, |
|
"learning_rate": 1.0807017543859648e-05, |
|
"loss": 0.9335, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.3684210526315788, |
|
"grad_norm": 0.204745814204216, |
|
"learning_rate": 1.068421052631579e-05, |
|
"loss": 0.9152, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 0.20484690368175507, |
|
"learning_rate": 1.056140350877193e-05, |
|
"loss": 0.9334, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.543859649122807, |
|
"grad_norm": 0.1847849041223526, |
|
"learning_rate": 1.043859649122807e-05, |
|
"loss": 0.9078, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.19749857485294342, |
|
"learning_rate": 1.031578947368421e-05, |
|
"loss": 0.8918, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.719298245614035, |
|
"grad_norm": 0.22192838788032532, |
|
"learning_rate": 1.0192982456140352e-05, |
|
"loss": 0.9108, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 0.22351142764091492, |
|
"learning_rate": 1.0070175438596491e-05, |
|
"loss": 0.9176, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.8947368421052633, |
|
"grad_norm": 0.23690944910049438, |
|
"learning_rate": 9.94736842105263e-06, |
|
"loss": 0.8665, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"grad_norm": 0.2534187436103821, |
|
"learning_rate": 9.824561403508772e-06, |
|
"loss": 0.9237, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.0701754385964914, |
|
"grad_norm": 0.30020347237586975, |
|
"learning_rate": 9.701754385964913e-06, |
|
"loss": 0.8558, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 0.28877881169319153, |
|
"learning_rate": 9.578947368421052e-06, |
|
"loss": 0.8699, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.245614035087719, |
|
"grad_norm": 0.3179704546928406, |
|
"learning_rate": 9.456140350877193e-06, |
|
"loss": 0.8773, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.36217424273490906, |
|
"learning_rate": 9.333333333333333e-06, |
|
"loss": 0.8581, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.4210526315789473, |
|
"grad_norm": 0.45486509799957275, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 0.8315, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 0.6085723042488098, |
|
"learning_rate": 9.087719298245615e-06, |
|
"loss": 0.8414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.5964912280701755, |
|
"grad_norm": 0.7863444685935974, |
|
"learning_rate": 8.964912280701754e-06, |
|
"loss": 0.8202, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.6842105263157894, |
|
"grad_norm": 0.32649222016334534, |
|
"learning_rate": 8.842105263157893e-06, |
|
"loss": 0.8005, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.7719298245614032, |
|
"grad_norm": 0.215932235121727, |
|
"learning_rate": 8.719298245614036e-06, |
|
"loss": 0.7807, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.8596491228070176, |
|
"grad_norm": 0.19240467250347137, |
|
"learning_rate": 8.596491228070176e-06, |
|
"loss": 0.788, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.9473684210526314, |
|
"grad_norm": 0.19347311556339264, |
|
"learning_rate": 8.473684210526315e-06, |
|
"loss": 0.8133, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.035087719298246, |
|
"grad_norm": 0.1866816133260727, |
|
"learning_rate": 8.350877192982456e-06, |
|
"loss": 0.8018, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.12280701754386, |
|
"grad_norm": 0.22412796318531036, |
|
"learning_rate": 8.228070175438597e-06, |
|
"loss": 0.7928, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 0.19428610801696777, |
|
"learning_rate": 8.105263157894736e-06, |
|
"loss": 0.8002, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.298245614035087, |
|
"grad_norm": 0.18602155148983002, |
|
"learning_rate": 7.982456140350877e-06, |
|
"loss": 0.7937, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"grad_norm": 0.18411681056022644, |
|
"learning_rate": 7.859649122807017e-06, |
|
"loss": 0.7788, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.473684210526316, |
|
"grad_norm": 0.19007687270641327, |
|
"learning_rate": 7.736842105263158e-06, |
|
"loss": 0.7814, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.56140350877193, |
|
"grad_norm": 0.19529704749584198, |
|
"learning_rate": 7.614035087719299e-06, |
|
"loss": 0.766, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.649122807017544, |
|
"grad_norm": 0.2058393806219101, |
|
"learning_rate": 7.491228070175438e-06, |
|
"loss": 0.7654, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.7368421052631575, |
|
"grad_norm": 0.2282346487045288, |
|
"learning_rate": 7.3684210526315784e-06, |
|
"loss": 0.7751, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.824561403508772, |
|
"grad_norm": 0.20693573355674744, |
|
"learning_rate": 7.24561403508772e-06, |
|
"loss": 0.773, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.912280701754386, |
|
"grad_norm": 0.2019735425710678, |
|
"learning_rate": 7.12280701754386e-06, |
|
"loss": 0.7645, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.27219313383102417, |
|
"learning_rate": 7e-06, |
|
"loss": 0.7646, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 5.087719298245614, |
|
"grad_norm": 0.19285519421100616, |
|
"learning_rate": 6.87719298245614e-06, |
|
"loss": 0.7764, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.175438596491228, |
|
"grad_norm": 0.22160640358924866, |
|
"learning_rate": 6.75438596491228e-06, |
|
"loss": 0.7828, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 0.2303527593612671, |
|
"learning_rate": 6.6315789473684205e-06, |
|
"loss": 0.7415, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.350877192982456, |
|
"grad_norm": 0.19970931112766266, |
|
"learning_rate": 6.5087719298245616e-06, |
|
"loss": 0.7689, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 5.43859649122807, |
|
"grad_norm": 0.21704891324043274, |
|
"learning_rate": 6.385964912280701e-06, |
|
"loss": 0.7641, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.526315789473684, |
|
"grad_norm": 0.2142358273267746, |
|
"learning_rate": 6.263157894736842e-06, |
|
"loss": 0.7555, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.614035087719298, |
|
"grad_norm": 0.21201816201210022, |
|
"learning_rate": 6.140350877192982e-06, |
|
"loss": 0.7397, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.701754385964913, |
|
"grad_norm": 0.2286573052406311, |
|
"learning_rate": 6.017543859649122e-06, |
|
"loss": 0.7548, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 5.7894736842105265, |
|
"grad_norm": 0.2286461740732193, |
|
"learning_rate": 5.894736842105263e-06, |
|
"loss": 0.7421, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.87719298245614, |
|
"grad_norm": 0.22108690440654755, |
|
"learning_rate": 5.771929824561404e-06, |
|
"loss": 0.7728, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 5.964912280701754, |
|
"grad_norm": 0.21524551510810852, |
|
"learning_rate": 5.649122807017544e-06, |
|
"loss": 0.7384, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.052631578947368, |
|
"grad_norm": 0.2278275340795517, |
|
"learning_rate": 5.526315789473684e-06, |
|
"loss": 0.7536, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 6.140350877192983, |
|
"grad_norm": 0.2368898093700409, |
|
"learning_rate": 5.403508771929824e-06, |
|
"loss": 0.7463, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.228070175438597, |
|
"grad_norm": 0.23636922240257263, |
|
"learning_rate": 5.280701754385965e-06, |
|
"loss": 0.7329, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 0.21842700242996216, |
|
"learning_rate": 5.157894736842105e-06, |
|
"loss": 0.7334, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.4035087719298245, |
|
"grad_norm": 0.24194689095020294, |
|
"learning_rate": 5.035087719298246e-06, |
|
"loss": 0.7589, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 6.491228070175438, |
|
"grad_norm": 0.2541049122810364, |
|
"learning_rate": 4.912280701754386e-06, |
|
"loss": 0.7506, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.578947368421053, |
|
"grad_norm": 0.24515019357204437, |
|
"learning_rate": 4.789473684210526e-06, |
|
"loss": 0.7553, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.24450013041496277, |
|
"learning_rate": 4.666666666666666e-06, |
|
"loss": 0.7518, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.754385964912281, |
|
"grad_norm": 0.2295515537261963, |
|
"learning_rate": 4.543859649122807e-06, |
|
"loss": 0.7316, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 6.842105263157895, |
|
"grad_norm": 0.23279713094234467, |
|
"learning_rate": 4.421052631578947e-06, |
|
"loss": 0.7294, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.9298245614035086, |
|
"grad_norm": 0.24888701736927032, |
|
"learning_rate": 4.298245614035088e-06, |
|
"loss": 0.731, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 0.21917015314102173, |
|
"learning_rate": 4.175438596491228e-06, |
|
"loss": 0.7233, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.105263157894737, |
|
"grad_norm": 0.2383662313222885, |
|
"learning_rate": 4.052631578947368e-06, |
|
"loss": 0.7471, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 7.192982456140351, |
|
"grad_norm": 0.2388984113931656, |
|
"learning_rate": 3.929824561403508e-06, |
|
"loss": 0.7314, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.280701754385965, |
|
"grad_norm": 0.23913495242595673, |
|
"learning_rate": 3.8070175438596494e-06, |
|
"loss": 0.7343, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 7.368421052631579, |
|
"grad_norm": 0.27001267671585083, |
|
"learning_rate": 3.6842105263157892e-06, |
|
"loss": 0.7352, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.456140350877193, |
|
"grad_norm": 0.25270870327949524, |
|
"learning_rate": 3.56140350877193e-06, |
|
"loss": 0.7189, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 7.543859649122807, |
|
"grad_norm": 0.2609616816043854, |
|
"learning_rate": 3.43859649122807e-06, |
|
"loss": 0.708, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.631578947368421, |
|
"grad_norm": 0.25190067291259766, |
|
"learning_rate": 3.3157894736842103e-06, |
|
"loss": 0.7236, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 7.719298245614035, |
|
"grad_norm": 0.26472926139831543, |
|
"learning_rate": 3.1929824561403505e-06, |
|
"loss": 0.742, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.807017543859649, |
|
"grad_norm": 0.2787232995033264, |
|
"learning_rate": 3.070175438596491e-06, |
|
"loss": 0.7349, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 7.894736842105263, |
|
"grad_norm": 0.24300773441791534, |
|
"learning_rate": 2.9473684210526313e-06, |
|
"loss": 0.7234, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.982456140350877, |
|
"grad_norm": 0.27598220109939575, |
|
"learning_rate": 2.824561403508772e-06, |
|
"loss": 0.7296, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 8.070175438596491, |
|
"grad_norm": 0.2716473937034607, |
|
"learning_rate": 2.701754385964912e-06, |
|
"loss": 0.7042, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.157894736842104, |
|
"grad_norm": 0.26843708753585815, |
|
"learning_rate": 2.5789473684210523e-06, |
|
"loss": 0.7237, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 8.24561403508772, |
|
"grad_norm": 0.271550714969635, |
|
"learning_rate": 2.456140350877193e-06, |
|
"loss": 0.7153, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 0.24845653772354126, |
|
"learning_rate": 2.333333333333333e-06, |
|
"loss": 0.7385, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 8.421052631578947, |
|
"grad_norm": 0.30234792828559875, |
|
"learning_rate": 2.2105263157894734e-06, |
|
"loss": 0.7487, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.508771929824562, |
|
"grad_norm": 0.27301862835884094, |
|
"learning_rate": 2.087719298245614e-06, |
|
"loss": 0.7316, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 8.596491228070175, |
|
"grad_norm": 0.2653884291648865, |
|
"learning_rate": 1.964912280701754e-06, |
|
"loss": 0.719, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.68421052631579, |
|
"grad_norm": 0.2817953824996948, |
|
"learning_rate": 1.8421052631578946e-06, |
|
"loss": 0.7163, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 0.2780102491378784, |
|
"learning_rate": 1.719298245614035e-06, |
|
"loss": 0.7183, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.859649122807017, |
|
"grad_norm": 0.25589415431022644, |
|
"learning_rate": 1.5964912280701752e-06, |
|
"loss": 0.7127, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 8.947368421052632, |
|
"grad_norm": 0.25434452295303345, |
|
"learning_rate": 1.4736842105263156e-06, |
|
"loss": 0.6984, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.035087719298245, |
|
"grad_norm": 0.26028621196746826, |
|
"learning_rate": 1.350877192982456e-06, |
|
"loss": 0.723, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 9.12280701754386, |
|
"grad_norm": 0.2656114101409912, |
|
"learning_rate": 1.2280701754385965e-06, |
|
"loss": 0.7102, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.210526315789474, |
|
"grad_norm": 0.25901249051094055, |
|
"learning_rate": 1.1052631578947367e-06, |
|
"loss": 0.7161, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 9.298245614035087, |
|
"grad_norm": 0.27115657925605774, |
|
"learning_rate": 9.82456140350877e-07, |
|
"loss": 0.7114, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.385964912280702, |
|
"grad_norm": 0.28459489345550537, |
|
"learning_rate": 8.596491228070175e-07, |
|
"loss": 0.7148, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 9.473684210526315, |
|
"grad_norm": 0.2780962884426117, |
|
"learning_rate": 7.368421052631578e-07, |
|
"loss": 0.7157, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.56140350877193, |
|
"grad_norm": 0.29147860407829285, |
|
"learning_rate": 6.140350877192982e-07, |
|
"loss": 0.7133, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 9.649122807017545, |
|
"grad_norm": 0.2604060769081116, |
|
"learning_rate": 4.912280701754385e-07, |
|
"loss": 0.7097, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.736842105263158, |
|
"grad_norm": 0.279658704996109, |
|
"learning_rate": 3.684210526315789e-07, |
|
"loss": 0.7337, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"grad_norm": 0.2638838589191437, |
|
"learning_rate": 2.456140350877193e-07, |
|
"loss": 0.7149, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.912280701754385, |
|
"grad_norm": 0.2900523245334625, |
|
"learning_rate": 1.2280701754385964e-07, |
|
"loss": 0.7002, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.3178514838218689, |
|
"learning_rate": 0.0, |
|
"loss": 0.7067, |
|
"step": 570 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 570, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1446214105760768e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|