|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9942446043165467, |
|
"eval_steps": 500, |
|
"global_step": 1388, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.999994877043978e-05, |
|
"loss": 1.5749, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.997951166621575e-05, |
|
"loss": 1.2447, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.9918088642045126e-05, |
|
"loss": 1.1636, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.981585677303025e-05, |
|
"loss": 1.1379, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.967302551523671e-05, |
|
"loss": 1.1114, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.948988750611294e-05, |
|
"loss": 1.1086, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.9266817964924905e-05, |
|
"loss": 1.0915, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.900427392399429e-05, |
|
"loss": 1.075, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.870279329231546e-05, |
|
"loss": 1.0875, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.836299375346956e-05, |
|
"loss": 1.0696, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.798557150009373e-05, |
|
"loss": 1.0614, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.757129980749847e-05, |
|
"loss": 1.0638, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.712102744935529e-05, |
|
"loss": 1.0545, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.6635676958700946e-05, |
|
"loss": 1.0508, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.611624273782092e-05, |
|
"loss": 1.0566, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.556378902088484e-05, |
|
"loss": 1.0577, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4979447693508e-05, |
|
"loss": 1.0428, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.436441597370635e-05, |
|
"loss": 1.0456, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.035969614982605, |
|
"eval_runtime": 20.059, |
|
"eval_samples_per_second": 13.909, |
|
"eval_steps_per_second": 3.49, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.371995395899618e-05, |
|
"loss": 1.0082, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.304738204466437e-05, |
|
"loss": 0.9889, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.234807821849838e-05, |
|
"loss": 0.9786, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.162347523751894e-05, |
|
"loss": 0.9881, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.0875057692499566e-05, |
|
"loss": 0.9747, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.0104358966287503e-05, |
|
"loss": 0.9842, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9312958092157724e-05, |
|
"loss": 0.9846, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.850247651863686e-05, |
|
"loss": 0.9801, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.767457478742533e-05, |
|
"loss": 0.9834, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6830949131224118e-05, |
|
"loss": 0.9831, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5973327998436527e-05, |
|
"loss": 0.9787, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.5103468511865456e-05, |
|
"loss": 0.981, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.4223152868661535e-05, |
|
"loss": 0.9845, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.3334184688898107e-05, |
|
"loss": 0.9754, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.2438385320254234e-05, |
|
"loss": 0.9779, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.1537590106376758e-05, |
|
"loss": 0.9737, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.0633644626567007e-05, |
|
"loss": 0.9714, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.0180176496505737, |
|
"eval_runtime": 20.0699, |
|
"eval_samples_per_second": 13.901, |
|
"eval_steps_per_second": 3.488, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.9728400914496288e-05, |
|
"loss": 0.9669, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.882371366369749e-05, |
|
"loss": 0.9478, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.79214364276071e-05, |
|
"loss": 0.9458, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.702341782194301e-05, |
|
"loss": 0.9307, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.6131497737198942e-05, |
|
"loss": 0.9435, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.5247503569015413e-05, |
|
"loss": 0.945, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.437324647415053e-05, |
|
"loss": 0.9416, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.3510517659721583e-05, |
|
"loss": 0.9476, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.2661084713320093e-05, |
|
"loss": 0.946, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.182668798151939e-05, |
|
"loss": 0.9414, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.1009037004194424e-05, |
|
"loss": 0.9439, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.020980701195946e-05, |
|
"loss": 0.9486, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.430635493899609e-06, |
|
"loss": 0.949, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.673118842628595e-06, |
|
"loss": 0.9376, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 7.938809083546264e-06, |
|
"loss": 0.9432, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.229210694997113e-06, |
|
"loss": 0.9457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.545777525844883e-06, |
|
"loss": 0.9357, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.889909816778458e-06, |
|
"loss": 0.9335, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.0176299810409546, |
|
"eval_runtime": 20.0069, |
|
"eval_samples_per_second": 13.945, |
|
"eval_steps_per_second": 3.499, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 5.262951331452011e-06, |
|
"loss": 0.937, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 4.6661866033371506e-06, |
|
"loss": 0.9351, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 4.100838303927914e-06, |
|
"loss": 0.9415, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.5680647376905666e-06, |
|
"loss": 0.9293, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.0689574688907607e-06, |
|
"loss": 0.9304, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.604539085160218e-06, |
|
"loss": 0.9254, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 2.1757611023850876e-06, |
|
"loss": 0.9293, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.7835020152084116e-06, |
|
"loss": 0.9391, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.4285654971409902e-06, |
|
"loss": 0.9363, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.1116787539682571e-06, |
|
"loss": 0.9506, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 8.334910338268054e-07, |
|
"loss": 0.9226, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 5.945722970031332e-07, |
|
"loss": 0.9305, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.9541204817997283e-07, |
|
"loss": 0.9306, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.3641833352276768e-07, |
|
"loss": 0.9344, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.1791690466107286e-07, |
|
"loss": 0.93, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.0150551277724494e-08, |
|
"loss": 0.9344, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.2786036732557203e-09, |
|
"loss": 0.9348, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 1.0186352729797363, |
|
"eval_runtime": 19.987, |
|
"eval_samples_per_second": 13.959, |
|
"eval_steps_per_second": 3.502, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"step": 1388, |
|
"total_flos": 1.7642090681398723e+18, |
|
"train_loss": 0.9866050820185747, |
|
"train_runtime": 37340.4665, |
|
"train_samples_per_second": 3.722, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1388, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 80, |
|
"total_flos": 1.7642090681398723e+18, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|