|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9634684865515857, |
|
"eval_steps": 500, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 42.4441, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 37.1075, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 27.8987, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 24.6867, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 22.6003, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 21.1455, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 21.6205, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9999788644103418e-05, |
|
"loss": 20.1194, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.999809785053594e-05, |
|
"loss": 20.1889, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9994716549285312e-05, |
|
"loss": 19.7878, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9989645312071867e-05, |
|
"loss": 19.5043, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9982884996355248e-05, |
|
"loss": 19.2639, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9974436745189444e-05, |
|
"loss": 19.0332, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9964301987029523e-05, |
|
"loss": 19.3774, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9952482435490094e-05, |
|
"loss": 18.8272, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9938980089055565e-05, |
|
"loss": 18.9226, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.992379723074224e-05, |
|
"loss": 19.2136, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9906936427712295e-05, |
|
"loss": 18.6037, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9888400530839713e-05, |
|
"loss": 18.6965, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.986819267422826e-05, |
|
"loss": 18.4695, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9846316274681547e-05, |
|
"loss": 18.5388, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.982277503112531e-05, |
|
"loss": 18.6583, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.979757292398201e-05, |
|
"loss": 18.1656, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.977071421449776e-05, |
|
"loss": 18.5414, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9742203444021878e-05, |
|
"loss": 18.3158, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9712045433238972e-05, |
|
"loss": 18.0978, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9680245281353894e-05, |
|
"loss": 18.0812, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9646808365229506e-05, |
|
"loss": 17.8044, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.961174033847757e-05, |
|
"loss": 17.9748, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9575047130502813e-05, |
|
"loss": 17.7447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.953673494550037e-05, |
|
"loss": 18.2907, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.949681026140674e-05, |
|
"loss": 18.1314, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9455279828804526e-05, |
|
"loss": 17.9227, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9412150669780952e-05, |
|
"loss": 18.4178, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.936743007674063e-05, |
|
"loss": 17.8701, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9321125611172468e-05, |
|
"loss": 18.1603, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.92732451023712e-05, |
|
"loss": 17.8314, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9223796646113567e-05, |
|
"loss": 17.5233, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9172788603289453e-05, |
|
"loss": 18.2041, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9120229598488218e-05, |
|
"loss": 18.1653, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9066128518540408e-05, |
|
"loss": 17.6848, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9010494511015164e-05, |
|
"loss": 18.1238, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8953336982673506e-05, |
|
"loss": 17.956, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8894665597877824e-05, |
|
"loss": 17.699, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8834490276957788e-05, |
|
"loss": 17.6947, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8772821194533e-05, |
|
"loss": 17.6464, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8709668777792633e-05, |
|
"loss": 17.89, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8645043704732367e-05, |
|
"loss": 17.6851, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8578956902348945e-05, |
|
"loss": 17.3849, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.851141954479256e-05, |
|
"loss": 17.8666, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.844244305147755e-05, |
|
"loss": 17.4788, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8372039085151537e-05, |
|
"loss": 17.4202, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.830021954992345e-05, |
|
"loss": 17.2379, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8226996589250775e-05, |
|
"loss": 17.5357, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8152382583886272e-05, |
|
"loss": 17.9992, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8076390149784622e-05, |
|
"loss": 17.7149, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7999032135969265e-05, |
|
"loss": 17.5005, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7920321622359876e-05, |
|
"loss": 17.4384, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.784027191756075e-05, |
|
"loss": 17.0999, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7758896556610547e-05, |
|
"loss": 17.203, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7676209298693765e-05, |
|
"loss": 17.3599, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.759222412481428e-05, |
|
"loss": 17.7387, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.75069552354314e-05, |
|
"loss": 17.4499, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7420417048058816e-05, |
|
"loss": 17.396, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7332624194826847e-05, |
|
"loss": 17.441, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7243591520008384e-05, |
|
"loss": 17.0808, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7153334077508983e-05, |
|
"loss": 17.1419, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7061867128321524e-05, |
|
"loss": 17.3086, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6969206137945797e-05, |
|
"loss": 17.3577, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.6875366773773604e-05, |
|
"loss": 17.6731, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.678036490243962e-05, |
|
"loss": 17.3216, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6684216587138647e-05, |
|
"loss": 17.3844, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.658693808490959e-05, |
|
"loss": 17.4103, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6488545843886677e-05, |
|
"loss": 17.0814, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6389056500518343e-05, |
|
"loss": 17.3081, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6288486876754314e-05, |
|
"loss": 17.148, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.618685397720128e-05, |
|
"loss": 16.8685, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6084174986247738e-05, |
|
"loss": 17.4187, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.598046726515836e-05, |
|
"loss": 17.4885, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.5875748349138533e-05, |
|
"loss": 17.2386, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5770035944369456e-05, |
|
"loss": 16.9695, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5663347925014302e-05, |
|
"loss": 17.3497, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5555702330196024e-05, |
|
"loss": 16.9592, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5447117360947244e-05, |
|
"loss": 17.5106, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5337611377132757e-05, |
|
"loss": 17.2786, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.522720289434521e-05, |
|
"loss": 17.4009, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.511591058077441e-05, |
|
"loss": 17.2044, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.500375325405087e-05, |
|
"loss": 17.1285, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.489074987806406e-05, |
|
"loss": 17.3726, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.477691955975594e-05, |
|
"loss": 17.1684, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.46622815458903e-05, |
|
"loss": 17.2715, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.454685521979846e-05, |
|
"loss": 17.3633, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.443066009810188e-05, |
|
"loss": 17.1012, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4313715827412243e-05, |
|
"loss": 16.7467, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4196042181009525e-05, |
|
"loss": 16.741, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.4077659055498695e-05, |
|
"loss": 17.0569, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3958586467445532e-05, |
|
"loss": 17.0277, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.383884454999215e-05, |
|
"loss": 16.8944, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3718453549452843e-05, |
|
"loss": 17.1323, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3597433821890787e-05, |
|
"loss": 16.9379, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3475805829676149e-05, |
|
"loss": 16.9814, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3353590138026273e-05, |
|
"loss": 17.1369, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.323080741152845e-05, |
|
"loss": 16.8629, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3107478410645875e-05, |
|
"loss": 16.8607, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2983623988207432e-05, |
|
"loss": 16.6911, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.28592650858818e-05, |
|
"loss": 16.9764, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2734422730636617e-05, |
|
"loss": 16.8255, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2609118031183144e-05, |
|
"loss": 16.8564, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2483372174407155e-05, |
|
"loss": 16.9249, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2357206421786611e-05, |
|
"loss": 16.8046, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2230642105796674e-05, |
|
"loss": 17.0138, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2103700626302784e-05, |
|
"loss": 16.8029, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.197640344694228e-05, |
|
"loss": 16.7554, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1848772091495287e-05, |
|
"loss": 16.62, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1720828140245393e-05, |
|
"loss": 16.8458, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1592593226330802e-05, |
|
"loss": 17.1714, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1464089032086547e-05, |
|
"loss": 17.0085, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1335337285378359e-05, |
|
"loss": 16.8423, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1206359755928865e-05, |
|
"loss": 16.8152, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1077178251636702e-05, |
|
"loss": 16.619, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0947814614889174e-05, |
|
"loss": 16.7087, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0818290718869068e-05, |
|
"loss": 16.7935, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0688628463856287e-05, |
|
"loss": 17.4941, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.055884977352487e-05, |
|
"loss": 17.0523, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0428976591236082e-05, |
|
"loss": 16.7451, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.029903087632817e-05, |
|
"loss": 16.8536, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0169034600403404e-05, |
|
"loss": 16.6058, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.003900974361306e-05, |
|
"loss": 16.6489, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.908978290940948e-06, |
|
"loss": 16.989, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.778962228486138e-06, |
|
"loss": 16.885, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.648983539745468e-06, |
|
"loss": 16.9012, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.51906420189652e-06, |
|
"loss": 16.8678, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.38922618208166e-06, |
|
"loss": 16.7718, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.259491433693751e-06, |
|
"loss": 16.7869, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.129881892664232e-06, |
|
"loss": 16.9432, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.00041947375411e-06, |
|
"loss": 16.8835, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.871126066848552e-06, |
|
"loss": 16.8081, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.742023533255677e-06, |
|
"loss": 16.6375, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.613133702010196e-06, |
|
"loss": 16.7379, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.484478366182472e-06, |
|
"loss": 16.5849, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.356079279193703e-06, |
|
"loss": 16.6192, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.227958151137773e-06, |
|
"loss": 16.4591, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.10013664511047e-06, |
|
"loss": 16.5343, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.9726363735466e-06, |
|
"loss": 16.9542, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.84547889456571e-06, |
|
"loss": 16.8205, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.718685708326965e-06, |
|
"loss": 16.5807, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.592278253393859e-06, |
|
"loss": 16.5174, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.466277903109291e-06, |
|
"loss": 16.6076, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.340705961981722e-06, |
|
"loss": 16.7884, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.215583662082939e-06, |
|
"loss": 16.8359, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.090932159458067e-06, |
|
"loss": 16.5588, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.966772530548448e-06, |
|
"loss": 16.6256, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.843125768627983e-06, |
|
"loss": 16.7764, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.720012780253509e-06, |
|
"loss": 16.9245, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.597454381729873e-06, |
|
"loss": 16.5384, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.475471295590248e-06, |
|
"loss": 16.6298, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.354084147092296e-06, |
|
"loss": 16.752, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.2333134607308e-06, |
|
"loss": 16.6684, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.113179656767319e-06, |
|
"loss": 16.4827, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.993703047777468e-06, |
|
"loss": 16.5697, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.874903835216417e-06, |
|
"loss": 16.4795, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.756802106003148e-06, |
|
"loss": 16.8255, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.639417829124132e-06, |
|
"loss": 16.6714, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.52277085225688e-06, |
|
"loss": 16.7242, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.406880898414069e-06, |
|
"loss": 16.4696, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.291767562608705e-06, |
|
"loss": 16.5084, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.177450308540928e-06, |
|
"loss": 16.5646, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.0639484653070535e-06, |
|
"loss": 16.4026, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.95128122413135e-06, |
|
"loss": 16.3032, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.83946763512111e-06, |
|
"loss": 16.5088, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7285266040456255e-06, |
|
"loss": 16.3905, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.618476889139538e-06, |
|
"loss": 16.4207, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.50933709793113e-06, |
|
"loss": 16.5308, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.401125684096124e-06, |
|
"loss": 16.521, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.293860944337482e-06, |
|
"loss": 16.3123, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.1875610152917225e-06, |
|
"loss": 16.6079, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.082243870462362e-06, |
|
"loss": 16.5056, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.977927317180864e-06, |
|
"loss": 16.4315, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.87462899359575e-06, |
|
"loss": 16.1708, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.772366365690283e-06, |
|
"loss": 16.3817, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6711567243292547e-06, |
|
"loss": 16.5455, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5710171823354145e-06, |
|
"loss": 16.3721, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.4719646715959777e-06, |
|
"loss": 16.4659, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.3740159401997173e-06, |
|
"loss": 16.5392, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.2771875496051743e-06, |
|
"loss": 16.6646, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.181495871840379e-06, |
|
"loss": 16.3682, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.0869570867346167e-06, |
|
"loss": 16.5935, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.9935871791827166e-06, |
|
"loss": 16.539, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.9014019364422606e-06, |
|
"loss": 16.1143, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.8104169454642293e-06, |
|
"loss": 16.5375, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7206475902575225e-06, |
|
"loss": 16.3927, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.6321090492877823e-06, |
|
"loss": 16.3639, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.544816292910962e-06, |
|
"loss": 16.3929, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.458784080842127e-06, |
|
"loss": 16.3984, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.3740269596597943e-06, |
|
"loss": 16.3938, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2905592603463888e-06, |
|
"loss": 16.3884, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2083950958651103e-06, |
|
"loss": 16.3289, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.1275483587736577e-06, |
|
"loss": 16.3735, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.048032718875255e-06, |
|
"loss": 16.4073, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.969861620907314e-06, |
|
"loss": 16.4259, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8930482822681473e-06, |
|
"loss": 16.3312, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8176056907821482e-06, |
|
"loss": 16.2809, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.7435466025037684e-06, |
|
"loss": 16.2617, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6708835395606704e-06, |
|
"loss": 16.6299, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5996287880364736e-06, |
|
"loss": 16.2559, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5297943958933748e-06, |
|
"loss": 16.4145, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4613921709350342e-06, |
|
"loss": 16.3823, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3944336788100976e-06, |
|
"loss": 16.3725, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3289302410566318e-06, |
|
"loss": 16.2138, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.2648929331878423e-06, |
|
"loss": 16.2873, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.202332582819402e-06, |
|
"loss": 16.4605, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.141259767838675e-06, |
|
"loss": 16.1904, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0816848146161895e-06, |
|
"loss": 16.4713, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.0236177962596173e-06, |
|
"loss": 16.143, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.670685309105786e-07, |
|
"loss": 16.2049, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.120465800845723e-07, |
|
"loss": 16.3906, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.585612470542859e-07, |
|
"loss": 16.1819, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.06621575276556e-07, |
|
"loss": 16.2495, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.562363468632949e-07, |
|
"loss": 16.3325, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.074140810965724e-07, |
|
"loss": 16.3265, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.601630329881525e-07, |
|
"loss": 16.3694, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.14491191883716e-07, |
|
"loss": 16.2883, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.704062801119947e-07, |
|
"loss": 16.2978, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.279157516790545e-07, |
|
"loss": 16.2589, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.870267910079618e-07, |
|
"loss": 16.6149, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.4774631172400663e-07, |
|
"loss": 16.2879, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.100809554857343e-07, |
|
"loss": 16.2822, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.740370908619528e-07, |
|
"loss": 16.3097, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.396208122549194e-07, |
|
"loss": 16.3474, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.0683793886986943e-07, |
|
"loss": 16.2617, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7569401373110595e-07, |
|
"loss": 16.1619, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.461943027447555e-07, |
|
"loss": 16.2584, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.1834379380839655e-07, |
|
"loss": 16.1723, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.921471959676957e-07, |
|
"loss": 16.2365, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.676089386201818e-07, |
|
"loss": 16.2976, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4473317076631355e-07, |
|
"loss": 16.3046, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2352376030795753e-07, |
|
"loss": 16.2992, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0398429339438353e-07, |
|
"loss": 16.2474, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.61180738159173e-08, |
|
"loss": 16.2852, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.992812244532188e-08, |
|
"loss": 16.3501, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2491, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|