|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.99809427010545, |
|
"eval_steps": 500, |
|
"global_step": 9835, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999987245581571e-05, |
|
"loss": 1.7374, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9999489824564244e-05, |
|
"loss": 1.5943, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9998852110149786e-05, |
|
"loss": 1.4529, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.999795931907928e-05, |
|
"loss": 1.4299, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.999681146046236e-05, |
|
"loss": 1.2767, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9995408546011235e-05, |
|
"loss": 1.2893, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9993750590040575e-05, |
|
"loss": 1.297, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9991837609467425e-05, |
|
"loss": 1.1932, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.998966962381092e-05, |
|
"loss": 1.1411, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.998724665519219e-05, |
|
"loss": 1.1558, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9984568728334075e-05, |
|
"loss": 1.1515, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.998163587056089e-05, |
|
"loss": 1.0365, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.997844811179817e-05, |
|
"loss": 1.0264, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9975005484572305e-05, |
|
"loss": 1.0283, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.997130802401027e-05, |
|
"loss": 1.0356, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9967355767839225e-05, |
|
"loss": 1.0245, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.996314875638616e-05, |
|
"loss": 1.0557, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.995868703257745e-05, |
|
"loss": 1.0417, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.995397064193846e-05, |
|
"loss": 1.0008, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9948999632593055e-05, |
|
"loss": 1.035, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.994377405526308e-05, |
|
"loss": 1.0543, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9938293963267914e-05, |
|
"loss": 1.0384, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.993255941252385e-05, |
|
"loss": 1.0359, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9926570461543586e-05, |
|
"loss": 1.0484, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.992032717143559e-05, |
|
"loss": 0.9157, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9913829605903486e-05, |
|
"loss": 1.0029, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.990707783124541e-05, |
|
"loss": 1.0332, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.990007191635334e-05, |
|
"loss": 0.9525, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.989281193271236e-05, |
|
"loss": 0.9969, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9885297954399964e-05, |
|
"loss": 0.9313, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.98775300580853e-05, |
|
"loss": 0.9715, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.986950832302836e-05, |
|
"loss": 0.9459, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9861232831079194e-05, |
|
"loss": 0.9614, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.985270366667708e-05, |
|
"loss": 0.9995, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9843920916849645e-05, |
|
"loss": 0.9779, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9834884671211976e-05, |
|
"loss": 0.926, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.982559502196572e-05, |
|
"loss": 1.0427, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.981605206389814e-05, |
|
"loss": 0.9495, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9806255894381135e-05, |
|
"loss": 0.9446, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.979620661337026e-05, |
|
"loss": 0.9732, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.978590432340371e-05, |
|
"loss": 0.8346, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9775349129601243e-05, |
|
"loss": 0.9642, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9764541139663176e-05, |
|
"loss": 0.8718, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.975348046386917e-05, |
|
"loss": 0.9406, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.974216721507725e-05, |
|
"loss": 0.8534, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.973060150872253e-05, |
|
"loss": 0.9735, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.971878346281609e-05, |
|
"loss": 0.9225, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.970671319794378e-05, |
|
"loss": 0.8771, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.969439083726496e-05, |
|
"loss": 0.9068, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.968181650651127e-05, |
|
"loss": 0.9524, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.966899033398533e-05, |
|
"loss": 0.8811, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.965591245055944e-05, |
|
"loss": 0.9009, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.964258298967423e-05, |
|
"loss": 0.8791, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.962900208733734e-05, |
|
"loss": 0.9129, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9615169882121945e-05, |
|
"loss": 0.9973, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.960108651516545e-05, |
|
"loss": 1.0256, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.958675213016798e-05, |
|
"loss": 0.8638, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9572166873390925e-05, |
|
"loss": 0.8928, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.955733089365546e-05, |
|
"loss": 0.8579, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9542244342341026e-05, |
|
"loss": 0.9767, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9526907373383766e-05, |
|
"loss": 0.8605, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.951132014327498e-05, |
|
"loss": 0.883, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.949548281105951e-05, |
|
"loss": 0.9282, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.947939553833412e-05, |
|
"loss": 0.8909, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.9463058489245874e-05, |
|
"loss": 0.8618, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.9446471830490396e-05, |
|
"loss": 0.8674, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.942963573131025e-05, |
|
"loss": 0.9487, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.941255036349316e-05, |
|
"loss": 0.959, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.9395215901370265e-05, |
|
"loss": 0.9217, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.937763252181434e-05, |
|
"loss": 0.9214, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.935980040423803e-05, |
|
"loss": 0.9023, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.934171973059196e-05, |
|
"loss": 0.863, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.9323390685362915e-05, |
|
"loss": 0.9249, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.930481345557193e-05, |
|
"loss": 0.9269, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.928598823077243e-05, |
|
"loss": 0.8938, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.926691520304824e-05, |
|
"loss": 0.9187, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.924759456701167e-05, |
|
"loss": 0.8909, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.922802651980149e-05, |
|
"loss": 0.8665, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.920821126108096e-05, |
|
"loss": 0.9484, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.9188148993035754e-05, |
|
"loss": 0.9234, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.916783992037193e-05, |
|
"loss": 0.9564, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.914728425031379e-05, |
|
"loss": 0.8569, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.912648219260188e-05, |
|
"loss": 0.972, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.910543395949067e-05, |
|
"loss": 0.7543, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.908413976574655e-05, |
|
"loss": 0.8996, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.9062599828645574e-05, |
|
"loss": 0.7431, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.9040814367971236e-05, |
|
"loss": 0.8302, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.901878360601223e-05, |
|
"loss": 0.9348, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.899650776756023e-05, |
|
"loss": 0.8658, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.897398707990749e-05, |
|
"loss": 0.8688, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.895122177284465e-05, |
|
"loss": 0.8237, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8928212078658315e-05, |
|
"loss": 0.8714, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.8904958232128687e-05, |
|
"loss": 0.8695, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.888146047052721e-05, |
|
"loss": 0.8811, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.88577190336141e-05, |
|
"loss": 0.954, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.883373416363593e-05, |
|
"loss": 0.9335, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.8809506105323164e-05, |
|
"loss": 0.864, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.878503510588765e-05, |
|
"loss": 0.9355, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.876032141502004e-05, |
|
"loss": 0.885, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.8735365284887374e-05, |
|
"loss": 0.8106, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.8710166970130376e-05, |
|
"loss": 0.7904, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.8684726727860944e-05, |
|
"loss": 0.8767, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.865904481765945e-05, |
|
"loss": 0.9206, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.863312150157216e-05, |
|
"loss": 0.905, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.8606957044108556e-05, |
|
"loss": 0.8438, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.858055171223856e-05, |
|
"loss": 0.8928, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.855390577538991e-05, |
|
"loss": 0.8943, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.8527019505445346e-05, |
|
"loss": 0.8802, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.849989317673984e-05, |
|
"loss": 0.8525, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.847252706605786e-05, |
|
"loss": 0.8939, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.844492145263044e-05, |
|
"loss": 0.8652, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.8417076618132426e-05, |
|
"loss": 0.8308, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.838899284667956e-05, |
|
"loss": 0.8173, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.836067042482557e-05, |
|
"loss": 0.9149, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.833210964155928e-05, |
|
"loss": 0.7346, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.8303310788301624e-05, |
|
"loss": 1.0617, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.827427415890271e-05, |
|
"loss": 0.8963, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.82450000496388e-05, |
|
"loss": 0.9368, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.821548875920927e-05, |
|
"loss": 0.9157, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.818574058873361e-05, |
|
"loss": 0.7684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.8155755841748296e-05, |
|
"loss": 0.8846, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.8125534824203754e-05, |
|
"loss": 0.9657, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.8095077844461176e-05, |
|
"loss": 0.6925, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.8064385213289414e-05, |
|
"loss": 0.8198, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.8033457243861804e-05, |
|
"loss": 0.8938, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.800229425175294e-05, |
|
"loss": 0.7988, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.7970896554935506e-05, |
|
"loss": 0.933, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.7939264473776995e-05, |
|
"loss": 0.8274, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.790739833103644e-05, |
|
"loss": 0.946, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.787529845186114e-05, |
|
"loss": 0.8698, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.784296516378333e-05, |
|
"loss": 0.9597, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.7810398796716825e-05, |
|
"loss": 0.7929, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.777759968295369e-05, |
|
"loss": 0.9, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.774456815716083e-05, |
|
"loss": 0.7947, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7711304556376555e-05, |
|
"loss": 0.948, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.767780922000718e-05, |
|
"loss": 0.8153, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.7644082489823525e-05, |
|
"loss": 0.9613, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.761012470995746e-05, |
|
"loss": 0.861, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.7575936226898366e-05, |
|
"loss": 0.8061, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.754151738948962e-05, |
|
"loss": 0.8082, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.750686854892503e-05, |
|
"loss": 0.8568, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.747199005874524e-05, |
|
"loss": 0.8792, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.7436882274834135e-05, |
|
"loss": 0.9441, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.7401545555415204e-05, |
|
"loss": 0.8254, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.73659802610479e-05, |
|
"loss": 0.9127, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.733018675462394e-05, |
|
"loss": 0.8423, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.729416540136361e-05, |
|
"loss": 0.8832, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.725791656881203e-05, |
|
"loss": 0.8718, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.722144062683543e-05, |
|
"loss": 0.7659, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.7184737947617354e-05, |
|
"loss": 0.793, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.714780890565485e-05, |
|
"loss": 0.8891, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.71106538777547e-05, |
|
"loss": 1.041, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.707327324302951e-05, |
|
"loss": 0.8006, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.703566738289389e-05, |
|
"loss": 0.9304, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.699783668106054e-05, |
|
"loss": 0.9484, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.695978152353634e-05, |
|
"loss": 0.8737, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.69215022986184e-05, |
|
"loss": 0.8265, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.688299939689015e-05, |
|
"loss": 0.9525, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.684427321121726e-05, |
|
"loss": 0.8764, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.6805324136743714e-05, |
|
"loss": 0.9115, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.676615257088776e-05, |
|
"loss": 0.8334, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.672675891333782e-05, |
|
"loss": 0.8515, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.668714356604845e-05, |
|
"loss": 0.9023, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.664730693323622e-05, |
|
"loss": 0.8983, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.660724942137561e-05, |
|
"loss": 0.8063, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.656697143919482e-05, |
|
"loss": 0.91, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.6526473397671644e-05, |
|
"loss": 0.8909, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.6485755710029256e-05, |
|
"loss": 0.8485, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.644481879173199e-05, |
|
"loss": 0.793, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.640366306048113e-05, |
|
"loss": 0.9396, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.63622889362106e-05, |
|
"loss": 0.8538, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.63206968410827e-05, |
|
"loss": 0.8986, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.627888719948385e-05, |
|
"loss": 0.9277, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.623686043802016e-05, |
|
"loss": 0.8013, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.619461698551315e-05, |
|
"loss": 0.8565, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.6152157272995355e-05, |
|
"loss": 0.7764, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.610948173370594e-05, |
|
"loss": 0.8674, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.606659080308624e-05, |
|
"loss": 0.7947, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.6023484918775364e-05, |
|
"loss": 0.8766, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.598016452060569e-05, |
|
"loss": 0.8197, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.593663005059841e-05, |
|
"loss": 0.9353, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.589288195295901e-05, |
|
"loss": 0.8794, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.584892067407272e-05, |
|
"loss": 0.844, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.580474666249997e-05, |
|
"loss": 0.8291, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.576036036897182e-05, |
|
"loss": 0.7954, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.571576224638536e-05, |
|
"loss": 0.9331, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.56709527497991e-05, |
|
"loss": 0.8131, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.562593233642828e-05, |
|
"loss": 0.8134, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5580701465640254e-05, |
|
"loss": 0.8629, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.553526059894978e-05, |
|
"loss": 0.8637, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.548961020001432e-05, |
|
"loss": 0.8737, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.544375073462932e-05, |
|
"loss": 0.8404, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.539768267072341e-05, |
|
"loss": 0.9458, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.535140647835369e-05, |
|
"loss": 0.8777, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.5304922629700896e-05, |
|
"loss": 0.8794, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.525823159906459e-05, |
|
"loss": 0.8848, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.521133386285833e-05, |
|
"loss": 0.8398, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.5164229899604796e-05, |
|
"loss": 0.8368, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.51169201899309e-05, |
|
"loss": 0.7742, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.506940521656293e-05, |
|
"loss": 0.875, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.502168546432155e-05, |
|
"loss": 0.8689, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.497376142011693e-05, |
|
"loss": 0.8329, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.492563357294369e-05, |
|
"loss": 0.804, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.487730241387602e-05, |
|
"loss": 0.7985, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.482876843606257e-05, |
|
"loss": 0.8019, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.478003213472146e-05, |
|
"loss": 0.7925, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.473109400713525e-05, |
|
"loss": 0.8566, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.468195455264581e-05, |
|
"loss": 0.7679, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.463261427264928e-05, |
|
"loss": 0.7556, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.458307367059092e-05, |
|
"loss": 0.7138, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.4533333251959985e-05, |
|
"loss": 0.893, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.448339352428456e-05, |
|
"loss": 0.9398, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.4433254997126394e-05, |
|
"loss": 0.9158, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.438291818207569e-05, |
|
"loss": 0.8109, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.4332383592745894e-05, |
|
"loss": 0.9455, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.4281651744768436e-05, |
|
"loss": 0.8938, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.42307231557875e-05, |
|
"loss": 0.7618, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.4179598345454704e-05, |
|
"loss": 0.8194, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.4128277835423825e-05, |
|
"loss": 0.8848, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.407676214934548e-05, |
|
"loss": 0.7657, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.402505181286175e-05, |
|
"loss": 0.8897, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.3973147353600866e-05, |
|
"loss": 0.8431, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.392104930117177e-05, |
|
"loss": 0.8565, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.386875818715874e-05, |
|
"loss": 0.8426, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.3816274545116e-05, |
|
"loss": 0.8096, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.37635989105622e-05, |
|
"loss": 0.8651, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.3710731820975e-05, |
|
"loss": 0.8658, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.36576738157856e-05, |
|
"loss": 0.8642, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.3604425436373194e-05, |
|
"loss": 0.8334, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.355098722605946e-05, |
|
"loss": 0.929, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.349735973010305e-05, |
|
"loss": 0.8318, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.344354349569398e-05, |
|
"loss": 0.7576, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.3389539071948065e-05, |
|
"loss": 0.7787, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.3335347009901314e-05, |
|
"loss": 0.7632, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.328096786250432e-05, |
|
"loss": 0.7624, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.32264021846166e-05, |
|
"loss": 0.8955, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.317165053300095e-05, |
|
"loss": 0.9177, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.311671346631774e-05, |
|
"loss": 0.9347, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.306159154511925e-05, |
|
"loss": 0.8386, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.300628533184391e-05, |
|
"loss": 0.7803, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.295079539081058e-05, |
|
"loss": 0.8833, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.289512228821282e-05, |
|
"loss": 0.896, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.283926659211306e-05, |
|
"loss": 0.8046, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.278322887243683e-05, |
|
"loss": 0.8341, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.272700970096696e-05, |
|
"loss": 0.8365, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.26706096513377e-05, |
|
"loss": 0.8236, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.2614029299028944e-05, |
|
"loss": 0.9149, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.2557269221360265e-05, |
|
"loss": 0.8772, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.250032999748508e-05, |
|
"loss": 0.8428, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.2443212208384755e-05, |
|
"loss": 0.7791, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.238591643686263e-05, |
|
"loss": 0.894, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.23284432675381e-05, |
|
"loss": 0.8412, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.2270793286840636e-05, |
|
"loss": 0.7827, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.2212967083003835e-05, |
|
"loss": 0.8868, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.215496524605936e-05, |
|
"loss": 0.8999, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.209678836783098e-05, |
|
"loss": 0.8319, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.2038437041928505e-05, |
|
"loss": 0.8147, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.1979911863741686e-05, |
|
"loss": 0.8202, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.192121343043424e-05, |
|
"loss": 0.8346, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.1862342340937655e-05, |
|
"loss": 0.8413, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.1803299195945145e-05, |
|
"loss": 0.8867, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.174408459790549e-05, |
|
"loss": 0.789, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.1684699151016896e-05, |
|
"loss": 0.7649, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.162514346122083e-05, |
|
"loss": 0.8685, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.156541813619585e-05, |
|
"loss": 0.7793, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.150552378535137e-05, |
|
"loss": 0.871, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.144546101982151e-05, |
|
"loss": 0.8534, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.1385230452458756e-05, |
|
"loss": 0.8658, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.132483269782781e-05, |
|
"loss": 0.7654, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.126426837219925e-05, |
|
"loss": 0.788, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.120353809354328e-05, |
|
"loss": 0.7899, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.114264248152342e-05, |
|
"loss": 0.8984, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.108158215749014e-05, |
|
"loss": 0.9037, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.10203577444746e-05, |
|
"loss": 0.8417, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.095896986718221e-05, |
|
"loss": 0.8844, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.089741915198632e-05, |
|
"loss": 0.8565, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.0835706226921776e-05, |
|
"loss": 0.8313, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.077383172167857e-05, |
|
"loss": 0.8312, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.0711796267595355e-05, |
|
"loss": 0.8551, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.064960049765304e-05, |
|
"loss": 0.8134, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.058724504646834e-05, |
|
"loss": 0.8246, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.052473055028726e-05, |
|
"loss": 0.7552, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.046205764697862e-05, |
|
"loss": 0.8374, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.0399226976027583e-05, |
|
"loss": 0.8721, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.0336239178529075e-05, |
|
"loss": 0.756, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.0273094897181285e-05, |
|
"loss": 0.7646, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.020979477627907e-05, |
|
"loss": 0.8254, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.014633946170742e-05, |
|
"loss": 0.843, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.0082729600934844e-05, |
|
"loss": 0.8923, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.001896584300675e-05, |
|
"loss": 0.8476, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.995504883853888e-05, |
|
"loss": 0.8202, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.98909792397106e-05, |
|
"loss": 0.8049, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.9826757700258284e-05, |
|
"loss": 0.7977, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.976238487546864e-05, |
|
"loss": 0.8494, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.9697861422172034e-05, |
|
"loss": 0.871, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.963318799873575e-05, |
|
"loss": 0.9323, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.956836526505733e-05, |
|
"loss": 0.912, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.9503393882557766e-05, |
|
"loss": 0.851, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.943827451417483e-05, |
|
"loss": 0.78, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.937300782435625e-05, |
|
"loss": 0.7798, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.930759447905298e-05, |
|
"loss": 0.8496, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.9242035145712344e-05, |
|
"loss": 0.8427, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.9176330493271285e-05, |
|
"loss": 0.8619, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.9110481192149504e-05, |
|
"loss": 0.7663, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.9044487914242646e-05, |
|
"loss": 0.7478, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.897835133291539e-05, |
|
"loss": 0.8048, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.891207212299467e-05, |
|
"loss": 0.8875, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.884565096076269e-05, |
|
"loss": 0.8649, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.877908852395008e-05, |
|
"loss": 0.8313, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.8712385491729e-05, |
|
"loss": 0.8779, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.864554254470613e-05, |
|
"loss": 0.7845, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.857856036491582e-05, |
|
"loss": 0.8581, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.851143963581306e-05, |
|
"loss": 0.8512, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.844418104226656e-05, |
|
"loss": 0.7689, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.837678527055168e-05, |
|
"loss": 0.8677, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.830925300834356e-05, |
|
"loss": 0.7601, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.824158494470996e-05, |
|
"loss": 0.8637, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.817378177010431e-05, |
|
"loss": 0.7152, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.8105844176358674e-05, |
|
"loss": 0.9339, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.803777285667665e-05, |
|
"loss": 0.8261, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.7969568505626305e-05, |
|
"loss": 0.896, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.7901231819133105e-05, |
|
"loss": 0.9026, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.783276349447281e-05, |
|
"loss": 0.8242, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.7764164230264357e-05, |
|
"loss": 0.8292, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.7695434726462704e-05, |
|
"loss": 0.9249, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.762657568435174e-05, |
|
"loss": 0.9214, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.7557587806537094e-05, |
|
"loss": 0.8414, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.748847179693897e-05, |
|
"loss": 0.7875, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.741922836078499e-05, |
|
"loss": 0.7981, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.734985820460293e-05, |
|
"loss": 0.8205, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.728036203621361e-05, |
|
"loss": 0.8429, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.72107405647236e-05, |
|
"loss": 0.8432, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.7140994500517995e-05, |
|
"loss": 0.86, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.707112455525318e-05, |
|
"loss": 0.7461, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.7001131441849586e-05, |
|
"loss": 0.8739, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.693101587448436e-05, |
|
"loss": 0.8064, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.6860778568584145e-05, |
|
"loss": 0.8171, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.6790420240817715e-05, |
|
"loss": 0.7549, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.671994160908872e-05, |
|
"loss": 0.8102, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.6649343392528335e-05, |
|
"loss": 0.8086, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.657862631148791e-05, |
|
"loss": 0.9243, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.650779108753163e-05, |
|
"loss": 0.8874, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.6436838443429175e-05, |
|
"loss": 0.7962, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.636576910314831e-05, |
|
"loss": 0.7621, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.6294583791847514e-05, |
|
"loss": 0.8126, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.622328323586859e-05, |
|
"loss": 0.8358, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.615186816272925e-05, |
|
"loss": 0.8677, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.608033930111564e-05, |
|
"loss": 0.8286, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.600869738087501e-05, |
|
"loss": 0.8292, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.5936943133008183e-05, |
|
"loss": 0.8448, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.5865077289662114e-05, |
|
"loss": 0.7162, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.5793100584122426e-05, |
|
"loss": 0.7949, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.572101375080594e-05, |
|
"loss": 0.8263, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.564881752525317e-05, |
|
"loss": 0.9174, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.5576512644120804e-05, |
|
"loss": 0.8188, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.550409984517421e-05, |
|
"loss": 0.81, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.5431579867279905e-05, |
|
"loss": 0.8592, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.5358953450397995e-05, |
|
"loss": 0.8419, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.528622133557465e-05, |
|
"loss": 0.7349, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.521338426493453e-05, |
|
"loss": 0.8005, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.514044298167322e-05, |
|
"loss": 0.7567, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.506739823004963e-05, |
|
"loss": 0.7951, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.4994250755378434e-05, |
|
"loss": 0.8423, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.492100130402242e-05, |
|
"loss": 0.844, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.4847650623384914e-05, |
|
"loss": 0.8515, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.477419946190213e-05, |
|
"loss": 0.906, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.470064856903555e-05, |
|
"loss": 0.8309, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.462699869526427e-05, |
|
"loss": 0.8666, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.455325059207732e-05, |
|
"loss": 0.7585, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.4479405011966056e-05, |
|
"loss": 0.8133, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.440546270841639e-05, |
|
"loss": 0.9355, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.4331424435901214e-05, |
|
"loss": 0.7332, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.4257290949872614e-05, |
|
"loss": 0.8603, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.418306300675416e-05, |
|
"loss": 0.8269, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.410874136393327e-05, |
|
"loss": 0.799, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.403432677975341e-05, |
|
"loss": 0.8898, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.395982001350637e-05, |
|
"loss": 0.7441, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.3885221825424537e-05, |
|
"loss": 0.8466, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.381053297667309e-05, |
|
"loss": 0.8273, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.3735754229342326e-05, |
|
"loss": 0.8397, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.3660886346439765e-05, |
|
"loss": 0.8455, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.358593009188247e-05, |
|
"loss": 0.8254, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.351088623048918e-05, |
|
"loss": 0.8374, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.3435755527972536e-05, |
|
"loss": 0.781, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.336053875093128e-05, |
|
"loss": 0.8414, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.32852366668424e-05, |
|
"loss": 0.7875, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.320985004405334e-05, |
|
"loss": 0.7889, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.3134379651774114e-05, |
|
"loss": 0.894, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.30588262600695e-05, |
|
"loss": 0.8475, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.298319063985116e-05, |
|
"loss": 0.8024, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.2907473562869754e-05, |
|
"loss": 0.8467, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.283167580170712e-05, |
|
"loss": 0.7829, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.275579812976835e-05, |
|
"loss": 0.8466, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.2679841321273895e-05, |
|
"loss": 0.7958, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.260380615125171e-05, |
|
"loss": 0.7956, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.252769339552927e-05, |
|
"loss": 0.8578, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.245150383072573e-05, |
|
"loss": 0.8806, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.2375238234243965e-05, |
|
"loss": 0.8477, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.229889738426264e-05, |
|
"loss": 0.7173, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.222248205972827e-05, |
|
"loss": 0.8259, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2145993040347264e-05, |
|
"loss": 0.7454, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2069431106577995e-05, |
|
"loss": 0.8054, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.199279703962282e-05, |
|
"loss": 0.7146, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.1916091621420104e-05, |
|
"loss": 0.8322, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.183931563463624e-05, |
|
"loss": 0.7718, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.176246986265767e-05, |
|
"loss": 0.9118, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.1685555089582906e-05, |
|
"loss": 0.8052, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.1608572100214526e-05, |
|
"loss": 0.8209, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.15315216800511e-05, |
|
"loss": 0.7682, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.145440461527929e-05, |
|
"loss": 0.8159, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.137722169276574e-05, |
|
"loss": 0.8396, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.129997370004909e-05, |
|
"loss": 0.7799, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.122266142533191e-05, |
|
"loss": 0.8488, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.114528565747268e-05, |
|
"loss": 0.7617, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.1067847185977735e-05, |
|
"loss": 0.8345, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.099034680099321e-05, |
|
"loss": 0.7212, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.091278529329698e-05, |
|
"loss": 0.8031, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.0835163454290574e-05, |
|
"loss": 0.8333, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.075748207599114e-05, |
|
"loss": 0.7761, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.06797419510233e-05, |
|
"loss": 0.7531, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.060194387261114e-05, |
|
"loss": 0.8292, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.0524088634570035e-05, |
|
"loss": 0.82, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.0446177031298627e-05, |
|
"loss": 0.8561, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.036820985777067e-05, |
|
"loss": 0.9112, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.0290187909526914e-05, |
|
"loss": 0.8364, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.0212111982667024e-05, |
|
"loss": 0.7643, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.013398287384144e-05, |
|
"loss": 0.8117, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.0055801380243224e-05, |
|
"loss": 0.8721, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.9977568299599973e-05, |
|
"loss": 0.76, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.989928443016564e-05, |
|
"loss": 0.7813, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.9820950570712414e-05, |
|
"loss": 0.8918, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.9742567520522534e-05, |
|
"loss": 0.8043, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.966413607938019e-05, |
|
"loss": 0.8443, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.9585657047563315e-05, |
|
"loss": 0.7935, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.9507131225835432e-05, |
|
"loss": 0.7864, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.9428559415437496e-05, |
|
"loss": 0.8375, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.93499424180797e-05, |
|
"loss": 0.8113, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.9271281035933313e-05, |
|
"loss": 0.7886, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.9192576071622473e-05, |
|
"loss": 0.9166, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.9113828328216027e-05, |
|
"loss": 0.8631, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.9035038609219306e-05, |
|
"loss": 0.861, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.8956207718565942e-05, |
|
"loss": 0.8465, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.8877336460609673e-05, |
|
"loss": 0.7999, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.879842564011612e-05, |
|
"loss": 0.8585, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.871947606225458e-05, |
|
"loss": 0.885, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.8640488532589803e-05, |
|
"loss": 0.736, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.8561463857073804e-05, |
|
"loss": 0.7454, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.8482402842037614e-05, |
|
"loss": 0.8043, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.8403306294183026e-05, |
|
"loss": 0.74, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.8324175020574424e-05, |
|
"loss": 0.8533, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.8245009828630502e-05, |
|
"loss": 0.735, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.816581152611606e-05, |
|
"loss": 0.867, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.808658092113372e-05, |
|
"loss": 0.8848, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.8007318822115713e-05, |
|
"loss": 0.7563, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.792802603781562e-05, |
|
"loss": 0.762, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.7848703377300118e-05, |
|
"loss": 0.7755, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.776935164994074e-05, |
|
"loss": 0.8489, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.7689971665405578e-05, |
|
"loss": 0.7536, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.761056423365107e-05, |
|
"loss": 0.7741, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.7531130164913703e-05, |
|
"loss": 0.7624, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.7451670269701767e-05, |
|
"loss": 0.8003, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.737218535878705e-05, |
|
"loss": 0.8823, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.7292676243196608e-05, |
|
"loss": 0.8301, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.7213143734204462e-05, |
|
"loss": 0.8486, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.7133588643323334e-05, |
|
"loss": 0.7807, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.7054011782296356e-05, |
|
"loss": 0.8104, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.6974413963088797e-05, |
|
"loss": 0.847, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.6894795997879762e-05, |
|
"loss": 0.8375, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.6815158699053932e-05, |
|
"loss": 0.834, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.6735502879193264e-05, |
|
"loss": 0.7997, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.665582935106866e-05, |
|
"loss": 0.7941, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.6576138927631742e-05, |
|
"loss": 0.8244, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.6496432422006522e-05, |
|
"loss": 0.8158, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.641671064748109e-05, |
|
"loss": 0.8289, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.633697441749935e-05, |
|
"loss": 0.8029, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.6257224545652688e-05, |
|
"loss": 0.8135, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.6177461845671685e-05, |
|
"loss": 0.8097, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.6097687131417843e-05, |
|
"loss": 0.8128, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.6017901216875217e-05, |
|
"loss": 0.8145, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.5938104916142155e-05, |
|
"loss": 0.7725, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.585829904342299e-05, |
|
"loss": 0.8902, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.577848441301971e-05, |
|
"loss": 0.8069, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.569866183932368e-05, |
|
"loss": 0.781, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.5618832136807297e-05, |
|
"loss": 0.7496, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.553899612001571e-05, |
|
"loss": 0.8554, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.5459154603558483e-05, |
|
"loss": 0.8187, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.5379308402101303e-05, |
|
"loss": 0.7848, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.529945833035767e-05, |
|
"loss": 0.7408, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.521960520308056e-05, |
|
"loss": 0.7655, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.5139749835054123e-05, |
|
"loss": 0.7614, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.5059893041085392e-05, |
|
"loss": 0.7382, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.4980035635995943e-05, |
|
"loss": 0.7321, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.4900178434613566e-05, |
|
"loss": 0.7464, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.4820322251764e-05, |
|
"loss": 0.7925, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.4740467902262583e-05, |
|
"loss": 0.8016, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.466061620090594e-05, |
|
"loss": 0.8147, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.4580767962463687e-05, |
|
"loss": 0.8129, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.4500924001670088e-05, |
|
"loss": 0.8099, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.4421085133215787e-05, |
|
"loss": 0.8304, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.4341252171739436e-05, |
|
"loss": 0.9241, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.4261425931819437e-05, |
|
"loss": 0.7888, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.4181607227965604e-05, |
|
"loss": 0.8431, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.4101796874610855e-05, |
|
"loss": 0.7654, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.40219956861029e-05, |
|
"loss": 0.8724, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.3942204476695943e-05, |
|
"loss": 0.9028, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.3862424060542357e-05, |
|
"loss": 0.7866, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.3782655251684394e-05, |
|
"loss": 0.8155, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.3702898864045876e-05, |
|
"loss": 0.766, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.362315571142385e-05, |
|
"loss": 0.7741, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.3543426607480364e-05, |
|
"loss": 0.8394, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.346371236573409e-05, |
|
"loss": 0.8572, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.3384013799552072e-05, |
|
"loss": 0.8239, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.3304331722141393e-05, |
|
"loss": 0.7008, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.32246669465409e-05, |
|
"loss": 0.7752, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.3145020285612894e-05, |
|
"loss": 0.7641, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.3065392552034857e-05, |
|
"loss": 0.8388, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.298578455829114e-05, |
|
"loss": 0.8176, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.2906197116664653e-05, |
|
"loss": 0.7676, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.282663103922863e-05, |
|
"loss": 0.7121, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.2747087137838307e-05, |
|
"loss": 0.7567, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.2667566224122648e-05, |
|
"loss": 0.8355, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.2588069109476057e-05, |
|
"loss": 0.7708, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2508596605050107e-05, |
|
"loss": 0.8587, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2429149521745254e-05, |
|
"loss": 0.7971, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.2349728670202582e-05, |
|
"loss": 0.7568, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.2270334860795497e-05, |
|
"loss": 0.7911, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.2190968903621498e-05, |
|
"loss": 0.8176, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.2111631608493885e-05, |
|
"loss": 0.8239, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.2032323784933505e-05, |
|
"loss": 0.8844, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.1953046242160493e-05, |
|
"loss": 0.7868, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.187379978908601e-05, |
|
"loss": 0.8765, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.1794585234303993e-05, |
|
"loss": 0.7886, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.1715403386082907e-05, |
|
"loss": 0.861, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.1636255052357497e-05, |
|
"loss": 0.8678, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.1557141040720515e-05, |
|
"loss": 0.8169, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.147806215841454e-05, |
|
"loss": 0.7597, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.1399019212323697e-05, |
|
"loss": 0.8513, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.1320013008965432e-05, |
|
"loss": 0.7976, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.124104435448228e-05, |
|
"loss": 0.7306, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.1162114054633663e-05, |
|
"loss": 0.8193, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.1083222914787623e-05, |
|
"loss": 0.8915, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.1004371739912654e-05, |
|
"loss": 0.7684, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.0925561334569464e-05, |
|
"loss": 0.7708, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.0846792502902753e-05, |
|
"loss": 0.7513, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.0768066048633033e-05, |
|
"loss": 0.7225, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.0689382775048418e-05, |
|
"loss": 0.7696, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.061074348499642e-05, |
|
"loss": 0.751, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.0532148980875768e-05, |
|
"loss": 0.7263, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.045360006462822e-05, |
|
"loss": 0.8106, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.037509753773037e-05, |
|
"loss": 0.7924, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0296642201185473e-05, |
|
"loss": 0.8711, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.02182348555153e-05, |
|
"loss": 0.8576, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.0139876300751904e-05, |
|
"loss": 0.8587, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.0061567336429527e-05, |
|
"loss": 0.8752, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.9983308761576407e-05, |
|
"loss": 0.8727, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.990510137470664e-05, |
|
"loss": 0.7785, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.9826945973812005e-05, |
|
"loss": 0.7669, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9748843356353856e-05, |
|
"loss": 0.8083, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9670794319254963e-05, |
|
"loss": 0.813, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.9592799658891385e-05, |
|
"loss": 0.815, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.951486017108436e-05, |
|
"loss": 0.9141, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.9436976651092144e-05, |
|
"loss": 0.7419, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.9359149893601944e-05, |
|
"loss": 0.8557, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.9281380692721786e-05, |
|
"loss": 0.871, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.9203669841972416e-05, |
|
"loss": 0.8396, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9126018134279193e-05, |
|
"loss": 0.8437, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.904842636196402e-05, |
|
"loss": 0.7932, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8970895316737238e-05, |
|
"loss": 0.6945, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8893425789689575e-05, |
|
"loss": 0.738, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8816018571284017e-05, |
|
"loss": 0.7109, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8738674451347818e-05, |
|
"loss": 0.8739, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.866139421906439e-05, |
|
"loss": 0.8342, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.858417866296528e-05, |
|
"loss": 0.7657, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.850702857092208e-05, |
|
"loss": 0.8464, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.8429944730138448e-05, |
|
"loss": 0.7853, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.8352927927142026e-05, |
|
"loss": 0.7642, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.8275978947776436e-05, |
|
"loss": 0.8316, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.819909857719328e-05, |
|
"loss": 0.8784, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.8122287599844066e-05, |
|
"loss": 0.7567, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.8045546799472286e-05, |
|
"loss": 0.7579, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.796887695910535e-05, |
|
"loss": 0.7957, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.7892278861046648e-05, |
|
"loss": 0.7458, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.7815753286867533e-05, |
|
"loss": 0.7479, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.7739301017399355e-05, |
|
"loss": 0.7617, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.7662922832725514e-05, |
|
"loss": 0.8818, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.7586619512173458e-05, |
|
"loss": 0.8075, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.751039183430678e-05, |
|
"loss": 0.786, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.7434240576917226e-05, |
|
"loss": 0.8369, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.735816651701681e-05, |
|
"loss": 0.8883, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7282170430829837e-05, |
|
"loss": 0.6625, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.7206253093785012e-05, |
|
"loss": 0.838, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.713041528050753e-05, |
|
"loss": 0.6941, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.705465776481114e-05, |
|
"loss": 0.7716, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.6978981319690298e-05, |
|
"loss": 0.8099, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.6903386717312236e-05, |
|
"loss": 0.7046, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.682787472900912e-05, |
|
"loss": 0.8008, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.6752446125270117e-05, |
|
"loss": 0.8587, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.6677101675733625e-05, |
|
"loss": 0.8083, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.6601842149179347e-05, |
|
"loss": 0.7906, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.6526668313520478e-05, |
|
"loss": 0.7089, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.6451580935795863e-05, |
|
"loss": 0.8412, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.637658078216217e-05, |
|
"loss": 0.7232, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.6301668617886072e-05, |
|
"loss": 0.8772, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.622684520733644e-05, |
|
"loss": 0.8135, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.615211131397654e-05, |
|
"loss": 0.7315, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.6077467700356256e-05, |
|
"loss": 0.8048, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.6002915128104284e-05, |
|
"loss": 0.684, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.592845435792039e-05, |
|
"loss": 0.7887, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.585408614956763e-05, |
|
"loss": 0.7469, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.5779811261864604e-05, |
|
"loss": 0.7575, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.5705630452677707e-05, |
|
"loss": 0.7354, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.56315444789134e-05, |
|
"loss": 0.7734, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.555755409651049e-05, |
|
"loss": 0.804, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.5483660060432432e-05, |
|
"loss": 0.7408, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.5409863124659562e-05, |
|
"loss": 0.819, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.5336164042181494e-05, |
|
"loss": 0.7686, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.5262563564989374e-05, |
|
"loss": 0.8118, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.5189062444068225e-05, |
|
"loss": 0.7973, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.5115661429389294e-05, |
|
"loss": 0.7622, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.5042361269902383e-05, |
|
"loss": 0.875, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.4969162713528212e-05, |
|
"loss": 0.7767, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.4896066507150804e-05, |
|
"loss": 0.7193, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.482307339660983e-05, |
|
"loss": 0.7978, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.4750184126693028e-05, |
|
"loss": 0.7754, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.4677399441128603e-05, |
|
"loss": 0.8816, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.4604720082577622e-05, |
|
"loss": 0.8264, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.4532146792626449e-05, |
|
"loss": 0.7528, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.4459680311779159e-05, |
|
"loss": 0.7898, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.438732137945001e-05, |
|
"loss": 0.8136, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.4315070733955888e-05, |
|
"loss": 0.7874, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.4242929112508769e-05, |
|
"loss": 0.7637, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.417089725120817e-05, |
|
"loss": 0.8007, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.4098975885033713e-05, |
|
"loss": 0.7594, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.4027165747837548e-05, |
|
"loss": 0.7395, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.3955467572336905e-05, |
|
"loss": 0.7882, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.3883882090106611e-05, |
|
"loss": 0.7563, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.381241003157162e-05, |
|
"loss": 0.7223, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.3741052125999564e-05, |
|
"loss": 0.7144, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.3669809101493314e-05, |
|
"loss": 0.7936, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.3605789203581502e-05, |
|
"loss": 0.7189, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.3534766454819494e-05, |
|
"loss": 0.7533, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.3463860691966308e-05, |
|
"loss": 0.7467, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.3393072638511351e-05, |
|
"loss": 0.7655, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.332240301674299e-05, |
|
"loss": 0.7911, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.3251852547741161e-05, |
|
"loss": 0.8208, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.3181421951370035e-05, |
|
"loss": 0.8012, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.311111194627064e-05, |
|
"loss": 0.8289, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.304092324985356e-05, |
|
"loss": 0.7399, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.2970856578291598e-05, |
|
"loss": 0.7232, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.290091264651247e-05, |
|
"loss": 0.8301, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.2831092168191517e-05, |
|
"loss": 0.7985, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.2761395855744408e-05, |
|
"loss": 0.7452, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.2691824420319895e-05, |
|
"loss": 0.8126, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.2622378571792535e-05, |
|
"loss": 0.8346, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.2553059018755454e-05, |
|
"loss": 0.7807, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.2483866468513125e-05, |
|
"loss": 0.8407, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.2414801627074144e-05, |
|
"loss": 0.8276, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.2345865199144e-05, |
|
"loss": 0.7426, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.2277057888117944e-05, |
|
"loss": 0.7461, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.220838039607376e-05, |
|
"loss": 0.7221, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.2139833423764626e-05, |
|
"loss": 0.8026, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.2071417670611959e-05, |
|
"loss": 0.7952, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.2003133834698268e-05, |
|
"loss": 0.7638, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.1934982612760049e-05, |
|
"loss": 0.7576, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.186696470018066e-05, |
|
"loss": 0.7676, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.1799080790983246e-05, |
|
"loss": 0.7482, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.1731331577823617e-05, |
|
"loss": 0.8294, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.1663717751983228e-05, |
|
"loss": 0.8052, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.15962400033621e-05, |
|
"loss": 0.844, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.152889902047179e-05, |
|
"loss": 0.7958, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.1461695490428352e-05, |
|
"loss": 0.7427, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.1394630098945342e-05, |
|
"loss": 0.7311, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.1327703530326811e-05, |
|
"loss": 0.8388, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.1260916467460336e-05, |
|
"loss": 0.768, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.1194269591810018e-05, |
|
"loss": 0.7228, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.1127763583409576e-05, |
|
"loss": 0.8205, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.1061399120855375e-05, |
|
"loss": 0.6815, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.0995176881299515e-05, |
|
"loss": 0.7285, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.092909754044292e-05, |
|
"loss": 0.7713, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.086316177252844e-05, |
|
"loss": 0.8213, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.0797370250333975e-05, |
|
"loss": 0.8498, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.0731723645165603e-05, |
|
"loss": 0.6787, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.0666222626850752e-05, |
|
"loss": 0.8175, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.0600867863731321e-05, |
|
"loss": 0.8446, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.0535660022656915e-05, |
|
"loss": 0.7999, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.0470599768978004e-05, |
|
"loss": 0.7724, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.040568776653915e-05, |
|
"loss": 0.8615, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.0340924677672223e-05, |
|
"loss": 0.8512, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.0276311163189647e-05, |
|
"loss": 0.8195, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.021184788237767e-05, |
|
"loss": 0.7919, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.0147535492989613e-05, |
|
"loss": 0.736, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.0083374651239172e-05, |
|
"loss": 0.8283, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.0019366011793732e-05, |
|
"loss": 0.814, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 9.955510227767665e-06, |
|
"loss": 0.8149, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 9.891807950715682e-06, |
|
"loss": 0.7876, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 9.82825983062618e-06, |
|
"loss": 0.6661, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 9.764866515914611e-06, |
|
"loss": 0.7429, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 9.701628653416867e-06, |
|
"loss": 0.7564, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 9.638546888382672e-06, |
|
"loss": 0.8258, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 9.575621864469006e-06, |
|
"loss": 0.848, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 9.512854223733547e-06, |
|
"loss": 0.7468, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 9.450244606628082e-06, |
|
"loss": 0.7268, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 9.38779365199202e-06, |
|
"loss": 0.896, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.325501997045847e-06, |
|
"loss": 0.8309, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.263370277384631e-06, |
|
"loss": 0.7935, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 9.20139912697153e-06, |
|
"loss": 0.7681, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 9.139589178131333e-06, |
|
"loss": 0.7819, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.077941061543996e-06, |
|
"loss": 0.7642, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.016455406238222e-06, |
|
"loss": 0.6481, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 8.955132839585037e-06, |
|
"loss": 0.7378, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 8.893973987291369e-06, |
|
"loss": 0.8364, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 8.832979473393693e-06, |
|
"loss": 0.7551, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 8.772149920251654e-06, |
|
"loss": 0.8637, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 8.711485948541715e-06, |
|
"loss": 0.7805, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 8.650988177250812e-06, |
|
"loss": 0.8183, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 8.590657223670059e-06, |
|
"loss": 0.8564, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 8.53049370338844e-06, |
|
"loss": 0.8067, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 8.470498230286523e-06, |
|
"loss": 0.7626, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 8.410671416530205e-06, |
|
"loss": 0.8154, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.351013872564447e-06, |
|
"loss": 0.7763, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 8.291526207107084e-06, |
|
"loss": 0.7609, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.232209027142571e-06, |
|
"loss": 0.7759, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.173062937915812e-06, |
|
"loss": 0.8389, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.114088542925993e-06, |
|
"loss": 0.7771, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.055286443920395e-06, |
|
"loss": 0.7084, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 7.99665724088828e-06, |
|
"loss": 0.8226, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 7.938201532054768e-06, |
|
"loss": 0.7129, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 7.879919913874695e-06, |
|
"loss": 0.7918, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 7.821812981026588e-06, |
|
"loss": 0.8319, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 7.763881326406552e-06, |
|
"loss": 0.7772, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 7.706125541122244e-06, |
|
"loss": 0.8371, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 7.64854621448682e-06, |
|
"loss": 0.7229, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 7.591143934012942e-06, |
|
"loss": 0.8227, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 7.5339192854067736e-06, |
|
"loss": 0.8326, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.476872852562003e-06, |
|
"loss": 0.7516, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.420005217553891e-06, |
|
"loss": 0.7943, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.3633169606333265e-06, |
|
"loss": 0.8194, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.306808660220909e-06, |
|
"loss": 0.8268, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.250480892901046e-06, |
|
"loss": 0.7529, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.194334233416069e-06, |
|
"loss": 0.7817, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.138369254660365e-06, |
|
"loss": 0.8199, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.082586527674542e-06, |
|
"loss": 0.8484, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.0269866216395915e-06, |
|
"loss": 0.7921, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 6.971570103871089e-06, |
|
"loss": 0.8236, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.9163375398134024e-06, |
|
"loss": 0.8259, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.8612894930339065e-06, |
|
"loss": 0.8038, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.806426525217266e-06, |
|
"loss": 0.8309, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.751749196159679e-06, |
|
"loss": 0.8441, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.69725806376317e-06, |
|
"loss": 0.7264, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.6429536840299035e-06, |
|
"loss": 0.8848, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 6.588836611056507e-06, |
|
"loss": 0.8101, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 6.534907397028409e-06, |
|
"loss": 0.724, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.481166592214225e-06, |
|
"loss": 0.8091, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.427614744960126e-06, |
|
"loss": 0.7131, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.374252401684233e-06, |
|
"loss": 0.7833, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.32108010687108e-06, |
|
"loss": 0.8134, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.268098403066022e-06, |
|
"loss": 0.7773, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.2153078308697125e-06, |
|
"loss": 0.7609, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.162708928932592e-06, |
|
"loss": 0.8762, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 6.110302233949383e-06, |
|
"loss": 0.7745, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.05808828065362e-06, |
|
"loss": 0.7402, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.006067601812187e-06, |
|
"loss": 0.7638, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 5.954240728219898e-06, |
|
"loss": 0.7565, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 5.902608188694039e-06, |
|
"loss": 0.7291, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 5.8511705100690314e-06, |
|
"loss": 0.7956, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 5.79992821719102e-06, |
|
"loss": 0.7756, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 5.7488818329125114e-06, |
|
"loss": 0.8543, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 5.698031878087071e-06, |
|
"loss": 0.6868, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 5.647378871563971e-06, |
|
"loss": 0.7887, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 5.59692333018293e-06, |
|
"loss": 0.7896, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.546665768768814e-06, |
|
"loss": 0.8122, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.496606700126397e-06, |
|
"loss": 0.8105, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 5.4467466350351245e-06, |
|
"loss": 0.8576, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 5.3970860822439045e-06, |
|
"loss": 0.8091, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.347625548465915e-06, |
|
"loss": 0.7318, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.298365538373426e-06, |
|
"loss": 0.9173, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.2493065545926644e-06, |
|
"loss": 0.8607, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 5.200449097698676e-06, |
|
"loss": 0.8263, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.151793666210217e-06, |
|
"loss": 0.8193, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 5.103340756584685e-06, |
|
"loss": 0.7089, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5.055090863213008e-06, |
|
"loss": 0.7857, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 5.007044478414658e-06, |
|
"loss": 0.7785, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.9592020924325936e-06, |
|
"loss": 0.8344, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.911564193428259e-06, |
|
"loss": 0.8568, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.864131267476615e-06, |
|
"loss": 0.8104, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.816903798561168e-06, |
|
"loss": 0.7698, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.769882268569037e-06, |
|
"loss": 0.7878, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.723067157286038e-06, |
|
"loss": 0.8072, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.6764589423917895e-06, |
|
"loss": 0.7909, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.630058099454823e-06, |
|
"loss": 0.8484, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.583865101927756e-06, |
|
"loss": 0.8064, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.537880421142443e-06, |
|
"loss": 0.7695, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.492104526305174e-06, |
|
"loss": 0.7681, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.44653788449188e-06, |
|
"loss": 0.7337, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.401180960643375e-06, |
|
"loss": 0.7758, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.3560342175606064e-06, |
|
"loss": 0.7611, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.311098115899936e-06, |
|
"loss": 0.762, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.266373114168445e-06, |
|
"loss": 0.7921, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.22185966871923e-06, |
|
"loss": 0.7286, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.177558233746787e-06, |
|
"loss": 0.7583, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.133469261282341e-06, |
|
"loss": 0.8479, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.089593201189259e-06, |
|
"loss": 0.8339, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.045930501158443e-06, |
|
"loss": 0.7481, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 4.00248160670377e-06, |
|
"loss": 0.8175, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.959246961157545e-06, |
|
"loss": 0.7694, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.916227005665976e-06, |
|
"loss": 0.7131, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.873422179184677e-06, |
|
"loss": 0.7284, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.83083291847417e-06, |
|
"loss": 0.8848, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.7884596580954668e-06, |
|
"loss": 0.7927, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.7463028304055987e-06, |
|
"loss": 0.884, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 3.704362865553221e-06, |
|
"loss": 0.7089, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.662640191474223e-06, |
|
"loss": 0.8192, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.621135233887363e-06, |
|
"loss": 0.8491, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.5798484162899105e-06, |
|
"loss": 0.8009, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.5387801599533475e-06, |
|
"loss": 0.8679, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.4979308839190565e-06, |
|
"loss": 0.7229, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.4573010049940403e-06, |
|
"loss": 0.8634, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.4168909377466836e-06, |
|
"loss": 0.8256, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 3.3767010945025075e-06, |
|
"loss": 0.7816, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.3367318853399775e-06, |
|
"loss": 0.7579, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 3.296983718086308e-06, |
|
"loss": 0.7629, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.257456998313302e-06, |
|
"loss": 0.8061, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.2181521293332213e-06, |
|
"loss": 0.8339, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.1790695121946627e-06, |
|
"loss": 0.677, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.140209545678463e-06, |
|
"loss": 0.7408, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.101572626293642e-06, |
|
"loss": 0.8345, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.063159148273351e-06, |
|
"loss": 0.7823, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3.024969503570843e-06, |
|
"loss": 0.8105, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.9870040818554934e-06, |
|
"loss": 0.7489, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.9492632705087926e-06, |
|
"loss": 0.7394, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.9117474546204283e-06, |
|
"loss": 0.8176, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 2.874457016984325e-06, |
|
"loss": 0.7422, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 2.8373923380947657e-06, |
|
"loss": 0.7723, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.80055379614248e-06, |
|
"loss": 0.7783, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.7639417670108165e-06, |
|
"loss": 0.8144, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.7275566242718846e-06, |
|
"loss": 0.6772, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.6913987391827545e-06, |
|
"loss": 0.7854, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.655468480681658e-06, |
|
"loss": 0.8284, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.6197662153842424e-06, |
|
"loss": 0.8052, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.584292307579808e-06, |
|
"loss": 0.7989, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.549047119227607e-06, |
|
"loss": 0.6619, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.5140310099531494e-06, |
|
"loss": 0.7794, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.479244337044509e-06, |
|
"loss": 0.6983, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.4446874554487216e-06, |
|
"loss": 0.8443, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.4103607177681233e-06, |
|
"loss": 0.7472, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.3762644742567786e-06, |
|
"loss": 0.7612, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.342399072816895e-06, |
|
"loss": 0.7782, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.308764858995266e-06, |
|
"loss": 0.7171, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.275362175979767e-06, |
|
"loss": 0.777, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.2421913645958304e-06, |
|
"loss": 0.7401, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.209252763302988e-06, |
|
"loss": 0.6972, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.176546708191396e-06, |
|
"loss": 0.7424, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.1440735329784273e-06, |
|
"loss": 0.7971, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.1118335690052533e-06, |
|
"loss": 0.8577, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.079827145233465e-06, |
|
"loss": 0.8388, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.048054588241721e-06, |
|
"loss": 0.7897, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.0165162222224087e-06, |
|
"loss": 0.7454, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.985212368978345e-06, |
|
"loss": 0.6974, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.954143347919482e-06, |
|
"loss": 0.8017, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.923309476059654e-06, |
|
"loss": 0.6967, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.8927110680133448e-06, |
|
"loss": 0.7471, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.8623484359924753e-06, |
|
"loss": 0.7466, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.832221889803215e-06, |
|
"loss": 0.7727, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.8023317368428272e-06, |
|
"loss": 0.6746, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.772678282096521e-06, |
|
"loss": 0.7834, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.7432618281343571e-06, |
|
"loss": 0.854, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.7140826751081417e-06, |
|
"loss": 0.7994, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.685141120748379e-06, |
|
"loss": 0.8229, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.6564374603612293e-06, |
|
"loss": 0.7353, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.6279719868254772e-06, |
|
"loss": 0.7952, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.5997449905895773e-06, |
|
"loss": 0.866, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.5717567596686661e-06, |
|
"loss": 0.8125, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.5440075796416292e-06, |
|
"loss": 0.8302, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.5164977336481896e-06, |
|
"loss": 0.7714, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.4892275023860176e-06, |
|
"loss": 0.841, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.4621971641078646e-06, |
|
"loss": 0.8062, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.4354069946187292e-06, |
|
"loss": 0.839, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.408857267273031e-06, |
|
"loss": 0.7927, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.3825482529718382e-06, |
|
"loss": 0.7557, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.3564802201600919e-06, |
|
"loss": 0.7451, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.3306534348238697e-06, |
|
"loss": 0.7812, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.305068160487674e-06, |
|
"loss": 0.7787, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.2797246582117422e-06, |
|
"loss": 0.712, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.2546231865893794e-06, |
|
"loss": 0.8208, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.2297640017443213e-06, |
|
"loss": 0.7492, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.2051473573281292e-06, |
|
"loss": 0.8182, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.180773504517585e-06, |
|
"loss": 0.7812, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.1566426920121415e-06, |
|
"loss": 0.7459, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.132755166031385e-06, |
|
"loss": 0.8221, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.1091111703125157e-06, |
|
"loss": 0.6909, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.0857109461078679e-06, |
|
"loss": 0.8285, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.0625547321824385e-06, |
|
"loss": 0.8397, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.0396427648114632e-06, |
|
"loss": 0.765, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.0169752777779984e-06, |
|
"loss": 0.795, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 9.945525023705327e-07, |
|
"loss": 0.6929, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 9.723746673806377e-07, |
|
"loss": 0.8618, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 9.50441999100618e-07, |
|
"loss": 0.7577, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 9.287547213212206e-07, |
|
"loss": 0.8186, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 9.07313055329334e-07, |
|
"loss": 0.8268, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.861172199057466e-07, |
|
"loss": 0.8767, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.651674313228997e-07, |
|
"loss": 0.716, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.444639033426904e-07, |
|
"loss": 0.8054, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.240068472142815e-07, |
|
"loss": 0.7585, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.037964716719609e-07, |
|
"loss": 0.8554, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 7.838329829329943e-07, |
|
"loss": 0.7113, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 7.641165846955345e-07, |
|
"loss": 0.8091, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.446474781365314e-07, |
|
"loss": 0.7636, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.254258619096982e-07, |
|
"loss": 0.7905, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.083381736919054e-07, |
|
"loss": 0.796, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 6.895873273412512e-07, |
|
"loss": 0.7762, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 6.710845331309279e-07, |
|
"loss": 0.7687, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 6.528299798548327e-07, |
|
"loss": 0.769, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.34823853773936e-07, |
|
"loss": 0.8484, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.170663386143721e-07, |
|
"loss": 0.7223, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 5.995576155655657e-07, |
|
"loss": 0.8462, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.822978632783748e-07, |
|
"loss": 0.7518, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.652872578632867e-07, |
|
"loss": 0.7555, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.485259728886055e-07, |
|
"loss": 0.7159, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.320141793786815e-07, |
|
"loss": 0.8041, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.157520458121734e-07, |
|
"loss": 0.7807, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 4.997397381203278e-07, |
|
"loss": 0.7471, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.839774196852831e-07, |
|
"loss": 0.6955, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.6846525133840135e-07, |
|
"loss": 0.8771, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.532033913586281e-07, |
|
"loss": 0.7871, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.3819199547089073e-07, |
|
"loss": 0.794, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.234312168444804e-07, |
|
"loss": 0.7949, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.0892120609151706e-07, |
|
"loss": 0.8159, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.946621112654009e-07, |
|
"loss": 0.7887, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 3.806540778593021e-07, |
|
"loss": 0.7609, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.668972488046762e-07, |
|
"loss": 0.7927, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.5339176446980424e-07, |
|
"loss": 0.7027, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.4013776265836293e-07, |
|
"loss": 0.7591, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.271353786080261e-07, |
|
"loss": 0.8672, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.143847449890658e-07, |
|
"loss": 0.7764, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.018859919030198e-07, |
|
"loss": 0.7795, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.896392468813458e-07, |
|
"loss": 0.8484, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.7764463488413327e-07, |
|
"loss": 0.7502, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.659022782988241e-07, |
|
"loss": 0.7886, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.5441229693895786e-07, |
|
"loss": 0.7578, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.431748080429619e-07, |
|
"loss": 0.7947, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.32189926272941e-07, |
|
"loss": 0.7695, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.2145776371352288e-07, |
|
"loss": 0.8766, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.109784298707007e-07, |
|
"loss": 0.9572, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.0075203167071733e-07, |
|
"loss": 0.8072, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.9077867345898282e-07, |
|
"loss": 0.7861, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.8105845699900592e-07, |
|
"loss": 0.813, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.7159148147135596e-07, |
|
"loss": 0.714, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.623778434726414e-07, |
|
"loss": 0.8831, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.5341763701453848e-07, |
|
"loss": 0.7104, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.4471095352282804e-07, |
|
"loss": 0.7877, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.362578818364546e-07, |
|
"loss": 0.7484, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.280585082066299e-07, |
|
"loss": 0.6747, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.2011291629594746e-07, |
|
"loss": 0.7271, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.1242118717753047e-07, |
|
"loss": 0.8372, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.0498339933420476e-07, |
|
"loss": 0.8522, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.779962865769654e-08, |
|
"loss": 0.8283, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.086994844786089e-08, |
|
"loss": 0.7474, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 8.419442941192679e-08, |
|
"loss": 0.7889, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 7.77731396637893e-08, |
|
"loss": 0.7646, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 7.160614472329907e-08, |
|
"loss": 0.8362, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 6.569350751560177e-08, |
|
"loss": 0.7019, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 6.003528837049966e-08, |
|
"loss": 0.7858, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.46315450218271e-08, |
|
"loss": 0.7506, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.9482332606867746e-08, |
|
"loss": 0.7883, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.458770366578824e-08, |
|
"loss": 0.7803, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.994770814110538e-08, |
|
"loss": 0.772, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.5562393377172595e-08, |
|
"loss": 0.8655, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.1431804119705366e-08, |
|
"loss": 0.7657, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.7555982515312107e-08, |
|
"loss": 0.8572, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.3934968111075095e-08, |
|
"loss": 0.8006, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.0568797854139678e-08, |
|
"loss": 0.749, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.745750609133956e-08, |
|
"loss": 0.7909, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.4601124568849878e-08, |
|
"loss": 0.8854, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.1999682431859672e-08, |
|
"loss": 0.6958, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 9.653206224272126e-09, |
|
"loss": 0.7082, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.561719888440899e-09, |
|
"loss": 0.8519, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 5.725244764917537e-09, |
|
"loss": 0.7919, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 4.143799592240538e-09, |
|
"loss": 0.7442, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.8174005067410637e-09, |
|
"loss": 0.7768, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 1.7460610423764011e-09, |
|
"loss": 0.803, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 9.297921305967405e-10, |
|
"loss": 0.738, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 3.686021002313744e-10, |
|
"loss": 0.7374, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 6.249667740265696e-11, |
|
"loss": 0.7616, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 9835, |
|
"total_flos": 3.008290083981312e+18, |
|
"train_loss": 0.8266432806266161, |
|
"train_runtime": 62063.7747, |
|
"train_samples_per_second": 2.536, |
|
"train_steps_per_second": 0.158 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9835, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"total_flos": 3.008290083981312e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|