|
{ |
|
"best_metric": 0.00564388744533062, |
|
"best_model_checkpoint": "output_dir\\checkpoint-9188", |
|
"epoch": 10.0, |
|
"global_step": 22970, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.978232477144103e-06, |
|
"loss": 0.5997, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.956464954288203e-06, |
|
"loss": 0.5077, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.934697431432305e-06, |
|
"loss": 0.4455, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.912929908576405e-06, |
|
"loss": 0.3683, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.891162385720507e-06, |
|
"loss": 0.2864, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.869394862864606e-06, |
|
"loss": 0.2272, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.847627340008708e-06, |
|
"loss": 0.1793, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.825859817152808e-06, |
|
"loss": 0.1467, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.80409229429691e-06, |
|
"loss": 0.1229, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.782324771441012e-06, |
|
"loss": 0.0996, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.760557248585112e-06, |
|
"loss": 0.092, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.738789725729214e-06, |
|
"loss": 0.0771, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.717022202873314e-06, |
|
"loss": 0.0597, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.695254680017416e-06, |
|
"loss": 0.0536, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.673487157161516e-06, |
|
"loss": 0.0514, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.651719634305616e-06, |
|
"loss": 0.0447, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.629952111449717e-06, |
|
"loss": 0.0444, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.608184588593819e-06, |
|
"loss": 0.0363, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.58641706573792e-06, |
|
"loss": 0.0395, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.56464954288202e-06, |
|
"loss": 0.0444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.542882020026123e-06, |
|
"loss": 0.0328, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.521114497170223e-06, |
|
"loss": 0.0337, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.499346974314325e-06, |
|
"loss": 0.0347, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.477579451458425e-06, |
|
"loss": 0.0262, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.455811928602527e-06, |
|
"loss": 0.0239, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.434044405746625e-06, |
|
"loss": 0.0259, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.412276882890727e-06, |
|
"loss": 0.021, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.390509360034828e-06, |
|
"loss": 0.0186, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.36874183717893e-06, |
|
"loss": 0.0207, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.346974314323032e-06, |
|
"loss": 0.017, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.325206791467132e-06, |
|
"loss": 0.0259, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.303439268611234e-06, |
|
"loss": 0.0299, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.281671745755334e-06, |
|
"loss": 0.0124, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.259904222899436e-06, |
|
"loss": 0.0155, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.238136700043536e-06, |
|
"loss": 0.0264, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.216369177187638e-06, |
|
"loss": 0.018, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.194601654331737e-06, |
|
"loss": 0.0163, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.172834131475838e-06, |
|
"loss": 0.0219, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.15106660861994e-06, |
|
"loss": 0.0164, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.12929908576404e-06, |
|
"loss": 0.026, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.107531562908143e-06, |
|
"loss": 0.0179, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.085764040052243e-06, |
|
"loss": 0.0177, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.063996517196345e-06, |
|
"loss": 0.0138, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.042228994340445e-06, |
|
"loss": 0.0134, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.020461471484547e-06, |
|
"loss": 0.0175, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.998025618014196, |
|
"eval_loss": 0.015513530932366848, |
|
"eval_runtime": 36.185, |
|
"eval_samples_per_second": 1015.282, |
|
"eval_steps_per_second": 15.891, |
|
"step": 2297 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.998693948628647e-06, |
|
"loss": 0.0105, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.976926425772747e-06, |
|
"loss": 0.0151, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.95515890291685e-06, |
|
"loss": 0.0155, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.93339138006095e-06, |
|
"loss": 0.016, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.911623857205051e-06, |
|
"loss": 0.0142, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.889856334349152e-06, |
|
"loss": 0.0211, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.868088811493254e-06, |
|
"loss": 0.0166, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.846321288637354e-06, |
|
"loss": 0.0195, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.824553765781456e-06, |
|
"loss": 0.0181, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.802786242925556e-06, |
|
"loss": 0.0137, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.781018720069658e-06, |
|
"loss": 0.0092, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.759251197213758e-06, |
|
"loss": 0.0153, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.737483674357858e-06, |
|
"loss": 0.0098, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.71571615150196e-06, |
|
"loss": 0.0116, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.69394862864606e-06, |
|
"loss": 0.0125, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.672181105790162e-06, |
|
"loss": 0.0169, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.650413582934263e-06, |
|
"loss": 0.0191, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.628646060078365e-06, |
|
"loss": 0.0135, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.606878537222465e-06, |
|
"loss": 0.0104, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.585111014366567e-06, |
|
"loss": 0.0074, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.563343491510667e-06, |
|
"loss": 0.0165, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.541575968654767e-06, |
|
"loss": 0.01, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.519808445798869e-06, |
|
"loss": 0.0092, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.49804092294297e-06, |
|
"loss": 0.0127, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.476273400087071e-06, |
|
"loss": 0.0123, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 8.454505877231172e-06, |
|
"loss": 0.0096, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.432738354375273e-06, |
|
"loss": 0.0095, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.410970831519374e-06, |
|
"loss": 0.0109, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 8.389203308663476e-06, |
|
"loss": 0.0086, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 8.367435785807576e-06, |
|
"loss": 0.0108, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.345668262951678e-06, |
|
"loss": 0.0077, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.323900740095778e-06, |
|
"loss": 0.0091, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.302133217239878e-06, |
|
"loss": 0.013, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.28036569438398e-06, |
|
"loss": 0.0147, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.25859817152808e-06, |
|
"loss": 0.0153, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.236830648672182e-06, |
|
"loss": 0.0103, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.215063125816283e-06, |
|
"loss": 0.0101, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.193295602960384e-06, |
|
"loss": 0.0049, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.171528080104485e-06, |
|
"loss": 0.008, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.149760557248587e-06, |
|
"loss": 0.0126, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.127993034392687e-06, |
|
"loss": 0.0124, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 8.106225511536789e-06, |
|
"loss": 0.0111, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.084457988680889e-06, |
|
"loss": 0.0137, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.06269046582499e-06, |
|
"loss": 0.0148, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.040922942969091e-06, |
|
"loss": 0.0154, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.019155420113191e-06, |
|
"loss": 0.0104, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9987417272652994, |
|
"eval_loss": 0.008711813017725945, |
|
"eval_runtime": 35.665, |
|
"eval_samples_per_second": 1030.086, |
|
"eval_steps_per_second": 16.122, |
|
"step": 4594 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.997387897257293e-06, |
|
"loss": 0.0077, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 7.975620374401394e-06, |
|
"loss": 0.0081, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.953852851545496e-06, |
|
"loss": 0.0071, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 7.932085328689596e-06, |
|
"loss": 0.0164, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 7.910317805833698e-06, |
|
"loss": 0.0096, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 7.888550282977798e-06, |
|
"loss": 0.0139, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7.866782760121898e-06, |
|
"loss": 0.0068, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 7.845015237266e-06, |
|
"loss": 0.0079, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 7.8232477144101e-06, |
|
"loss": 0.0084, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 7.801480191554202e-06, |
|
"loss": 0.0008, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.779712668698302e-06, |
|
"loss": 0.01, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.757945145842404e-06, |
|
"loss": 0.0094, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.736177622986505e-06, |
|
"loss": 0.0094, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.714410100130607e-06, |
|
"loss": 0.0116, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.692642577274707e-06, |
|
"loss": 0.0055, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.670875054418809e-06, |
|
"loss": 0.0047, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.649107531562909e-06, |
|
"loss": 0.0102, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 7.627340008707009e-06, |
|
"loss": 0.0095, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.60557248585111e-06, |
|
"loss": 0.0136, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.583804962995211e-06, |
|
"loss": 0.0086, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 7.562037440139312e-06, |
|
"loss": 0.0024, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.5402699172834135e-06, |
|
"loss": 0.0064, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.5185023944275146e-06, |
|
"loss": 0.0091, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 7.496734871571616e-06, |
|
"loss": 0.0045, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.474967348715717e-06, |
|
"loss": 0.0031, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.453199825859818e-06, |
|
"loss": 0.0098, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.431432303003919e-06, |
|
"loss": 0.0055, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.409664780148019e-06, |
|
"loss": 0.0064, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.38789725729212e-06, |
|
"loss": 0.0085, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.366129734436221e-06, |
|
"loss": 0.0096, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.344362211580322e-06, |
|
"loss": 0.0102, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.3225946887244234e-06, |
|
"loss": 0.0069, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 7.3008271658685245e-06, |
|
"loss": 0.0069, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 7.279059643012626e-06, |
|
"loss": 0.0071, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 7.257292120156727e-06, |
|
"loss": 0.0114, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.235524597300828e-06, |
|
"loss": 0.0083, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.213757074444929e-06, |
|
"loss": 0.0101, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 7.191989551589029e-06, |
|
"loss": 0.0109, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 7.17022202873313e-06, |
|
"loss": 0.0064, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 7.148454505877231e-06, |
|
"loss": 0.0047, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 7.126686983021332e-06, |
|
"loss": 0.0076, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 7.104919460165433e-06, |
|
"loss": 0.0133, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.0831519373095345e-06, |
|
"loss": 0.0054, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 7.0613844144536355e-06, |
|
"loss": 0.0108, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.039616891597737e-06, |
|
"loss": 0.0047, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.017849368741838e-06, |
|
"loss": 0.0088, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9989708741035986, |
|
"eval_loss": 0.007210045587271452, |
|
"eval_runtime": 34.5277, |
|
"eval_samples_per_second": 1064.015, |
|
"eval_steps_per_second": 16.653, |
|
"step": 6891 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 6.996081845885939e-06, |
|
"loss": 0.0043, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 6.974314323030041e-06, |
|
"loss": 0.0019, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 6.95254680017414e-06, |
|
"loss": 0.0081, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 6.930779277318241e-06, |
|
"loss": 0.0078, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 6.909011754462342e-06, |
|
"loss": 0.0062, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 6.887244231606443e-06, |
|
"loss": 0.007, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 6.865476708750544e-06, |
|
"loss": 0.0047, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 6.8437091858946455e-06, |
|
"loss": 0.005, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 6.8219416630387466e-06, |
|
"loss": 0.0056, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 6.800174140182848e-06, |
|
"loss": 0.0024, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 6.77840661732695e-06, |
|
"loss": 0.005, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 6.756639094471051e-06, |
|
"loss": 0.0042, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 6.73487157161515e-06, |
|
"loss": 0.0057, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 6.713104048759251e-06, |
|
"loss": 0.0015, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 6.691336525903352e-06, |
|
"loss": 0.0017, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.669569003047453e-06, |
|
"loss": 0.0057, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.647801480191554e-06, |
|
"loss": 0.0025, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.6260339573356554e-06, |
|
"loss": 0.0075, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.6042664344797565e-06, |
|
"loss": 0.0059, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.582498911623858e-06, |
|
"loss": 0.006, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.5607313887679595e-06, |
|
"loss": 0.0053, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 6.538963865912061e-06, |
|
"loss": 0.0116, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.51719634305616e-06, |
|
"loss": 0.0038, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.495428820200261e-06, |
|
"loss": 0.0078, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 6.473661297344362e-06, |
|
"loss": 0.0071, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 6.451893774488463e-06, |
|
"loss": 0.0105, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 6.430126251632564e-06, |
|
"loss": 0.0116, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 6.408358728776665e-06, |
|
"loss": 0.0076, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 6.3865912059207665e-06, |
|
"loss": 0.0029, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 6.364823683064868e-06, |
|
"loss": 0.0098, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 6.3430561602089695e-06, |
|
"loss": 0.0084, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 6.3212886373530706e-06, |
|
"loss": 0.0068, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 6.29952111449717e-06, |
|
"loss": 0.0071, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 6.277753591641271e-06, |
|
"loss": 0.0056, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.255986068785372e-06, |
|
"loss": 0.0085, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.234218545929473e-06, |
|
"loss": 0.0014, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.212451023073574e-06, |
|
"loss": 0.0068, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.190683500217675e-06, |
|
"loss": 0.0091, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.168915977361776e-06, |
|
"loss": 0.0043, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.147148454505878e-06, |
|
"loss": 0.0068, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.1253809316499794e-06, |
|
"loss": 0.0046, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 6.1036134087940805e-06, |
|
"loss": 0.008, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 6.081845885938182e-06, |
|
"loss": 0.0076, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 6.060078363082281e-06, |
|
"loss": 0.0049, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 6.038310840226382e-06, |
|
"loss": 0.0073, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 6.016543317370483e-06, |
|
"loss": 0.0073, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9991011162501838, |
|
"eval_loss": 0.00564388744533062, |
|
"eval_runtime": 36.1615, |
|
"eval_samples_per_second": 1015.942, |
|
"eval_steps_per_second": 15.901, |
|
"step": 9188 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 5.994775794514584e-06, |
|
"loss": 0.004, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.973008271658685e-06, |
|
"loss": 0.0011, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.951240748802787e-06, |
|
"loss": 0.0039, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 5.929473225946888e-06, |
|
"loss": 0.0087, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 5.907705703090989e-06, |
|
"loss": 0.0047, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 5.8859381802350905e-06, |
|
"loss": 0.0026, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 5.8641706573791915e-06, |
|
"loss": 0.0054, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 5.842403134523291e-06, |
|
"loss": 0.0049, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5.820635611667392e-06, |
|
"loss": 0.003, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 5.798868088811493e-06, |
|
"loss": 0.0053, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 5.777100565955594e-06, |
|
"loss": 0.0002, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 5.755333043099695e-06, |
|
"loss": 0.003, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 5.733565520243797e-06, |
|
"loss": 0.0095, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 5.711797997387898e-06, |
|
"loss": 0.0023, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 5.690030474531999e-06, |
|
"loss": 0.0033, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.6682629516761e-06, |
|
"loss": 0.0036, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 5.6464954288202015e-06, |
|
"loss": 0.0078, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 5.624727905964301e-06, |
|
"loss": 0.0092, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5.602960383108402e-06, |
|
"loss": 0.0052, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.581192860252503e-06, |
|
"loss": 0.0018, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 5.559425337396604e-06, |
|
"loss": 0.0033, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.537657814540706e-06, |
|
"loss": 0.0054, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 5.515890291684807e-06, |
|
"loss": 0.0069, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 5.494122768828908e-06, |
|
"loss": 0.0062, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 5.472355245973009e-06, |
|
"loss": 0.0014, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5.45058772311711e-06, |
|
"loss": 0.0048, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 5.4288202002612114e-06, |
|
"loss": 0.0037, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 5.4070526774053125e-06, |
|
"loss": 0.0037, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 5.385285154549412e-06, |
|
"loss": 0.0042, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 5.363517631693513e-06, |
|
"loss": 0.0011, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.341750108837614e-06, |
|
"loss": 0.0035, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.319982585981716e-06, |
|
"loss": 0.0036, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.298215063125817e-06, |
|
"loss": 0.0063, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 5.276447540269918e-06, |
|
"loss": 0.0048, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 5.254680017414019e-06, |
|
"loss": 0.004, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 5.23291249455812e-06, |
|
"loss": 0.0055, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 5.211144971702221e-06, |
|
"loss": 0.006, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5.1893774488463225e-06, |
|
"loss": 0.0068, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.167609925990422e-06, |
|
"loss": 0.0043, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 5.145842403134523e-06, |
|
"loss": 0.0036, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 5.124074880278625e-06, |
|
"loss": 0.0007, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 5.102307357422726e-06, |
|
"loss": 0.0034, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 5.080539834566827e-06, |
|
"loss": 0.0063, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.058772311710928e-06, |
|
"loss": 0.0003, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 5.037004788855029e-06, |
|
"loss": 0.0042, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.01523726599913e-06, |
|
"loss": 0.0086, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9988390915482595, |
|
"eval_loss": 0.008038449101150036, |
|
"eval_runtime": 35.4168, |
|
"eval_samples_per_second": 1037.303, |
|
"eval_steps_per_second": 16.235, |
|
"step": 11485 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.9934697431432305e-06, |
|
"loss": 0.0026, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.9717022202873316e-06, |
|
"loss": 0.0005, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.949934697431433e-06, |
|
"loss": 0.0009, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 4.928167174575534e-06, |
|
"loss": 0.0026, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 4.906399651719635e-06, |
|
"loss": 0.0026, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 4.884632128863736e-06, |
|
"loss": 0.0095, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 4.862864606007837e-06, |
|
"loss": 0.0036, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.841097083151938e-06, |
|
"loss": 0.0001, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.819329560296039e-06, |
|
"loss": 0.0036, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.79756203744014e-06, |
|
"loss": 0.0086, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.7757945145842404e-06, |
|
"loss": 0.0046, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 4.7540269917283415e-06, |
|
"loss": 0.0015, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.732259468872443e-06, |
|
"loss": 0.0018, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 4.710491946016544e-06, |
|
"loss": 0.0041, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 4.688724423160645e-06, |
|
"loss": 0.0036, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 4.666956900304746e-06, |
|
"loss": 0.0063, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.645189377448847e-06, |
|
"loss": 0.0036, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 4.623421854592948e-06, |
|
"loss": 0.002, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.601654331737049e-06, |
|
"loss": 0.0006, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.57988680888115e-06, |
|
"loss": 0.0041, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 4.55811928602525e-06, |
|
"loss": 0.002, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 4.5363517631693515e-06, |
|
"loss": 0.0053, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 4.5145842403134525e-06, |
|
"loss": 0.0027, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.492816717457554e-06, |
|
"loss": 0.0056, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 4.471049194601655e-06, |
|
"loss": 0.002, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 4.449281671745756e-06, |
|
"loss": 0.0007, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.427514148889857e-06, |
|
"loss": 0.0026, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.405746626033958e-06, |
|
"loss": 0.0021, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 4.383979103178059e-06, |
|
"loss": 0.001, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 4.36221158032216e-06, |
|
"loss": 0.0043, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 4.340444057466261e-06, |
|
"loss": 0.0009, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 4.318676534610361e-06, |
|
"loss": 0.0073, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 4.2969090117544625e-06, |
|
"loss": 0.0039, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 4.2751414888985636e-06, |
|
"loss": 0.004, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.253373966042665e-06, |
|
"loss": 0.0024, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.231606443186766e-06, |
|
"loss": 0.0015, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.209838920330867e-06, |
|
"loss": 0.0015, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 4.188071397474968e-06, |
|
"loss": 0.0061, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 4.166303874619069e-06, |
|
"loss": 0.0027, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.14453635176317e-06, |
|
"loss": 0.0044, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.122768828907271e-06, |
|
"loss": 0.004, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 4.101001306051371e-06, |
|
"loss": 0.0018, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.0792337831954724e-06, |
|
"loss": 0.003, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 4.0574662603395735e-06, |
|
"loss": 0.0015, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 4.035698737483675e-06, |
|
"loss": 0.0072, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.013931214627776e-06, |
|
"loss": 0.001, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9992156862745099, |
|
"eval_loss": 0.006630082614719868, |
|
"eval_runtime": 34.2419, |
|
"eval_samples_per_second": 1072.896, |
|
"eval_steps_per_second": 16.792, |
|
"step": 13782 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 3.992163691771877e-06, |
|
"loss": 0.0017, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.970396168915978e-06, |
|
"loss": 0.0016, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 3.948628646060079e-06, |
|
"loss": 0.0068, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 3.92686112320418e-06, |
|
"loss": 0.006, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 3.905093600348281e-06, |
|
"loss": 0.0032, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 3.883326077492381e-06, |
|
"loss": 0.0055, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.861558554636482e-06, |
|
"loss": 0.0002, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 3.8397910317805835e-06, |
|
"loss": 0.0009, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 3.8180235089246845e-06, |
|
"loss": 0.0022, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3.796255986068786e-06, |
|
"loss": 0.0039, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 3.7744884632128863e-06, |
|
"loss": 0.0039, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 3.7527209403569874e-06, |
|
"loss": 0.0045, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 3.7309534175010884e-06, |
|
"loss": 0.0024, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 3.70918589464519e-06, |
|
"loss": 0.0027, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 3.687418371789291e-06, |
|
"loss": 0.0054, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 3.6656508489333912e-06, |
|
"loss": 0.0038, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.6438833260774923e-06, |
|
"loss": 0.001, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.6221158032215934e-06, |
|
"loss": 0.0039, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.600348280365695e-06, |
|
"loss": 0.0018, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.578580757509796e-06, |
|
"loss": 0.0002, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.556813234653897e-06, |
|
"loss": 0.0009, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 3.5350457117979973e-06, |
|
"loss": 0.0008, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 3.513278188942099e-06, |
|
"loss": 0.0032, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 3.4915106660862e-06, |
|
"loss": 0.0004, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 3.469743143230301e-06, |
|
"loss": 0.0003, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 3.447975620374402e-06, |
|
"loss": 0.0012, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 3.4262080975185023e-06, |
|
"loss": 0.0014, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.4044405746626038e-06, |
|
"loss": 0.0003, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 3.382673051806705e-06, |
|
"loss": 0.0063, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.360905528950806e-06, |
|
"loss": 0.0043, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 3.339138006094907e-06, |
|
"loss": 0.003, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.3173704832390073e-06, |
|
"loss": 0.0061, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 3.2956029603831088e-06, |
|
"loss": 0.0005, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3.27383543752721e-06, |
|
"loss": 0.003, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 3.252067914671311e-06, |
|
"loss": 0.0057, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 3.230300391815412e-06, |
|
"loss": 0.0017, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 3.2085328689595122e-06, |
|
"loss": 0.0041, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3.1867653461036137e-06, |
|
"loss": 0.0009, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 3.164997823247715e-06, |
|
"loss": 0.0026, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 3.143230300391816e-06, |
|
"loss": 0.0034, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 3.121462777535917e-06, |
|
"loss": 0.0008, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.0996952546800176e-06, |
|
"loss": 0.0012, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 3.0779277318241187e-06, |
|
"loss": 0.0012, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 3.0561602089682198e-06, |
|
"loss": 0.0034, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.034392686112321e-06, |
|
"loss": 0.0039, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.012625163256422e-06, |
|
"loss": 0.0001, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.9990852961353762, |
|
"eval_loss": 0.007986071519553661, |
|
"eval_runtime": 33.4792, |
|
"eval_samples_per_second": 1097.337, |
|
"eval_steps_per_second": 17.175, |
|
"step": 16079 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 2.9908576404005226e-06, |
|
"loss": 0.0021, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 2.9690901175446237e-06, |
|
"loss": 0.001, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 2.9473225946887248e-06, |
|
"loss": 0.0024, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 2.925555071832826e-06, |
|
"loss": 0.0024, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 2.903787548976927e-06, |
|
"loss": 0.0028, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 2.882020026121028e-06, |
|
"loss": 0.0004, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.8602525032651287e-06, |
|
"loss": 0.0013, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 2.8384849804092297e-06, |
|
"loss": 0.0007, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 2.816717457553331e-06, |
|
"loss": 0.0001, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 2.794949934697432e-06, |
|
"loss": 0.0056, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 2.773182411841533e-06, |
|
"loss": 0.0041, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 2.7514148889856336e-06, |
|
"loss": 0.0022, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 2.7296473661297347e-06, |
|
"loss": 0.0003, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.7078798432738358e-06, |
|
"loss": 0.0012, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 2.686112320417937e-06, |
|
"loss": 0.0006, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 2.664344797562038e-06, |
|
"loss": 0.0019, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 2.6425772747061386e-06, |
|
"loss": 0.0014, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 2.6208097518502397e-06, |
|
"loss": 0.0001, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 2.5990422289943408e-06, |
|
"loss": 0.0048, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 2.577274706138442e-06, |
|
"loss": 0.0032, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 2.555507183282543e-06, |
|
"loss": 0.0001, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 2.5337396604266436e-06, |
|
"loss": 0.0013, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 2.5119721375707447e-06, |
|
"loss": 0.0023, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 2.4902046147148457e-06, |
|
"loss": 0.0003, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 2.468437091858947e-06, |
|
"loss": 0.0029, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 2.4466695690030475e-06, |
|
"loss": 0.0001, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 2.4249020461471485e-06, |
|
"loss": 0.0001, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 2.4031345232912496e-06, |
|
"loss": 0.0009, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 2.3813670004353507e-06, |
|
"loss": 0.0043, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 2.3595994775794518e-06, |
|
"loss": 0.001, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 2.3378319547235524e-06, |
|
"loss": 0.0011, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 2.3160644318676535e-06, |
|
"loss": 0.0005, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 2.2942969090117546e-06, |
|
"loss": 0.0051, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 2.2725293861558557e-06, |
|
"loss": 0.003, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 2.2507618632999568e-06, |
|
"loss": 0.0053, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 2.2289943404440574e-06, |
|
"loss": 0.0024, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 2.2072268175881585e-06, |
|
"loss": 0.0035, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 2.1854592947322596e-06, |
|
"loss": 0.0033, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 2.1636917718763607e-06, |
|
"loss": 0.0017, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 2.1419242490204617e-06, |
|
"loss": 0.0054, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 2.1201567261645624e-06, |
|
"loss": 0.0047, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 2.0983892033086635e-06, |
|
"loss": 0.0027, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 2.0766216804527645e-06, |
|
"loss": 0.0033, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 2.0548541575968656e-06, |
|
"loss": 0.0019, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 2.0330866347409667e-06, |
|
"loss": 0.0008, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 2.0113191118850674e-06, |
|
"loss": 0.0013, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9992646458043959, |
|
"eval_loss": 0.006770425010472536, |
|
"eval_runtime": 35.137, |
|
"eval_samples_per_second": 1045.565, |
|
"eval_steps_per_second": 16.365, |
|
"step": 18376 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.9895515890291684e-06, |
|
"loss": 0.0007, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 1.9677840661732695e-06, |
|
"loss": 0.0041, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 1.9460165433173706e-06, |
|
"loss": 0.001, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 1.9242490204614717e-06, |
|
"loss": 0.0001, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 1.9024814976055728e-06, |
|
"loss": 0.0001, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 1.8807139747496736e-06, |
|
"loss": 0.0002, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 1.8589464518937747e-06, |
|
"loss": 0.0014, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 1.8371789290378756e-06, |
|
"loss": 0.0015, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1.8154114061819767e-06, |
|
"loss": 0.0052, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 1.7936438833260777e-06, |
|
"loss": 0.0001, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 1.7718763604701786e-06, |
|
"loss": 0.0001, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 1.7501088376142797e-06, |
|
"loss": 0.0013, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 1.7283413147583806e-06, |
|
"loss": 0.0016, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 1.7065737919024816e-06, |
|
"loss": 0.0005, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 1.6848062690465827e-06, |
|
"loss": 0.0016, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.6630387461906836e-06, |
|
"loss": 0.0012, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 1.6412712233347847e-06, |
|
"loss": 0.0026, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 1.6195037004788855e-06, |
|
"loss": 0.0015, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 1.5977361776229866e-06, |
|
"loss": 0.0011, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 1.5759686547670877e-06, |
|
"loss": 0.006, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1.5542011319111886e-06, |
|
"loss": 0.0031, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 1.5324336090552896e-06, |
|
"loss": 0.0018, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.5106660861993907e-06, |
|
"loss": 0.0001, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 1.4888985633434916e-06, |
|
"loss": 0.0001, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 1.4671310404875927e-06, |
|
"loss": 0.0, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 1.4453635176316935e-06, |
|
"loss": 0.0011, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 1.4235959947757946e-06, |
|
"loss": 0.0001, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 1.4018284719198957e-06, |
|
"loss": 0.0019, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1.3800609490639966e-06, |
|
"loss": 0.0027, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 1.3582934262080976e-06, |
|
"loss": 0.0018, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 1.3365259033521985e-06, |
|
"loss": 0.0013, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 1.3147583804962996e-06, |
|
"loss": 0.0001, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 1.2929908576404007e-06, |
|
"loss": 0.0003, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 1.2712233347845015e-06, |
|
"loss": 0.0005, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 1.2494558119286026e-06, |
|
"loss": 0.0049, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 1.2276882890727037e-06, |
|
"loss": 0.0026, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.2059207662168046e-06, |
|
"loss": 0.0014, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 1.1841532433609056e-06, |
|
"loss": 0.0003, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 1.1623857205050067e-06, |
|
"loss": 0.0006, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 1.1406181976491076e-06, |
|
"loss": 0.0, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 1.1188506747932087e-06, |
|
"loss": 0.0025, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 1.0970831519373095e-06, |
|
"loss": 0.0, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 1.0753156290814106e-06, |
|
"loss": 0.0001, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1.0535481062255117e-06, |
|
"loss": 0.0023, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 1.0317805833696126e-06, |
|
"loss": 0.0018, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 1.0100130605137136e-06, |
|
"loss": 0.0055, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9991502434879236, |
|
"eval_loss": 0.007731999270617962, |
|
"eval_runtime": 34.7727, |
|
"eval_samples_per_second": 1056.52, |
|
"eval_steps_per_second": 16.536, |
|
"step": 20673 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 9.882455376578145e-07, |
|
"loss": 0.0019, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 9.664780148019156e-07, |
|
"loss": 0.0024, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"learning_rate": 9.447104919460167e-07, |
|
"loss": 0.0044, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 9.229429690901176e-07, |
|
"loss": 0.0006, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 9.011754462342186e-07, |
|
"loss": 0.0001, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 8.794079233783197e-07, |
|
"loss": 0.0004, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 8.576404005224207e-07, |
|
"loss": 0.0013, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 8.358728776665216e-07, |
|
"loss": 0.0004, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.141053548106226e-07, |
|
"loss": 0.001, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 7.923378319547236e-07, |
|
"loss": 0.0005, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 7.705703090988247e-07, |
|
"loss": 0.0008, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 7.488027862429256e-07, |
|
"loss": 0.0004, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 7.270352633870266e-07, |
|
"loss": 0.0019, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 7.052677405311276e-07, |
|
"loss": 0.0003, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 6.835002176752287e-07, |
|
"loss": 0.0003, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.617326948193296e-07, |
|
"loss": 0.0001, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 6.399651719634306e-07, |
|
"loss": 0.0006, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 6.181976491075317e-07, |
|
"loss": 0.0017, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 5.964301262516327e-07, |
|
"loss": 0.0, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 5.746626033957335e-07, |
|
"loss": 0.0, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 5.528950805398346e-07, |
|
"loss": 0.0, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 5.311275576839356e-07, |
|
"loss": 0.0006, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 5.093600348280367e-07, |
|
"loss": 0.0, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 4.875925119721376e-07, |
|
"loss": 0.0032, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 4.658249891162386e-07, |
|
"loss": 0.0016, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 4.440574662603396e-07, |
|
"loss": 0.0024, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 4.2228994340444067e-07, |
|
"loss": 0.0001, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.005224205485416e-07, |
|
"loss": 0.0001, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 3.7875489769264256e-07, |
|
"loss": 0.002, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 3.5698737483674364e-07, |
|
"loss": 0.0, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3.3521985198084456e-07, |
|
"loss": 0.0001, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 3.1345232912494564e-07, |
|
"loss": 0.0038, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 2.916848062690466e-07, |
|
"loss": 0.0027, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.699172834131476e-07, |
|
"loss": 0.003, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 2.481497605572486e-07, |
|
"loss": 0.0001, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.263822377013496e-07, |
|
"loss": 0.0001, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 2.0461471484545061e-07, |
|
"loss": 0.0054, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 1.8284719198955161e-07, |
|
"loss": 0.0015, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.6107966913365261e-07, |
|
"loss": 0.0016, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 1.393121462777536e-07, |
|
"loss": 0.0, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 1.175446234218546e-07, |
|
"loss": 0.0012, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 9.57771005659556e-08, |
|
"loss": 0.0, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 7.40095777100566e-08, |
|
"loss": 0.0001, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 5.22420548541576e-08, |
|
"loss": 0.0053, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 3.04745319982586e-08, |
|
"loss": 0.0015, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 8.7070091423596e-09, |
|
"loss": 0.0001, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9991828997254544, |
|
"eval_loss": 0.008167621679604053, |
|
"eval_runtime": 35.0331, |
|
"eval_samples_per_second": 1048.665, |
|
"eval_steps_per_second": 16.413, |
|
"step": 22970 |
|
} |
|
], |
|
"max_steps": 22970, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.161637529772032e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|