|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 22840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9956217162872154e-05, |
|
"loss": 13.2845, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.991243432574431e-05, |
|
"loss": 5.8932, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9868651488616462e-05, |
|
"loss": 1.2995, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9824868651488618e-05, |
|
"loss": 0.6063, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.978108581436077e-05, |
|
"loss": 0.5446, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9737302977232926e-05, |
|
"loss": 0.469, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.969352014010508e-05, |
|
"loss": 0.4239, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9649737302977235e-05, |
|
"loss": 0.3874, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9605954465849387e-05, |
|
"loss": 0.3522, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9562171628721543e-05, |
|
"loss": 0.3294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9518388791593695e-05, |
|
"loss": 0.3258, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.947460595446585e-05, |
|
"loss": 0.3032, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9430823117338004e-05, |
|
"loss": 0.3083, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.938704028021016e-05, |
|
"loss": 0.298, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9343257443082312e-05, |
|
"loss": 0.2915, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9299474605954468e-05, |
|
"loss": 0.2927, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.925569176882662e-05, |
|
"loss": 0.2854, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9211908931698776e-05, |
|
"loss": 0.2821, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.916812609457093e-05, |
|
"loss": 0.2876, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9124343257443084e-05, |
|
"loss": 0.2695, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9080560420315237e-05, |
|
"loss": 0.2801, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9036777583187393e-05, |
|
"loss": 0.2772, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8992994746059545e-05, |
|
"loss": 0.2702, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.89492119089317e-05, |
|
"loss": 0.277, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8905429071803853e-05, |
|
"loss": 0.2749, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.886164623467601e-05, |
|
"loss": 0.2751, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.881786339754816e-05, |
|
"loss": 0.2747, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8774080560420317e-05, |
|
"loss": 0.2768, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.873029772329247e-05, |
|
"loss": 0.269, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.8686514886164622e-05, |
|
"loss": 0.2711, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.8642732049036778e-05, |
|
"loss": 0.2669, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.8598949211908934e-05, |
|
"loss": 0.2617, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.855516637478109e-05, |
|
"loss": 0.269, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8511383537653242e-05, |
|
"loss": 0.2629, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8467600700525398e-05, |
|
"loss": 0.2606, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.842381786339755e-05, |
|
"loss": 0.2623, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8380035026269706e-05, |
|
"loss": 0.2638, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.833625218914186e-05, |
|
"loss": 0.2617, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.829246935201401e-05, |
|
"loss": 0.2547, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8248686514886167e-05, |
|
"loss": 0.2656, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.820490367775832e-05, |
|
"loss": 0.2635, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8161120840630475e-05, |
|
"loss": 0.2598, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8117338003502628e-05, |
|
"loss": 0.2927, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8073555166374784e-05, |
|
"loss": 0.2635, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8029772329246936e-05, |
|
"loss": 0.2532, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.7985989492119092e-05, |
|
"loss": 0.2665, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7942206654991244e-05, |
|
"loss": 0.2518, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.78984238178634e-05, |
|
"loss": 0.2622, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7854640980735553e-05, |
|
"loss": 0.2599, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.781085814360771e-05, |
|
"loss": 0.2571, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.776707530647986e-05, |
|
"loss": 0.2593, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7723292469352017e-05, |
|
"loss": 0.2659, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.767950963222417e-05, |
|
"loss": 0.2544, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7635726795096325e-05, |
|
"loss": 0.2523, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7591943957968477e-05, |
|
"loss": 0.2618, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7548161120840633e-05, |
|
"loss": 0.2539, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7504378283712786e-05, |
|
"loss": 0.2617, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.746059544658494e-05, |
|
"loss": 0.2503, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7416812609457094e-05, |
|
"loss": 0.2548, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.737302977232925e-05, |
|
"loss": 0.2547, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7329246935201402e-05, |
|
"loss": 0.2516, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7285464098073558e-05, |
|
"loss": 0.2525, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.724168126094571e-05, |
|
"loss": 0.2549, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.7197898423817866e-05, |
|
"loss": 0.2495, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.715411558669002e-05, |
|
"loss": 0.262, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.7110332749562174e-05, |
|
"loss": 0.2557, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.7066549912434327e-05, |
|
"loss": 0.2493, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7022767075306483e-05, |
|
"loss": 0.2561, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.6978984238178635e-05, |
|
"loss": 0.2473, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6935201401050788e-05, |
|
"loss": 0.2507, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6891418563922943e-05, |
|
"loss": 0.253, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.6847635726795096e-05, |
|
"loss": 0.2442, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.6803852889667252e-05, |
|
"loss": 0.2447, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.6760070052539404e-05, |
|
"loss": 0.2544, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.671628721541156e-05, |
|
"loss": 0.2554, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6672504378283712e-05, |
|
"loss": 0.2571, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.6628721541155868e-05, |
|
"loss": 0.2503, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.658493870402802e-05, |
|
"loss": 0.2483, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6541155866900177e-05, |
|
"loss": 0.2543, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.649737302977233e-05, |
|
"loss": 0.2366, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6453590192644485e-05, |
|
"loss": 0.2476, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6409807355516637e-05, |
|
"loss": 0.248, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6366024518388793e-05, |
|
"loss": 0.2474, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.6322241681260946e-05, |
|
"loss": 0.2493, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.62784588441331e-05, |
|
"loss": 0.2383, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.6234676007005254e-05, |
|
"loss": 0.2493, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.619089316987741e-05, |
|
"loss": 0.251, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6147110332749562e-05, |
|
"loss": 0.2558, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6103327495621718e-05, |
|
"loss": 0.2448, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.605954465849387e-05, |
|
"loss": 0.2502, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.6015761821366026e-05, |
|
"loss": 0.2507, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.597197898423818e-05, |
|
"loss": 0.2468, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.5928196147110334e-05, |
|
"loss": 0.24, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.5884413309982487e-05, |
|
"loss": 0.2485, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.5840630472854643e-05, |
|
"loss": 0.2521, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.57968476357268e-05, |
|
"loss": 0.2504, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.575306479859895e-05, |
|
"loss": 0.2481, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.5709281961471107e-05, |
|
"loss": 0.2469, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.566549912434326e-05, |
|
"loss": 0.2446, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.5621716287215415e-05, |
|
"loss": 0.2494, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.5577933450087568e-05, |
|
"loss": 0.2442, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.5534150612959723e-05, |
|
"loss": 0.2458, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.5490367775831876e-05, |
|
"loss": 0.2579, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.544658493870403e-05, |
|
"loss": 0.2456, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.5402802101576184e-05, |
|
"loss": 0.2478, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.535901926444834e-05, |
|
"loss": 0.2491, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5315236427320492e-05, |
|
"loss": 0.2434, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5271453590192645e-05, |
|
"loss": 0.2438, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.52276707530648e-05, |
|
"loss": 0.2435, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5183887915936955e-05, |
|
"loss": 0.2524, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5140105078809109e-05, |
|
"loss": 0.2511, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5096322241681263e-05, |
|
"loss": 0.2534, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5052539404553417e-05, |
|
"loss": 0.2477, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5008756567425571e-05, |
|
"loss": 0.255, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.4964973730297725e-05, |
|
"loss": 0.2446, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.492119089316988e-05, |
|
"loss": 0.2492, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4877408056042034e-05, |
|
"loss": 0.25, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4833625218914188e-05, |
|
"loss": 0.241, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4789842381786342e-05, |
|
"loss": 0.2517, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4746059544658496e-05, |
|
"loss": 0.2453, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.470227670753065e-05, |
|
"loss": 0.2485, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4658493870402803e-05, |
|
"loss": 0.2494, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4614711033274957e-05, |
|
"loss": 0.2471, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4570928196147111e-05, |
|
"loss": 0.2508, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.4527145359019265e-05, |
|
"loss": 0.2423, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.448336252189142e-05, |
|
"loss": 0.2433, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4439579684763573e-05, |
|
"loss": 0.2405, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4395796847635727e-05, |
|
"loss": 0.2496, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4352014010507882e-05, |
|
"loss": 0.2433, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.4308231173380036e-05, |
|
"loss": 0.2429, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.426444833625219e-05, |
|
"loss": 0.246, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4220665499124344e-05, |
|
"loss": 0.2455, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4176882661996498e-05, |
|
"loss": 0.2437, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4133099824868652e-05, |
|
"loss": 0.2455, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4089316987740806e-05, |
|
"loss": 0.2457, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.404553415061296e-05, |
|
"loss": 0.2304, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4001751313485115e-05, |
|
"loss": 0.241, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.3957968476357269e-05, |
|
"loss": 0.2398, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3914185639229423e-05, |
|
"loss": 0.2459, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3870402802101577e-05, |
|
"loss": 0.2452, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3826619964973731e-05, |
|
"loss": 0.2416, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3782837127845885e-05, |
|
"loss": 0.2365, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.373905429071804e-05, |
|
"loss": 0.245, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.3695271453590194e-05, |
|
"loss": 0.2472, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.3651488616462348e-05, |
|
"loss": 0.2462, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.3607705779334502e-05, |
|
"loss": 0.2426, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.3563922942206656e-05, |
|
"loss": 0.2463, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.352014010507881e-05, |
|
"loss": 0.2416, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.3476357267950964e-05, |
|
"loss": 0.2476, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3432574430823118e-05, |
|
"loss": 0.2424, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3388791593695273e-05, |
|
"loss": 0.2354, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3345008756567425e-05, |
|
"loss": 0.243, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3301225919439579e-05, |
|
"loss": 0.2362, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3257443082311733e-05, |
|
"loss": 0.2374, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3213660245183887e-05, |
|
"loss": 0.2379, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3169877408056041e-05, |
|
"loss": 0.2348, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3126094570928196e-05, |
|
"loss": 0.2432, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.308231173380035e-05, |
|
"loss": 0.2381, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.3038528896672507e-05, |
|
"loss": 0.2472, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2994746059544661e-05, |
|
"loss": 0.2409, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2950963222416814e-05, |
|
"loss": 0.2448, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.2907180385288968e-05, |
|
"loss": 0.2421, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.2863397548161122e-05, |
|
"loss": 0.2392, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.2819614711033276e-05, |
|
"loss": 0.2432, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.277583187390543e-05, |
|
"loss": 0.2336, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2732049036777585e-05, |
|
"loss": 0.2416, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2688266199649739e-05, |
|
"loss": 0.236, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2644483362521893e-05, |
|
"loss": 0.2387, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2600700525394047e-05, |
|
"loss": 0.2398, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2556917688266201e-05, |
|
"loss": 0.2413, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2513134851138355e-05, |
|
"loss": 0.2445, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.246935201401051e-05, |
|
"loss": 0.2405, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2425569176882663e-05, |
|
"loss": 0.2389, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2381786339754818e-05, |
|
"loss": 0.2348, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2338003502626972e-05, |
|
"loss": 0.2397, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2294220665499126e-05, |
|
"loss": 0.233, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.225043782837128e-05, |
|
"loss": 0.2395, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2206654991243434e-05, |
|
"loss": 0.2366, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2162872154115588e-05, |
|
"loss": 0.234, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2119089316987742e-05, |
|
"loss": 0.2391, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2075306479859897e-05, |
|
"loss": 0.2382, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.203152364273205e-05, |
|
"loss": 0.231, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1987740805604205e-05, |
|
"loss": 0.2383, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1943957968476359e-05, |
|
"loss": 0.2443, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1900175131348513e-05, |
|
"loss": 0.2446, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1856392294220667e-05, |
|
"loss": 0.2404, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.1812609457092821e-05, |
|
"loss": 0.2371, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.1768826619964975e-05, |
|
"loss": 0.2427, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.172504378283713e-05, |
|
"loss": 0.2354, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.1681260945709284e-05, |
|
"loss": 0.2332, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.1637478108581436e-05, |
|
"loss": 0.2394, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.159369527145359e-05, |
|
"loss": 0.238, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1549912434325744e-05, |
|
"loss": 0.2365, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1506129597197899e-05, |
|
"loss": 0.235, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1462346760070053e-05, |
|
"loss": 0.2374, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1418563922942207e-05, |
|
"loss": 0.2291, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1374781085814361e-05, |
|
"loss": 0.2404, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.1330998248686515e-05, |
|
"loss": 0.2373, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.128721541155867e-05, |
|
"loss": 0.2426, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1243432574430823e-05, |
|
"loss": 0.2476, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1199649737302978e-05, |
|
"loss": 0.2404, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1155866900175132e-05, |
|
"loss": 0.2401, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1112084063047286e-05, |
|
"loss": 0.2443, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.106830122591944e-05, |
|
"loss": 0.2349, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1024518388791594e-05, |
|
"loss": 0.2408, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0980735551663748e-05, |
|
"loss": 0.2393, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0936952714535902e-05, |
|
"loss": 0.2358, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0893169877408056e-05, |
|
"loss": 0.2412, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.084938704028021e-05, |
|
"loss": 0.2536, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0805604203152365e-05, |
|
"loss": 0.2344, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0761821366024519e-05, |
|
"loss": 0.2413, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.0718038528896673e-05, |
|
"loss": 0.2487, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.0674255691768827e-05, |
|
"loss": 0.2374, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0630472854640981e-05, |
|
"loss": 0.2374, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0586690017513135e-05, |
|
"loss": 0.2396, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.054290718038529e-05, |
|
"loss": 0.236, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0499124343257444e-05, |
|
"loss": 0.2393, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0455341506129598e-05, |
|
"loss": 0.238, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0411558669001752e-05, |
|
"loss": 0.2385, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0367775831873904e-05, |
|
"loss": 0.2376, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0323992994746059e-05, |
|
"loss": 0.2347, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0280210157618213e-05, |
|
"loss": 0.2337, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.023642732049037e-05, |
|
"loss": 0.2423, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0192644483362524e-05, |
|
"loss": 0.2347, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0148861646234678e-05, |
|
"loss": 0.2387, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0105078809106833e-05, |
|
"loss": 0.2427, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0061295971978987e-05, |
|
"loss": 0.2243, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0017513134851141e-05, |
|
"loss": 0.2386, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.2167460024356842, |
|
"eval_runtime": 72.1234, |
|
"eval_samples_per_second": 63.336, |
|
"eval_steps_per_second": 15.834, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.973730297723293e-06, |
|
"loss": 0.2285, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.929947460595447e-06, |
|
"loss": 0.2315, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.886164623467602e-06, |
|
"loss": 0.2317, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.842381786339756e-06, |
|
"loss": 0.2314, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.79859894921191e-06, |
|
"loss": 0.2392, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.754816112084064e-06, |
|
"loss": 0.2311, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.711033274956218e-06, |
|
"loss": 0.223, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.667250437828372e-06, |
|
"loss": 0.2176, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.623467600700526e-06, |
|
"loss": 0.2311, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.57968476357268e-06, |
|
"loss": 0.2242, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.535901926444835e-06, |
|
"loss": 0.2203, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.492119089316989e-06, |
|
"loss": 0.2301, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.448336252189143e-06, |
|
"loss": 0.2356, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.404553415061297e-06, |
|
"loss": 0.2261, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.360770577933451e-06, |
|
"loss": 0.2269, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.316987740805605e-06, |
|
"loss": 0.2224, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.27320490367776e-06, |
|
"loss": 0.2214, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.229422066549914e-06, |
|
"loss": 0.2327, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.185639229422068e-06, |
|
"loss": 0.2245, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.141856392294222e-06, |
|
"loss": 0.2304, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.098073555166376e-06, |
|
"loss": 0.2387, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.05429071803853e-06, |
|
"loss": 0.2285, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.010507880910684e-06, |
|
"loss": 0.2274, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.966725043782838e-06, |
|
"loss": 0.2278, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.922942206654993e-06, |
|
"loss": 0.2277, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.879159369527147e-06, |
|
"loss": 0.2261, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.8353765323993e-06, |
|
"loss": 0.2274, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.791593695271455e-06, |
|
"loss": 0.227, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.747810858143609e-06, |
|
"loss": 0.2363, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.704028021015763e-06, |
|
"loss": 0.2244, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.660245183887916e-06, |
|
"loss": 0.2272, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.61646234676007e-06, |
|
"loss": 0.2277, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.572679509632224e-06, |
|
"loss": 0.232, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.528896672504378e-06, |
|
"loss": 0.2309, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.485113835376532e-06, |
|
"loss": 0.2292, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.441330998248686e-06, |
|
"loss": 0.2308, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.39754816112084e-06, |
|
"loss": 0.2291, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.353765323992995e-06, |
|
"loss": 0.2326, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.309982486865149e-06, |
|
"loss": 0.2215, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.266199649737303e-06, |
|
"loss": 0.2312, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.222416812609457e-06, |
|
"loss": 0.2216, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.178633975481613e-06, |
|
"loss": 0.2269, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.134851138353767e-06, |
|
"loss": 0.229, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.091068301225921e-06, |
|
"loss": 0.2319, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.047285464098075e-06, |
|
"loss": 0.226, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.00350262697023e-06, |
|
"loss": 0.2294, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.959719789842383e-06, |
|
"loss": 0.2347, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.915936952714538e-06, |
|
"loss": 0.2315, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.872154115586692e-06, |
|
"loss": 0.2209, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.828371278458846e-06, |
|
"loss": 0.2248, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.784588441330998e-06, |
|
"loss": 0.2305, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.740805604203152e-06, |
|
"loss": 0.2288, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.697022767075307e-06, |
|
"loss": 0.2286, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.65323992994746e-06, |
|
"loss": 0.2274, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.609457092819616e-06, |
|
"loss": 0.2235, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.56567425569177e-06, |
|
"loss": 0.2328, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.521891418563924e-06, |
|
"loss": 0.2267, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.478108581436077e-06, |
|
"loss": 0.2231, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.434325744308231e-06, |
|
"loss": 0.2256, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.3905429071803855e-06, |
|
"loss": 0.2276, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.34676007005254e-06, |
|
"loss": 0.2312, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.302977232924694e-06, |
|
"loss": 0.2307, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.259194395796848e-06, |
|
"loss": 0.2265, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.215411558669002e-06, |
|
"loss": 0.2351, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.171628721541156e-06, |
|
"loss": 0.2264, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.12784588441331e-06, |
|
"loss": 0.2309, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.0840630472854645e-06, |
|
"loss": 0.2325, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.040280210157619e-06, |
|
"loss": 0.2329, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.996497373029773e-06, |
|
"loss": 0.2301, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.952714535901927e-06, |
|
"loss": 0.2308, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.908931698774081e-06, |
|
"loss": 0.2273, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.865148861646235e-06, |
|
"loss": 0.2347, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.8213660245183884e-06, |
|
"loss": 0.2303, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.777583187390544e-06, |
|
"loss": 0.2293, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.733800350262698e-06, |
|
"loss": 0.2299, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.6900175131348525e-06, |
|
"loss": 0.2271, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.646234676007006e-06, |
|
"loss": 0.2246, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.60245183887916e-06, |
|
"loss": 0.2262, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.558669001751314e-06, |
|
"loss": 0.2176, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.514886164623468e-06, |
|
"loss": 0.2277, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.471103327495622e-06, |
|
"loss": 0.2184, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.4273204903677765e-06, |
|
"loss": 0.2365, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.383537653239931e-06, |
|
"loss": 0.2288, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.339754816112085e-06, |
|
"loss": 0.2252, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.295971978984239e-06, |
|
"loss": 0.2293, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.252189141856393e-06, |
|
"loss": 0.2252, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.208406304728547e-06, |
|
"loss": 0.2382, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.164623467600701e-06, |
|
"loss": 0.2293, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.120840630472855e-06, |
|
"loss": 0.2274, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.0770577933450096e-06, |
|
"loss": 0.2227, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.033274956217164e-06, |
|
"loss": 0.2311, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.989492119089317e-06, |
|
"loss": 0.2227, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.945709281961471e-06, |
|
"loss": 0.2242, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.901926444833625e-06, |
|
"loss": 0.2341, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.858143607705779e-06, |
|
"loss": 0.2341, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.8143607705779335e-06, |
|
"loss": 0.2306, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.770577933450088e-06, |
|
"loss": 0.224, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.726795096322242e-06, |
|
"loss": 0.2283, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.683012259194396e-06, |
|
"loss": 0.2266, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.63922942206655e-06, |
|
"loss": 0.2283, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.595446584938704e-06, |
|
"loss": 0.2296, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.551663747810858e-06, |
|
"loss": 0.2214, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.5078809106830125e-06, |
|
"loss": 0.2229, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.464098073555167e-06, |
|
"loss": 0.2333, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.420315236427321e-06, |
|
"loss": 0.2276, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.376532399299476e-06, |
|
"loss": 0.2261, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.33274956217163e-06, |
|
"loss": 0.2305, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.288966725043784e-06, |
|
"loss": 0.2314, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.245183887915938e-06, |
|
"loss": 0.2269, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.201401050788092e-06, |
|
"loss": 0.2313, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.157618213660246e-06, |
|
"loss": 0.2341, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.1138353765324e-06, |
|
"loss": 0.225, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.070052539404554e-06, |
|
"loss": 0.2189, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.026269702276708e-06, |
|
"loss": 0.231, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.982486865148862e-06, |
|
"loss": 0.229, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.938704028021016e-06, |
|
"loss": 0.2324, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.89492119089317e-06, |
|
"loss": 0.2303, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.8511383537653245e-06, |
|
"loss": 0.2274, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.807355516637479e-06, |
|
"loss": 0.2244, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.763572679509633e-06, |
|
"loss": 0.2273, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.719789842381787e-06, |
|
"loss": 0.2265, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.676007005253941e-06, |
|
"loss": 0.2253, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.632224168126095e-06, |
|
"loss": 0.2349, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.588441330998249e-06, |
|
"loss": 0.2202, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.544658493870403e-06, |
|
"loss": 0.2304, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.500875656742557e-06, |
|
"loss": 0.2321, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.457092819614711e-06, |
|
"loss": 0.2194, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.413309982486865e-06, |
|
"loss": 0.2278, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.36952714535902e-06, |
|
"loss": 0.2237, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.325744308231174e-06, |
|
"loss": 0.2241, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.281961471103328e-06, |
|
"loss": 0.2244, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.238178633975482e-06, |
|
"loss": 0.2247, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.1943957968476365e-06, |
|
"loss": 0.2307, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.150612959719791e-06, |
|
"loss": 0.2212, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.106830122591945e-06, |
|
"loss": 0.224, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.063047285464098e-06, |
|
"loss": 0.221, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.019264448336252e-06, |
|
"loss": 0.2237, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.975481611208406e-06, |
|
"loss": 0.2294, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.9316987740805604e-06, |
|
"loss": 0.222, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.8879159369527146e-06, |
|
"loss": 0.2235, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.844133099824869e-06, |
|
"loss": 0.2299, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.800350262697023e-06, |
|
"loss": 0.2308, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.756567425569177e-06, |
|
"loss": 0.2279, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.712784588441331e-06, |
|
"loss": 0.2293, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.6690017513134857e-06, |
|
"loss": 0.2279, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.62521891418564e-06, |
|
"loss": 0.2292, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.581436077057794e-06, |
|
"loss": 0.2276, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.537653239929948e-06, |
|
"loss": 0.2276, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.4938704028021018e-06, |
|
"loss": 0.2209, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.450087565674256e-06, |
|
"loss": 0.2251, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.40630472854641e-06, |
|
"loss": 0.2266, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.362521891418564e-06, |
|
"loss": 0.228, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.3187390542907183e-06, |
|
"loss": 0.2274, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.2749562171628725e-06, |
|
"loss": 0.2256, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.2311733800350266e-06, |
|
"loss": 0.2279, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.1873905429071807e-06, |
|
"loss": 0.2254, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.1436077057793344e-06, |
|
"loss": 0.2273, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.0998248686514886e-06, |
|
"loss": 0.2277, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.0560420315236427e-06, |
|
"loss": 0.2291, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.012259194395797e-06, |
|
"loss": 0.2258, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.9684763572679514e-06, |
|
"loss": 0.2247, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.9246935201401055e-06, |
|
"loss": 0.2246, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.8809106830122597e-06, |
|
"loss": 0.2228, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.837127845884414e-06, |
|
"loss": 0.2281, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.793345008756568e-06, |
|
"loss": 0.2396, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.749562171628722e-06, |
|
"loss": 0.2214, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7057793345008758e-06, |
|
"loss": 0.2286, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.66199649737303e-06, |
|
"loss": 0.2289, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.618213660245184e-06, |
|
"loss": 0.2282, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.574430823117338e-06, |
|
"loss": 0.2299, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.5306479859894923e-06, |
|
"loss": 0.2276, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.4868651488616464e-06, |
|
"loss": 0.2257, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.4430823117338006e-06, |
|
"loss": 0.228, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3992994746059547e-06, |
|
"loss": 0.2245, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.355516637478109e-06, |
|
"loss": 0.2199, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.311733800350263e-06, |
|
"loss": 0.2264, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.267950963222417e-06, |
|
"loss": 0.228, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2241681260945713e-06, |
|
"loss": 0.2311, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.180385288966725e-06, |
|
"loss": 0.2252, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.136602451838879e-06, |
|
"loss": 0.2259, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0928196147110337e-06, |
|
"loss": 0.2254, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0490367775831878e-06, |
|
"loss": 0.2273, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.005253940455342e-06, |
|
"loss": 0.2188, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9614711033274956e-06, |
|
"loss": 0.2295, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9176882661996498e-06, |
|
"loss": 0.2257, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.873905429071804e-06, |
|
"loss": 0.2296, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.830122591943958e-06, |
|
"loss": 0.2304, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7863397548161122e-06, |
|
"loss": 0.2316, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.7425569176882665e-06, |
|
"loss": 0.2311, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6987740805604204e-06, |
|
"loss": 0.2254, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6549912434325746e-06, |
|
"loss": 0.2234, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6112084063047287e-06, |
|
"loss": 0.2193, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.5674255691768828e-06, |
|
"loss": 0.236, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.5236427320490368e-06, |
|
"loss": 0.2282, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.479859894921191e-06, |
|
"loss": 0.23, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.436077057793345e-06, |
|
"loss": 0.232, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.3922942206654994e-06, |
|
"loss": 0.2283, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.3485113835376535e-06, |
|
"loss": 0.2334, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.3047285464098074e-06, |
|
"loss": 0.2297, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2609457092819616e-06, |
|
"loss": 0.223, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2171628721541157e-06, |
|
"loss": 0.223, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1733800350262698e-06, |
|
"loss": 0.2252, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.129597197898424e-06, |
|
"loss": 0.2256, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.085814360770578e-06, |
|
"loss": 0.2228, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.042031523642732e-06, |
|
"loss": 0.2288, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.982486865148862e-07, |
|
"loss": 0.2342, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.544658493870403e-07, |
|
"loss": 0.2267, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.106830122591944e-07, |
|
"loss": 0.2234, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.669001751313486e-07, |
|
"loss": 0.2191, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.231173380035026e-07, |
|
"loss": 0.2219, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.793345008756568e-07, |
|
"loss": 0.2239, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.35551663747811e-07, |
|
"loss": 0.2262, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.91768826619965e-07, |
|
"loss": 0.2185, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.47985989492119e-07, |
|
"loss": 0.2239, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.042031523642733e-07, |
|
"loss": 0.2286, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.604203152364274e-07, |
|
"loss": 0.2253, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.166374781085814e-07, |
|
"loss": 0.2252, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.728546409807356e-07, |
|
"loss": 0.2245, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.290718038528897e-07, |
|
"loss": 0.2285, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.8528896672504383e-07, |
|
"loss": 0.2256, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.415061295971979e-07, |
|
"loss": 0.2334, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.9772329246935205e-07, |
|
"loss": 0.2313, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.5394045534150613e-07, |
|
"loss": 0.2202, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.1015761821366026e-07, |
|
"loss": 0.2235, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6637478108581437e-07, |
|
"loss": 0.2311, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.2259194395796848e-07, |
|
"loss": 0.2213, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 7.88091068301226e-08, |
|
"loss": 0.2246, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.502626970227671e-08, |
|
"loss": 0.2186, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.20982445776462555, |
|
"eval_runtime": 72.0375, |
|
"eval_samples_per_second": 63.411, |
|
"eval_steps_per_second": 15.853, |
|
"step": 22840 |
|
} |
|
], |
|
"max_steps": 22840, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.0789666943297126e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|