kfkas's picture
Upload 11 files
89657f7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 22840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9956217162872154e-05,
"loss": 13.2845,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 1.991243432574431e-05,
"loss": 5.8932,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.9868651488616462e-05,
"loss": 1.2995,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 1.9824868651488618e-05,
"loss": 0.6063,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.978108581436077e-05,
"loss": 0.5446,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 1.9737302977232926e-05,
"loss": 0.469,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.969352014010508e-05,
"loss": 0.4239,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 1.9649737302977235e-05,
"loss": 0.3874,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.9605954465849387e-05,
"loss": 0.3522,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 1.9562171628721543e-05,
"loss": 0.3294,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 1.9518388791593695e-05,
"loss": 0.3258,
"step": 550
},
{
"epoch": 0.05,
"learning_rate": 1.947460595446585e-05,
"loss": 0.3032,
"step": 600
},
{
"epoch": 0.06,
"learning_rate": 1.9430823117338004e-05,
"loss": 0.3083,
"step": 650
},
{
"epoch": 0.06,
"learning_rate": 1.938704028021016e-05,
"loss": 0.298,
"step": 700
},
{
"epoch": 0.07,
"learning_rate": 1.9343257443082312e-05,
"loss": 0.2915,
"step": 750
},
{
"epoch": 0.07,
"learning_rate": 1.9299474605954468e-05,
"loss": 0.2927,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 1.925569176882662e-05,
"loss": 0.2854,
"step": 850
},
{
"epoch": 0.08,
"learning_rate": 1.9211908931698776e-05,
"loss": 0.2821,
"step": 900
},
{
"epoch": 0.08,
"learning_rate": 1.916812609457093e-05,
"loss": 0.2876,
"step": 950
},
{
"epoch": 0.09,
"learning_rate": 1.9124343257443084e-05,
"loss": 0.2695,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 1.9080560420315237e-05,
"loss": 0.2801,
"step": 1050
},
{
"epoch": 0.1,
"learning_rate": 1.9036777583187393e-05,
"loss": 0.2772,
"step": 1100
},
{
"epoch": 0.1,
"learning_rate": 1.8992994746059545e-05,
"loss": 0.2702,
"step": 1150
},
{
"epoch": 0.11,
"learning_rate": 1.89492119089317e-05,
"loss": 0.277,
"step": 1200
},
{
"epoch": 0.11,
"learning_rate": 1.8905429071803853e-05,
"loss": 0.2749,
"step": 1250
},
{
"epoch": 0.11,
"learning_rate": 1.886164623467601e-05,
"loss": 0.2751,
"step": 1300
},
{
"epoch": 0.12,
"learning_rate": 1.881786339754816e-05,
"loss": 0.2747,
"step": 1350
},
{
"epoch": 0.12,
"learning_rate": 1.8774080560420317e-05,
"loss": 0.2768,
"step": 1400
},
{
"epoch": 0.13,
"learning_rate": 1.873029772329247e-05,
"loss": 0.269,
"step": 1450
},
{
"epoch": 0.13,
"learning_rate": 1.8686514886164622e-05,
"loss": 0.2711,
"step": 1500
},
{
"epoch": 0.14,
"learning_rate": 1.8642732049036778e-05,
"loss": 0.2669,
"step": 1550
},
{
"epoch": 0.14,
"learning_rate": 1.8598949211908934e-05,
"loss": 0.2617,
"step": 1600
},
{
"epoch": 0.14,
"learning_rate": 1.855516637478109e-05,
"loss": 0.269,
"step": 1650
},
{
"epoch": 0.15,
"learning_rate": 1.8511383537653242e-05,
"loss": 0.2629,
"step": 1700
},
{
"epoch": 0.15,
"learning_rate": 1.8467600700525398e-05,
"loss": 0.2606,
"step": 1750
},
{
"epoch": 0.16,
"learning_rate": 1.842381786339755e-05,
"loss": 0.2623,
"step": 1800
},
{
"epoch": 0.16,
"learning_rate": 1.8380035026269706e-05,
"loss": 0.2638,
"step": 1850
},
{
"epoch": 0.17,
"learning_rate": 1.833625218914186e-05,
"loss": 0.2617,
"step": 1900
},
{
"epoch": 0.17,
"learning_rate": 1.829246935201401e-05,
"loss": 0.2547,
"step": 1950
},
{
"epoch": 0.18,
"learning_rate": 1.8248686514886167e-05,
"loss": 0.2656,
"step": 2000
},
{
"epoch": 0.18,
"learning_rate": 1.820490367775832e-05,
"loss": 0.2635,
"step": 2050
},
{
"epoch": 0.18,
"learning_rate": 1.8161120840630475e-05,
"loss": 0.2598,
"step": 2100
},
{
"epoch": 0.19,
"learning_rate": 1.8117338003502628e-05,
"loss": 0.2927,
"step": 2150
},
{
"epoch": 0.19,
"learning_rate": 1.8073555166374784e-05,
"loss": 0.2635,
"step": 2200
},
{
"epoch": 0.2,
"learning_rate": 1.8029772329246936e-05,
"loss": 0.2532,
"step": 2250
},
{
"epoch": 0.2,
"learning_rate": 1.7985989492119092e-05,
"loss": 0.2665,
"step": 2300
},
{
"epoch": 0.21,
"learning_rate": 1.7942206654991244e-05,
"loss": 0.2518,
"step": 2350
},
{
"epoch": 0.21,
"learning_rate": 1.78984238178634e-05,
"loss": 0.2622,
"step": 2400
},
{
"epoch": 0.21,
"learning_rate": 1.7854640980735553e-05,
"loss": 0.2599,
"step": 2450
},
{
"epoch": 0.22,
"learning_rate": 1.781085814360771e-05,
"loss": 0.2571,
"step": 2500
},
{
"epoch": 0.22,
"learning_rate": 1.776707530647986e-05,
"loss": 0.2593,
"step": 2550
},
{
"epoch": 0.23,
"learning_rate": 1.7723292469352017e-05,
"loss": 0.2659,
"step": 2600
},
{
"epoch": 0.23,
"learning_rate": 1.767950963222417e-05,
"loss": 0.2544,
"step": 2650
},
{
"epoch": 0.24,
"learning_rate": 1.7635726795096325e-05,
"loss": 0.2523,
"step": 2700
},
{
"epoch": 0.24,
"learning_rate": 1.7591943957968477e-05,
"loss": 0.2618,
"step": 2750
},
{
"epoch": 0.25,
"learning_rate": 1.7548161120840633e-05,
"loss": 0.2539,
"step": 2800
},
{
"epoch": 0.25,
"learning_rate": 1.7504378283712786e-05,
"loss": 0.2617,
"step": 2850
},
{
"epoch": 0.25,
"learning_rate": 1.746059544658494e-05,
"loss": 0.2503,
"step": 2900
},
{
"epoch": 0.26,
"learning_rate": 1.7416812609457094e-05,
"loss": 0.2548,
"step": 2950
},
{
"epoch": 0.26,
"learning_rate": 1.737302977232925e-05,
"loss": 0.2547,
"step": 3000
},
{
"epoch": 0.27,
"learning_rate": 1.7329246935201402e-05,
"loss": 0.2516,
"step": 3050
},
{
"epoch": 0.27,
"learning_rate": 1.7285464098073558e-05,
"loss": 0.2525,
"step": 3100
},
{
"epoch": 0.28,
"learning_rate": 1.724168126094571e-05,
"loss": 0.2549,
"step": 3150
},
{
"epoch": 0.28,
"learning_rate": 1.7197898423817866e-05,
"loss": 0.2495,
"step": 3200
},
{
"epoch": 0.28,
"learning_rate": 1.715411558669002e-05,
"loss": 0.262,
"step": 3250
},
{
"epoch": 0.29,
"learning_rate": 1.7110332749562174e-05,
"loss": 0.2557,
"step": 3300
},
{
"epoch": 0.29,
"learning_rate": 1.7066549912434327e-05,
"loss": 0.2493,
"step": 3350
},
{
"epoch": 0.3,
"learning_rate": 1.7022767075306483e-05,
"loss": 0.2561,
"step": 3400
},
{
"epoch": 0.3,
"learning_rate": 1.6978984238178635e-05,
"loss": 0.2473,
"step": 3450
},
{
"epoch": 0.31,
"learning_rate": 1.6935201401050788e-05,
"loss": 0.2507,
"step": 3500
},
{
"epoch": 0.31,
"learning_rate": 1.6891418563922943e-05,
"loss": 0.253,
"step": 3550
},
{
"epoch": 0.32,
"learning_rate": 1.6847635726795096e-05,
"loss": 0.2442,
"step": 3600
},
{
"epoch": 0.32,
"learning_rate": 1.6803852889667252e-05,
"loss": 0.2447,
"step": 3650
},
{
"epoch": 0.32,
"learning_rate": 1.6760070052539404e-05,
"loss": 0.2544,
"step": 3700
},
{
"epoch": 0.33,
"learning_rate": 1.671628721541156e-05,
"loss": 0.2554,
"step": 3750
},
{
"epoch": 0.33,
"learning_rate": 1.6672504378283712e-05,
"loss": 0.2571,
"step": 3800
},
{
"epoch": 0.34,
"learning_rate": 1.6628721541155868e-05,
"loss": 0.2503,
"step": 3850
},
{
"epoch": 0.34,
"learning_rate": 1.658493870402802e-05,
"loss": 0.2483,
"step": 3900
},
{
"epoch": 0.35,
"learning_rate": 1.6541155866900177e-05,
"loss": 0.2543,
"step": 3950
},
{
"epoch": 0.35,
"learning_rate": 1.649737302977233e-05,
"loss": 0.2366,
"step": 4000
},
{
"epoch": 0.35,
"learning_rate": 1.6453590192644485e-05,
"loss": 0.2476,
"step": 4050
},
{
"epoch": 0.36,
"learning_rate": 1.6409807355516637e-05,
"loss": 0.248,
"step": 4100
},
{
"epoch": 0.36,
"learning_rate": 1.6366024518388793e-05,
"loss": 0.2474,
"step": 4150
},
{
"epoch": 0.37,
"learning_rate": 1.6322241681260946e-05,
"loss": 0.2493,
"step": 4200
},
{
"epoch": 0.37,
"learning_rate": 1.62784588441331e-05,
"loss": 0.2383,
"step": 4250
},
{
"epoch": 0.38,
"learning_rate": 1.6234676007005254e-05,
"loss": 0.2493,
"step": 4300
},
{
"epoch": 0.38,
"learning_rate": 1.619089316987741e-05,
"loss": 0.251,
"step": 4350
},
{
"epoch": 0.39,
"learning_rate": 1.6147110332749562e-05,
"loss": 0.2558,
"step": 4400
},
{
"epoch": 0.39,
"learning_rate": 1.6103327495621718e-05,
"loss": 0.2448,
"step": 4450
},
{
"epoch": 0.39,
"learning_rate": 1.605954465849387e-05,
"loss": 0.2502,
"step": 4500
},
{
"epoch": 0.4,
"learning_rate": 1.6015761821366026e-05,
"loss": 0.2507,
"step": 4550
},
{
"epoch": 0.4,
"learning_rate": 1.597197898423818e-05,
"loss": 0.2468,
"step": 4600
},
{
"epoch": 0.41,
"learning_rate": 1.5928196147110334e-05,
"loss": 0.24,
"step": 4650
},
{
"epoch": 0.41,
"learning_rate": 1.5884413309982487e-05,
"loss": 0.2485,
"step": 4700
},
{
"epoch": 0.42,
"learning_rate": 1.5840630472854643e-05,
"loss": 0.2521,
"step": 4750
},
{
"epoch": 0.42,
"learning_rate": 1.57968476357268e-05,
"loss": 0.2504,
"step": 4800
},
{
"epoch": 0.42,
"learning_rate": 1.575306479859895e-05,
"loss": 0.2481,
"step": 4850
},
{
"epoch": 0.43,
"learning_rate": 1.5709281961471107e-05,
"loss": 0.2469,
"step": 4900
},
{
"epoch": 0.43,
"learning_rate": 1.566549912434326e-05,
"loss": 0.2446,
"step": 4950
},
{
"epoch": 0.44,
"learning_rate": 1.5621716287215415e-05,
"loss": 0.2494,
"step": 5000
},
{
"epoch": 0.44,
"learning_rate": 1.5577933450087568e-05,
"loss": 0.2442,
"step": 5050
},
{
"epoch": 0.45,
"learning_rate": 1.5534150612959723e-05,
"loss": 0.2458,
"step": 5100
},
{
"epoch": 0.45,
"learning_rate": 1.5490367775831876e-05,
"loss": 0.2579,
"step": 5150
},
{
"epoch": 0.46,
"learning_rate": 1.544658493870403e-05,
"loss": 0.2456,
"step": 5200
},
{
"epoch": 0.46,
"learning_rate": 1.5402802101576184e-05,
"loss": 0.2478,
"step": 5250
},
{
"epoch": 0.46,
"learning_rate": 1.535901926444834e-05,
"loss": 0.2491,
"step": 5300
},
{
"epoch": 0.47,
"learning_rate": 1.5315236427320492e-05,
"loss": 0.2434,
"step": 5350
},
{
"epoch": 0.47,
"learning_rate": 1.5271453590192645e-05,
"loss": 0.2438,
"step": 5400
},
{
"epoch": 0.48,
"learning_rate": 1.52276707530648e-05,
"loss": 0.2435,
"step": 5450
},
{
"epoch": 0.48,
"learning_rate": 1.5183887915936955e-05,
"loss": 0.2524,
"step": 5500
},
{
"epoch": 0.49,
"learning_rate": 1.5140105078809109e-05,
"loss": 0.2511,
"step": 5550
},
{
"epoch": 0.49,
"learning_rate": 1.5096322241681263e-05,
"loss": 0.2534,
"step": 5600
},
{
"epoch": 0.49,
"learning_rate": 1.5052539404553417e-05,
"loss": 0.2477,
"step": 5650
},
{
"epoch": 0.5,
"learning_rate": 1.5008756567425571e-05,
"loss": 0.255,
"step": 5700
},
{
"epoch": 0.5,
"learning_rate": 1.4964973730297725e-05,
"loss": 0.2446,
"step": 5750
},
{
"epoch": 0.51,
"learning_rate": 1.492119089316988e-05,
"loss": 0.2492,
"step": 5800
},
{
"epoch": 0.51,
"learning_rate": 1.4877408056042034e-05,
"loss": 0.25,
"step": 5850
},
{
"epoch": 0.52,
"learning_rate": 1.4833625218914188e-05,
"loss": 0.241,
"step": 5900
},
{
"epoch": 0.52,
"learning_rate": 1.4789842381786342e-05,
"loss": 0.2517,
"step": 5950
},
{
"epoch": 0.53,
"learning_rate": 1.4746059544658496e-05,
"loss": 0.2453,
"step": 6000
},
{
"epoch": 0.53,
"learning_rate": 1.470227670753065e-05,
"loss": 0.2485,
"step": 6050
},
{
"epoch": 0.53,
"learning_rate": 1.4658493870402803e-05,
"loss": 0.2494,
"step": 6100
},
{
"epoch": 0.54,
"learning_rate": 1.4614711033274957e-05,
"loss": 0.2471,
"step": 6150
},
{
"epoch": 0.54,
"learning_rate": 1.4570928196147111e-05,
"loss": 0.2508,
"step": 6200
},
{
"epoch": 0.55,
"learning_rate": 1.4527145359019265e-05,
"loss": 0.2423,
"step": 6250
},
{
"epoch": 0.55,
"learning_rate": 1.448336252189142e-05,
"loss": 0.2433,
"step": 6300
},
{
"epoch": 0.56,
"learning_rate": 1.4439579684763573e-05,
"loss": 0.2405,
"step": 6350
},
{
"epoch": 0.56,
"learning_rate": 1.4395796847635727e-05,
"loss": 0.2496,
"step": 6400
},
{
"epoch": 0.56,
"learning_rate": 1.4352014010507882e-05,
"loss": 0.2433,
"step": 6450
},
{
"epoch": 0.57,
"learning_rate": 1.4308231173380036e-05,
"loss": 0.2429,
"step": 6500
},
{
"epoch": 0.57,
"learning_rate": 1.426444833625219e-05,
"loss": 0.246,
"step": 6550
},
{
"epoch": 0.58,
"learning_rate": 1.4220665499124344e-05,
"loss": 0.2455,
"step": 6600
},
{
"epoch": 0.58,
"learning_rate": 1.4176882661996498e-05,
"loss": 0.2437,
"step": 6650
},
{
"epoch": 0.59,
"learning_rate": 1.4133099824868652e-05,
"loss": 0.2455,
"step": 6700
},
{
"epoch": 0.59,
"learning_rate": 1.4089316987740806e-05,
"loss": 0.2457,
"step": 6750
},
{
"epoch": 0.6,
"learning_rate": 1.404553415061296e-05,
"loss": 0.2304,
"step": 6800
},
{
"epoch": 0.6,
"learning_rate": 1.4001751313485115e-05,
"loss": 0.241,
"step": 6850
},
{
"epoch": 0.6,
"learning_rate": 1.3957968476357269e-05,
"loss": 0.2398,
"step": 6900
},
{
"epoch": 0.61,
"learning_rate": 1.3914185639229423e-05,
"loss": 0.2459,
"step": 6950
},
{
"epoch": 0.61,
"learning_rate": 1.3870402802101577e-05,
"loss": 0.2452,
"step": 7000
},
{
"epoch": 0.62,
"learning_rate": 1.3826619964973731e-05,
"loss": 0.2416,
"step": 7050
},
{
"epoch": 0.62,
"learning_rate": 1.3782837127845885e-05,
"loss": 0.2365,
"step": 7100
},
{
"epoch": 0.63,
"learning_rate": 1.373905429071804e-05,
"loss": 0.245,
"step": 7150
},
{
"epoch": 0.63,
"learning_rate": 1.3695271453590194e-05,
"loss": 0.2472,
"step": 7200
},
{
"epoch": 0.63,
"learning_rate": 1.3651488616462348e-05,
"loss": 0.2462,
"step": 7250
},
{
"epoch": 0.64,
"learning_rate": 1.3607705779334502e-05,
"loss": 0.2426,
"step": 7300
},
{
"epoch": 0.64,
"learning_rate": 1.3563922942206656e-05,
"loss": 0.2463,
"step": 7350
},
{
"epoch": 0.65,
"learning_rate": 1.352014010507881e-05,
"loss": 0.2416,
"step": 7400
},
{
"epoch": 0.65,
"learning_rate": 1.3476357267950964e-05,
"loss": 0.2476,
"step": 7450
},
{
"epoch": 0.66,
"learning_rate": 1.3432574430823118e-05,
"loss": 0.2424,
"step": 7500
},
{
"epoch": 0.66,
"learning_rate": 1.3388791593695273e-05,
"loss": 0.2354,
"step": 7550
},
{
"epoch": 0.67,
"learning_rate": 1.3345008756567425e-05,
"loss": 0.243,
"step": 7600
},
{
"epoch": 0.67,
"learning_rate": 1.3301225919439579e-05,
"loss": 0.2362,
"step": 7650
},
{
"epoch": 0.67,
"learning_rate": 1.3257443082311733e-05,
"loss": 0.2374,
"step": 7700
},
{
"epoch": 0.68,
"learning_rate": 1.3213660245183887e-05,
"loss": 0.2379,
"step": 7750
},
{
"epoch": 0.68,
"learning_rate": 1.3169877408056041e-05,
"loss": 0.2348,
"step": 7800
},
{
"epoch": 0.69,
"learning_rate": 1.3126094570928196e-05,
"loss": 0.2432,
"step": 7850
},
{
"epoch": 0.69,
"learning_rate": 1.308231173380035e-05,
"loss": 0.2381,
"step": 7900
},
{
"epoch": 0.7,
"learning_rate": 1.3038528896672507e-05,
"loss": 0.2472,
"step": 7950
},
{
"epoch": 0.7,
"learning_rate": 1.2994746059544661e-05,
"loss": 0.2409,
"step": 8000
},
{
"epoch": 0.7,
"learning_rate": 1.2950963222416814e-05,
"loss": 0.2448,
"step": 8050
},
{
"epoch": 0.71,
"learning_rate": 1.2907180385288968e-05,
"loss": 0.2421,
"step": 8100
},
{
"epoch": 0.71,
"learning_rate": 1.2863397548161122e-05,
"loss": 0.2392,
"step": 8150
},
{
"epoch": 0.72,
"learning_rate": 1.2819614711033276e-05,
"loss": 0.2432,
"step": 8200
},
{
"epoch": 0.72,
"learning_rate": 1.277583187390543e-05,
"loss": 0.2336,
"step": 8250
},
{
"epoch": 0.73,
"learning_rate": 1.2732049036777585e-05,
"loss": 0.2416,
"step": 8300
},
{
"epoch": 0.73,
"learning_rate": 1.2688266199649739e-05,
"loss": 0.236,
"step": 8350
},
{
"epoch": 0.74,
"learning_rate": 1.2644483362521893e-05,
"loss": 0.2387,
"step": 8400
},
{
"epoch": 0.74,
"learning_rate": 1.2600700525394047e-05,
"loss": 0.2398,
"step": 8450
},
{
"epoch": 0.74,
"learning_rate": 1.2556917688266201e-05,
"loss": 0.2413,
"step": 8500
},
{
"epoch": 0.75,
"learning_rate": 1.2513134851138355e-05,
"loss": 0.2445,
"step": 8550
},
{
"epoch": 0.75,
"learning_rate": 1.246935201401051e-05,
"loss": 0.2405,
"step": 8600
},
{
"epoch": 0.76,
"learning_rate": 1.2425569176882663e-05,
"loss": 0.2389,
"step": 8650
},
{
"epoch": 0.76,
"learning_rate": 1.2381786339754818e-05,
"loss": 0.2348,
"step": 8700
},
{
"epoch": 0.77,
"learning_rate": 1.2338003502626972e-05,
"loss": 0.2397,
"step": 8750
},
{
"epoch": 0.77,
"learning_rate": 1.2294220665499126e-05,
"loss": 0.233,
"step": 8800
},
{
"epoch": 0.77,
"learning_rate": 1.225043782837128e-05,
"loss": 0.2395,
"step": 8850
},
{
"epoch": 0.78,
"learning_rate": 1.2206654991243434e-05,
"loss": 0.2366,
"step": 8900
},
{
"epoch": 0.78,
"learning_rate": 1.2162872154115588e-05,
"loss": 0.234,
"step": 8950
},
{
"epoch": 0.79,
"learning_rate": 1.2119089316987742e-05,
"loss": 0.2391,
"step": 9000
},
{
"epoch": 0.79,
"learning_rate": 1.2075306479859897e-05,
"loss": 0.2382,
"step": 9050
},
{
"epoch": 0.8,
"learning_rate": 1.203152364273205e-05,
"loss": 0.231,
"step": 9100
},
{
"epoch": 0.8,
"learning_rate": 1.1987740805604205e-05,
"loss": 0.2383,
"step": 9150
},
{
"epoch": 0.81,
"learning_rate": 1.1943957968476359e-05,
"loss": 0.2443,
"step": 9200
},
{
"epoch": 0.81,
"learning_rate": 1.1900175131348513e-05,
"loss": 0.2446,
"step": 9250
},
{
"epoch": 0.81,
"learning_rate": 1.1856392294220667e-05,
"loss": 0.2404,
"step": 9300
},
{
"epoch": 0.82,
"learning_rate": 1.1812609457092821e-05,
"loss": 0.2371,
"step": 9350
},
{
"epoch": 0.82,
"learning_rate": 1.1768826619964975e-05,
"loss": 0.2427,
"step": 9400
},
{
"epoch": 0.83,
"learning_rate": 1.172504378283713e-05,
"loss": 0.2354,
"step": 9450
},
{
"epoch": 0.83,
"learning_rate": 1.1681260945709284e-05,
"loss": 0.2332,
"step": 9500
},
{
"epoch": 0.84,
"learning_rate": 1.1637478108581436e-05,
"loss": 0.2394,
"step": 9550
},
{
"epoch": 0.84,
"learning_rate": 1.159369527145359e-05,
"loss": 0.238,
"step": 9600
},
{
"epoch": 0.85,
"learning_rate": 1.1549912434325744e-05,
"loss": 0.2365,
"step": 9650
},
{
"epoch": 0.85,
"learning_rate": 1.1506129597197899e-05,
"loss": 0.235,
"step": 9700
},
{
"epoch": 0.85,
"learning_rate": 1.1462346760070053e-05,
"loss": 0.2374,
"step": 9750
},
{
"epoch": 0.86,
"learning_rate": 1.1418563922942207e-05,
"loss": 0.2291,
"step": 9800
},
{
"epoch": 0.86,
"learning_rate": 1.1374781085814361e-05,
"loss": 0.2404,
"step": 9850
},
{
"epoch": 0.87,
"learning_rate": 1.1330998248686515e-05,
"loss": 0.2373,
"step": 9900
},
{
"epoch": 0.87,
"learning_rate": 1.128721541155867e-05,
"loss": 0.2426,
"step": 9950
},
{
"epoch": 0.88,
"learning_rate": 1.1243432574430823e-05,
"loss": 0.2476,
"step": 10000
},
{
"epoch": 0.88,
"learning_rate": 1.1199649737302978e-05,
"loss": 0.2404,
"step": 10050
},
{
"epoch": 0.88,
"learning_rate": 1.1155866900175132e-05,
"loss": 0.2401,
"step": 10100
},
{
"epoch": 0.89,
"learning_rate": 1.1112084063047286e-05,
"loss": 0.2443,
"step": 10150
},
{
"epoch": 0.89,
"learning_rate": 1.106830122591944e-05,
"loss": 0.2349,
"step": 10200
},
{
"epoch": 0.9,
"learning_rate": 1.1024518388791594e-05,
"loss": 0.2408,
"step": 10250
},
{
"epoch": 0.9,
"learning_rate": 1.0980735551663748e-05,
"loss": 0.2393,
"step": 10300
},
{
"epoch": 0.91,
"learning_rate": 1.0936952714535902e-05,
"loss": 0.2358,
"step": 10350
},
{
"epoch": 0.91,
"learning_rate": 1.0893169877408056e-05,
"loss": 0.2412,
"step": 10400
},
{
"epoch": 0.92,
"learning_rate": 1.084938704028021e-05,
"loss": 0.2536,
"step": 10450
},
{
"epoch": 0.92,
"learning_rate": 1.0805604203152365e-05,
"loss": 0.2344,
"step": 10500
},
{
"epoch": 0.92,
"learning_rate": 1.0761821366024519e-05,
"loss": 0.2413,
"step": 10550
},
{
"epoch": 0.93,
"learning_rate": 1.0718038528896673e-05,
"loss": 0.2487,
"step": 10600
},
{
"epoch": 0.93,
"learning_rate": 1.0674255691768827e-05,
"loss": 0.2374,
"step": 10650
},
{
"epoch": 0.94,
"learning_rate": 1.0630472854640981e-05,
"loss": 0.2374,
"step": 10700
},
{
"epoch": 0.94,
"learning_rate": 1.0586690017513135e-05,
"loss": 0.2396,
"step": 10750
},
{
"epoch": 0.95,
"learning_rate": 1.054290718038529e-05,
"loss": 0.236,
"step": 10800
},
{
"epoch": 0.95,
"learning_rate": 1.0499124343257444e-05,
"loss": 0.2393,
"step": 10850
},
{
"epoch": 0.95,
"learning_rate": 1.0455341506129598e-05,
"loss": 0.238,
"step": 10900
},
{
"epoch": 0.96,
"learning_rate": 1.0411558669001752e-05,
"loss": 0.2385,
"step": 10950
},
{
"epoch": 0.96,
"learning_rate": 1.0367775831873904e-05,
"loss": 0.2376,
"step": 11000
},
{
"epoch": 0.97,
"learning_rate": 1.0323992994746059e-05,
"loss": 0.2347,
"step": 11050
},
{
"epoch": 0.97,
"learning_rate": 1.0280210157618213e-05,
"loss": 0.2337,
"step": 11100
},
{
"epoch": 0.98,
"learning_rate": 1.023642732049037e-05,
"loss": 0.2423,
"step": 11150
},
{
"epoch": 0.98,
"learning_rate": 1.0192644483362524e-05,
"loss": 0.2347,
"step": 11200
},
{
"epoch": 0.99,
"learning_rate": 1.0148861646234678e-05,
"loss": 0.2387,
"step": 11250
},
{
"epoch": 0.99,
"learning_rate": 1.0105078809106833e-05,
"loss": 0.2427,
"step": 11300
},
{
"epoch": 0.99,
"learning_rate": 1.0061295971978987e-05,
"loss": 0.2243,
"step": 11350
},
{
"epoch": 1.0,
"learning_rate": 1.0017513134851141e-05,
"loss": 0.2386,
"step": 11400
},
{
"epoch": 1.0,
"eval_loss": 0.2167460024356842,
"eval_runtime": 72.1234,
"eval_samples_per_second": 63.336,
"eval_steps_per_second": 15.834,
"step": 11420
},
{
"epoch": 1.0,
"learning_rate": 9.973730297723293e-06,
"loss": 0.2285,
"step": 11450
},
{
"epoch": 1.01,
"learning_rate": 9.929947460595447e-06,
"loss": 0.2315,
"step": 11500
},
{
"epoch": 1.01,
"learning_rate": 9.886164623467602e-06,
"loss": 0.2317,
"step": 11550
},
{
"epoch": 1.02,
"learning_rate": 9.842381786339756e-06,
"loss": 0.2314,
"step": 11600
},
{
"epoch": 1.02,
"learning_rate": 9.79859894921191e-06,
"loss": 0.2392,
"step": 11650
},
{
"epoch": 1.02,
"learning_rate": 9.754816112084064e-06,
"loss": 0.2311,
"step": 11700
},
{
"epoch": 1.03,
"learning_rate": 9.711033274956218e-06,
"loss": 0.223,
"step": 11750
},
{
"epoch": 1.03,
"learning_rate": 9.667250437828372e-06,
"loss": 0.2176,
"step": 11800
},
{
"epoch": 1.04,
"learning_rate": 9.623467600700526e-06,
"loss": 0.2311,
"step": 11850
},
{
"epoch": 1.04,
"learning_rate": 9.57968476357268e-06,
"loss": 0.2242,
"step": 11900
},
{
"epoch": 1.05,
"learning_rate": 9.535901926444835e-06,
"loss": 0.2203,
"step": 11950
},
{
"epoch": 1.05,
"learning_rate": 9.492119089316989e-06,
"loss": 0.2301,
"step": 12000
},
{
"epoch": 1.06,
"learning_rate": 9.448336252189143e-06,
"loss": 0.2356,
"step": 12050
},
{
"epoch": 1.06,
"learning_rate": 9.404553415061297e-06,
"loss": 0.2261,
"step": 12100
},
{
"epoch": 1.06,
"learning_rate": 9.360770577933451e-06,
"loss": 0.2269,
"step": 12150
},
{
"epoch": 1.07,
"learning_rate": 9.316987740805605e-06,
"loss": 0.2224,
"step": 12200
},
{
"epoch": 1.07,
"learning_rate": 9.27320490367776e-06,
"loss": 0.2214,
"step": 12250
},
{
"epoch": 1.08,
"learning_rate": 9.229422066549914e-06,
"loss": 0.2327,
"step": 12300
},
{
"epoch": 1.08,
"learning_rate": 9.185639229422068e-06,
"loss": 0.2245,
"step": 12350
},
{
"epoch": 1.09,
"learning_rate": 9.141856392294222e-06,
"loss": 0.2304,
"step": 12400
},
{
"epoch": 1.09,
"learning_rate": 9.098073555166376e-06,
"loss": 0.2387,
"step": 12450
},
{
"epoch": 1.09,
"learning_rate": 9.05429071803853e-06,
"loss": 0.2285,
"step": 12500
},
{
"epoch": 1.1,
"learning_rate": 9.010507880910684e-06,
"loss": 0.2274,
"step": 12550
},
{
"epoch": 1.1,
"learning_rate": 8.966725043782838e-06,
"loss": 0.2278,
"step": 12600
},
{
"epoch": 1.11,
"learning_rate": 8.922942206654993e-06,
"loss": 0.2277,
"step": 12650
},
{
"epoch": 1.11,
"learning_rate": 8.879159369527147e-06,
"loss": 0.2261,
"step": 12700
},
{
"epoch": 1.12,
"learning_rate": 8.8353765323993e-06,
"loss": 0.2274,
"step": 12750
},
{
"epoch": 1.12,
"learning_rate": 8.791593695271455e-06,
"loss": 0.227,
"step": 12800
},
{
"epoch": 1.13,
"learning_rate": 8.747810858143609e-06,
"loss": 0.2363,
"step": 12850
},
{
"epoch": 1.13,
"learning_rate": 8.704028021015763e-06,
"loss": 0.2244,
"step": 12900
},
{
"epoch": 1.13,
"learning_rate": 8.660245183887916e-06,
"loss": 0.2272,
"step": 12950
},
{
"epoch": 1.14,
"learning_rate": 8.61646234676007e-06,
"loss": 0.2277,
"step": 13000
},
{
"epoch": 1.14,
"learning_rate": 8.572679509632224e-06,
"loss": 0.232,
"step": 13050
},
{
"epoch": 1.15,
"learning_rate": 8.528896672504378e-06,
"loss": 0.2309,
"step": 13100
},
{
"epoch": 1.15,
"learning_rate": 8.485113835376532e-06,
"loss": 0.2292,
"step": 13150
},
{
"epoch": 1.16,
"learning_rate": 8.441330998248686e-06,
"loss": 0.2308,
"step": 13200
},
{
"epoch": 1.16,
"learning_rate": 8.39754816112084e-06,
"loss": 0.2291,
"step": 13250
},
{
"epoch": 1.16,
"learning_rate": 8.353765323992995e-06,
"loss": 0.2326,
"step": 13300
},
{
"epoch": 1.17,
"learning_rate": 8.309982486865149e-06,
"loss": 0.2215,
"step": 13350
},
{
"epoch": 1.17,
"learning_rate": 8.266199649737303e-06,
"loss": 0.2312,
"step": 13400
},
{
"epoch": 1.18,
"learning_rate": 8.222416812609457e-06,
"loss": 0.2216,
"step": 13450
},
{
"epoch": 1.18,
"learning_rate": 8.178633975481613e-06,
"loss": 0.2269,
"step": 13500
},
{
"epoch": 1.19,
"learning_rate": 8.134851138353767e-06,
"loss": 0.229,
"step": 13550
},
{
"epoch": 1.19,
"learning_rate": 8.091068301225921e-06,
"loss": 0.2319,
"step": 13600
},
{
"epoch": 1.2,
"learning_rate": 8.047285464098075e-06,
"loss": 0.226,
"step": 13650
},
{
"epoch": 1.2,
"learning_rate": 8.00350262697023e-06,
"loss": 0.2294,
"step": 13700
},
{
"epoch": 1.2,
"learning_rate": 7.959719789842383e-06,
"loss": 0.2347,
"step": 13750
},
{
"epoch": 1.21,
"learning_rate": 7.915936952714538e-06,
"loss": 0.2315,
"step": 13800
},
{
"epoch": 1.21,
"learning_rate": 7.872154115586692e-06,
"loss": 0.2209,
"step": 13850
},
{
"epoch": 1.22,
"learning_rate": 7.828371278458846e-06,
"loss": 0.2248,
"step": 13900
},
{
"epoch": 1.22,
"learning_rate": 7.784588441330998e-06,
"loss": 0.2305,
"step": 13950
},
{
"epoch": 1.23,
"learning_rate": 7.740805604203152e-06,
"loss": 0.2288,
"step": 14000
},
{
"epoch": 1.23,
"learning_rate": 7.697022767075307e-06,
"loss": 0.2286,
"step": 14050
},
{
"epoch": 1.23,
"learning_rate": 7.65323992994746e-06,
"loss": 0.2274,
"step": 14100
},
{
"epoch": 1.24,
"learning_rate": 7.609457092819616e-06,
"loss": 0.2235,
"step": 14150
},
{
"epoch": 1.24,
"learning_rate": 7.56567425569177e-06,
"loss": 0.2328,
"step": 14200
},
{
"epoch": 1.25,
"learning_rate": 7.521891418563924e-06,
"loss": 0.2267,
"step": 14250
},
{
"epoch": 1.25,
"learning_rate": 7.478108581436077e-06,
"loss": 0.2231,
"step": 14300
},
{
"epoch": 1.26,
"learning_rate": 7.434325744308231e-06,
"loss": 0.2256,
"step": 14350
},
{
"epoch": 1.26,
"learning_rate": 7.3905429071803855e-06,
"loss": 0.2276,
"step": 14400
},
{
"epoch": 1.27,
"learning_rate": 7.34676007005254e-06,
"loss": 0.2312,
"step": 14450
},
{
"epoch": 1.27,
"learning_rate": 7.302977232924694e-06,
"loss": 0.2307,
"step": 14500
},
{
"epoch": 1.27,
"learning_rate": 7.259194395796848e-06,
"loss": 0.2265,
"step": 14550
},
{
"epoch": 1.28,
"learning_rate": 7.215411558669002e-06,
"loss": 0.2351,
"step": 14600
},
{
"epoch": 1.28,
"learning_rate": 7.171628721541156e-06,
"loss": 0.2264,
"step": 14650
},
{
"epoch": 1.29,
"learning_rate": 7.12784588441331e-06,
"loss": 0.2309,
"step": 14700
},
{
"epoch": 1.29,
"learning_rate": 7.0840630472854645e-06,
"loss": 0.2325,
"step": 14750
},
{
"epoch": 1.3,
"learning_rate": 7.040280210157619e-06,
"loss": 0.2329,
"step": 14800
},
{
"epoch": 1.3,
"learning_rate": 6.996497373029773e-06,
"loss": 0.2301,
"step": 14850
},
{
"epoch": 1.3,
"learning_rate": 6.952714535901927e-06,
"loss": 0.2308,
"step": 14900
},
{
"epoch": 1.31,
"learning_rate": 6.908931698774081e-06,
"loss": 0.2273,
"step": 14950
},
{
"epoch": 1.31,
"learning_rate": 6.865148861646235e-06,
"loss": 0.2347,
"step": 15000
},
{
"epoch": 1.32,
"learning_rate": 6.8213660245183884e-06,
"loss": 0.2303,
"step": 15050
},
{
"epoch": 1.32,
"learning_rate": 6.777583187390544e-06,
"loss": 0.2293,
"step": 15100
},
{
"epoch": 1.33,
"learning_rate": 6.733800350262698e-06,
"loss": 0.2299,
"step": 15150
},
{
"epoch": 1.33,
"learning_rate": 6.6900175131348525e-06,
"loss": 0.2271,
"step": 15200
},
{
"epoch": 1.34,
"learning_rate": 6.646234676007006e-06,
"loss": 0.2246,
"step": 15250
},
{
"epoch": 1.34,
"learning_rate": 6.60245183887916e-06,
"loss": 0.2262,
"step": 15300
},
{
"epoch": 1.34,
"learning_rate": 6.558669001751314e-06,
"loss": 0.2176,
"step": 15350
},
{
"epoch": 1.35,
"learning_rate": 6.514886164623468e-06,
"loss": 0.2277,
"step": 15400
},
{
"epoch": 1.35,
"learning_rate": 6.471103327495622e-06,
"loss": 0.2184,
"step": 15450
},
{
"epoch": 1.36,
"learning_rate": 6.4273204903677765e-06,
"loss": 0.2365,
"step": 15500
},
{
"epoch": 1.36,
"learning_rate": 6.383537653239931e-06,
"loss": 0.2288,
"step": 15550
},
{
"epoch": 1.37,
"learning_rate": 6.339754816112085e-06,
"loss": 0.2252,
"step": 15600
},
{
"epoch": 1.37,
"learning_rate": 6.295971978984239e-06,
"loss": 0.2293,
"step": 15650
},
{
"epoch": 1.37,
"learning_rate": 6.252189141856393e-06,
"loss": 0.2252,
"step": 15700
},
{
"epoch": 1.38,
"learning_rate": 6.208406304728547e-06,
"loss": 0.2382,
"step": 15750
},
{
"epoch": 1.38,
"learning_rate": 6.164623467600701e-06,
"loss": 0.2293,
"step": 15800
},
{
"epoch": 1.39,
"learning_rate": 6.120840630472855e-06,
"loss": 0.2274,
"step": 15850
},
{
"epoch": 1.39,
"learning_rate": 6.0770577933450096e-06,
"loss": 0.2227,
"step": 15900
},
{
"epoch": 1.4,
"learning_rate": 6.033274956217164e-06,
"loss": 0.2311,
"step": 15950
},
{
"epoch": 1.4,
"learning_rate": 5.989492119089317e-06,
"loss": 0.2227,
"step": 16000
},
{
"epoch": 1.41,
"learning_rate": 5.945709281961471e-06,
"loss": 0.2242,
"step": 16050
},
{
"epoch": 1.41,
"learning_rate": 5.901926444833625e-06,
"loss": 0.2341,
"step": 16100
},
{
"epoch": 1.41,
"learning_rate": 5.858143607705779e-06,
"loss": 0.2341,
"step": 16150
},
{
"epoch": 1.42,
"learning_rate": 5.8143607705779335e-06,
"loss": 0.2306,
"step": 16200
},
{
"epoch": 1.42,
"learning_rate": 5.770577933450088e-06,
"loss": 0.224,
"step": 16250
},
{
"epoch": 1.43,
"learning_rate": 5.726795096322242e-06,
"loss": 0.2283,
"step": 16300
},
{
"epoch": 1.43,
"learning_rate": 5.683012259194396e-06,
"loss": 0.2266,
"step": 16350
},
{
"epoch": 1.44,
"learning_rate": 5.63922942206655e-06,
"loss": 0.2283,
"step": 16400
},
{
"epoch": 1.44,
"learning_rate": 5.595446584938704e-06,
"loss": 0.2296,
"step": 16450
},
{
"epoch": 1.44,
"learning_rate": 5.551663747810858e-06,
"loss": 0.2214,
"step": 16500
},
{
"epoch": 1.45,
"learning_rate": 5.5078809106830125e-06,
"loss": 0.2229,
"step": 16550
},
{
"epoch": 1.45,
"learning_rate": 5.464098073555167e-06,
"loss": 0.2333,
"step": 16600
},
{
"epoch": 1.46,
"learning_rate": 5.420315236427321e-06,
"loss": 0.2276,
"step": 16650
},
{
"epoch": 1.46,
"learning_rate": 5.376532399299476e-06,
"loss": 0.2261,
"step": 16700
},
{
"epoch": 1.47,
"learning_rate": 5.33274956217163e-06,
"loss": 0.2305,
"step": 16750
},
{
"epoch": 1.47,
"learning_rate": 5.288966725043784e-06,
"loss": 0.2314,
"step": 16800
},
{
"epoch": 1.48,
"learning_rate": 5.245183887915938e-06,
"loss": 0.2269,
"step": 16850
},
{
"epoch": 1.48,
"learning_rate": 5.201401050788092e-06,
"loss": 0.2313,
"step": 16900
},
{
"epoch": 1.48,
"learning_rate": 5.157618213660246e-06,
"loss": 0.2341,
"step": 16950
},
{
"epoch": 1.49,
"learning_rate": 5.1138353765324e-06,
"loss": 0.225,
"step": 17000
},
{
"epoch": 1.49,
"learning_rate": 5.070052539404554e-06,
"loss": 0.2189,
"step": 17050
},
{
"epoch": 1.5,
"learning_rate": 5.026269702276708e-06,
"loss": 0.231,
"step": 17100
},
{
"epoch": 1.5,
"learning_rate": 4.982486865148862e-06,
"loss": 0.229,
"step": 17150
},
{
"epoch": 1.51,
"learning_rate": 4.938704028021016e-06,
"loss": 0.2324,
"step": 17200
},
{
"epoch": 1.51,
"learning_rate": 4.89492119089317e-06,
"loss": 0.2303,
"step": 17250
},
{
"epoch": 1.51,
"learning_rate": 4.8511383537653245e-06,
"loss": 0.2274,
"step": 17300
},
{
"epoch": 1.52,
"learning_rate": 4.807355516637479e-06,
"loss": 0.2244,
"step": 17350
},
{
"epoch": 1.52,
"learning_rate": 4.763572679509633e-06,
"loss": 0.2273,
"step": 17400
},
{
"epoch": 1.53,
"learning_rate": 4.719789842381787e-06,
"loss": 0.2265,
"step": 17450
},
{
"epoch": 1.53,
"learning_rate": 4.676007005253941e-06,
"loss": 0.2253,
"step": 17500
},
{
"epoch": 1.54,
"learning_rate": 4.632224168126095e-06,
"loss": 0.2349,
"step": 17550
},
{
"epoch": 1.54,
"learning_rate": 4.588441330998249e-06,
"loss": 0.2202,
"step": 17600
},
{
"epoch": 1.55,
"learning_rate": 4.544658493870403e-06,
"loss": 0.2304,
"step": 17650
},
{
"epoch": 1.55,
"learning_rate": 4.500875656742557e-06,
"loss": 0.2321,
"step": 17700
},
{
"epoch": 1.55,
"learning_rate": 4.457092819614711e-06,
"loss": 0.2194,
"step": 17750
},
{
"epoch": 1.56,
"learning_rate": 4.413309982486865e-06,
"loss": 0.2278,
"step": 17800
},
{
"epoch": 1.56,
"learning_rate": 4.36952714535902e-06,
"loss": 0.2237,
"step": 17850
},
{
"epoch": 1.57,
"learning_rate": 4.325744308231174e-06,
"loss": 0.2241,
"step": 17900
},
{
"epoch": 1.57,
"learning_rate": 4.281961471103328e-06,
"loss": 0.2244,
"step": 17950
},
{
"epoch": 1.58,
"learning_rate": 4.238178633975482e-06,
"loss": 0.2247,
"step": 18000
},
{
"epoch": 1.58,
"learning_rate": 4.1943957968476365e-06,
"loss": 0.2307,
"step": 18050
},
{
"epoch": 1.58,
"learning_rate": 4.150612959719791e-06,
"loss": 0.2212,
"step": 18100
},
{
"epoch": 1.59,
"learning_rate": 4.106830122591945e-06,
"loss": 0.224,
"step": 18150
},
{
"epoch": 1.59,
"learning_rate": 4.063047285464098e-06,
"loss": 0.221,
"step": 18200
},
{
"epoch": 1.6,
"learning_rate": 4.019264448336252e-06,
"loss": 0.2237,
"step": 18250
},
{
"epoch": 1.6,
"learning_rate": 3.975481611208406e-06,
"loss": 0.2294,
"step": 18300
},
{
"epoch": 1.61,
"learning_rate": 3.9316987740805604e-06,
"loss": 0.222,
"step": 18350
},
{
"epoch": 1.61,
"learning_rate": 3.8879159369527146e-06,
"loss": 0.2235,
"step": 18400
},
{
"epoch": 1.62,
"learning_rate": 3.844133099824869e-06,
"loss": 0.2299,
"step": 18450
},
{
"epoch": 1.62,
"learning_rate": 3.800350262697023e-06,
"loss": 0.2308,
"step": 18500
},
{
"epoch": 1.62,
"learning_rate": 3.756567425569177e-06,
"loss": 0.2279,
"step": 18550
},
{
"epoch": 1.63,
"learning_rate": 3.712784588441331e-06,
"loss": 0.2293,
"step": 18600
},
{
"epoch": 1.63,
"learning_rate": 3.6690017513134857e-06,
"loss": 0.2279,
"step": 18650
},
{
"epoch": 1.64,
"learning_rate": 3.62521891418564e-06,
"loss": 0.2292,
"step": 18700
},
{
"epoch": 1.64,
"learning_rate": 3.581436077057794e-06,
"loss": 0.2276,
"step": 18750
},
{
"epoch": 1.65,
"learning_rate": 3.537653239929948e-06,
"loss": 0.2276,
"step": 18800
},
{
"epoch": 1.65,
"learning_rate": 3.4938704028021018e-06,
"loss": 0.2209,
"step": 18850
},
{
"epoch": 1.65,
"learning_rate": 3.450087565674256e-06,
"loss": 0.2251,
"step": 18900
},
{
"epoch": 1.66,
"learning_rate": 3.40630472854641e-06,
"loss": 0.2266,
"step": 18950
},
{
"epoch": 1.66,
"learning_rate": 3.362521891418564e-06,
"loss": 0.228,
"step": 19000
},
{
"epoch": 1.67,
"learning_rate": 3.3187390542907183e-06,
"loss": 0.2274,
"step": 19050
},
{
"epoch": 1.67,
"learning_rate": 3.2749562171628725e-06,
"loss": 0.2256,
"step": 19100
},
{
"epoch": 1.68,
"learning_rate": 3.2311733800350266e-06,
"loss": 0.2279,
"step": 19150
},
{
"epoch": 1.68,
"learning_rate": 3.1873905429071807e-06,
"loss": 0.2254,
"step": 19200
},
{
"epoch": 1.69,
"learning_rate": 3.1436077057793344e-06,
"loss": 0.2273,
"step": 19250
},
{
"epoch": 1.69,
"learning_rate": 3.0998248686514886e-06,
"loss": 0.2277,
"step": 19300
},
{
"epoch": 1.69,
"learning_rate": 3.0560420315236427e-06,
"loss": 0.2291,
"step": 19350
},
{
"epoch": 1.7,
"learning_rate": 3.012259194395797e-06,
"loss": 0.2258,
"step": 19400
},
{
"epoch": 1.7,
"learning_rate": 2.9684763572679514e-06,
"loss": 0.2247,
"step": 19450
},
{
"epoch": 1.71,
"learning_rate": 2.9246935201401055e-06,
"loss": 0.2246,
"step": 19500
},
{
"epoch": 1.71,
"learning_rate": 2.8809106830122597e-06,
"loss": 0.2228,
"step": 19550
},
{
"epoch": 1.72,
"learning_rate": 2.837127845884414e-06,
"loss": 0.2281,
"step": 19600
},
{
"epoch": 1.72,
"learning_rate": 2.793345008756568e-06,
"loss": 0.2396,
"step": 19650
},
{
"epoch": 1.73,
"learning_rate": 2.749562171628722e-06,
"loss": 0.2214,
"step": 19700
},
{
"epoch": 1.73,
"learning_rate": 2.7057793345008758e-06,
"loss": 0.2286,
"step": 19750
},
{
"epoch": 1.73,
"learning_rate": 2.66199649737303e-06,
"loss": 0.2289,
"step": 19800
},
{
"epoch": 1.74,
"learning_rate": 2.618213660245184e-06,
"loss": 0.2282,
"step": 19850
},
{
"epoch": 1.74,
"learning_rate": 2.574430823117338e-06,
"loss": 0.2299,
"step": 19900
},
{
"epoch": 1.75,
"learning_rate": 2.5306479859894923e-06,
"loss": 0.2276,
"step": 19950
},
{
"epoch": 1.75,
"learning_rate": 2.4868651488616464e-06,
"loss": 0.2257,
"step": 20000
},
{
"epoch": 1.76,
"learning_rate": 2.4430823117338006e-06,
"loss": 0.228,
"step": 20050
},
{
"epoch": 1.76,
"learning_rate": 2.3992994746059547e-06,
"loss": 0.2245,
"step": 20100
},
{
"epoch": 1.76,
"learning_rate": 2.355516637478109e-06,
"loss": 0.2199,
"step": 20150
},
{
"epoch": 1.77,
"learning_rate": 2.311733800350263e-06,
"loss": 0.2264,
"step": 20200
},
{
"epoch": 1.77,
"learning_rate": 2.267950963222417e-06,
"loss": 0.228,
"step": 20250
},
{
"epoch": 1.78,
"learning_rate": 2.2241681260945713e-06,
"loss": 0.2311,
"step": 20300
},
{
"epoch": 1.78,
"learning_rate": 2.180385288966725e-06,
"loss": 0.2252,
"step": 20350
},
{
"epoch": 1.79,
"learning_rate": 2.136602451838879e-06,
"loss": 0.2259,
"step": 20400
},
{
"epoch": 1.79,
"learning_rate": 2.0928196147110337e-06,
"loss": 0.2254,
"step": 20450
},
{
"epoch": 1.8,
"learning_rate": 2.0490367775831878e-06,
"loss": 0.2273,
"step": 20500
},
{
"epoch": 1.8,
"learning_rate": 2.005253940455342e-06,
"loss": 0.2188,
"step": 20550
},
{
"epoch": 1.8,
"learning_rate": 1.9614711033274956e-06,
"loss": 0.2295,
"step": 20600
},
{
"epoch": 1.81,
"learning_rate": 1.9176882661996498e-06,
"loss": 0.2257,
"step": 20650
},
{
"epoch": 1.81,
"learning_rate": 1.873905429071804e-06,
"loss": 0.2296,
"step": 20700
},
{
"epoch": 1.82,
"learning_rate": 1.830122591943958e-06,
"loss": 0.2304,
"step": 20750
},
{
"epoch": 1.82,
"learning_rate": 1.7863397548161122e-06,
"loss": 0.2316,
"step": 20800
},
{
"epoch": 1.83,
"learning_rate": 1.7425569176882665e-06,
"loss": 0.2311,
"step": 20850
},
{
"epoch": 1.83,
"learning_rate": 1.6987740805604204e-06,
"loss": 0.2254,
"step": 20900
},
{
"epoch": 1.83,
"learning_rate": 1.6549912434325746e-06,
"loss": 0.2234,
"step": 20950
},
{
"epoch": 1.84,
"learning_rate": 1.6112084063047287e-06,
"loss": 0.2193,
"step": 21000
},
{
"epoch": 1.84,
"learning_rate": 1.5674255691768828e-06,
"loss": 0.236,
"step": 21050
},
{
"epoch": 1.85,
"learning_rate": 1.5236427320490368e-06,
"loss": 0.2282,
"step": 21100
},
{
"epoch": 1.85,
"learning_rate": 1.479859894921191e-06,
"loss": 0.23,
"step": 21150
},
{
"epoch": 1.86,
"learning_rate": 1.436077057793345e-06,
"loss": 0.232,
"step": 21200
},
{
"epoch": 1.86,
"learning_rate": 1.3922942206654994e-06,
"loss": 0.2283,
"step": 21250
},
{
"epoch": 1.87,
"learning_rate": 1.3485113835376535e-06,
"loss": 0.2334,
"step": 21300
},
{
"epoch": 1.87,
"learning_rate": 1.3047285464098074e-06,
"loss": 0.2297,
"step": 21350
},
{
"epoch": 1.87,
"learning_rate": 1.2609457092819616e-06,
"loss": 0.223,
"step": 21400
},
{
"epoch": 1.88,
"learning_rate": 1.2171628721541157e-06,
"loss": 0.223,
"step": 21450
},
{
"epoch": 1.88,
"learning_rate": 1.1733800350262698e-06,
"loss": 0.2252,
"step": 21500
},
{
"epoch": 1.89,
"learning_rate": 1.129597197898424e-06,
"loss": 0.2256,
"step": 21550
},
{
"epoch": 1.89,
"learning_rate": 1.085814360770578e-06,
"loss": 0.2228,
"step": 21600
},
{
"epoch": 1.9,
"learning_rate": 1.042031523642732e-06,
"loss": 0.2288,
"step": 21650
},
{
"epoch": 1.9,
"learning_rate": 9.982486865148862e-07,
"loss": 0.2342,
"step": 21700
},
{
"epoch": 1.9,
"learning_rate": 9.544658493870403e-07,
"loss": 0.2267,
"step": 21750
},
{
"epoch": 1.91,
"learning_rate": 9.106830122591944e-07,
"loss": 0.2234,
"step": 21800
},
{
"epoch": 1.91,
"learning_rate": 8.669001751313486e-07,
"loss": 0.2191,
"step": 21850
},
{
"epoch": 1.92,
"learning_rate": 8.231173380035026e-07,
"loss": 0.2219,
"step": 21900
},
{
"epoch": 1.92,
"learning_rate": 7.793345008756568e-07,
"loss": 0.2239,
"step": 21950
},
{
"epoch": 1.93,
"learning_rate": 7.35551663747811e-07,
"loss": 0.2262,
"step": 22000
},
{
"epoch": 1.93,
"learning_rate": 6.91768826619965e-07,
"loss": 0.2185,
"step": 22050
},
{
"epoch": 1.94,
"learning_rate": 6.47985989492119e-07,
"loss": 0.2239,
"step": 22100
},
{
"epoch": 1.94,
"learning_rate": 6.042031523642733e-07,
"loss": 0.2286,
"step": 22150
},
{
"epoch": 1.94,
"learning_rate": 5.604203152364274e-07,
"loss": 0.2253,
"step": 22200
},
{
"epoch": 1.95,
"learning_rate": 5.166374781085814e-07,
"loss": 0.2252,
"step": 22250
},
{
"epoch": 1.95,
"learning_rate": 4.728546409807356e-07,
"loss": 0.2245,
"step": 22300
},
{
"epoch": 1.96,
"learning_rate": 4.290718038528897e-07,
"loss": 0.2285,
"step": 22350
},
{
"epoch": 1.96,
"learning_rate": 3.8528896672504383e-07,
"loss": 0.2256,
"step": 22400
},
{
"epoch": 1.97,
"learning_rate": 3.415061295971979e-07,
"loss": 0.2334,
"step": 22450
},
{
"epoch": 1.97,
"learning_rate": 2.9772329246935205e-07,
"loss": 0.2313,
"step": 22500
},
{
"epoch": 1.97,
"learning_rate": 2.5394045534150613e-07,
"loss": 0.2202,
"step": 22550
},
{
"epoch": 1.98,
"learning_rate": 2.1015761821366026e-07,
"loss": 0.2235,
"step": 22600
},
{
"epoch": 1.98,
"learning_rate": 1.6637478108581437e-07,
"loss": 0.2311,
"step": 22650
},
{
"epoch": 1.99,
"learning_rate": 1.2259194395796848e-07,
"loss": 0.2213,
"step": 22700
},
{
"epoch": 1.99,
"learning_rate": 7.88091068301226e-08,
"loss": 0.2246,
"step": 22750
},
{
"epoch": 2.0,
"learning_rate": 3.502626970227671e-08,
"loss": 0.2186,
"step": 22800
},
{
"epoch": 2.0,
"eval_loss": 0.20982445776462555,
"eval_runtime": 72.0375,
"eval_samples_per_second": 63.411,
"eval_steps_per_second": 15.853,
"step": 22840
}
],
"max_steps": 22840,
"num_train_epochs": 2,
"total_flos": 1.0789666943297126e+17,
"trial_name": null,
"trial_params": null
}