{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14583050847457626, "global_step": 2151000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999775367231639e-05, "loss": 0.994, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999549378531074e-05, "loss": 0.735, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999323389830509e-05, "loss": 0.6701, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.999097401129944e-05, "loss": 0.6115, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.998871412429379e-05, "loss": 0.5883, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.9986454237288136e-05, "loss": 0.5659, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.9984194350282485e-05, "loss": 0.5431, "step": 3500 }, { "epoch": 0.0, "learning_rate": 4.9981934463276833e-05, "loss": 0.5271, "step": 4000 }, { "epoch": 0.0, "learning_rate": 4.997967457627119e-05, "loss": 0.5072, "step": 4500 }, { "epoch": 0.0, "learning_rate": 4.997741468926554e-05, "loss": 0.4946, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.9975154802259886e-05, "loss": 0.4913, "step": 5500 }, { "epoch": 0.0, "learning_rate": 4.9972894915254235e-05, "loss": 0.48, "step": 6000 }, { "epoch": 0.0, "learning_rate": 4.997063502824859e-05, "loss": 0.464, "step": 6500 }, { "epoch": 0.0, "learning_rate": 4.996837966101695e-05, "loss": 0.4509, "step": 7000 }, { "epoch": 0.0, "learning_rate": 4.99661197740113e-05, "loss": 0.4448, "step": 7500 }, { "epoch": 0.0, "learning_rate": 4.996385988700565e-05, "loss": 0.4416, "step": 8000 }, { "epoch": 0.0, "learning_rate": 4.99616e-05, "loss": 0.4297, "step": 8500 }, { "epoch": 0.0, "learning_rate": 4.9959340112994355e-05, "loss": 0.4246, "step": 9000 }, { "epoch": 0.0, "learning_rate": 4.995708474576272e-05, "loss": 0.4243, "step": 9500 }, { "epoch": 0.0, "learning_rate": 4.9954824858757066e-05, "loss": 0.4138, "step": 10000 }, { "epoch": 0.0, "learning_rate": 4.9952564971751415e-05, "loss": 0.4055, "step": 10500 }, { "epoch": 0.0, "learning_rate": 4.9950305084745764e-05, "loss": 0.4008, "step": 11000 }, { "epoch": 0.0, "learning_rate": 4.9948049717514126e-05, "loss": 0.3897, "step": 11500 }, { "epoch": 0.0, "learning_rate": 4.9945789830508475e-05, "loss": 0.393, "step": 12000 }, { "epoch": 0.0, "learning_rate": 4.9943529943502824e-05, "loss": 0.3842, "step": 12500 }, { "epoch": 0.0, "learning_rate": 4.994127005649718e-05, "loss": 0.3814, "step": 13000 }, { "epoch": 0.0, "learning_rate": 4.993901016949153e-05, "loss": 0.3861, "step": 13500 }, { "epoch": 0.0, "learning_rate": 4.993675480225989e-05, "loss": 0.3787, "step": 14000 }, { "epoch": 0.0, "learning_rate": 4.993449491525424e-05, "loss": 0.3706, "step": 14500 }, { "epoch": 0.0, "learning_rate": 4.9932235028248595e-05, "loss": 0.3697, "step": 15000 }, { "epoch": 0.0, "learning_rate": 4.9929975141242943e-05, "loss": 0.3688, "step": 15500 }, { "epoch": 0.0, "learning_rate": 4.992771525423729e-05, "loss": 0.3559, "step": 16000 }, { "epoch": 0.0, "learning_rate": 4.992545536723164e-05, "loss": 0.3606, "step": 16500 }, { "epoch": 0.0, "learning_rate": 4.992319548022599e-05, "loss": 0.3562, "step": 17000 }, { "epoch": 0.0, "learning_rate": 4.992093559322034e-05, "loss": 0.3523, "step": 17500 }, { "epoch": 0.0, "learning_rate": 4.991867570621469e-05, "loss": 0.3419, "step": 18000 }, { "epoch": 0.01, "learning_rate": 4.9916415819209036e-05, "loss": 0.3459, "step": 18500 }, { "epoch": 0.01, "learning_rate": 4.991415593220339e-05, "loss": 0.3401, "step": 19000 }, { "epoch": 0.01, "learning_rate": 4.991189604519774e-05, "loss": 0.3393, "step": 19500 }, { "epoch": 0.01, "learning_rate": 4.99096406779661e-05, "loss": 0.3352, "step": 20000 }, { "epoch": 0.0, "learning_rate": 4.995369491525424e-05, "loss": 0.3279, "step": 20500 }, { "epoch": 0.0, "learning_rate": 4.9952564971751415e-05, "loss": 0.3253, "step": 21000 }, { "epoch": 0.0, "learning_rate": 4.995143502824859e-05, "loss": 0.3324, "step": 21500 }, { "epoch": 0.0, "learning_rate": 4.9950305084745764e-05, "loss": 0.317, "step": 22000 }, { "epoch": 0.0, "learning_rate": 4.994917514124294e-05, "loss": 0.3204, "step": 22500 }, { "epoch": 0.0, "learning_rate": 4.994804519774012e-05, "loss": 0.3125, "step": 23000 }, { "epoch": 0.0, "learning_rate": 4.9946915254237294e-05, "loss": 0.3184, "step": 23500 }, { "epoch": 0.0, "learning_rate": 4.9945787570621475e-05, "loss": 0.3212, "step": 24000 }, { "epoch": 0.0, "learning_rate": 4.994465762711865e-05, "loss": 0.3209, "step": 24500 }, { "epoch": 0.0, "learning_rate": 4.9943527683615824e-05, "loss": 0.3075, "step": 25000 }, { "epoch": 0.0, "learning_rate": 4.9942397740113e-05, "loss": 0.3162, "step": 25500 }, { "epoch": 0.0, "learning_rate": 4.994126779661017e-05, "loss": 0.3114, "step": 26000 }, { "epoch": 0.0, "learning_rate": 4.994013785310735e-05, "loss": 0.3187, "step": 26500 }, { "epoch": 0.0, "learning_rate": 4.993900790960452e-05, "loss": 0.3143, "step": 27000 }, { "epoch": 0.0, "learning_rate": 4.9937877966101696e-05, "loss": 0.303, "step": 27500 }, { "epoch": 0.0, "learning_rate": 4.993675028248588e-05, "loss": 0.3195, "step": 28000 }, { "epoch": 0.0, "learning_rate": 4.993562033898305e-05, "loss": 0.3093, "step": 28500 }, { "epoch": 0.0, "learning_rate": 4.9934490395480226e-05, "loss": 0.3025, "step": 29000 }, { "epoch": 0.0, "learning_rate": 4.99333604519774e-05, "loss": 0.3078, "step": 29500 }, { "epoch": 0.0, "learning_rate": 4.993223276836159e-05, "loss": 0.3028, "step": 30000 }, { "epoch": 0.0, "learning_rate": 4.993110282485876e-05, "loss": 0.3056, "step": 30500 }, { "epoch": 0.0, "learning_rate": 4.992997288135594e-05, "loss": 0.3103, "step": 31000 }, { "epoch": 0.0, "learning_rate": 4.992884293785311e-05, "loss": 0.2955, "step": 31500 }, { "epoch": 0.0, "learning_rate": 4.992771525423729e-05, "loss": 0.2934, "step": 32000 }, { "epoch": 0.0, "learning_rate": 4.992658757062147e-05, "loss": 0.2981, "step": 32500 }, { "epoch": 0.0, "learning_rate": 4.992545762711865e-05, "loss": 0.3028, "step": 33000 }, { "epoch": 0.0, "learning_rate": 4.992432768361582e-05, "loss": 0.2976, "step": 33500 }, { "epoch": 0.0, "learning_rate": 4.9923197740112997e-05, "loss": 0.2958, "step": 34000 }, { "epoch": 0.0, "learning_rate": 4.992207005649718e-05, "loss": 0.2944, "step": 34500 }, { "epoch": 0.0, "learning_rate": 4.992094011299435e-05, "loss": 0.2936, "step": 35000 }, { "epoch": 0.0, "learning_rate": 4.991981242937853e-05, "loss": 0.2855, "step": 35500 }, { "epoch": 0.0, "learning_rate": 4.991868248587571e-05, "loss": 0.2859, "step": 36000 }, { "epoch": 0.0, "learning_rate": 4.991755254237289e-05, "loss": 0.2896, "step": 36500 }, { "epoch": 0.01, "learning_rate": 4.991642259887006e-05, "loss": 0.2939, "step": 37000 }, { "epoch": 0.01, "learning_rate": 4.991529265536724e-05, "loss": 0.2915, "step": 37500 }, { "epoch": 0.01, "learning_rate": 4.991416271186441e-05, "loss": 0.2811, "step": 38000 }, { "epoch": 0.01, "learning_rate": 4.9913032768361586e-05, "loss": 0.2851, "step": 38500 }, { "epoch": 0.01, "learning_rate": 4.991190282485876e-05, "loss": 0.2877, "step": 39000 }, { "epoch": 0.01, "learning_rate": 4.9910772881355935e-05, "loss": 0.2855, "step": 39500 }, { "epoch": 0.01, "learning_rate": 4.990964293785311e-05, "loss": 0.2802, "step": 40000 }, { "epoch": 0.01, "learning_rate": 4.9908512994350284e-05, "loss": 0.2855, "step": 40500 }, { "epoch": 0.01, "learning_rate": 4.9907385310734465e-05, "loss": 0.2894, "step": 41000 }, { "epoch": 0.01, "learning_rate": 4.990625536723164e-05, "loss": 0.2821, "step": 41500 }, { "epoch": 0.01, "learning_rate": 4.9905125423728814e-05, "loss": 0.2857, "step": 42000 }, { "epoch": 0.0, "learning_rate": 4.99519988700565e-05, "loss": 0.2725, "step": 42500 }, { "epoch": 0.0, "learning_rate": 4.9951433898305086e-05, "loss": 0.2795, "step": 43000 }, { "epoch": 0.0, "learning_rate": 4.9950868926553677e-05, "loss": 0.2712, "step": 43500 }, { "epoch": 0.0, "learning_rate": 4.995030395480226e-05, "loss": 0.2597, "step": 44000 }, { "epoch": 0.0, "learning_rate": 4.9949740112994354e-05, "loss": 0.2736, "step": 44500 }, { "epoch": 0.0, "learning_rate": 4.994917514124294e-05, "loss": 0.2641, "step": 45000 }, { "epoch": 0.0, "learning_rate": 4.994861016949153e-05, "loss": 0.2673, "step": 45500 }, { "epoch": 0.0, "learning_rate": 4.994804519774012e-05, "loss": 0.2584, "step": 46000 }, { "epoch": 0.0, "learning_rate": 4.9947481355932207e-05, "loss": 0.2668, "step": 46500 }, { "epoch": 0.0, "learning_rate": 4.994691638418079e-05, "loss": 0.2677, "step": 47000 }, { "epoch": 0.0, "learning_rate": 4.994635141242938e-05, "loss": 0.2729, "step": 47500 }, { "epoch": 0.0, "learning_rate": 4.9945786440677965e-05, "loss": 0.2587, "step": 48000 }, { "epoch": 0.0, "learning_rate": 4.9945221468926555e-05, "loss": 0.2687, "step": 48500 }, { "epoch": 0.0, "learning_rate": 4.994465762711865e-05, "loss": 0.2743, "step": 49000 }, { "epoch": 0.0, "learning_rate": 4.994409265536723e-05, "loss": 0.2572, "step": 49500 }, { "epoch": 0.0, "learning_rate": 4.9943527683615824e-05, "loss": 0.2667, "step": 50000 }, { "epoch": 0.0, "learning_rate": 4.994296271186441e-05, "loss": 0.2725, "step": 50500 }, { "epoch": 0.0, "learning_rate": 4.9942397740113e-05, "loss": 0.2682, "step": 51000 }, { "epoch": 0.0, "learning_rate": 4.9941833898305085e-05, "loss": 0.2624, "step": 51500 }, { "epoch": 0.0, "learning_rate": 4.9941268926553676e-05, "loss": 0.2639, "step": 52000 }, { "epoch": 0.0, "learning_rate": 4.9940703954802266e-05, "loss": 0.2653, "step": 52500 }, { "epoch": 0.0, "learning_rate": 4.994013898305085e-05, "loss": 0.2737, "step": 53000 }, { "epoch": 0.0, "learning_rate": 4.993957514124294e-05, "loss": 0.2708, "step": 53500 }, { "epoch": 0.0, "learning_rate": 4.993901016949153e-05, "loss": 0.2678, "step": 54000 }, { "epoch": 0.0, "learning_rate": 4.993844519774011e-05, "loss": 0.2519, "step": 54500 }, { "epoch": 0.0, "learning_rate": 4.99378802259887e-05, "loss": 0.2629, "step": 55000 }, { "epoch": 0.0, "learning_rate": 4.9937316384180796e-05, "loss": 0.2693, "step": 55500 }, { "epoch": 0.0, "learning_rate": 4.993675141242938e-05, "loss": 0.2809, "step": 56000 }, { "epoch": 0.0, "learning_rate": 4.993618644067797e-05, "loss": 0.2585, "step": 56500 }, { "epoch": 0.0, "learning_rate": 4.9935621468926555e-05, "loss": 0.2671, "step": 57000 }, { "epoch": 0.0, "learning_rate": 4.9935056497175145e-05, "loss": 0.2691, "step": 57500 }, { "epoch": 0.0, "learning_rate": 4.993449265536723e-05, "loss": 0.2519, "step": 58000 }, { "epoch": 0.0, "learning_rate": 4.993392768361582e-05, "loss": 0.2781, "step": 58500 }, { "epoch": 0.0, "learning_rate": 4.993336271186441e-05, "loss": 0.2463, "step": 59000 }, { "epoch": 0.0, "learning_rate": 4.9932797740113e-05, "loss": 0.2612, "step": 59500 }, { "epoch": 0.0, "learning_rate": 4.9932235028248595e-05, "loss": 0.2638, "step": 60000 }, { "epoch": 0.0, "learning_rate": 4.993167005649718e-05, "loss": 0.2717, "step": 60500 }, { "epoch": 0.0, "learning_rate": 4.993110508474577e-05, "loss": 0.2552, "step": 61000 }, { "epoch": 0.0, "learning_rate": 4.993054011299435e-05, "loss": 0.26, "step": 61500 }, { "epoch": 0.0, "learning_rate": 4.9929975141242943e-05, "loss": 0.2665, "step": 62000 }, { "epoch": 0.0, "learning_rate": 4.992941016949153e-05, "loss": 0.2554, "step": 62500 }, { "epoch": 0.0, "learning_rate": 4.992884519774012e-05, "loss": 0.2508, "step": 63000 }, { "epoch": 0.0, "learning_rate": 4.99282802259887e-05, "loss": 0.2651, "step": 63500 }, { "epoch": 0.0, "learning_rate": 4.992771525423729e-05, "loss": 0.2399, "step": 64000 }, { "epoch": 0.0, "learning_rate": 4.992715028248588e-05, "loss": 0.2515, "step": 64500 }, { "epoch": 0.0, "learning_rate": 4.992658644067797e-05, "loss": 0.2586, "step": 65000 }, { "epoch": 0.0, "learning_rate": 4.9926021468926554e-05, "loss": 0.2581, "step": 65500 }, { "epoch": 0.0, "learning_rate": 4.9925456497175144e-05, "loss": 0.2639, "step": 66000 }, { "epoch": 0.0, "learning_rate": 4.9924891525423735e-05, "loss": 0.2589, "step": 66500 }, { "epoch": 0.0, "learning_rate": 4.992432768361582e-05, "loss": 0.2568, "step": 67000 }, { "epoch": 0.0, "learning_rate": 4.9923762711864406e-05, "loss": 0.2536, "step": 67500 }, { "epoch": 0.0, "learning_rate": 4.9923197740112997e-05, "loss": 0.2539, "step": 68000 }, { "epoch": 0.0, "learning_rate": 4.992263276836158e-05, "loss": 0.2574, "step": 68500 }, { "epoch": 0.0, "learning_rate": 4.9922068926553674e-05, "loss": 0.2473, "step": 69000 }, { "epoch": 0.0, "learning_rate": 4.9921503954802265e-05, "loss": 0.2535, "step": 69500 }, { "epoch": 0.0, "learning_rate": 4.9920938983050856e-05, "loss": 0.2479, "step": 70000 }, { "epoch": 0.0, "learning_rate": 4.992037401129944e-05, "loss": 0.247, "step": 70500 }, { "epoch": 0.0, "learning_rate": 4.9919810169491527e-05, "loss": 0.2443, "step": 71000 }, { "epoch": 0.0, "learning_rate": 4.991924519774012e-05, "loss": 0.2537, "step": 71500 }, { "epoch": 0.0, "learning_rate": 4.99186802259887e-05, "loss": 0.2443, "step": 72000 }, { "epoch": 0.0, "learning_rate": 4.991811525423729e-05, "loss": 0.2435, "step": 72500 }, { "epoch": 0.0, "learning_rate": 4.9917550282485875e-05, "loss": 0.2554, "step": 73000 }, { "epoch": 0.0, "learning_rate": 4.9916985310734466e-05, "loss": 0.2551, "step": 73500 }, { "epoch": 0.01, "learning_rate": 4.9916420338983056e-05, "loss": 0.2447, "step": 74000 }, { "epoch": 0.01, "learning_rate": 4.991585536723164e-05, "loss": 0.2537, "step": 74500 }, { "epoch": 0.01, "learning_rate": 4.991529152542373e-05, "loss": 0.2516, "step": 75000 }, { "epoch": 0.01, "learning_rate": 4.991472655367232e-05, "loss": 0.2398, "step": 75500 }, { "epoch": 0.01, "learning_rate": 4.99141615819209e-05, "loss": 0.2441, "step": 76000 }, { "epoch": 0.01, "learning_rate": 4.991359661016949e-05, "loss": 0.2427, "step": 76500 }, { "epoch": 0.01, "learning_rate": 4.9913032768361586e-05, "loss": 0.2505, "step": 77000 }, { "epoch": 0.01, "learning_rate": 4.991246779661018e-05, "loss": 0.2465, "step": 77500 }, { "epoch": 0.01, "learning_rate": 4.991190282485876e-05, "loss": 0.249, "step": 78000 }, { "epoch": 0.01, "learning_rate": 4.991133785310735e-05, "loss": 0.2444, "step": 78500 }, { "epoch": 0.01, "learning_rate": 4.9910772881355935e-05, "loss": 0.2491, "step": 79000 }, { "epoch": 0.01, "learning_rate": 4.991020903954802e-05, "loss": 0.2389, "step": 79500 }, { "epoch": 0.01, "learning_rate": 4.990964406779661e-05, "loss": 0.2489, "step": 80000 }, { "epoch": 0.01, "learning_rate": 4.9909079096045204e-05, "loss": 0.2481, "step": 80500 }, { "epoch": 0.01, "learning_rate": 4.990851525423729e-05, "loss": 0.251, "step": 81000 }, { "epoch": 0.01, "learning_rate": 4.9907950282485875e-05, "loss": 0.2523, "step": 81500 }, { "epoch": 0.01, "learning_rate": 4.9907385310734465e-05, "loss": 0.2507, "step": 82000 }, { "epoch": 0.01, "learning_rate": 4.990682033898305e-05, "loss": 0.2482, "step": 82500 }, { "epoch": 0.01, "learning_rate": 4.990625536723164e-05, "loss": 0.2387, "step": 83000 }, { "epoch": 0.01, "learning_rate": 4.990569039548022e-05, "loss": 0.2445, "step": 83500 }, { "epoch": 0.01, "learning_rate": 4.9905125423728814e-05, "loss": 0.2507, "step": 84000 }, { "epoch": 0.01, "learning_rate": 4.9904560451977405e-05, "loss": 0.2553, "step": 84500 }, { "epoch": 0.01, "learning_rate": 4.990399548022599e-05, "loss": 0.254, "step": 85000 }, { "epoch": 0.01, "learning_rate": 4.990343163841808e-05, "loss": 0.2534, "step": 85500 }, { "epoch": 0.01, "learning_rate": 4.990286666666667e-05, "loss": 0.2489, "step": 86000 }, { "epoch": 0.01, "learning_rate": 4.990230169491526e-05, "loss": 0.2457, "step": 86500 }, { "epoch": 0.01, "learning_rate": 4.990173672316385e-05, "loss": 0.2449, "step": 87000 }, { "epoch": 0.01, "learning_rate": 4.9901172881355935e-05, "loss": 0.239, "step": 87500 }, { "epoch": 0.01, "learning_rate": 4.9900607909604525e-05, "loss": 0.2482, "step": 88000 }, { "epoch": 0.01, "learning_rate": 4.990004293785311e-05, "loss": 0.2454, "step": 88500 }, { "epoch": 0.01, "learning_rate": 4.98994779661017e-05, "loss": 0.2665, "step": 89000 }, { "epoch": 0.01, "learning_rate": 4.989891299435028e-05, "loss": 0.2511, "step": 89500 }, { "epoch": 0.01, "learning_rate": 4.989834915254237e-05, "loss": 0.2604, "step": 90000 }, { "epoch": 0.01, "learning_rate": 4.989778418079096e-05, "loss": 0.2529, "step": 90500 }, { "epoch": 0.01, "learning_rate": 4.989721920903955e-05, "loss": 0.2519, "step": 91000 }, { "epoch": 0.01, "learning_rate": 4.9896654237288135e-05, "loss": 0.2489, "step": 91500 }, { "epoch": 0.01, "learning_rate": 4.989609039548023e-05, "loss": 0.2569, "step": 92000 }, { "epoch": 0.01, "learning_rate": 4.989552542372882e-05, "loss": 0.2427, "step": 92500 }, { "epoch": 0.01, "learning_rate": 4.9894960451977404e-05, "loss": 0.2485, "step": 93000 }, { "epoch": 0.01, "learning_rate": 4.9894395480225994e-05, "loss": 0.2392, "step": 93500 }, { "epoch": 0.01, "learning_rate": 4.989383163841808e-05, "loss": 0.2446, "step": 94000 }, { "epoch": 0.01, "learning_rate": 4.989326666666667e-05, "loss": 0.2424, "step": 94500 }, { "epoch": 0.01, "learning_rate": 4.9892701694915256e-05, "loss": 0.2472, "step": 95000 }, { "epoch": 0.01, "learning_rate": 4.9892136723163847e-05, "loss": 0.2531, "step": 95500 }, { "epoch": 0.01, "learning_rate": 4.989157175141243e-05, "loss": 0.2609, "step": 96000 }, { "epoch": 0.01, "learning_rate": 4.989100790960452e-05, "loss": 0.2369, "step": 96500 }, { "epoch": 0.01, "learning_rate": 4.989044293785311e-05, "loss": 0.2425, "step": 97000 }, { "epoch": 0.01, "learning_rate": 4.988987796610169e-05, "loss": 0.2486, "step": 97500 }, { "epoch": 0.01, "learning_rate": 4.988931299435028e-05, "loss": 0.2407, "step": 98000 }, { "epoch": 0.01, "learning_rate": 4.988874802259887e-05, "loss": 0.2419, "step": 98500 }, { "epoch": 0.01, "learning_rate": 4.988818418079097e-05, "loss": 0.2393, "step": 99000 }, { "epoch": 0.01, "learning_rate": 4.988761920903955e-05, "loss": 0.246, "step": 99500 }, { "epoch": 0.01, "learning_rate": 4.988705423728814e-05, "loss": 0.2512, "step": 100000 }, { "epoch": 0.01, "learning_rate": 4.9886489265536725e-05, "loss": 0.2512, "step": 100500 }, { "epoch": 0.01, "learning_rate": 4.9885924293785316e-05, "loss": 0.2406, "step": 101000 }, { "epoch": 0.01, "learning_rate": 4.98853604519774e-05, "loss": 0.2385, "step": 101500 }, { "epoch": 0.01, "learning_rate": 4.9884795480225994e-05, "loss": 0.2456, "step": 102000 }, { "epoch": 0.01, "learning_rate": 4.988423050847458e-05, "loss": 0.2394, "step": 102500 }, { "epoch": 0.01, "learning_rate": 4.988366553672317e-05, "loss": 0.24, "step": 103000 }, { "epoch": 0.01, "learning_rate": 4.988310056497175e-05, "loss": 0.2506, "step": 103500 }, { "epoch": 0.01, "learning_rate": 4.988253559322034e-05, "loss": 0.2467, "step": 104000 }, { "epoch": 0.01, "learning_rate": 4.9881970621468926e-05, "loss": 0.2451, "step": 104500 }, { "epoch": 0.01, "learning_rate": 4.988140564971752e-05, "loss": 0.2393, "step": 105000 }, { "epoch": 0.01, "learning_rate": 4.9880841807909604e-05, "loss": 0.2374, "step": 105500 }, { "epoch": 0.01, "learning_rate": 4.9880276836158195e-05, "loss": 0.2488, "step": 106000 }, { "epoch": 0.01, "learning_rate": 4.987971186440678e-05, "loss": 0.2413, "step": 106500 }, { "epoch": 0.01, "learning_rate": 4.987914689265537e-05, "loss": 0.2442, "step": 107000 }, { "epoch": 0.01, "learning_rate": 4.987858305084746e-05, "loss": 0.2399, "step": 107500 }, { "epoch": 0.01, "learning_rate": 4.987801807909605e-05, "loss": 0.2342, "step": 108000 }, { "epoch": 0.01, "learning_rate": 4.987745310734464e-05, "loss": 0.2343, "step": 108500 }, { "epoch": 0.01, "learning_rate": 4.987688813559323e-05, "loss": 0.2306, "step": 109000 }, { "epoch": 0.01, "learning_rate": 4.987632316384181e-05, "loss": 0.2305, "step": 109500 }, { "epoch": 0.01, "learning_rate": 4.98757593220339e-05, "loss": 0.2358, "step": 110000 }, { "epoch": 0.01, "learning_rate": 4.987519435028249e-05, "loss": 0.2524, "step": 110500 }, { "epoch": 0.01, "learning_rate": 4.987462937853107e-05, "loss": 0.2306, "step": 111000 }, { "epoch": 0.01, "learning_rate": 4.9874064406779664e-05, "loss": 0.2395, "step": 111500 }, { "epoch": 0.01, "learning_rate": 4.9873499435028255e-05, "loss": 0.2422, "step": 112000 }, { "epoch": 0.01, "learning_rate": 4.987293559322034e-05, "loss": 0.2463, "step": 112500 }, { "epoch": 0.01, "learning_rate": 4.9872370621468926e-05, "loss": 0.2345, "step": 113000 }, { "epoch": 0.01, "learning_rate": 4.9871805649717516e-05, "loss": 0.2349, "step": 113500 }, { "epoch": 0.01, "learning_rate": 4.98712406779661e-05, "loss": 0.2365, "step": 114000 }, { "epoch": 0.01, "learning_rate": 4.9870676836158194e-05, "loss": 0.24, "step": 114500 }, { "epoch": 0.01, "learning_rate": 4.9870111864406784e-05, "loss": 0.24, "step": 115000 }, { "epoch": 0.01, "learning_rate": 4.9869546892655375e-05, "loss": 0.2332, "step": 115500 }, { "epoch": 0.01, "learning_rate": 4.986898305084746e-05, "loss": 0.246, "step": 116000 }, { "epoch": 0.01, "learning_rate": 4.9868418079096046e-05, "loss": 0.2423, "step": 116500 }, { "epoch": 0.01, "learning_rate": 4.986785310734464e-05, "loss": 0.2419, "step": 117000 }, { "epoch": 0.01, "learning_rate": 4.986728813559322e-05, "loss": 0.2437, "step": 117500 }, { "epoch": 0.01, "learning_rate": 4.986672316384181e-05, "loss": 0.2456, "step": 118000 }, { "epoch": 0.01, "learning_rate": 4.9866158192090395e-05, "loss": 0.2321, "step": 118500 }, { "epoch": 0.01, "learning_rate": 4.9865593220338985e-05, "loss": 0.2369, "step": 119000 }, { "epoch": 0.01, "learning_rate": 4.9865028248587576e-05, "loss": 0.2355, "step": 119500 }, { "epoch": 0.01, "learning_rate": 4.986446327683616e-05, "loss": 0.2312, "step": 120000 }, { "epoch": 0.01, "learning_rate": 4.986389943502825e-05, "loss": 0.2285, "step": 120500 }, { "epoch": 0.01, "learning_rate": 4.986333446327684e-05, "loss": 0.2326, "step": 121000 }, { "epoch": 0.01, "learning_rate": 4.986276949152542e-05, "loss": 0.2343, "step": 121500 }, { "epoch": 0.01, "learning_rate": 4.986220451977401e-05, "loss": 0.2325, "step": 122000 }, { "epoch": 0.01, "learning_rate": 4.98616395480226e-05, "loss": 0.2329, "step": 122500 }, { "epoch": 0.01, "learning_rate": 4.9861075706214697e-05, "loss": 0.2341, "step": 123000 }, { "epoch": 0.01, "learning_rate": 4.986051073446328e-05, "loss": 0.2362, "step": 123500 }, { "epoch": 0.01, "learning_rate": 4.985994576271187e-05, "loss": 0.2265, "step": 124000 }, { "epoch": 0.01, "learning_rate": 4.9859380790960455e-05, "loss": 0.2445, "step": 124500 }, { "epoch": 0.01, "learning_rate": 4.9858815819209045e-05, "loss": 0.2234, "step": 125000 }, { "epoch": 0.01, "learning_rate": 4.985825197740113e-05, "loss": 0.2263, "step": 125500 }, { "epoch": 0.01, "learning_rate": 4.985768700564972e-05, "loss": 0.2249, "step": 126000 }, { "epoch": 0.01, "learning_rate": 4.985712203389831e-05, "loss": 0.225, "step": 126500 }, { "epoch": 0.01, "learning_rate": 4.98565570621469e-05, "loss": 0.2192, "step": 127000 }, { "epoch": 0.01, "learning_rate": 4.9855993220338985e-05, "loss": 0.2351, "step": 127500 }, { "epoch": 0.01, "learning_rate": 4.985542824858757e-05, "loss": 0.2289, "step": 128000 }, { "epoch": 0.01, "learning_rate": 4.985486327683616e-05, "loss": 0.2254, "step": 128500 }, { "epoch": 0.01, "learning_rate": 4.985429830508474e-05, "loss": 0.2199, "step": 129000 }, { "epoch": 0.01, "learning_rate": 4.9853733333333333e-05, "loss": 0.2175, "step": 129500 }, { "epoch": 0.01, "learning_rate": 4.985316949152543e-05, "loss": 0.2288, "step": 130000 }, { "epoch": 0.01, "learning_rate": 4.985260451977402e-05, "loss": 0.2277, "step": 130500 }, { "epoch": 0.01, "learning_rate": 4.98520395480226e-05, "loss": 0.2283, "step": 131000 }, { "epoch": 0.01, "learning_rate": 4.985147457627119e-05, "loss": 0.224, "step": 131500 }, { "epoch": 0.01, "learning_rate": 4.9850909604519776e-05, "loss": 0.2252, "step": 132000 }, { "epoch": 0.01, "learning_rate": 4.9850345762711863e-05, "loss": 0.2283, "step": 132500 }, { "epoch": 0.01, "learning_rate": 4.9849780790960454e-05, "loss": 0.2277, "step": 133000 }, { "epoch": 0.01, "learning_rate": 4.9849215819209045e-05, "loss": 0.2375, "step": 133500 }, { "epoch": 0.01, "learning_rate": 4.984865084745763e-05, "loss": 0.2271, "step": 134000 }, { "epoch": 0.01, "learning_rate": 4.9848087005649716e-05, "loss": 0.2195, "step": 134500 }, { "epoch": 0.01, "learning_rate": 4.9847522033898306e-05, "loss": 0.2234, "step": 135000 }, { "epoch": 0.01, "learning_rate": 4.984695706214689e-05, "loss": 0.23, "step": 135500 }, { "epoch": 0.01, "learning_rate": 4.984639209039548e-05, "loss": 0.2272, "step": 136000 }, { "epoch": 0.01, "learning_rate": 4.984582711864407e-05, "loss": 0.2297, "step": 136500 }, { "epoch": 0.01, "learning_rate": 4.9845263276836165e-05, "loss": 0.2216, "step": 137000 }, { "epoch": 0.01, "learning_rate": 4.984469830508475e-05, "loss": 0.2276, "step": 137500 }, { "epoch": 0.01, "learning_rate": 4.984413333333334e-05, "loss": 0.2197, "step": 138000 }, { "epoch": 0.01, "learning_rate": 4.984356836158192e-05, "loss": 0.2385, "step": 138500 }, { "epoch": 0.01, "learning_rate": 4.984300451977401e-05, "loss": 0.2194, "step": 139000 }, { "epoch": 0.01, "learning_rate": 4.98424395480226e-05, "loss": 0.2254, "step": 139500 }, { "epoch": 0.01, "learning_rate": 4.984187457627119e-05, "loss": 0.2197, "step": 140000 }, { "epoch": 0.01, "learning_rate": 4.9841309604519776e-05, "loss": 0.2209, "step": 140500 }, { "epoch": 0.01, "learning_rate": 4.9840744632768366e-05, "loss": 0.2319, "step": 141000 }, { "epoch": 0.01, "learning_rate": 4.984018192090396e-05, "loss": 0.2211, "step": 141500 }, { "epoch": 0.01, "learning_rate": 4.983961694915255e-05, "loss": 0.2284, "step": 142000 }, { "epoch": 0.01, "learning_rate": 4.983905197740113e-05, "loss": 0.2227, "step": 142500 }, { "epoch": 0.01, "learning_rate": 4.983848700564972e-05, "loss": 0.2294, "step": 143000 }, { "epoch": 0.01, "learning_rate": 4.983792203389831e-05, "loss": 0.2226, "step": 143500 }, { "epoch": 0.01, "learning_rate": 4.9837357062146896e-05, "loss": 0.2215, "step": 144000 }, { "epoch": 0.01, "learning_rate": 4.983679209039549e-05, "loss": 0.231, "step": 144500 }, { "epoch": 0.01, "learning_rate": 4.983622711864407e-05, "loss": 0.2286, "step": 145000 }, { "epoch": 0.01, "learning_rate": 4.983566327683616e-05, "loss": 0.2171, "step": 145500 }, { "epoch": 0.01, "learning_rate": 4.983509830508475e-05, "loss": 0.2293, "step": 146000 }, { "epoch": 0.01, "learning_rate": 4.983453333333333e-05, "loss": 0.2271, "step": 146500 }, { "epoch": 0.01, "learning_rate": 4.983396836158192e-05, "loss": 0.2218, "step": 147000 }, { "epoch": 0.01, "learning_rate": 4.983340338983051e-05, "loss": 0.2184, "step": 147500 }, { "epoch": 0.01, "learning_rate": 4.98328395480226e-05, "loss": 0.2224, "step": 148000 }, { "epoch": 0.01, "learning_rate": 4.9832274576271184e-05, "loss": 0.2212, "step": 148500 }, { "epoch": 0.01, "learning_rate": 4.9831709604519775e-05, "loss": 0.2199, "step": 149000 }, { "epoch": 0.01, "learning_rate": 4.983114463276836e-05, "loss": 0.2293, "step": 149500 }, { "epoch": 0.01, "learning_rate": 4.983058079096045e-05, "loss": 0.2179, "step": 150000 }, { "epoch": 0.01, "learning_rate": 4.983001581920904e-05, "loss": 0.2149, "step": 150500 }, { "epoch": 0.01, "learning_rate": 4.9829450847457634e-05, "loss": 0.2239, "step": 151000 }, { "epoch": 0.01, "learning_rate": 4.982888587570622e-05, "loss": 0.2219, "step": 151500 }, { "epoch": 0.01, "learning_rate": 4.982832090395481e-05, "loss": 0.2198, "step": 152000 }, { "epoch": 0.01, "learning_rate": 4.9827757062146895e-05, "loss": 0.2162, "step": 152500 }, { "epoch": 0.01, "learning_rate": 4.982719209039548e-05, "loss": 0.2279, "step": 153000 }, { "epoch": 0.01, "learning_rate": 4.982662711864407e-05, "loss": 0.2156, "step": 153500 }, { "epoch": 0.01, "learning_rate": 4.982606214689266e-05, "loss": 0.2188, "step": 154000 }, { "epoch": 0.01, "learning_rate": 4.9825497175141244e-05, "loss": 0.2132, "step": 154500 }, { "epoch": 0.01, "learning_rate": 4.982493333333333e-05, "loss": 0.2253, "step": 155000 }, { "epoch": 0.01, "learning_rate": 4.982436836158192e-05, "loss": 0.2166, "step": 155500 }, { "epoch": 0.01, "learning_rate": 4.9823803389830506e-05, "loss": 0.2255, "step": 156000 }, { "epoch": 0.01, "learning_rate": 4.9823238418079096e-05, "loss": 0.2145, "step": 156500 }, { "epoch": 0.01, "learning_rate": 4.982267457627119e-05, "loss": 0.219, "step": 157000 }, { "epoch": 0.01, "learning_rate": 4.982210960451978e-05, "loss": 0.2231, "step": 157500 }, { "epoch": 0.01, "learning_rate": 4.9821544632768365e-05, "loss": 0.2281, "step": 158000 }, { "epoch": 0.01, "learning_rate": 4.9820979661016955e-05, "loss": 0.2163, "step": 158500 }, { "epoch": 0.01, "learning_rate": 4.982041581920904e-05, "loss": 0.2158, "step": 159000 }, { "epoch": 0.01, "learning_rate": 4.9819850847457626e-05, "loss": 0.2215, "step": 159500 }, { "epoch": 0.01, "learning_rate": 4.981928587570622e-05, "loss": 0.2193, "step": 160000 }, { "epoch": 0.01, "learning_rate": 4.98187209039548e-05, "loss": 0.2243, "step": 160500 }, { "epoch": 0.01, "learning_rate": 4.981815593220339e-05, "loss": 0.2158, "step": 161000 }, { "epoch": 0.01, "learning_rate": 4.981759209039548e-05, "loss": 0.2072, "step": 161500 }, { "epoch": 0.01, "learning_rate": 4.981702711864407e-05, "loss": 0.216, "step": 162000 }, { "epoch": 0.01, "learning_rate": 4.981646214689266e-05, "loss": 0.2247, "step": 162500 }, { "epoch": 0.01, "learning_rate": 4.9815897175141243e-05, "loss": 0.2244, "step": 163000 }, { "epoch": 0.01, "learning_rate": 4.981533333333334e-05, "loss": 0.219, "step": 163500 }, { "epoch": 0.01, "learning_rate": 4.981476836158193e-05, "loss": 0.2238, "step": 164000 }, { "epoch": 0.01, "learning_rate": 4.981420338983051e-05, "loss": 0.2241, "step": 164500 }, { "epoch": 0.01, "learning_rate": 4.98136384180791e-05, "loss": 0.212, "step": 165000 }, { "epoch": 0.01, "learning_rate": 4.981307457627119e-05, "loss": 0.2108, "step": 165500 }, { "epoch": 0.01, "learning_rate": 4.981250960451977e-05, "loss": 0.2056, "step": 166000 }, { "epoch": 0.01, "learning_rate": 4.9811944632768364e-05, "loss": 0.2144, "step": 166500 }, { "epoch": 0.01, "learning_rate": 4.981137966101695e-05, "loss": 0.2165, "step": 167000 }, { "epoch": 0.01, "learning_rate": 4.981081581920904e-05, "loss": 0.2084, "step": 167500 }, { "epoch": 0.01, "learning_rate": 4.981025084745763e-05, "loss": 0.222, "step": 168000 }, { "epoch": 0.01, "learning_rate": 4.9809685875706216e-05, "loss": 0.2157, "step": 168500 }, { "epoch": 0.01, "learning_rate": 4.980912090395481e-05, "loss": 0.2174, "step": 169000 }, { "epoch": 0.01, "learning_rate": 4.9808557062146894e-05, "loss": 0.2121, "step": 169500 }, { "epoch": 0.01, "learning_rate": 4.9807992090395484e-05, "loss": 0.2135, "step": 170000 }, { "epoch": 0.01, "learning_rate": 4.980742711864407e-05, "loss": 0.2234, "step": 170500 }, { "epoch": 0.01, "learning_rate": 4.980686214689266e-05, "loss": 0.2147, "step": 171000 }, { "epoch": 0.01, "learning_rate": 4.980629717514125e-05, "loss": 0.2161, "step": 171500 }, { "epoch": 0.01, "learning_rate": 4.980573333333334e-05, "loss": 0.2154, "step": 172000 }, { "epoch": 0.01, "learning_rate": 4.980516836158192e-05, "loss": 0.2136, "step": 172500 }, { "epoch": 0.01, "learning_rate": 4.980460338983051e-05, "loss": 0.2222, "step": 173000 }, { "epoch": 0.01, "learning_rate": 4.9804038418079095e-05, "loss": 0.1996, "step": 173500 }, { "epoch": 0.01, "learning_rate": 4.9803473446327685e-05, "loss": 0.2093, "step": 174000 }, { "epoch": 0.01, "learning_rate": 4.980290960451978e-05, "loss": 0.2238, "step": 174500 }, { "epoch": 0.01, "learning_rate": 4.980234463276836e-05, "loss": 0.2108, "step": 175000 }, { "epoch": 0.01, "learning_rate": 4.9801779661016954e-05, "loss": 0.2035, "step": 175500 }, { "epoch": 0.01, "learning_rate": 4.980121468926554e-05, "loss": 0.2161, "step": 176000 }, { "epoch": 0.01, "learning_rate": 4.980065084745763e-05, "loss": 0.2125, "step": 176500 }, { "epoch": 0.01, "learning_rate": 4.9800085875706215e-05, "loss": 0.2132, "step": 177000 }, { "epoch": 0.01, "learning_rate": 4.9799520903954806e-05, "loss": 0.218, "step": 177500 }, { "epoch": 0.01, "learning_rate": 4.9798955932203397e-05, "loss": 0.2143, "step": 178000 }, { "epoch": 0.01, "learning_rate": 4.979839096045198e-05, "loss": 0.2107, "step": 178500 }, { "epoch": 0.01, "learning_rate": 4.979782598870057e-05, "loss": 0.2084, "step": 179000 }, { "epoch": 0.01, "learning_rate": 4.979726214689266e-05, "loss": 0.2151, "step": 179500 }, { "epoch": 0.01, "learning_rate": 4.979669717514124e-05, "loss": 0.2011, "step": 180000 }, { "epoch": 0.01, "learning_rate": 4.979613220338983e-05, "loss": 0.2127, "step": 180500 }, { "epoch": 0.01, "learning_rate": 4.9795567231638416e-05, "loss": 0.2145, "step": 181000 }, { "epoch": 0.01, "learning_rate": 4.979500225988701e-05, "loss": 0.2178, "step": 181500 }, { "epoch": 0.01, "learning_rate": 4.97944384180791e-05, "loss": 0.202, "step": 182000 }, { "epoch": 0.01, "learning_rate": 4.979387457627119e-05, "loss": 0.2129, "step": 182500 }, { "epoch": 0.01, "learning_rate": 4.979330960451978e-05, "loss": 0.2158, "step": 183000 }, { "epoch": 0.01, "learning_rate": 4.979274463276836e-05, "loss": 0.2142, "step": 183500 }, { "epoch": 0.01, "learning_rate": 4.979217966101695e-05, "loss": 0.203, "step": 184000 }, { "epoch": 0.01, "learning_rate": 4.979161468926554e-05, "loss": 0.2119, "step": 184500 }, { "epoch": 0.01, "learning_rate": 4.979104971751413e-05, "loss": 0.209, "step": 185000 }, { "epoch": 0.01, "learning_rate": 4.979048474576272e-05, "loss": 0.2094, "step": 185500 }, { "epoch": 0.01, "learning_rate": 4.97899197740113e-05, "loss": 0.2008, "step": 186000 }, { "epoch": 0.01, "learning_rate": 4.978935593220339e-05, "loss": 0.2159, "step": 186500 }, { "epoch": 0.01, "learning_rate": 4.978879096045198e-05, "loss": 0.2096, "step": 187000 }, { "epoch": 0.01, "learning_rate": 4.9788225988700563e-05, "loss": 0.2123, "step": 187500 }, { "epoch": 0.01, "learning_rate": 4.9787661016949154e-05, "loss": 0.212, "step": 188000 }, { "epoch": 0.01, "learning_rate": 4.978709604519774e-05, "loss": 0.2066, "step": 188500 }, { "epoch": 0.01, "learning_rate": 4.978653220338983e-05, "loss": 0.2081, "step": 189000 }, { "epoch": 0.01, "learning_rate": 4.978596723163842e-05, "loss": 0.2061, "step": 189500 }, { "epoch": 0.01, "learning_rate": 4.9785402259887006e-05, "loss": 0.2067, "step": 190000 }, { "epoch": 0.01, "learning_rate": 4.97848372881356e-05, "loss": 0.2133, "step": 190500 }, { "epoch": 0.01, "learning_rate": 4.9784273446327684e-05, "loss": 0.2085, "step": 191000 }, { "epoch": 0.01, "learning_rate": 4.9783708474576275e-05, "loss": 0.2088, "step": 191500 }, { "epoch": 0.01, "learning_rate": 4.9783143502824865e-05, "loss": 0.2039, "step": 192000 }, { "epoch": 0.01, "learning_rate": 4.978257853107345e-05, "loss": 0.2116, "step": 192500 }, { "epoch": 0.01, "learning_rate": 4.978201355932204e-05, "loss": 0.2098, "step": 193000 }, { "epoch": 0.01, "learning_rate": 4.978144858757062e-05, "loss": 0.2066, "step": 193500 }, { "epoch": 0.01, "learning_rate": 4.978088474576271e-05, "loss": 0.1969, "step": 194000 }, { "epoch": 0.01, "learning_rate": 4.97803197740113e-05, "loss": 0.2102, "step": 194500 }, { "epoch": 0.01, "learning_rate": 4.9779754802259885e-05, "loss": 0.2084, "step": 195000 }, { "epoch": 0.01, "learning_rate": 4.9779189830508476e-05, "loss": 0.2135, "step": 195500 }, { "epoch": 0.01, "learning_rate": 4.977862598870057e-05, "loss": 0.211, "step": 196000 }, { "epoch": 0.01, "learning_rate": 4.977806101694916e-05, "loss": 0.2096, "step": 196500 }, { "epoch": 0.01, "learning_rate": 4.9777496045197744e-05, "loss": 0.2061, "step": 197000 }, { "epoch": 0.01, "learning_rate": 4.9776931073446334e-05, "loss": 0.2109, "step": 197500 }, { "epoch": 0.01, "learning_rate": 4.977636610169492e-05, "loss": 0.2003, "step": 198000 }, { "epoch": 0.01, "learning_rate": 4.9775802259887005e-05, "loss": 0.2047, "step": 198500 }, { "epoch": 0.01, "learning_rate": 4.9775237288135596e-05, "loss": 0.2035, "step": 199000 }, { "epoch": 0.01, "learning_rate": 4.977467231638419e-05, "loss": 0.2191, "step": 199500 }, { "epoch": 0.01, "learning_rate": 4.977410734463277e-05, "loss": 0.2075, "step": 200000 }, { "epoch": 0.01, "learning_rate": 4.977354237288136e-05, "loss": 0.2098, "step": 200500 }, { "epoch": 0.01, "learning_rate": 4.977297853107345e-05, "loss": 0.2058, "step": 201000 }, { "epoch": 0.01, "learning_rate": 4.977241355932203e-05, "loss": 0.203, "step": 201500 }, { "epoch": 0.01, "learning_rate": 4.977184858757062e-05, "loss": 0.2074, "step": 202000 }, { "epoch": 0.01, "learning_rate": 4.977128361581921e-05, "loss": 0.2019, "step": 202500 }, { "epoch": 0.01, "learning_rate": 4.97707186440678e-05, "loss": 0.2046, "step": 203000 }, { "epoch": 0.01, "learning_rate": 4.977015480225989e-05, "loss": 0.207, "step": 203500 }, { "epoch": 0.01, "learning_rate": 4.976958983050848e-05, "loss": 0.2085, "step": 204000 }, { "epoch": 0.01, "learning_rate": 4.9769024858757065e-05, "loss": 0.2112, "step": 204500 }, { "epoch": 0.01, "learning_rate": 4.9768459887005656e-05, "loss": 0.2111, "step": 205000 }, { "epoch": 0.01, "learning_rate": 4.976789491525424e-05, "loss": 0.2064, "step": 205500 }, { "epoch": 0.01, "learning_rate": 4.9767331073446334e-05, "loss": 0.213, "step": 206000 }, { "epoch": 0.01, "learning_rate": 4.976676610169492e-05, "loss": 0.21, "step": 206500 }, { "epoch": 0.01, "learning_rate": 4.976620112994351e-05, "loss": 0.2064, "step": 207000 }, { "epoch": 0.01, "learning_rate": 4.976563615819209e-05, "loss": 0.2016, "step": 207500 }, { "epoch": 0.01, "learning_rate": 4.976507231638418e-05, "loss": 0.1975, "step": 208000 }, { "epoch": 0.01, "learning_rate": 4.976450734463277e-05, "loss": 0.1976, "step": 208500 }, { "epoch": 0.01, "learning_rate": 4.9763942372881354e-05, "loss": 0.2073, "step": 209000 }, { "epoch": 0.01, "learning_rate": 4.9763377401129944e-05, "loss": 0.2081, "step": 209500 }, { "epoch": 0.01, "learning_rate": 4.9762812429378535e-05, "loss": 0.2157, "step": 210000 }, { "epoch": 0.01, "learning_rate": 4.976224745762712e-05, "loss": 0.2011, "step": 210500 }, { "epoch": 0.01, "learning_rate": 4.976168248587571e-05, "loss": 0.2008, "step": 211000 }, { "epoch": 0.01, "learning_rate": 4.976111751412429e-05, "loss": 0.1979, "step": 211500 }, { "epoch": 0.01, "learning_rate": 4.976055367231639e-05, "loss": 0.1947, "step": 212000 }, { "epoch": 0.01, "learning_rate": 4.975998870056498e-05, "loss": 0.1923, "step": 212500 }, { "epoch": 0.01, "learning_rate": 4.975942372881356e-05, "loss": 0.2057, "step": 213000 }, { "epoch": 0.01, "learning_rate": 4.975885875706215e-05, "loss": 0.2052, "step": 213500 }, { "epoch": 0.01, "learning_rate": 4.9758293785310736e-05, "loss": 0.2111, "step": 214000 }, { "epoch": 0.01, "learning_rate": 4.975772994350283e-05, "loss": 0.1997, "step": 214500 }, { "epoch": 0.01, "learning_rate": 4.9757164971751413e-05, "loss": 0.2013, "step": 215000 }, { "epoch": 0.01, "learning_rate": 4.9756600000000004e-05, "loss": 0.2007, "step": 215500 }, { "epoch": 0.01, "learning_rate": 4.975603502824859e-05, "loss": 0.2034, "step": 216000 }, { "epoch": 0.01, "learning_rate": 4.975547005649718e-05, "loss": 0.2054, "step": 216500 }, { "epoch": 0.01, "learning_rate": 4.975490508474577e-05, "loss": 0.2007, "step": 217000 }, { "epoch": 0.01, "learning_rate": 4.9754341242937856e-05, "loss": 0.1929, "step": 217500 }, { "epoch": 0.01, "learning_rate": 4.975377627118644e-05, "loss": 0.2041, "step": 218000 }, { "epoch": 0.01, "learning_rate": 4.975321129943503e-05, "loss": 0.1977, "step": 218500 }, { "epoch": 0.01, "learning_rate": 4.9752646327683614e-05, "loss": 0.1938, "step": 219000 }, { "epoch": 0.01, "learning_rate": 4.9752081355932205e-05, "loss": 0.2061, "step": 219500 }, { "epoch": 0.01, "learning_rate": 4.97515175141243e-05, "loss": 0.2037, "step": 220000 }, { "epoch": 0.01, "learning_rate": 4.975095254237289e-05, "loss": 0.2086, "step": 220500 }, { "epoch": 0.01, "learning_rate": 4.975038757062147e-05, "loss": 0.1962, "step": 221000 }, { "epoch": 0.02, "learning_rate": 4.9749822598870064e-05, "loss": 0.2073, "step": 221500 }, { "epoch": 0.02, "learning_rate": 4.974925875706215e-05, "loss": 0.1907, "step": 222000 }, { "epoch": 0.02, "learning_rate": 4.9748693785310735e-05, "loss": 0.1939, "step": 222500 }, { "epoch": 0.02, "learning_rate": 4.9748128813559326e-05, "loss": 0.198, "step": 223000 }, { "epoch": 0.02, "learning_rate": 4.974756384180791e-05, "loss": 0.2071, "step": 223500 }, { "epoch": 0.02, "learning_rate": 4.97469988700565e-05, "loss": 0.2023, "step": 224000 }, { "epoch": 0.02, "learning_rate": 4.974643502824859e-05, "loss": 0.1931, "step": 224500 }, { "epoch": 0.02, "learning_rate": 4.974587005649718e-05, "loss": 0.1969, "step": 225000 }, { "epoch": 0.02, "learning_rate": 4.974530508474576e-05, "loss": 0.1919, "step": 225500 }, { "epoch": 0.02, "learning_rate": 4.974474011299435e-05, "loss": 0.1974, "step": 226000 }, { "epoch": 0.02, "learning_rate": 4.9744176271186446e-05, "loss": 0.1898, "step": 226500 }, { "epoch": 0.02, "learning_rate": 4.974361129943504e-05, "loss": 0.2037, "step": 227000 }, { "epoch": 0.02, "learning_rate": 4.974304632768362e-05, "loss": 0.199, "step": 227500 }, { "epoch": 0.02, "learning_rate": 4.974248135593221e-05, "loss": 0.1962, "step": 228000 }, { "epoch": 0.02, "learning_rate": 4.97419175141243e-05, "loss": 0.1923, "step": 228500 }, { "epoch": 0.02, "learning_rate": 4.974135254237288e-05, "loss": 0.1849, "step": 229000 }, { "epoch": 0.02, "learning_rate": 4.974078757062147e-05, "loss": 0.1937, "step": 229500 }, { "epoch": 0.02, "learning_rate": 4.9740222598870056e-05, "loss": 0.212, "step": 230000 }, { "epoch": 0.02, "learning_rate": 4.973965762711865e-05, "loss": 0.2, "step": 230500 }, { "epoch": 0.02, "learning_rate": 4.973909265536724e-05, "loss": 0.1996, "step": 231000 }, { "epoch": 0.02, "learning_rate": 4.973852768361582e-05, "loss": 0.1985, "step": 231500 }, { "epoch": 0.02, "learning_rate": 4.973796271186441e-05, "loss": 0.2012, "step": 232000 }, { "epoch": 0.02, "learning_rate": 4.97373988700565e-05, "loss": 0.1883, "step": 232500 }, { "epoch": 0.02, "learning_rate": 4.973683389830508e-05, "loss": 0.1851, "step": 233000 }, { "epoch": 0.02, "learning_rate": 4.9736268926553674e-05, "loss": 0.1963, "step": 233500 }, { "epoch": 0.02, "learning_rate": 4.973570395480226e-05, "loss": 0.2054, "step": 234000 }, { "epoch": 0.02, "learning_rate": 4.973513898305085e-05, "loss": 0.1984, "step": 234500 }, { "epoch": 0.02, "learning_rate": 4.973457514124294e-05, "loss": 0.1985, "step": 235000 }, { "epoch": 0.02, "learning_rate": 4.973401016949153e-05, "loss": 0.1926, "step": 235500 }, { "epoch": 0.02, "learning_rate": 4.9733445197740116e-05, "loss": 0.203, "step": 236000 }, { "epoch": 0.02, "learning_rate": 4.973288022598871e-05, "loss": 0.1988, "step": 236500 }, { "epoch": 0.02, "learning_rate": 4.9732316384180794e-05, "loss": 0.1957, "step": 237000 }, { "epoch": 0.02, "learning_rate": 4.973175141242938e-05, "loss": 0.1924, "step": 237500 }, { "epoch": 0.02, "learning_rate": 4.973118644067797e-05, "loss": 0.1918, "step": 238000 }, { "epoch": 0.02, "learning_rate": 4.973062146892656e-05, "loss": 0.1969, "step": 238500 }, { "epoch": 0.02, "learning_rate": 4.973005649717514e-05, "loss": 0.2039, "step": 239000 }, { "epoch": 0.02, "learning_rate": 4.972949265536723e-05, "loss": 0.1937, "step": 239500 }, { "epoch": 0.02, "learning_rate": 4.972892768361582e-05, "loss": 0.1996, "step": 240000 }, { "epoch": 0.02, "learning_rate": 4.9728362711864404e-05, "loss": 0.1993, "step": 240500 }, { "epoch": 0.02, "learning_rate": 4.9727797740112995e-05, "loss": 0.1901, "step": 241000 }, { "epoch": 0.02, "learning_rate": 4.972723389830509e-05, "loss": 0.2075, "step": 241500 }, { "epoch": 0.02, "learning_rate": 4.972666892655368e-05, "loss": 0.1993, "step": 242000 }, { "epoch": 0.02, "learning_rate": 4.9726103954802263e-05, "loss": 0.1892, "step": 242500 }, { "epoch": 0.02, "learning_rate": 4.9725538983050854e-05, "loss": 0.2005, "step": 243000 }, { "epoch": 0.02, "learning_rate": 4.972497401129944e-05, "loss": 0.1944, "step": 243500 }, { "epoch": 0.02, "learning_rate": 4.9724410169491525e-05, "loss": 0.201, "step": 244000 }, { "epoch": 0.02, "learning_rate": 4.9723845197740116e-05, "loss": 0.204, "step": 244500 }, { "epoch": 0.02, "learning_rate": 4.9723280225988706e-05, "loss": 0.2029, "step": 245000 }, { "epoch": 0.02, "learning_rate": 4.972271525423729e-05, "loss": 0.2045, "step": 245500 }, { "epoch": 0.02, "learning_rate": 4.972215141242938e-05, "loss": 0.1943, "step": 246000 }, { "epoch": 0.02, "learning_rate": 4.972158644067797e-05, "loss": 0.1921, "step": 246500 }, { "epoch": 0.02, "learning_rate": 4.972102146892655e-05, "loss": 0.1958, "step": 247000 }, { "epoch": 0.02, "learning_rate": 4.972045649717514e-05, "loss": 0.1893, "step": 247500 }, { "epoch": 0.02, "learning_rate": 4.9719891525423726e-05, "loss": 0.198, "step": 248000 }, { "epoch": 0.02, "learning_rate": 4.971932768361583e-05, "loss": 0.1972, "step": 248500 }, { "epoch": 0.02, "learning_rate": 4.971876271186441e-05, "loss": 0.1959, "step": 249000 }, { "epoch": 0.02, "learning_rate": 4.9718197740113e-05, "loss": 0.1942, "step": 249500 }, { "epoch": 0.02, "learning_rate": 4.9717632768361585e-05, "loss": 0.2012, "step": 250000 }, { "epoch": 0.02, "learning_rate": 4.9717067796610175e-05, "loss": 0.1999, "step": 250500 }, { "epoch": 0.02, "learning_rate": 4.971650395480226e-05, "loss": 0.201, "step": 251000 }, { "epoch": 0.02, "learning_rate": 4.9715938983050847e-05, "loss": 0.191, "step": 251500 }, { "epoch": 0.02, "learning_rate": 4.971537401129944e-05, "loss": 0.1892, "step": 252000 }, { "epoch": 0.02, "learning_rate": 4.971480903954803e-05, "loss": 0.1975, "step": 252500 }, { "epoch": 0.02, "learning_rate": 4.9714245197740115e-05, "loss": 0.1934, "step": 253000 }, { "epoch": 0.02, "learning_rate": 4.97136802259887e-05, "loss": 0.1846, "step": 253500 }, { "epoch": 0.02, "learning_rate": 4.971311525423729e-05, "loss": 0.1863, "step": 254000 }, { "epoch": 0.02, "learning_rate": 4.971255028248587e-05, "loss": 0.18, "step": 254500 }, { "epoch": 0.02, "learning_rate": 4.9711985310734464e-05, "loss": 0.1864, "step": 255000 }, { "epoch": 0.02, "learning_rate": 4.971142146892656e-05, "loss": 0.1797, "step": 255500 }, { "epoch": 0.02, "learning_rate": 4.971085649717515e-05, "loss": 0.197, "step": 256000 }, { "epoch": 0.02, "learning_rate": 4.971029152542373e-05, "loss": 0.1857, "step": 256500 }, { "epoch": 0.02, "learning_rate": 4.970972655367232e-05, "loss": 0.1929, "step": 257000 }, { "epoch": 0.02, "learning_rate": 4.9709161581920906e-05, "loss": 0.1903, "step": 257500 }, { "epoch": 0.02, "learning_rate": 4.9708597740112994e-05, "loss": 0.1879, "step": 258000 }, { "epoch": 0.02, "learning_rate": 4.9708032768361584e-05, "loss": 0.198, "step": 258500 }, { "epoch": 0.02, "learning_rate": 4.9707467796610175e-05, "loss": 0.1957, "step": 259000 }, { "epoch": 0.02, "learning_rate": 4.970690282485876e-05, "loss": 0.186, "step": 259500 }, { "epoch": 0.02, "learning_rate": 4.9706338983050846e-05, "loss": 0.1859, "step": 260000 }, { "epoch": 0.02, "learning_rate": 4.9705774011299436e-05, "loss": 0.1833, "step": 260500 }, { "epoch": 0.02, "learning_rate": 4.970520903954802e-05, "loss": 0.1869, "step": 261000 }, { "epoch": 0.02, "learning_rate": 4.970464406779661e-05, "loss": 0.1895, "step": 261500 }, { "epoch": 0.02, "learning_rate": 4.9704079096045195e-05, "loss": 0.1896, "step": 262000 }, { "epoch": 0.02, "learning_rate": 4.9703514124293785e-05, "loss": 0.1864, "step": 262500 }, { "epoch": 0.02, "learning_rate": 4.970295028248588e-05, "loss": 0.1915, "step": 263000 }, { "epoch": 0.02, "learning_rate": 4.970238531073447e-05, "loss": 0.188, "step": 263500 }, { "epoch": 0.02, "learning_rate": 4.9701820338983054e-05, "loss": 0.1895, "step": 264000 }, { "epoch": 0.02, "learning_rate": 4.9701255367231644e-05, "loss": 0.1918, "step": 264500 }, { "epoch": 0.02, "learning_rate": 4.970069039548023e-05, "loss": 0.1955, "step": 265000 }, { "epoch": 0.02, "learning_rate": 4.970012655367232e-05, "loss": 0.1879, "step": 265500 }, { "epoch": 0.02, "learning_rate": 4.9699561581920906e-05, "loss": 0.1913, "step": 266000 }, { "epoch": 0.02, "learning_rate": 4.9698996610169496e-05, "loss": 0.1873, "step": 266500 }, { "epoch": 0.02, "learning_rate": 4.969843163841808e-05, "loss": 0.1939, "step": 267000 }, { "epoch": 0.02, "learning_rate": 4.969786666666667e-05, "loss": 0.1865, "step": 267500 }, { "epoch": 0.02, "learning_rate": 4.9697301694915254e-05, "loss": 0.1934, "step": 268000 }, { "epoch": 0.02, "learning_rate": 4.969673785310734e-05, "loss": 0.1905, "step": 268500 }, { "epoch": 0.02, "learning_rate": 4.969617288135593e-05, "loss": 0.1829, "step": 269000 }, { "epoch": 0.02, "learning_rate": 4.969560790960452e-05, "loss": 0.1903, "step": 269500 }, { "epoch": 0.02, "learning_rate": 4.969504293785311e-05, "loss": 0.1909, "step": 270000 }, { "epoch": 0.02, "learning_rate": 4.96944779661017e-05, "loss": 0.1989, "step": 270500 }, { "epoch": 0.02, "learning_rate": 4.969391412429379e-05, "loss": 0.192, "step": 271000 }, { "epoch": 0.02, "learning_rate": 4.9693349152542375e-05, "loss": 0.1898, "step": 271500 }, { "epoch": 0.02, "learning_rate": 4.9692784180790966e-05, "loss": 0.189, "step": 272000 }, { "epoch": 0.02, "learning_rate": 4.969221920903955e-05, "loss": 0.1874, "step": 272500 }, { "epoch": 0.02, "learning_rate": 4.969165536723164e-05, "loss": 0.1868, "step": 273000 }, { "epoch": 0.02, "learning_rate": 4.969109152542373e-05, "loss": 0.1936, "step": 273500 }, { "epoch": 0.02, "learning_rate": 4.9690526553672314e-05, "loss": 0.2004, "step": 274000 }, { "epoch": 0.02, "learning_rate": 4.9689961581920905e-05, "loss": 0.1902, "step": 274500 }, { "epoch": 0.02, "learning_rate": 4.968939661016949e-05, "loss": 0.1762, "step": 275000 }, { "epoch": 0.02, "learning_rate": 4.968883163841808e-05, "loss": 0.1911, "step": 275500 }, { "epoch": 0.02, "learning_rate": 4.968826666666666e-05, "loss": 0.1937, "step": 276000 }, { "epoch": 0.02, "learning_rate": 4.9687701694915254e-05, "loss": 0.1808, "step": 276500 }, { "epoch": 0.02, "learning_rate": 4.9687136723163844e-05, "loss": 0.1935, "step": 277000 }, { "epoch": 0.02, "learning_rate": 4.968657288135594e-05, "loss": 0.1995, "step": 277500 }, { "epoch": 0.02, "learning_rate": 4.968600790960452e-05, "loss": 0.1892, "step": 278000 }, { "epoch": 0.02, "learning_rate": 4.968544293785311e-05, "loss": 0.1894, "step": 278500 }, { "epoch": 0.02, "learning_rate": 4.9684877966101696e-05, "loss": 0.1892, "step": 279000 }, { "epoch": 0.02, "learning_rate": 4.968431299435029e-05, "loss": 0.1936, "step": 279500 }, { "epoch": 0.02, "learning_rate": 4.9683749152542374e-05, "loss": 0.1881, "step": 280000 }, { "epoch": 0.02, "learning_rate": 4.9683184180790965e-05, "loss": 0.1814, "step": 280500 }, { "epoch": 0.02, "learning_rate": 4.968261920903955e-05, "loss": 0.1935, "step": 281000 }, { "epoch": 0.02, "learning_rate": 4.968205423728814e-05, "loss": 0.1932, "step": 281500 }, { "epoch": 0.02, "learning_rate": 4.9681490395480226e-05, "loss": 0.1786, "step": 282000 }, { "epoch": 0.02, "learning_rate": 4.968092542372881e-05, "loss": 0.1935, "step": 282500 }, { "epoch": 0.02, "learning_rate": 4.96803604519774e-05, "loss": 0.1805, "step": 283000 }, { "epoch": 0.02, "learning_rate": 4.967979548022599e-05, "loss": 0.1854, "step": 283500 }, { "epoch": 0.02, "learning_rate": 4.9679230508474575e-05, "loss": 0.1924, "step": 284000 }, { "epoch": 0.02, "learning_rate": 4.9678665536723166e-05, "loss": 0.1838, "step": 284500 }, { "epoch": 0.02, "learning_rate": 4.967810169491526e-05, "loss": 0.1806, "step": 285000 }, { "epoch": 0.02, "learning_rate": 4.9677536723163844e-05, "loss": 0.1826, "step": 285500 }, { "epoch": 0.02, "learning_rate": 4.9676971751412434e-05, "loss": 0.1945, "step": 286000 }, { "epoch": 0.02, "learning_rate": 4.967640677966102e-05, "loss": 0.1886, "step": 286500 }, { "epoch": 0.02, "learning_rate": 4.967584180790961e-05, "loss": 0.1838, "step": 287000 }, { "epoch": 0.02, "learning_rate": 4.96752768361582e-05, "loss": 0.1843, "step": 287500 }, { "epoch": 0.02, "learning_rate": 4.9674712994350286e-05, "loss": 0.1885, "step": 288000 }, { "epoch": 0.02, "learning_rate": 4.967414802259887e-05, "loss": 0.1826, "step": 288500 }, { "epoch": 0.02, "learning_rate": 4.967358305084746e-05, "loss": 0.1889, "step": 289000 }, { "epoch": 0.02, "learning_rate": 4.9673018079096045e-05, "loss": 0.1836, "step": 289500 }, { "epoch": 0.02, "learning_rate": 4.967245423728814e-05, "loss": 0.1816, "step": 290000 }, { "epoch": 0.02, "learning_rate": 4.967188926553672e-05, "loss": 0.1826, "step": 290500 }, { "epoch": 0.02, "learning_rate": 4.967132429378531e-05, "loss": 0.185, "step": 291000 }, { "epoch": 0.02, "learning_rate": 4.96707593220339e-05, "loss": 0.1829, "step": 291500 }, { "epoch": 0.02, "learning_rate": 4.967019548022599e-05, "loss": 0.1879, "step": 292000 }, { "epoch": 0.02, "learning_rate": 4.966963050847458e-05, "loss": 0.1877, "step": 292500 }, { "epoch": 0.02, "learning_rate": 4.9669065536723165e-05, "loss": 0.1871, "step": 293000 }, { "epoch": 0.02, "learning_rate": 4.9668500564971756e-05, "loss": 0.1782, "step": 293500 }, { "epoch": 0.02, "learning_rate": 4.9667935593220346e-05, "loss": 0.1809, "step": 294000 }, { "epoch": 0.02, "learning_rate": 4.9667371751412433e-05, "loss": 0.184, "step": 294500 }, { "epoch": 0.02, "learning_rate": 4.966680677966102e-05, "loss": 0.1762, "step": 295000 }, { "epoch": 0.02, "learning_rate": 4.966624180790961e-05, "loss": 0.1848, "step": 295500 }, { "epoch": 0.02, "learning_rate": 4.966567683615819e-05, "loss": 0.1961, "step": 296000 }, { "epoch": 0.02, "learning_rate": 4.966511186440678e-05, "loss": 0.1808, "step": 296500 }, { "epoch": 0.02, "learning_rate": 4.966454802259887e-05, "loss": 0.1753, "step": 297000 }, { "epoch": 0.02, "learning_rate": 4.966398305084746e-05, "loss": 0.1811, "step": 297500 }, { "epoch": 0.02, "learning_rate": 4.9663418079096044e-05, "loss": 0.1784, "step": 298000 }, { "epoch": 0.02, "learning_rate": 4.9662853107344634e-05, "loss": 0.1861, "step": 298500 }, { "epoch": 0.02, "learning_rate": 4.966228926553673e-05, "loss": 0.1775, "step": 299000 }, { "epoch": 0.02, "learning_rate": 4.966172429378531e-05, "loss": 0.1912, "step": 299500 }, { "epoch": 0.02, "learning_rate": 4.96611593220339e-05, "loss": 0.1923, "step": 300000 }, { "epoch": 0.02, "learning_rate": 4.9660594350282487e-05, "loss": 0.1857, "step": 300500 }, { "epoch": 0.02, "learning_rate": 4.966002937853108e-05, "loss": 0.1795, "step": 301000 }, { "epoch": 0.02, "learning_rate": 4.965946440677967e-05, "loss": 0.1864, "step": 301500 }, { "epoch": 0.02, "learning_rate": 4.9658900564971755e-05, "loss": 0.1891, "step": 302000 }, { "epoch": 0.02, "learning_rate": 4.965833559322034e-05, "loss": 0.1806, "step": 302500 }, { "epoch": 0.02, "learning_rate": 4.965777062146893e-05, "loss": 0.1745, "step": 303000 }, { "epoch": 0.02, "learning_rate": 4.965720564971751e-05, "loss": 0.1869, "step": 303500 }, { "epoch": 0.02, "learning_rate": 4.9656640677966104e-05, "loss": 0.1863, "step": 304000 }, { "epoch": 0.02, "learning_rate": 4.965607683615819e-05, "loss": 0.1871, "step": 304500 }, { "epoch": 0.02, "learning_rate": 4.965551186440678e-05, "loss": 0.1811, "step": 305000 }, { "epoch": 0.02, "learning_rate": 4.9654946892655365e-05, "loss": 0.1871, "step": 305500 }, { "epoch": 0.02, "learning_rate": 4.9654381920903956e-05, "loss": 0.1828, "step": 306000 }, { "epoch": 0.02, "learning_rate": 4.965381807909605e-05, "loss": 0.1833, "step": 306500 }, { "epoch": 0.02, "learning_rate": 4.9653253107344634e-05, "loss": 0.1709, "step": 307000 }, { "epoch": 0.02, "learning_rate": 4.9652688135593224e-05, "loss": 0.1862, "step": 307500 }, { "epoch": 0.02, "learning_rate": 4.9652123163841815e-05, "loss": 0.1799, "step": 308000 }, { "epoch": 0.02, "learning_rate": 4.96515581920904e-05, "loss": 0.1784, "step": 308500 }, { "epoch": 0.02, "learning_rate": 4.965099322033899e-05, "loss": 0.1896, "step": 309000 }, { "epoch": 0.02, "learning_rate": 4.965042824858757e-05, "loss": 0.1867, "step": 309500 }, { "epoch": 0.02, "learning_rate": 4.9649863276836164e-05, "loss": 0.1747, "step": 310000 }, { "epoch": 0.02, "learning_rate": 4.964929943502825e-05, "loss": 0.1837, "step": 310500 }, { "epoch": 0.02, "learning_rate": 4.9648734463276835e-05, "loss": 0.1832, "step": 311000 }, { "epoch": 0.02, "learning_rate": 4.9648169491525425e-05, "loss": 0.1856, "step": 311500 }, { "epoch": 0.02, "learning_rate": 4.9647604519774016e-05, "loss": 0.1819, "step": 312000 }, { "epoch": 0.02, "learning_rate": 4.96470395480226e-05, "loss": 0.1741, "step": 312500 }, { "epoch": 0.02, "learning_rate": 4.964647570621469e-05, "loss": 0.1853, "step": 313000 }, { "epoch": 0.02, "learning_rate": 4.964591073446328e-05, "loss": 0.1817, "step": 313500 }, { "epoch": 0.02, "learning_rate": 4.964534689265537e-05, "loss": 0.182, "step": 314000 }, { "epoch": 0.02, "learning_rate": 4.9644781920903955e-05, "loss": 0.18, "step": 314500 }, { "epoch": 0.02, "learning_rate": 4.9644216949152546e-05, "loss": 0.1769, "step": 315000 }, { "epoch": 0.02, "learning_rate": 4.9643651977401136e-05, "loss": 0.1796, "step": 315500 }, { "epoch": 0.02, "learning_rate": 4.964308700564972e-05, "loss": 0.176, "step": 316000 }, { "epoch": 0.02, "learning_rate": 4.964252203389831e-05, "loss": 0.1815, "step": 316500 }, { "epoch": 0.02, "learning_rate": 4.9641957062146895e-05, "loss": 0.1761, "step": 317000 }, { "epoch": 0.02, "learning_rate": 4.9641392090395485e-05, "loss": 0.1798, "step": 317500 }, { "epoch": 0.02, "learning_rate": 4.964082711864407e-05, "loss": 0.1819, "step": 318000 }, { "epoch": 0.02, "learning_rate": 4.964026214689266e-05, "loss": 0.1744, "step": 318500 }, { "epoch": 0.02, "learning_rate": 4.963969717514125e-05, "loss": 0.1824, "step": 319000 }, { "epoch": 0.02, "learning_rate": 4.963913333333334e-05, "loss": 0.1898, "step": 319500 }, { "epoch": 0.02, "learning_rate": 4.963856836158192e-05, "loss": 0.18, "step": 320000 }, { "epoch": 0.02, "learning_rate": 4.963800338983051e-05, "loss": 0.1716, "step": 320500 }, { "epoch": 0.02, "learning_rate": 4.9637438418079095e-05, "loss": 0.1864, "step": 321000 }, { "epoch": 0.02, "learning_rate": 4.9636873446327686e-05, "loss": 0.1743, "step": 321500 }, { "epoch": 0.02, "learning_rate": 4.963630960451977e-05, "loss": 0.1876, "step": 322000 }, { "epoch": 0.02, "learning_rate": 4.9635744632768364e-05, "loss": 0.1753, "step": 322500 }, { "epoch": 0.02, "learning_rate": 4.963517966101695e-05, "loss": 0.1828, "step": 323000 }, { "epoch": 0.02, "learning_rate": 4.963461468926554e-05, "loss": 0.1863, "step": 323500 }, { "epoch": 0.02, "learning_rate": 4.963404971751412e-05, "loss": 0.1797, "step": 324000 }, { "epoch": 0.02, "learning_rate": 4.9633485875706216e-05, "loss": 0.1839, "step": 324500 }, { "epoch": 0.02, "learning_rate": 4.9632920903954807e-05, "loss": 0.1852, "step": 325000 }, { "epoch": 0.02, "learning_rate": 4.96323559322034e-05, "loss": 0.1821, "step": 325500 }, { "epoch": 0.02, "learning_rate": 4.963179096045198e-05, "loss": 0.1861, "step": 326000 }, { "epoch": 0.02, "learning_rate": 4.963122598870057e-05, "loss": 0.1787, "step": 326500 }, { "epoch": 0.02, "learning_rate": 4.9630661016949155e-05, "loss": 0.1687, "step": 327000 }, { "epoch": 0.02, "learning_rate": 4.963009717514124e-05, "loss": 0.1755, "step": 327500 }, { "epoch": 0.02, "learning_rate": 4.962953220338983e-05, "loss": 0.1779, "step": 328000 }, { "epoch": 0.02, "learning_rate": 4.962896723163842e-05, "loss": 0.1727, "step": 328500 }, { "epoch": 0.02, "learning_rate": 4.962840225988701e-05, "loss": 0.1863, "step": 329000 }, { "epoch": 0.02, "learning_rate": 4.96278372881356e-05, "loss": 0.183, "step": 329500 }, { "epoch": 0.02, "learning_rate": 4.9627273446327685e-05, "loss": 0.1881, "step": 330000 }, { "epoch": 0.02, "learning_rate": 4.962670847457627e-05, "loss": 0.179, "step": 330500 }, { "epoch": 0.02, "learning_rate": 4.962614350282486e-05, "loss": 0.1911, "step": 331000 }, { "epoch": 0.02, "learning_rate": 4.9625578531073444e-05, "loss": 0.1796, "step": 331500 }, { "epoch": 0.02, "learning_rate": 4.9625013559322034e-05, "loss": 0.1782, "step": 332000 }, { "epoch": 0.02, "learning_rate": 4.962444971751413e-05, "loss": 0.1721, "step": 332500 }, { "epoch": 0.02, "learning_rate": 4.962388474576272e-05, "loss": 0.1731, "step": 333000 }, { "epoch": 0.02, "learning_rate": 4.96233197740113e-05, "loss": 0.1832, "step": 333500 }, { "epoch": 0.02, "learning_rate": 4.962275480225989e-05, "loss": 0.1782, "step": 334000 }, { "epoch": 0.02, "learning_rate": 4.962218983050848e-05, "loss": 0.1764, "step": 334500 }, { "epoch": 0.02, "learning_rate": 4.9621625988700564e-05, "loss": 0.1806, "step": 335000 }, { "epoch": 0.02, "learning_rate": 4.9621061016949155e-05, "loss": 0.1832, "step": 335500 }, { "epoch": 0.02, "learning_rate": 4.9620496045197745e-05, "loss": 0.1894, "step": 336000 }, { "epoch": 0.02, "learning_rate": 4.961993107344633e-05, "loss": 0.1849, "step": 336500 }, { "epoch": 0.02, "learning_rate": 4.9619367231638416e-05, "loss": 0.1889, "step": 337000 }, { "epoch": 0.02, "learning_rate": 4.961880225988701e-05, "loss": 0.1813, "step": 337500 }, { "epoch": 0.02, "learning_rate": 4.961823728813559e-05, "loss": 0.1798, "step": 338000 }, { "epoch": 0.02, "learning_rate": 4.961767231638418e-05, "loss": 0.1696, "step": 338500 }, { "epoch": 0.02, "learning_rate": 4.9617107344632765e-05, "loss": 0.1722, "step": 339000 }, { "epoch": 0.02, "learning_rate": 4.9616543502824866e-05, "loss": 0.1783, "step": 339500 }, { "epoch": 0.02, "learning_rate": 4.961597853107345e-05, "loss": 0.1755, "step": 340000 }, { "epoch": 0.02, "learning_rate": 4.961541355932204e-05, "loss": 0.171, "step": 340500 }, { "epoch": 0.02, "learning_rate": 4.9614848587570624e-05, "loss": 0.1797, "step": 341000 }, { "epoch": 0.02, "learning_rate": 4.961428474576271e-05, "loss": 0.1726, "step": 341500 }, { "epoch": 0.02, "learning_rate": 4.96137197740113e-05, "loss": 0.1728, "step": 342000 }, { "epoch": 0.02, "learning_rate": 4.9613154802259886e-05, "loss": 0.1756, "step": 342500 }, { "epoch": 0.02, "learning_rate": 4.9612589830508476e-05, "loss": 0.1759, "step": 343000 }, { "epoch": 0.02, "learning_rate": 4.961202485875707e-05, "loss": 0.1885, "step": 343500 }, { "epoch": 0.02, "learning_rate": 4.9611461016949154e-05, "loss": 0.1714, "step": 344000 }, { "epoch": 0.02, "learning_rate": 4.9610896045197745e-05, "loss": 0.1807, "step": 344500 }, { "epoch": 0.02, "learning_rate": 4.961033107344633e-05, "loss": 0.1757, "step": 345000 }, { "epoch": 0.02, "learning_rate": 4.960976610169492e-05, "loss": 0.1731, "step": 345500 }, { "epoch": 0.02, "learning_rate": 4.96092011299435e-05, "loss": 0.1755, "step": 346000 }, { "epoch": 0.02, "learning_rate": 4.960863615819209e-05, "loss": 0.1764, "step": 346500 }, { "epoch": 0.02, "learning_rate": 4.960807118644068e-05, "loss": 0.178, "step": 347000 }, { "epoch": 0.02, "learning_rate": 4.960750621468927e-05, "loss": 0.1787, "step": 347500 }, { "epoch": 0.02, "learning_rate": 4.960694237288136e-05, "loss": 0.1693, "step": 348000 }, { "epoch": 0.02, "learning_rate": 4.9606377401129945e-05, "loss": 0.1718, "step": 348500 }, { "epoch": 0.02, "learning_rate": 4.9605812429378536e-05, "loss": 0.1739, "step": 349000 }, { "epoch": 0.02, "learning_rate": 4.960524745762712e-05, "loss": 0.1741, "step": 349500 }, { "epoch": 0.02, "learning_rate": 4.9604683615819214e-05, "loss": 0.1699, "step": 350000 }, { "epoch": 0.02, "learning_rate": 4.96041186440678e-05, "loss": 0.1769, "step": 350500 }, { "epoch": 0.02, "learning_rate": 4.960355367231639e-05, "loss": 0.1732, "step": 351000 }, { "epoch": 0.02, "learning_rate": 4.960298870056497e-05, "loss": 0.1819, "step": 351500 }, { "epoch": 0.02, "learning_rate": 4.960242372881356e-05, "loss": 0.1791, "step": 352000 }, { "epoch": 0.02, "learning_rate": 4.9601858757062146e-05, "loss": 0.1741, "step": 352500 }, { "epoch": 0.02, "learning_rate": 4.960129491525424e-05, "loss": 0.173, "step": 353000 }, { "epoch": 0.02, "learning_rate": 4.9600729943502824e-05, "loss": 0.1654, "step": 353500 }, { "epoch": 0.02, "learning_rate": 4.9600164971751415e-05, "loss": 0.1755, "step": 354000 }, { "epoch": 0.02, "learning_rate": 4.95996e-05, "loss": 0.1792, "step": 354500 }, { "epoch": 0.02, "learning_rate": 4.959903615819209e-05, "loss": 0.1741, "step": 355000 }, { "epoch": 0.02, "learning_rate": 4.959847118644068e-05, "loss": 0.1681, "step": 355500 }, { "epoch": 0.02, "learning_rate": 4.959790621468927e-05, "loss": 0.1808, "step": 356000 }, { "epoch": 0.02, "learning_rate": 4.959734124293786e-05, "loss": 0.1841, "step": 356500 }, { "epoch": 0.02, "learning_rate": 4.959677627118644e-05, "loss": 0.1722, "step": 357000 }, { "epoch": 0.02, "learning_rate": 4.9596212429378535e-05, "loss": 0.1843, "step": 357500 }, { "epoch": 0.02, "learning_rate": 4.959564745762712e-05, "loss": 0.1689, "step": 358000 }, { "epoch": 0.02, "learning_rate": 4.959508361581921e-05, "loss": 0.1802, "step": 358500 }, { "epoch": 0.02, "learning_rate": 4.95945186440678e-05, "loss": 0.1759, "step": 359000 }, { "epoch": 0.02, "learning_rate": 4.959395367231639e-05, "loss": 0.1785, "step": 359500 }, { "epoch": 0.02, "learning_rate": 4.959338870056497e-05, "loss": 0.1786, "step": 360000 }, { "epoch": 0.02, "learning_rate": 4.959282372881356e-05, "loss": 0.1731, "step": 360500 }, { "epoch": 0.02, "learning_rate": 4.9592258757062146e-05, "loss": 0.168, "step": 361000 }, { "epoch": 0.02, "learning_rate": 4.9591693785310736e-05, "loss": 0.1799, "step": 361500 }, { "epoch": 0.02, "learning_rate": 4.959112881355932e-05, "loss": 0.1777, "step": 362000 }, { "epoch": 0.02, "learning_rate": 4.9590564971751414e-05, "loss": 0.1781, "step": 362500 }, { "epoch": 0.02, "learning_rate": 4.9590000000000005e-05, "loss": 0.1733, "step": 363000 }, { "epoch": 0.02, "learning_rate": 4.958943502824859e-05, "loss": 0.1875, "step": 363500 }, { "epoch": 0.02, "learning_rate": 4.958887005649718e-05, "loss": 0.18, "step": 364000 }, { "epoch": 0.02, "learning_rate": 4.958830508474577e-05, "loss": 0.1785, "step": 364500 }, { "epoch": 0.02, "learning_rate": 4.958774124293786e-05, "loss": 0.1777, "step": 365000 }, { "epoch": 0.02, "learning_rate": 4.958717627118644e-05, "loss": 0.1696, "step": 365500 }, { "epoch": 0.02, "learning_rate": 4.958661129943503e-05, "loss": 0.1793, "step": 366000 }, { "epoch": 0.02, "learning_rate": 4.9586046327683615e-05, "loss": 0.1745, "step": 366500 }, { "epoch": 0.02, "learning_rate": 4.9585481355932206e-05, "loss": 0.1746, "step": 367000 }, { "epoch": 0.02, "learning_rate": 4.958491751412429e-05, "loss": 0.1778, "step": 367500 }, { "epoch": 0.02, "learning_rate": 4.958435254237288e-05, "loss": 0.1714, "step": 368000 }, { "epoch": 0.02, "learning_rate": 4.958378870056498e-05, "loss": 0.1733, "step": 368500 }, { "epoch": 0.03, "learning_rate": 4.958322372881356e-05, "loss": 0.1699, "step": 369000 }, { "epoch": 0.03, "learning_rate": 4.958265875706215e-05, "loss": 0.1784, "step": 369500 }, { "epoch": 0.03, "learning_rate": 4.9582093785310736e-05, "loss": 0.1752, "step": 370000 }, { "epoch": 0.03, "learning_rate": 4.9581528813559326e-05, "loss": 0.1783, "step": 370500 }, { "epoch": 0.03, "learning_rate": 4.958096384180791e-05, "loss": 0.1651, "step": 371000 }, { "epoch": 0.03, "learning_rate": 4.95803988700565e-05, "loss": 0.1801, "step": 371500 }, { "epoch": 0.03, "learning_rate": 4.957983389830509e-05, "loss": 0.1723, "step": 372000 }, { "epoch": 0.03, "learning_rate": 4.9579268926553675e-05, "loss": 0.1758, "step": 372500 }, { "epoch": 0.03, "learning_rate": 4.9578703954802265e-05, "loss": 0.1828, "step": 373000 }, { "epoch": 0.03, "learning_rate": 4.957814011299435e-05, "loss": 0.1853, "step": 373500 }, { "epoch": 0.03, "learning_rate": 4.9577575141242936e-05, "loss": 0.1748, "step": 374000 }, { "epoch": 0.03, "learning_rate": 4.957701016949153e-05, "loss": 0.1732, "step": 374500 }, { "epoch": 0.03, "learning_rate": 4.957644519774012e-05, "loss": 0.1783, "step": 375000 }, { "epoch": 0.03, "learning_rate": 4.95758802259887e-05, "loss": 0.1749, "step": 375500 }, { "epoch": 0.03, "learning_rate": 4.9575316384180795e-05, "loss": 0.1691, "step": 376000 }, { "epoch": 0.03, "learning_rate": 4.957475141242938e-05, "loss": 0.1739, "step": 376500 }, { "epoch": 0.03, "learning_rate": 4.957418644067797e-05, "loss": 0.1823, "step": 377000 }, { "epoch": 0.03, "learning_rate": 4.9573621468926554e-05, "loss": 0.1747, "step": 377500 }, { "epoch": 0.03, "learning_rate": 4.9573056497175144e-05, "loss": 0.1728, "step": 378000 }, { "epoch": 0.03, "learning_rate": 4.957249152542373e-05, "loss": 0.1823, "step": 378500 }, { "epoch": 0.03, "learning_rate": 4.957192655367232e-05, "loss": 0.1726, "step": 379000 }, { "epoch": 0.03, "learning_rate": 4.95713615819209e-05, "loss": 0.1722, "step": 379500 }, { "epoch": 0.03, "learning_rate": 4.9570797740112996e-05, "loss": 0.1762, "step": 380000 }, { "epoch": 0.03, "learning_rate": 4.9570233898305084e-05, "loss": 0.1759, "step": 380500 }, { "epoch": 0.03, "learning_rate": 4.9569668926553674e-05, "loss": 0.1653, "step": 381000 }, { "epoch": 0.03, "learning_rate": 4.956910395480226e-05, "loss": 0.172, "step": 381500 }, { "epoch": 0.03, "learning_rate": 4.956853898305085e-05, "loss": 0.1662, "step": 382000 }, { "epoch": 0.03, "learning_rate": 4.956797401129944e-05, "loss": 0.1815, "step": 382500 }, { "epoch": 0.03, "learning_rate": 4.956740903954802e-05, "loss": 0.1768, "step": 383000 }, { "epoch": 0.03, "learning_rate": 4.956684519774012e-05, "loss": 0.1759, "step": 383500 }, { "epoch": 0.03, "learning_rate": 4.95662802259887e-05, "loss": 0.1712, "step": 384000 }, { "epoch": 0.03, "learning_rate": 4.956571525423729e-05, "loss": 0.1769, "step": 384500 }, { "epoch": 0.03, "learning_rate": 4.9565150282485875e-05, "loss": 0.1754, "step": 385000 }, { "epoch": 0.03, "learning_rate": 4.9564585310734466e-05, "loss": 0.1699, "step": 385500 }, { "epoch": 0.03, "learning_rate": 4.956402146892656e-05, "loss": 0.1597, "step": 386000 }, { "epoch": 0.03, "learning_rate": 4.9563456497175143e-05, "loss": 0.1779, "step": 386500 }, { "epoch": 0.03, "learning_rate": 4.9562891525423734e-05, "loss": 0.1696, "step": 387000 }, { "epoch": 0.03, "learning_rate": 4.956232655367232e-05, "loss": 0.1729, "step": 387500 }, { "epoch": 0.03, "learning_rate": 4.956176158192091e-05, "loss": 0.1629, "step": 388000 }, { "epoch": 0.03, "learning_rate": 4.956119661016949e-05, "loss": 0.1704, "step": 388500 }, { "epoch": 0.03, "learning_rate": 4.956063163841808e-05, "loss": 0.1795, "step": 389000 }, { "epoch": 0.03, "learning_rate": 4.956006779661017e-05, "loss": 0.1746, "step": 389500 }, { "epoch": 0.03, "learning_rate": 4.955950282485876e-05, "loss": 0.1706, "step": 390000 }, { "epoch": 0.03, "learning_rate": 4.9558937853107344e-05, "loss": 0.1735, "step": 390500 }, { "epoch": 0.03, "learning_rate": 4.9558372881355935e-05, "loss": 0.1687, "step": 391000 }, { "epoch": 0.03, "learning_rate": 4.955780903954802e-05, "loss": 0.1796, "step": 391500 }, { "epoch": 0.03, "learning_rate": 4.955724406779661e-05, "loss": 0.1778, "step": 392000 }, { "epoch": 0.03, "learning_rate": 4.9556679096045197e-05, "loss": 0.1648, "step": 392500 }, { "epoch": 0.03, "learning_rate": 4.955611412429379e-05, "loss": 0.1708, "step": 393000 }, { "epoch": 0.03, "learning_rate": 4.955554915254237e-05, "loss": 0.1715, "step": 393500 }, { "epoch": 0.03, "learning_rate": 4.9554985310734465e-05, "loss": 0.1677, "step": 394000 }, { "epoch": 0.03, "learning_rate": 4.9554420338983056e-05, "loss": 0.1765, "step": 394500 }, { "epoch": 0.03, "learning_rate": 4.955385536723164e-05, "loss": 0.1785, "step": 395000 }, { "epoch": 0.03, "learning_rate": 4.955329039548023e-05, "loss": 0.1723, "step": 395500 }, { "epoch": 0.03, "learning_rate": 4.955272542372882e-05, "loss": 0.1656, "step": 396000 }, { "epoch": 0.03, "learning_rate": 4.955216271186441e-05, "loss": 0.1691, "step": 396500 }, { "epoch": 0.03, "learning_rate": 4.9551597740112995e-05, "loss": 0.1799, "step": 397000 }, { "epoch": 0.03, "learning_rate": 4.9551032768361586e-05, "loss": 0.173, "step": 397500 }, { "epoch": 0.03, "learning_rate": 4.955046779661017e-05, "loss": 0.1717, "step": 398000 }, { "epoch": 0.03, "learning_rate": 4.954990282485876e-05, "loss": 0.1696, "step": 398500 }, { "epoch": 0.03, "learning_rate": 4.954933785310735e-05, "loss": 0.169, "step": 399000 }, { "epoch": 0.03, "learning_rate": 4.9548772881355934e-05, "loss": 0.1679, "step": 399500 }, { "epoch": 0.03, "learning_rate": 4.9548207909604525e-05, "loss": 0.1721, "step": 400000 }, { "epoch": 0.03, "learning_rate": 4.954764293785311e-05, "loss": 0.1592, "step": 400500 }, { "epoch": 0.03, "learning_rate": 4.95470779661017e-05, "loss": 0.1607, "step": 401000 }, { "epoch": 0.03, "learning_rate": 4.9546514124293786e-05, "loss": 0.1806, "step": 401500 }, { "epoch": 0.03, "learning_rate": 4.954594915254238e-05, "loss": 0.1692, "step": 402000 }, { "epoch": 0.03, "learning_rate": 4.954538418079096e-05, "loss": 0.1788, "step": 402500 }, { "epoch": 0.03, "learning_rate": 4.954481920903955e-05, "loss": 0.1739, "step": 403000 }, { "epoch": 0.03, "learning_rate": 4.954425423728814e-05, "loss": 0.1714, "step": 403500 }, { "epoch": 0.03, "learning_rate": 4.954369039548023e-05, "loss": 0.1613, "step": 404000 }, { "epoch": 0.03, "learning_rate": 4.954312542372881e-05, "loss": 0.1848, "step": 404500 }, { "epoch": 0.03, "learning_rate": 4.9542560451977404e-05, "loss": 0.1716, "step": 405000 }, { "epoch": 0.03, "learning_rate": 4.954199548022599e-05, "loss": 0.1618, "step": 405500 }, { "epoch": 0.03, "learning_rate": 4.954143050847458e-05, "loss": 0.1799, "step": 406000 }, { "epoch": 0.03, "learning_rate": 4.954086553672317e-05, "loss": 0.1802, "step": 406500 }, { "epoch": 0.03, "learning_rate": 4.9540301694915256e-05, "loss": 0.1717, "step": 407000 }, { "epoch": 0.03, "learning_rate": 4.9539736723163846e-05, "loss": 0.175, "step": 407500 }, { "epoch": 0.03, "learning_rate": 4.953917175141243e-05, "loss": 0.1715, "step": 408000 }, { "epoch": 0.03, "learning_rate": 4.953860677966102e-05, "loss": 0.1702, "step": 408500 }, { "epoch": 0.03, "learning_rate": 4.9538041807909605e-05, "loss": 0.1715, "step": 409000 }, { "epoch": 0.03, "learning_rate": 4.95374779661017e-05, "loss": 0.1694, "step": 409500 }, { "epoch": 0.03, "learning_rate": 4.953691299435029e-05, "loss": 0.1682, "step": 410000 }, { "epoch": 0.03, "learning_rate": 4.953634802259887e-05, "loss": 0.1653, "step": 410500 }, { "epoch": 0.03, "learning_rate": 4.9535783050847464e-05, "loss": 0.1702, "step": 411000 }, { "epoch": 0.03, "learning_rate": 4.953521920903955e-05, "loss": 0.1617, "step": 411500 }, { "epoch": 0.03, "learning_rate": 4.9534654237288135e-05, "loss": 0.1677, "step": 412000 }, { "epoch": 0.03, "learning_rate": 4.9534089265536725e-05, "loss": 0.1729, "step": 412500 }, { "epoch": 0.03, "learning_rate": 4.953352542372882e-05, "loss": 0.177, "step": 413000 }, { "epoch": 0.03, "learning_rate": 4.95329604519774e-05, "loss": 0.1721, "step": 413500 }, { "epoch": 0.03, "learning_rate": 4.9532395480225993e-05, "loss": 0.1692, "step": 414000 }, { "epoch": 0.03, "learning_rate": 4.953183050847458e-05, "loss": 0.1643, "step": 414500 }, { "epoch": 0.03, "learning_rate": 4.953126553672317e-05, "loss": 0.1751, "step": 415000 }, { "epoch": 0.03, "learning_rate": 4.953070056497175e-05, "loss": 0.1722, "step": 415500 }, { "epoch": 0.03, "learning_rate": 4.953013559322034e-05, "loss": 0.1631, "step": 416000 }, { "epoch": 0.03, "learning_rate": 4.9529570621468926e-05, "loss": 0.1704, "step": 416500 }, { "epoch": 0.03, "learning_rate": 4.952900564971752e-05, "loss": 0.166, "step": 417000 }, { "epoch": 0.03, "learning_rate": 4.952844180790961e-05, "loss": 0.1673, "step": 417500 }, { "epoch": 0.03, "learning_rate": 4.9527876836158194e-05, "loss": 0.1726, "step": 418000 }, { "epoch": 0.03, "learning_rate": 4.9527311864406785e-05, "loss": 0.1604, "step": 418500 }, { "epoch": 0.03, "learning_rate": 4.952674689265537e-05, "loss": 0.1693, "step": 419000 }, { "epoch": 0.03, "learning_rate": 4.952618192090396e-05, "loss": 0.1667, "step": 419500 }, { "epoch": 0.03, "learning_rate": 4.9525618079096047e-05, "loss": 0.163, "step": 420000 }, { "epoch": 0.03, "learning_rate": 4.952505310734464e-05, "loss": 0.1719, "step": 420500 }, { "epoch": 0.03, "learning_rate": 4.952448813559322e-05, "loss": 0.159, "step": 421000 }, { "epoch": 0.03, "learning_rate": 4.952392316384181e-05, "loss": 0.1639, "step": 421500 }, { "epoch": 0.03, "learning_rate": 4.9523358192090395e-05, "loss": 0.1639, "step": 422000 }, { "epoch": 0.03, "learning_rate": 4.952279435028249e-05, "loss": 0.1702, "step": 422500 }, { "epoch": 0.03, "learning_rate": 4.952222937853107e-05, "loss": 0.1708, "step": 423000 }, { "epoch": 0.03, "learning_rate": 4.9521664406779664e-05, "loss": 0.1827, "step": 423500 }, { "epoch": 0.03, "learning_rate": 4.952109943502825e-05, "loss": 0.1801, "step": 424000 }, { "epoch": 0.03, "learning_rate": 4.952053446327684e-05, "loss": 0.1659, "step": 424500 }, { "epoch": 0.03, "learning_rate": 4.951997062146893e-05, "loss": 0.1677, "step": 425000 }, { "epoch": 0.03, "learning_rate": 4.9519405649717516e-05, "loss": 0.1669, "step": 425500 }, { "epoch": 0.03, "learning_rate": 4.9518840677966106e-05, "loss": 0.1745, "step": 426000 }, { "epoch": 0.03, "learning_rate": 4.951827570621469e-05, "loss": 0.1786, "step": 426500 }, { "epoch": 0.03, "learning_rate": 4.951771073446328e-05, "loss": 0.1691, "step": 427000 }, { "epoch": 0.03, "learning_rate": 4.9517145762711865e-05, "loss": 0.1605, "step": 427500 }, { "epoch": 0.03, "learning_rate": 4.951658192090396e-05, "loss": 0.172, "step": 428000 }, { "epoch": 0.03, "learning_rate": 4.951601694915254e-05, "loss": 0.1678, "step": 428500 }, { "epoch": 0.03, "learning_rate": 4.951545197740113e-05, "loss": 0.1711, "step": 429000 }, { "epoch": 0.03, "learning_rate": 4.951488700564972e-05, "loss": 0.1692, "step": 429500 }, { "epoch": 0.03, "learning_rate": 4.951432203389831e-05, "loss": 0.1617, "step": 430000 }, { "epoch": 0.03, "learning_rate": 4.95137581920904e-05, "loss": 0.1692, "step": 430500 }, { "epoch": 0.03, "learning_rate": 4.9513193220338985e-05, "loss": 0.166, "step": 431000 }, { "epoch": 0.03, "learning_rate": 4.9512628248587576e-05, "loss": 0.1687, "step": 431500 }, { "epoch": 0.03, "learning_rate": 4.951206327683616e-05, "loss": 0.1655, "step": 432000 }, { "epoch": 0.03, "learning_rate": 4.951149830508475e-05, "loss": 0.1653, "step": 432500 }, { "epoch": 0.03, "learning_rate": 4.951093446327684e-05, "loss": 0.1754, "step": 433000 }, { "epoch": 0.03, "learning_rate": 4.951036949152543e-05, "loss": 0.1731, "step": 433500 }, { "epoch": 0.03, "learning_rate": 4.950980451977401e-05, "loss": 0.1664, "step": 434000 }, { "epoch": 0.03, "learning_rate": 4.95092395480226e-05, "loss": 0.1665, "step": 434500 }, { "epoch": 0.03, "learning_rate": 4.950867570621469e-05, "loss": 0.171, "step": 435000 }, { "epoch": 0.03, "learning_rate": 4.950811073446328e-05, "loss": 0.1658, "step": 435500 }, { "epoch": 0.03, "learning_rate": 4.9507545762711864e-05, "loss": 0.1634, "step": 436000 }, { "epoch": 0.03, "learning_rate": 4.9506980790960455e-05, "loss": 0.1579, "step": 436500 }, { "epoch": 0.03, "learning_rate": 4.950641581920904e-05, "loss": 0.1661, "step": 437000 }, { "epoch": 0.03, "learning_rate": 4.950585084745763e-05, "loss": 0.1605, "step": 437500 }, { "epoch": 0.03, "learning_rate": 4.950528587570621e-05, "loss": 0.1647, "step": 438000 }, { "epoch": 0.03, "learning_rate": 4.95047209039548e-05, "loss": 0.1699, "step": 438500 }, { "epoch": 0.03, "learning_rate": 4.9504155932203394e-05, "loss": 0.1626, "step": 439000 }, { "epoch": 0.03, "learning_rate": 4.950359096045198e-05, "loss": 0.1703, "step": 439500 }, { "epoch": 0.03, "learning_rate": 4.950302711864407e-05, "loss": 0.1688, "step": 440000 }, { "epoch": 0.03, "learning_rate": 4.9502462146892655e-05, "loss": 0.1621, "step": 440500 }, { "epoch": 0.03, "learning_rate": 4.9501897175141246e-05, "loss": 0.1649, "step": 441000 }, { "epoch": 0.03, "learning_rate": 4.950133220338983e-05, "loss": 0.1657, "step": 441500 }, { "epoch": 0.03, "learning_rate": 4.950076723163842e-05, "loss": 0.1642, "step": 442000 }, { "epoch": 0.03, "learning_rate": 4.9500203389830514e-05, "loss": 0.1698, "step": 442500 }, { "epoch": 0.03, "learning_rate": 4.94996384180791e-05, "loss": 0.1686, "step": 443000 }, { "epoch": 0.03, "learning_rate": 4.949907344632769e-05, "loss": 0.1714, "step": 443500 }, { "epoch": 0.03, "learning_rate": 4.949850847457627e-05, "loss": 0.166, "step": 444000 }, { "epoch": 0.03, "learning_rate": 4.949794350282486e-05, "loss": 0.1661, "step": 444500 }, { "epoch": 0.03, "learning_rate": 4.949737966101695e-05, "loss": 0.1619, "step": 445000 }, { "epoch": 0.03, "learning_rate": 4.949681468926554e-05, "loss": 0.1682, "step": 445500 }, { "epoch": 0.03, "learning_rate": 4.9496249717514125e-05, "loss": 0.1612, "step": 446000 }, { "epoch": 0.03, "learning_rate": 4.9495684745762715e-05, "loss": 0.1682, "step": 446500 }, { "epoch": 0.03, "learning_rate": 4.94951209039548e-05, "loss": 0.1706, "step": 447000 }, { "epoch": 0.03, "learning_rate": 4.949455593220339e-05, "loss": 0.1655, "step": 447500 }, { "epoch": 0.03, "learning_rate": 4.949399096045198e-05, "loss": 0.1705, "step": 448000 }, { "epoch": 0.03, "learning_rate": 4.949342598870057e-05, "loss": 0.1648, "step": 448500 }, { "epoch": 0.03, "learning_rate": 4.949286214689266e-05, "loss": 0.1621, "step": 449000 }, { "epoch": 0.03, "learning_rate": 4.9492297175141245e-05, "loss": 0.1601, "step": 449500 }, { "epoch": 0.03, "learning_rate": 4.9491732203389836e-05, "loss": 0.1632, "step": 450000 }, { "epoch": 0.03, "learning_rate": 4.949116723163842e-05, "loss": 0.1677, "step": 450500 }, { "epoch": 0.03, "learning_rate": 4.949060225988701e-05, "loss": 0.1691, "step": 451000 }, { "epoch": 0.03, "learning_rate": 4.94900384180791e-05, "loss": 0.1662, "step": 451500 }, { "epoch": 0.03, "learning_rate": 4.948947344632768e-05, "loss": 0.1736, "step": 452000 }, { "epoch": 0.03, "learning_rate": 4.948890847457627e-05, "loss": 0.1676, "step": 452500 }, { "epoch": 0.03, "learning_rate": 4.948834350282486e-05, "loss": 0.1662, "step": 453000 }, { "epoch": 0.03, "learning_rate": 4.948777966101695e-05, "loss": 0.1644, "step": 453500 }, { "epoch": 0.03, "learning_rate": 4.948721468926554e-05, "loss": 0.1595, "step": 454000 }, { "epoch": 0.03, "learning_rate": 4.948664971751413e-05, "loss": 0.1632, "step": 454500 }, { "epoch": 0.03, "learning_rate": 4.9486084745762715e-05, "loss": 0.1674, "step": 455000 }, { "epoch": 0.03, "learning_rate": 4.948552090395481e-05, "loss": 0.1615, "step": 455500 }, { "epoch": 0.03, "learning_rate": 4.948495593220339e-05, "loss": 0.1728, "step": 456000 }, { "epoch": 0.03, "learning_rate": 4.948439096045198e-05, "loss": 0.1626, "step": 456500 }, { "epoch": 0.03, "learning_rate": 4.948382598870057e-05, "loss": 0.1677, "step": 457000 }, { "epoch": 0.03, "learning_rate": 4.948326101694916e-05, "loss": 0.1563, "step": 457500 }, { "epoch": 0.03, "learning_rate": 4.9482697175141245e-05, "loss": 0.1708, "step": 458000 }, { "epoch": 0.03, "learning_rate": 4.948213220338983e-05, "loss": 0.1671, "step": 458500 }, { "epoch": 0.03, "learning_rate": 4.948156723163842e-05, "loss": 0.1583, "step": 459000 }, { "epoch": 0.03, "learning_rate": 4.948100225988701e-05, "loss": 0.164, "step": 459500 }, { "epoch": 0.03, "learning_rate": 4.9480437288135593e-05, "loss": 0.164, "step": 460000 }, { "epoch": 0.03, "learning_rate": 4.947987344632769e-05, "loss": 0.167, "step": 460500 }, { "epoch": 0.03, "learning_rate": 4.947930847457628e-05, "loss": 0.1654, "step": 461000 }, { "epoch": 0.03, "learning_rate": 4.947874350282486e-05, "loss": 0.1601, "step": 461500 }, { "epoch": 0.03, "learning_rate": 4.947817853107345e-05, "loss": 0.1681, "step": 462000 }, { "epoch": 0.03, "learning_rate": 4.947761468926554e-05, "loss": 0.1662, "step": 462500 }, { "epoch": 0.03, "learning_rate": 4.947704971751413e-05, "loss": 0.1632, "step": 463000 }, { "epoch": 0.03, "learning_rate": 4.9476484745762714e-05, "loss": 0.1603, "step": 463500 }, { "epoch": 0.03, "learning_rate": 4.9475919774011305e-05, "loss": 0.162, "step": 464000 }, { "epoch": 0.03, "learning_rate": 4.947535593220339e-05, "loss": 0.1673, "step": 464500 }, { "epoch": 0.03, "learning_rate": 4.9474790960451976e-05, "loss": 0.1698, "step": 465000 }, { "epoch": 0.03, "learning_rate": 4.9474225988700566e-05, "loss": 0.1657, "step": 465500 }, { "epoch": 0.03, "learning_rate": 4.947366101694916e-05, "loss": 0.1569, "step": 466000 }, { "epoch": 0.03, "learning_rate": 4.947309604519774e-05, "loss": 0.1671, "step": 466500 }, { "epoch": 0.03, "learning_rate": 4.9472532203389834e-05, "loss": 0.1626, "step": 467000 }, { "epoch": 0.03, "learning_rate": 4.9471967231638425e-05, "loss": 0.1531, "step": 467500 }, { "epoch": 0.03, "learning_rate": 4.947140225988701e-05, "loss": 0.1668, "step": 468000 }, { "epoch": 0.03, "learning_rate": 4.94708372881356e-05, "loss": 0.1635, "step": 468500 }, { "epoch": 0.03, "learning_rate": 4.947027231638418e-05, "loss": 0.1773, "step": 469000 }, { "epoch": 0.03, "learning_rate": 4.946970847457628e-05, "loss": 0.1604, "step": 469500 }, { "epoch": 0.03, "learning_rate": 4.946914350282486e-05, "loss": 0.1688, "step": 470000 }, { "epoch": 0.03, "learning_rate": 4.946857853107345e-05, "loss": 0.1617, "step": 470500 }, { "epoch": 0.03, "learning_rate": 4.9468013559322035e-05, "loss": 0.1655, "step": 471000 }, { "epoch": 0.03, "learning_rate": 4.9467448587570626e-05, "loss": 0.1646, "step": 471500 }, { "epoch": 0.03, "learning_rate": 4.946688474576271e-05, "loss": 0.1627, "step": 472000 }, { "epoch": 0.03, "learning_rate": 4.94663197740113e-05, "loss": 0.1693, "step": 472500 }, { "epoch": 0.03, "learning_rate": 4.946575480225989e-05, "loss": 0.1541, "step": 473000 }, { "epoch": 0.03, "learning_rate": 4.946518983050848e-05, "loss": 0.167, "step": 473500 }, { "epoch": 0.03, "learning_rate": 4.946462485875706e-05, "loss": 0.171, "step": 474000 }, { "epoch": 0.03, "learning_rate": 4.946405988700565e-05, "loss": 0.1643, "step": 474500 }, { "epoch": 0.03, "learning_rate": 4.9463496045197747e-05, "loss": 0.1659, "step": 475000 }, { "epoch": 0.03, "learning_rate": 4.946293107344633e-05, "loss": 0.1646, "step": 475500 }, { "epoch": 0.03, "learning_rate": 4.946236610169492e-05, "loss": 0.1541, "step": 476000 }, { "epoch": 0.03, "learning_rate": 4.9461801129943505e-05, "loss": 0.1633, "step": 476500 }, { "epoch": 0.03, "learning_rate": 4.94612372881356e-05, "loss": 0.1669, "step": 477000 }, { "epoch": 0.03, "learning_rate": 4.946067231638418e-05, "loss": 0.1649, "step": 477500 }, { "epoch": 0.03, "learning_rate": 4.946010734463277e-05, "loss": 0.1617, "step": 478000 }, { "epoch": 0.03, "learning_rate": 4.945954237288136e-05, "loss": 0.1615, "step": 478500 }, { "epoch": 0.03, "learning_rate": 4.945897740112995e-05, "loss": 0.1594, "step": 479000 }, { "epoch": 0.03, "learning_rate": 4.945841242937853e-05, "loss": 0.1648, "step": 479500 }, { "epoch": 0.03, "learning_rate": 4.945784745762712e-05, "loss": 0.1653, "step": 480000 }, { "epoch": 0.03, "learning_rate": 4.945728248587571e-05, "loss": 0.1612, "step": 480500 }, { "epoch": 0.03, "learning_rate": 4.94567186440678e-05, "loss": 0.1495, "step": 481000 }, { "epoch": 0.03, "learning_rate": 4.9456153672316383e-05, "loss": 0.1623, "step": 481500 }, { "epoch": 0.03, "learning_rate": 4.9455588700564974e-05, "loss": 0.1642, "step": 482000 }, { "epoch": 0.03, "learning_rate": 4.945502372881356e-05, "loss": 0.1597, "step": 482500 }, { "epoch": 0.03, "learning_rate": 4.945445875706215e-05, "loss": 0.1617, "step": 483000 }, { "epoch": 0.03, "learning_rate": 4.945389491525424e-05, "loss": 0.1613, "step": 483500 }, { "epoch": 0.03, "learning_rate": 4.945332994350283e-05, "loss": 0.1624, "step": 484000 }, { "epoch": 0.03, "learning_rate": 4.945276497175142e-05, "loss": 0.1713, "step": 484500 }, { "epoch": 0.03, "learning_rate": 4.945220000000001e-05, "loss": 0.1572, "step": 485000 }, { "epoch": 0.03, "learning_rate": 4.945163502824859e-05, "loss": 0.1505, "step": 485500 }, { "epoch": 0.03, "learning_rate": 4.945107005649718e-05, "loss": 0.1636, "step": 486000 }, { "epoch": 0.03, "learning_rate": 4.9450505084745766e-05, "loss": 0.1633, "step": 486500 }, { "epoch": 0.03, "learning_rate": 4.944994124293785e-05, "loss": 0.174, "step": 487000 }, { "epoch": 0.03, "learning_rate": 4.944937740112995e-05, "loss": 0.1682, "step": 487500 }, { "epoch": 0.03, "learning_rate": 4.944881242937853e-05, "loss": 0.1672, "step": 488000 }, { "epoch": 0.03, "learning_rate": 4.944824745762712e-05, "loss": 0.1599, "step": 488500 }, { "epoch": 0.03, "learning_rate": 4.9447682485875705e-05, "loss": 0.1625, "step": 489000 }, { "epoch": 0.03, "learning_rate": 4.9447117514124296e-05, "loss": 0.1698, "step": 489500 }, { "epoch": 0.03, "learning_rate": 4.944655254237288e-05, "loss": 0.1543, "step": 490000 }, { "epoch": 0.03, "learning_rate": 4.944598757062147e-05, "loss": 0.158, "step": 490500 }, { "epoch": 0.03, "learning_rate": 4.944542259887006e-05, "loss": 0.1617, "step": 491000 }, { "epoch": 0.03, "learning_rate": 4.9444857627118644e-05, "loss": 0.1651, "step": 491500 }, { "epoch": 0.03, "learning_rate": 4.9444292655367235e-05, "loss": 0.1639, "step": 492000 }, { "epoch": 0.03, "learning_rate": 4.944372881355933e-05, "loss": 0.1603, "step": 492500 }, { "epoch": 0.03, "learning_rate": 4.944316384180791e-05, "loss": 0.1585, "step": 493000 }, { "epoch": 0.03, "learning_rate": 4.94425988700565e-05, "loss": 0.1567, "step": 493500 }, { "epoch": 0.03, "learning_rate": 4.944203389830509e-05, "loss": 0.1616, "step": 494000 }, { "epoch": 0.03, "learning_rate": 4.944146892655368e-05, "loss": 0.1585, "step": 494500 }, { "epoch": 0.03, "learning_rate": 4.9440905084745765e-05, "loss": 0.1613, "step": 495000 }, { "epoch": 0.03, "learning_rate": 4.9440340112994355e-05, "loss": 0.1557, "step": 495500 }, { "epoch": 0.03, "learning_rate": 4.943977514124294e-05, "loss": 0.1672, "step": 496000 }, { "epoch": 0.03, "learning_rate": 4.943921016949153e-05, "loss": 0.1574, "step": 496500 }, { "epoch": 0.03, "learning_rate": 4.943864632768362e-05, "loss": 0.1702, "step": 497000 }, { "epoch": 0.03, "learning_rate": 4.94380813559322e-05, "loss": 0.1547, "step": 497500 }, { "epoch": 0.03, "learning_rate": 4.943751638418079e-05, "loss": 0.1611, "step": 498000 }, { "epoch": 0.03, "learning_rate": 4.943695141242938e-05, "loss": 0.1581, "step": 498500 }, { "epoch": 0.03, "learning_rate": 4.9436387570621476e-05, "loss": 0.1739, "step": 499000 }, { "epoch": 0.03, "learning_rate": 4.943582372881356e-05, "loss": 0.1574, "step": 499500 }, { "epoch": 0.03, "learning_rate": 4.943525875706215e-05, "loss": 0.1657, "step": 500000 }, { "epoch": 0.03, "learning_rate": 4.943469378531074e-05, "loss": 0.1745, "step": 500500 }, { "epoch": 0.03, "learning_rate": 4.943412881355932e-05, "loss": 0.1596, "step": 501000 }, { "epoch": 0.03, "learning_rate": 4.943356384180791e-05, "loss": 0.1644, "step": 501500 }, { "epoch": 0.03, "learning_rate": 4.94329988700565e-05, "loss": 0.1593, "step": 502000 }, { "epoch": 0.03, "learning_rate": 4.9432433898305086e-05, "loss": 0.1656, "step": 502500 }, { "epoch": 0.03, "learning_rate": 4.943186892655368e-05, "loss": 0.1553, "step": 503000 }, { "epoch": 0.03, "learning_rate": 4.9431305084745764e-05, "loss": 0.1531, "step": 503500 }, { "epoch": 0.03, "learning_rate": 4.943074011299435e-05, "loss": 0.149, "step": 504000 }, { "epoch": 0.03, "learning_rate": 4.943017514124294e-05, "loss": 0.1622, "step": 504500 }, { "epoch": 0.03, "learning_rate": 4.942961016949153e-05, "loss": 0.1557, "step": 505000 }, { "epoch": 0.03, "learning_rate": 4.942904519774011e-05, "loss": 0.1638, "step": 505500 }, { "epoch": 0.03, "learning_rate": 4.942848135593221e-05, "loss": 0.1568, "step": 506000 }, { "epoch": 0.03, "learning_rate": 4.94279163841808e-05, "loss": 0.1594, "step": 506500 }, { "epoch": 0.03, "learning_rate": 4.942735141242938e-05, "loss": 0.1574, "step": 507000 }, { "epoch": 0.03, "learning_rate": 4.942678644067797e-05, "loss": 0.1674, "step": 507500 }, { "epoch": 0.03, "learning_rate": 4.942622259887006e-05, "loss": 0.1603, "step": 508000 }, { "epoch": 0.03, "learning_rate": 4.942565762711865e-05, "loss": 0.1625, "step": 508500 }, { "epoch": 0.03, "learning_rate": 4.9425092655367233e-05, "loss": 0.1628, "step": 509000 }, { "epoch": 0.03, "learning_rate": 4.9424527683615824e-05, "loss": 0.1571, "step": 509500 }, { "epoch": 0.03, "learning_rate": 4.942396384180791e-05, "loss": 0.153, "step": 510000 }, { "epoch": 0.03, "learning_rate": 4.9423398870056495e-05, "loss": 0.1667, "step": 510500 }, { "epoch": 0.03, "learning_rate": 4.9422833898305086e-05, "loss": 0.1657, "step": 511000 }, { "epoch": 0.03, "learning_rate": 4.942226892655367e-05, "loss": 0.1684, "step": 511500 }, { "epoch": 0.03, "learning_rate": 4.942170395480226e-05, "loss": 0.1611, "step": 512000 }, { "epoch": 0.03, "learning_rate": 4.9421140112994354e-05, "loss": 0.1576, "step": 512500 }, { "epoch": 0.03, "learning_rate": 4.9420575141242945e-05, "loss": 0.1559, "step": 513000 }, { "epoch": 0.03, "learning_rate": 4.942001016949153e-05, "loss": 0.1604, "step": 513500 }, { "epoch": 0.03, "learning_rate": 4.941944519774012e-05, "loss": 0.1548, "step": 514000 }, { "epoch": 0.03, "learning_rate": 4.94188802259887e-05, "loss": 0.1524, "step": 514500 }, { "epoch": 0.03, "learning_rate": 4.941831638418079e-05, "loss": 0.154, "step": 515000 }, { "epoch": 0.03, "learning_rate": 4.941775141242938e-05, "loss": 0.163, "step": 515500 }, { "epoch": 0.03, "learning_rate": 4.941718644067797e-05, "loss": 0.1603, "step": 516000 }, { "epoch": 0.04, "learning_rate": 4.9416621468926555e-05, "loss": 0.1588, "step": 516500 }, { "epoch": 0.04, "learning_rate": 4.941605762711864e-05, "loss": 0.1573, "step": 517000 }, { "epoch": 0.04, "learning_rate": 4.941549265536723e-05, "loss": 0.1558, "step": 517500 }, { "epoch": 0.04, "learning_rate": 4.9414927683615817e-05, "loss": 0.1656, "step": 518000 }, { "epoch": 0.04, "learning_rate": 4.941436271186441e-05, "loss": 0.162, "step": 518500 }, { "epoch": 0.04, "learning_rate": 4.9413797740113e-05, "loss": 0.1683, "step": 519000 }, { "epoch": 0.04, "learning_rate": 4.941323389830509e-05, "loss": 0.1579, "step": 519500 }, { "epoch": 0.04, "learning_rate": 4.9412668926553676e-05, "loss": 0.154, "step": 520000 }, { "epoch": 0.04, "learning_rate": 4.9412103954802266e-05, "loss": 0.1579, "step": 520500 }, { "epoch": 0.04, "learning_rate": 4.941153898305085e-05, "loss": 0.1625, "step": 521000 }, { "epoch": 0.04, "learning_rate": 4.941097514124294e-05, "loss": 0.1664, "step": 521500 }, { "epoch": 0.04, "learning_rate": 4.941041016949153e-05, "loss": 0.1546, "step": 522000 }, { "epoch": 0.04, "learning_rate": 4.940984519774012e-05, "loss": 0.1582, "step": 522500 }, { "epoch": 0.04, "learning_rate": 4.94092802259887e-05, "loss": 0.1656, "step": 523000 }, { "epoch": 0.04, "learning_rate": 4.940871525423729e-05, "loss": 0.1557, "step": 523500 }, { "epoch": 0.04, "learning_rate": 4.940815141242938e-05, "loss": 0.172, "step": 524000 }, { "epoch": 0.04, "learning_rate": 4.9407586440677964e-05, "loss": 0.1508, "step": 524500 }, { "epoch": 0.04, "learning_rate": 4.9407021468926554e-05, "loss": 0.1622, "step": 525000 }, { "epoch": 0.04, "learning_rate": 4.940645649717514e-05, "loss": 0.1594, "step": 525500 }, { "epoch": 0.04, "learning_rate": 4.940589265536724e-05, "loss": 0.164, "step": 526000 }, { "epoch": 0.04, "learning_rate": 4.940532768361582e-05, "loss": 0.1537, "step": 526500 }, { "epoch": 0.04, "learning_rate": 4.940476271186441e-05, "loss": 0.158, "step": 527000 }, { "epoch": 0.04, "learning_rate": 4.9404197740113e-05, "loss": 0.1566, "step": 527500 }, { "epoch": 0.04, "learning_rate": 4.940363276836159e-05, "loss": 0.1589, "step": 528000 }, { "epoch": 0.04, "learning_rate": 4.940306779661017e-05, "loss": 0.1625, "step": 528500 }, { "epoch": 0.04, "learning_rate": 4.940250282485876e-05, "loss": 0.1584, "step": 529000 }, { "epoch": 0.04, "learning_rate": 4.940193898305085e-05, "loss": 0.1623, "step": 529500 }, { "epoch": 0.04, "learning_rate": 4.940137401129944e-05, "loss": 0.162, "step": 530000 }, { "epoch": 0.04, "learning_rate": 4.9400809039548024e-05, "loss": 0.1579, "step": 530500 }, { "epoch": 0.04, "learning_rate": 4.9400244067796614e-05, "loss": 0.1541, "step": 531000 }, { "epoch": 0.04, "learning_rate": 4.93996790960452e-05, "loss": 0.1597, "step": 531500 }, { "epoch": 0.04, "learning_rate": 4.9399115254237285e-05, "loss": 0.1554, "step": 532000 }, { "epoch": 0.04, "learning_rate": 4.9398550282485876e-05, "loss": 0.1578, "step": 532500 }, { "epoch": 0.04, "learning_rate": 4.9397985310734466e-05, "loss": 0.1592, "step": 533000 }, { "epoch": 0.04, "learning_rate": 4.939742033898305e-05, "loss": 0.1592, "step": 533500 }, { "epoch": 0.04, "learning_rate": 4.939685536723164e-05, "loss": 0.1575, "step": 534000 }, { "epoch": 0.04, "learning_rate": 4.9396291525423735e-05, "loss": 0.16, "step": 534500 }, { "epoch": 0.04, "learning_rate": 4.939572655367232e-05, "loss": 0.1597, "step": 535000 }, { "epoch": 0.04, "learning_rate": 4.939516158192091e-05, "loss": 0.1547, "step": 535500 }, { "epoch": 0.04, "learning_rate": 4.939459661016949e-05, "loss": 0.1555, "step": 536000 }, { "epoch": 0.04, "learning_rate": 4.9394031638418083e-05, "loss": 0.1617, "step": 536500 }, { "epoch": 0.04, "learning_rate": 4.9393466666666674e-05, "loss": 0.1616, "step": 537000 }, { "epoch": 0.04, "learning_rate": 4.939290282485876e-05, "loss": 0.1647, "step": 537500 }, { "epoch": 0.04, "learning_rate": 4.9392337853107345e-05, "loss": 0.1619, "step": 538000 }, { "epoch": 0.04, "learning_rate": 4.9391772881355936e-05, "loss": 0.1575, "step": 538500 }, { "epoch": 0.04, "learning_rate": 4.939120790960452e-05, "loss": 0.1471, "step": 539000 }, { "epoch": 0.04, "learning_rate": 4.939064406779661e-05, "loss": 0.1576, "step": 539500 }, { "epoch": 0.04, "learning_rate": 4.93900790960452e-05, "loss": 0.1543, "step": 540000 }, { "epoch": 0.04, "learning_rate": 4.938951412429379e-05, "loss": 0.1531, "step": 540500 }, { "epoch": 0.04, "learning_rate": 4.938894915254237e-05, "loss": 0.1607, "step": 541000 }, { "epoch": 0.04, "learning_rate": 4.938838418079096e-05, "loss": 0.1549, "step": 541500 }, { "epoch": 0.04, "learning_rate": 4.9387820338983056e-05, "loss": 0.1555, "step": 542000 }, { "epoch": 0.04, "learning_rate": 4.938725536723164e-05, "loss": 0.162, "step": 542500 }, { "epoch": 0.04, "learning_rate": 4.938669039548023e-05, "loss": 0.1614, "step": 543000 }, { "epoch": 0.04, "learning_rate": 4.938612542372882e-05, "loss": 0.1529, "step": 543500 }, { "epoch": 0.04, "learning_rate": 4.9385560451977405e-05, "loss": 0.1526, "step": 544000 }, { "epoch": 0.04, "learning_rate": 4.938499661016949e-05, "loss": 0.1539, "step": 544500 }, { "epoch": 0.04, "learning_rate": 4.938443163841808e-05, "loss": 0.1586, "step": 545000 }, { "epoch": 0.04, "learning_rate": 4.9383866666666667e-05, "loss": 0.1558, "step": 545500 }, { "epoch": 0.04, "learning_rate": 4.938330169491526e-05, "loss": 0.1611, "step": 546000 }, { "epoch": 0.04, "learning_rate": 4.9382737853107344e-05, "loss": 0.1486, "step": 546500 }, { "epoch": 0.04, "learning_rate": 4.9382172881355935e-05, "loss": 0.158, "step": 547000 }, { "epoch": 0.04, "learning_rate": 4.938160790960452e-05, "loss": 0.1645, "step": 547500 }, { "epoch": 0.04, "learning_rate": 4.938104293785311e-05, "loss": 0.1645, "step": 548000 }, { "epoch": 0.04, "learning_rate": 4.938047796610169e-05, "loss": 0.1463, "step": 548500 }, { "epoch": 0.04, "learning_rate": 4.937991412429379e-05, "loss": 0.161, "step": 549000 }, { "epoch": 0.04, "learning_rate": 4.937934915254238e-05, "loss": 0.161, "step": 549500 }, { "epoch": 0.04, "learning_rate": 4.937878418079096e-05, "loss": 0.1587, "step": 550000 }, { "epoch": 0.04, "learning_rate": 4.937821920903955e-05, "loss": 0.1542, "step": 550500 }, { "epoch": 0.04, "learning_rate": 4.937765536723164e-05, "loss": 0.1619, "step": 551000 }, { "epoch": 0.04, "learning_rate": 4.937709039548023e-05, "loss": 0.167, "step": 551500 }, { "epoch": 0.04, "learning_rate": 4.9376525423728814e-05, "loss": 0.1523, "step": 552000 }, { "epoch": 0.04, "learning_rate": 4.9375960451977404e-05, "loss": 0.1516, "step": 552500 }, { "epoch": 0.04, "learning_rate": 4.937539548022599e-05, "loss": 0.1528, "step": 553000 }, { "epoch": 0.04, "learning_rate": 4.937483050847458e-05, "loss": 0.164, "step": 553500 }, { "epoch": 0.04, "learning_rate": 4.937426553672317e-05, "loss": 0.1514, "step": 554000 }, { "epoch": 0.04, "learning_rate": 4.937370056497175e-05, "loss": 0.1621, "step": 554500 }, { "epoch": 0.04, "learning_rate": 4.937313672316384e-05, "loss": 0.1608, "step": 555000 }, { "epoch": 0.04, "learning_rate": 4.937257175141243e-05, "loss": 0.1558, "step": 555500 }, { "epoch": 0.04, "learning_rate": 4.9372006779661015e-05, "loss": 0.1616, "step": 556000 }, { "epoch": 0.04, "learning_rate": 4.9371441807909605e-05, "loss": 0.1566, "step": 556500 }, { "epoch": 0.04, "learning_rate": 4.93708779661017e-05, "loss": 0.1644, "step": 557000 }, { "epoch": 0.04, "learning_rate": 4.937031299435029e-05, "loss": 0.1527, "step": 557500 }, { "epoch": 0.04, "learning_rate": 4.9369748022598874e-05, "loss": 0.1585, "step": 558000 }, { "epoch": 0.04, "learning_rate": 4.9369183050847464e-05, "loss": 0.1563, "step": 558500 }, { "epoch": 0.04, "learning_rate": 4.936861807909605e-05, "loss": 0.1479, "step": 559000 }, { "epoch": 0.04, "learning_rate": 4.936805310734464e-05, "loss": 0.1617, "step": 559500 }, { "epoch": 0.04, "learning_rate": 4.9367489265536726e-05, "loss": 0.1505, "step": 560000 }, { "epoch": 0.04, "learning_rate": 4.936692429378531e-05, "loss": 0.1515, "step": 560500 }, { "epoch": 0.04, "learning_rate": 4.93663593220339e-05, "loss": 0.1536, "step": 561000 }, { "epoch": 0.04, "learning_rate": 4.936579435028249e-05, "loss": 0.157, "step": 561500 }, { "epoch": 0.04, "learning_rate": 4.936523050847458e-05, "loss": 0.1602, "step": 562000 }, { "epoch": 0.04, "learning_rate": 4.936466553672316e-05, "loss": 0.1618, "step": 562500 }, { "epoch": 0.04, "learning_rate": 4.936410056497175e-05, "loss": 0.155, "step": 563000 }, { "epoch": 0.04, "learning_rate": 4.9363535593220336e-05, "loss": 0.157, "step": 563500 }, { "epoch": 0.04, "learning_rate": 4.936297062146893e-05, "loss": 0.1538, "step": 564000 }, { "epoch": 0.04, "learning_rate": 4.936240677966102e-05, "loss": 0.1547, "step": 564500 }, { "epoch": 0.04, "learning_rate": 4.936184180790961e-05, "loss": 0.1539, "step": 565000 }, { "epoch": 0.04, "learning_rate": 4.9361276836158195e-05, "loss": 0.1602, "step": 565500 }, { "epoch": 0.04, "learning_rate": 4.9360711864406786e-05, "loss": 0.1546, "step": 566000 }, { "epoch": 0.04, "learning_rate": 4.936014802259887e-05, "loss": 0.1462, "step": 566500 }, { "epoch": 0.04, "learning_rate": 4.935958305084746e-05, "loss": 0.1647, "step": 567000 }, { "epoch": 0.04, "learning_rate": 4.935901807909605e-05, "loss": 0.1535, "step": 567500 }, { "epoch": 0.04, "learning_rate": 4.935845310734464e-05, "loss": 0.1589, "step": 568000 }, { "epoch": 0.04, "learning_rate": 4.9357889265536725e-05, "loss": 0.151, "step": 568500 }, { "epoch": 0.04, "learning_rate": 4.935732429378531e-05, "loss": 0.1495, "step": 569000 }, { "epoch": 0.04, "learning_rate": 4.93567593220339e-05, "loss": 0.161, "step": 569500 }, { "epoch": 0.04, "learning_rate": 4.935619435028248e-05, "loss": 0.1533, "step": 570000 }, { "epoch": 0.04, "learning_rate": 4.9355629378531074e-05, "loss": 0.1528, "step": 570500 }, { "epoch": 0.04, "learning_rate": 4.935506553672317e-05, "loss": 0.1571, "step": 571000 }, { "epoch": 0.04, "learning_rate": 4.935450056497176e-05, "loss": 0.1557, "step": 571500 }, { "epoch": 0.04, "learning_rate": 4.935393559322034e-05, "loss": 0.1474, "step": 572000 }, { "epoch": 0.04, "learning_rate": 4.935337062146893e-05, "loss": 0.16, "step": 572500 }, { "epoch": 0.04, "learning_rate": 4.9352805649717517e-05, "loss": 0.1583, "step": 573000 }, { "epoch": 0.04, "learning_rate": 4.9352241807909604e-05, "loss": 0.1488, "step": 573500 }, { "epoch": 0.04, "learning_rate": 4.9351676836158194e-05, "loss": 0.1503, "step": 574000 }, { "epoch": 0.04, "learning_rate": 4.935111186440678e-05, "loss": 0.1595, "step": 574500 }, { "epoch": 0.04, "learning_rate": 4.935054689265537e-05, "loss": 0.1634, "step": 575000 }, { "epoch": 0.04, "learning_rate": 4.934998192090396e-05, "loss": 0.1552, "step": 575500 }, { "epoch": 0.04, "learning_rate": 4.934941694915254e-05, "loss": 0.1529, "step": 576000 }, { "epoch": 0.04, "learning_rate": 4.9348851977401134e-05, "loss": 0.158, "step": 576500 }, { "epoch": 0.04, "learning_rate": 4.934828700564972e-05, "loss": 0.1481, "step": 577000 }, { "epoch": 0.04, "learning_rate": 4.9347723163841805e-05, "loss": 0.152, "step": 577500 }, { "epoch": 0.04, "learning_rate": 4.9347158192090395e-05, "loss": 0.1527, "step": 578000 }, { "epoch": 0.04, "learning_rate": 4.9346593220338986e-05, "loss": 0.1644, "step": 578500 }, { "epoch": 0.04, "learning_rate": 4.934602824858757e-05, "loss": 0.1587, "step": 579000 }, { "epoch": 0.04, "learning_rate": 4.934546327683616e-05, "loss": 0.1574, "step": 579500 }, { "epoch": 0.04, "learning_rate": 4.9344899435028254e-05, "loss": 0.1557, "step": 580000 }, { "epoch": 0.04, "learning_rate": 4.934433446327684e-05, "loss": 0.1552, "step": 580500 }, { "epoch": 0.04, "learning_rate": 4.9343770621468925e-05, "loss": 0.156, "step": 581000 }, { "epoch": 0.04, "learning_rate": 4.9343205649717516e-05, "loss": 0.1598, "step": 581500 }, { "epoch": 0.04, "learning_rate": 4.9342640677966106e-05, "loss": 0.1552, "step": 582000 }, { "epoch": 0.04, "learning_rate": 4.934207570621469e-05, "loss": 0.1506, "step": 582500 }, { "epoch": 0.04, "learning_rate": 4.934151073446328e-05, "loss": 0.1521, "step": 583000 }, { "epoch": 0.04, "learning_rate": 4.9340945762711865e-05, "loss": 0.1585, "step": 583500 }, { "epoch": 0.04, "learning_rate": 4.9340380790960455e-05, "loss": 0.1547, "step": 584000 }, { "epoch": 0.04, "learning_rate": 4.933981581920904e-05, "loss": 0.1482, "step": 584500 }, { "epoch": 0.04, "learning_rate": 4.933925084745763e-05, "loss": 0.1447, "step": 585000 }, { "epoch": 0.04, "learning_rate": 4.933868700564972e-05, "loss": 0.1521, "step": 585500 }, { "epoch": 0.04, "learning_rate": 4.933812203389831e-05, "loss": 0.1551, "step": 586000 }, { "epoch": 0.04, "learning_rate": 4.933755706214689e-05, "loss": 0.1575, "step": 586500 }, { "epoch": 0.04, "learning_rate": 4.933699209039548e-05, "loss": 0.1597, "step": 587000 }, { "epoch": 0.04, "learning_rate": 4.9336427118644066e-05, "loss": 0.1512, "step": 587500 }, { "epoch": 0.04, "learning_rate": 4.9335862146892656e-05, "loss": 0.1525, "step": 588000 }, { "epoch": 0.04, "learning_rate": 4.933529830508475e-05, "loss": 0.1548, "step": 588500 }, { "epoch": 0.04, "learning_rate": 4.933473333333334e-05, "loss": 0.1558, "step": 589000 }, { "epoch": 0.04, "learning_rate": 4.9334168361581924e-05, "loss": 0.1551, "step": 589500 }, { "epoch": 0.04, "learning_rate": 4.9333603389830515e-05, "loss": 0.1586, "step": 590000 }, { "epoch": 0.04, "learning_rate": 4.93330395480226e-05, "loss": 0.1577, "step": 590500 }, { "epoch": 0.04, "learning_rate": 4.9332474576271186e-05, "loss": 0.154, "step": 591000 }, { "epoch": 0.04, "learning_rate": 4.933190960451978e-05, "loss": 0.1482, "step": 591500 }, { "epoch": 0.04, "learning_rate": 4.933134463276836e-05, "loss": 0.1573, "step": 592000 }, { "epoch": 0.04, "learning_rate": 4.933077966101695e-05, "loss": 0.1517, "step": 592500 }, { "epoch": 0.04, "learning_rate": 4.933021468926554e-05, "loss": 0.1532, "step": 593000 }, { "epoch": 0.04, "learning_rate": 4.9329649717514125e-05, "loss": 0.1504, "step": 593500 }, { "epoch": 0.04, "learning_rate": 4.932908587570621e-05, "loss": 0.1567, "step": 594000 }, { "epoch": 0.04, "learning_rate": 4.93285209039548e-05, "loss": 0.1652, "step": 594500 }, { "epoch": 0.04, "learning_rate": 4.932795593220339e-05, "loss": 0.1467, "step": 595000 }, { "epoch": 0.04, "learning_rate": 4.932739096045198e-05, "loss": 0.1565, "step": 595500 }, { "epoch": 0.04, "learning_rate": 4.932682711864407e-05, "loss": 0.1455, "step": 596000 }, { "epoch": 0.04, "learning_rate": 4.932626214689266e-05, "loss": 0.1591, "step": 596500 }, { "epoch": 0.04, "learning_rate": 4.9325697175141246e-05, "loss": 0.1506, "step": 597000 }, { "epoch": 0.04, "learning_rate": 4.9325132203389837e-05, "loss": 0.1512, "step": 597500 }, { "epoch": 0.04, "learning_rate": 4.9324568361581924e-05, "loss": 0.1514, "step": 598000 }, { "epoch": 0.04, "learning_rate": 4.932400338983051e-05, "loss": 0.1593, "step": 598500 }, { "epoch": 0.04, "learning_rate": 4.93234384180791e-05, "loss": 0.1493, "step": 599000 }, { "epoch": 0.04, "learning_rate": 4.932287344632769e-05, "loss": 0.162, "step": 599500 }, { "epoch": 0.04, "learning_rate": 4.932230847457627e-05, "loss": 0.1485, "step": 600000 }, { "epoch": 0.04, "learning_rate": 4.932174463276836e-05, "loss": 0.1498, "step": 600500 }, { "epoch": 0.04, "learning_rate": 4.932117966101695e-05, "loss": 0.1548, "step": 601000 }, { "epoch": 0.04, "learning_rate": 4.9320614689265534e-05, "loss": 0.1445, "step": 601500 }, { "epoch": 0.04, "learning_rate": 4.9320049717514125e-05, "loss": 0.1486, "step": 602000 }, { "epoch": 0.04, "learning_rate": 4.931948474576271e-05, "loss": 0.1485, "step": 602500 }, { "epoch": 0.04, "learning_rate": 4.931892090395481e-05, "loss": 0.1562, "step": 603000 }, { "epoch": 0.04, "learning_rate": 4.931835593220339e-05, "loss": 0.1577, "step": 603500 }, { "epoch": 0.04, "learning_rate": 4.9317790960451984e-05, "loss": 0.1601, "step": 604000 }, { "epoch": 0.04, "learning_rate": 4.931722598870057e-05, "loss": 0.1503, "step": 604500 }, { "epoch": 0.04, "learning_rate": 4.931666101694916e-05, "loss": 0.1524, "step": 605000 }, { "epoch": 0.04, "learning_rate": 4.931609604519774e-05, "loss": 0.1607, "step": 605500 }, { "epoch": 0.04, "learning_rate": 4.931553220338983e-05, "loss": 0.1566, "step": 606000 }, { "epoch": 0.04, "learning_rate": 4.931496723163842e-05, "loss": 0.1483, "step": 606500 }, { "epoch": 0.04, "learning_rate": 4.931440225988701e-05, "loss": 0.1449, "step": 607000 }, { "epoch": 0.04, "learning_rate": 4.9313837288135594e-05, "loss": 0.1491, "step": 607500 }, { "epoch": 0.04, "learning_rate": 4.9313272316384185e-05, "loss": 0.151, "step": 608000 }, { "epoch": 0.04, "learning_rate": 4.931270847457627e-05, "loss": 0.1607, "step": 608500 }, { "epoch": 0.04, "learning_rate": 4.931214350282486e-05, "loss": 0.1579, "step": 609000 }, { "epoch": 0.04, "learning_rate": 4.9311578531073446e-05, "loss": 0.1567, "step": 609500 }, { "epoch": 0.04, "learning_rate": 4.931101355932204e-05, "loss": 0.1532, "step": 610000 }, { "epoch": 0.04, "learning_rate": 4.931044858757062e-05, "loss": 0.1588, "step": 610500 }, { "epoch": 0.04, "learning_rate": 4.9309884745762715e-05, "loss": 0.151, "step": 611000 }, { "epoch": 0.04, "learning_rate": 4.9309319774011305e-05, "loss": 0.1529, "step": 611500 }, { "epoch": 0.04, "learning_rate": 4.930875480225989e-05, "loss": 0.1521, "step": 612000 }, { "epoch": 0.04, "learning_rate": 4.930818983050848e-05, "loss": 0.1463, "step": 612500 }, { "epoch": 0.04, "learning_rate": 4.930762598870057e-05, "loss": 0.1561, "step": 613000 }, { "epoch": 0.04, "learning_rate": 4.930706101694916e-05, "loss": 0.1464, "step": 613500 }, { "epoch": 0.04, "learning_rate": 4.930649604519774e-05, "loss": 0.1506, "step": 614000 }, { "epoch": 0.04, "learning_rate": 4.930593107344633e-05, "loss": 0.1513, "step": 614500 }, { "epoch": 0.04, "learning_rate": 4.9305366101694916e-05, "loss": 0.1537, "step": 615000 }, { "epoch": 0.04, "learning_rate": 4.930480225988701e-05, "loss": 0.1422, "step": 615500 }, { "epoch": 0.04, "learning_rate": 4.930423728813559e-05, "loss": 0.1483, "step": 616000 }, { "epoch": 0.04, "learning_rate": 4.9303672316384184e-05, "loss": 0.1576, "step": 616500 }, { "epoch": 0.04, "learning_rate": 4.930310734463277e-05, "loss": 0.1459, "step": 617000 }, { "epoch": 0.04, "learning_rate": 4.930254237288136e-05, "loss": 0.1497, "step": 617500 }, { "epoch": 0.04, "learning_rate": 4.930197853107345e-05, "loss": 0.1573, "step": 618000 }, { "epoch": 0.04, "learning_rate": 4.9301413559322036e-05, "loss": 0.1554, "step": 618500 }, { "epoch": 0.04, "learning_rate": 4.930084858757063e-05, "loss": 0.1493, "step": 619000 }, { "epoch": 0.04, "learning_rate": 4.930028361581921e-05, "loss": 0.15, "step": 619500 }, { "epoch": 0.04, "learning_rate": 4.92997197740113e-05, "loss": 0.1565, "step": 620000 }, { "epoch": 0.04, "learning_rate": 4.929915480225989e-05, "loss": 0.1569, "step": 620500 }, { "epoch": 0.04, "learning_rate": 4.929858983050848e-05, "loss": 0.1466, "step": 621000 }, { "epoch": 0.04, "learning_rate": 4.929802485875706e-05, "loss": 0.1594, "step": 621500 }, { "epoch": 0.04, "learning_rate": 4.929745988700565e-05, "loss": 0.1458, "step": 622000 }, { "epoch": 0.04, "learning_rate": 4.929689491525424e-05, "loss": 0.153, "step": 622500 }, { "epoch": 0.04, "learning_rate": 4.929633107344633e-05, "loss": 0.1599, "step": 623000 }, { "epoch": 0.04, "learning_rate": 4.9295766101694915e-05, "loss": 0.1555, "step": 623500 }, { "epoch": 0.04, "learning_rate": 4.9295201129943505e-05, "loss": 0.155, "step": 624000 }, { "epoch": 0.04, "learning_rate": 4.929463615819209e-05, "loss": 0.1514, "step": 624500 }, { "epoch": 0.04, "learning_rate": 4.929407231638418e-05, "loss": 0.1496, "step": 625000 }, { "epoch": 0.04, "learning_rate": 4.9293507344632774e-05, "loss": 0.15, "step": 625500 }, { "epoch": 0.04, "learning_rate": 4.929294237288136e-05, "loss": 0.1419, "step": 626000 }, { "epoch": 0.04, "learning_rate": 4.929237740112995e-05, "loss": 0.1461, "step": 626500 }, { "epoch": 0.04, "learning_rate": 4.929181242937853e-05, "loss": 0.154, "step": 627000 }, { "epoch": 0.04, "learning_rate": 4.9291248587570626e-05, "loss": 0.1535, "step": 627500 }, { "epoch": 0.04, "learning_rate": 4.929068361581921e-05, "loss": 0.148, "step": 628000 }, { "epoch": 0.04, "learning_rate": 4.92901186440678e-05, "loss": 0.1543, "step": 628500 }, { "epoch": 0.04, "learning_rate": 4.9289553672316384e-05, "loss": 0.1564, "step": 629000 }, { "epoch": 0.04, "learning_rate": 4.9288988700564975e-05, "loss": 0.1548, "step": 629500 }, { "epoch": 0.04, "learning_rate": 4.928842372881356e-05, "loss": 0.1472, "step": 630000 }, { "epoch": 0.04, "learning_rate": 4.928785988700565e-05, "loss": 0.1559, "step": 630500 }, { "epoch": 0.04, "learning_rate": 4.9287294915254236e-05, "loss": 0.1435, "step": 631000 }, { "epoch": 0.04, "learning_rate": 4.928672994350283e-05, "loss": 0.1475, "step": 631500 }, { "epoch": 0.04, "learning_rate": 4.928616497175141e-05, "loss": 0.1516, "step": 632000 }, { "epoch": 0.04, "learning_rate": 4.92856e-05, "loss": 0.155, "step": 632500 }, { "epoch": 0.04, "learning_rate": 4.9285036158192095e-05, "loss": 0.1587, "step": 633000 }, { "epoch": 0.04, "learning_rate": 4.928447118644068e-05, "loss": 0.1472, "step": 633500 }, { "epoch": 0.04, "learning_rate": 4.928390621468927e-05, "loss": 0.1521, "step": 634000 }, { "epoch": 0.04, "learning_rate": 4.9283341242937853e-05, "loss": 0.1465, "step": 634500 }, { "epoch": 0.04, "learning_rate": 4.9282776271186444e-05, "loss": 0.1456, "step": 635000 }, { "epoch": 0.04, "learning_rate": 4.928221242937853e-05, "loss": 0.1538, "step": 635500 }, { "epoch": 0.04, "learning_rate": 4.928164745762712e-05, "loss": 0.1561, "step": 636000 }, { "epoch": 0.04, "learning_rate": 4.9281082485875706e-05, "loss": 0.1534, "step": 636500 }, { "epoch": 0.04, "learning_rate": 4.9280517514124296e-05, "loss": 0.1464, "step": 637000 }, { "epoch": 0.04, "learning_rate": 4.927995367231639e-05, "loss": 0.1528, "step": 637500 }, { "epoch": 0.04, "learning_rate": 4.9279388700564974e-05, "loss": 0.1472, "step": 638000 }, { "epoch": 0.04, "learning_rate": 4.9278823728813565e-05, "loss": 0.1521, "step": 638500 }, { "epoch": 0.04, "learning_rate": 4.927825875706215e-05, "loss": 0.1427, "step": 639000 }, { "epoch": 0.04, "learning_rate": 4.927769378531074e-05, "loss": 0.1438, "step": 639500 }, { "epoch": 0.04, "learning_rate": 4.927712881355932e-05, "loss": 0.1546, "step": 640000 }, { "epoch": 0.04, "learning_rate": 4.927656384180791e-05, "loss": 0.1465, "step": 640500 }, { "epoch": 0.04, "learning_rate": 4.9276e-05, "loss": 0.1533, "step": 641000 }, { "epoch": 0.04, "learning_rate": 4.927543502824859e-05, "loss": 0.1511, "step": 641500 }, { "epoch": 0.04, "learning_rate": 4.927487005649718e-05, "loss": 0.1436, "step": 642000 }, { "epoch": 0.04, "learning_rate": 4.9274305084745765e-05, "loss": 0.1456, "step": 642500 }, { "epoch": 0.04, "learning_rate": 4.9273740112994356e-05, "loss": 0.1507, "step": 643000 }, { "epoch": 0.04, "learning_rate": 4.927317627118644e-05, "loss": 0.1461, "step": 643500 }, { "epoch": 0.04, "learning_rate": 4.927261129943503e-05, "loss": 0.1505, "step": 644000 }, { "epoch": 0.04, "learning_rate": 4.927204632768362e-05, "loss": 0.154, "step": 644500 }, { "epoch": 0.04, "learning_rate": 4.92714813559322e-05, "loss": 0.1505, "step": 645000 }, { "epoch": 0.04, "learning_rate": 4.9270917514124295e-05, "loss": 0.1464, "step": 645500 }, { "epoch": 0.04, "learning_rate": 4.9270352542372886e-05, "loss": 0.1538, "step": 646000 }, { "epoch": 0.04, "learning_rate": 4.926978757062147e-05, "loss": 0.1597, "step": 646500 }, { "epoch": 0.04, "learning_rate": 4.926922259887006e-05, "loss": 0.1489, "step": 647000 }, { "epoch": 0.04, "learning_rate": 4.9268657627118644e-05, "loss": 0.1525, "step": 647500 }, { "epoch": 0.04, "learning_rate": 4.926809378531074e-05, "loss": 0.1475, "step": 648000 }, { "epoch": 0.04, "learning_rate": 4.926752881355933e-05, "loss": 0.1506, "step": 648500 }, { "epoch": 0.04, "learning_rate": 4.926696384180791e-05, "loss": 0.1518, "step": 649000 }, { "epoch": 0.04, "learning_rate": 4.92663988700565e-05, "loss": 0.1501, "step": 649500 }, { "epoch": 0.04, "learning_rate": 4.926583389830509e-05, "loss": 0.147, "step": 650000 }, { "epoch": 0.04, "learning_rate": 4.9265270056497174e-05, "loss": 0.148, "step": 650500 }, { "epoch": 0.04, "learning_rate": 4.9264705084745765e-05, "loss": 0.147, "step": 651000 }, { "epoch": 0.04, "learning_rate": 4.926414011299435e-05, "loss": 0.1428, "step": 651500 }, { "epoch": 0.04, "learning_rate": 4.926357514124294e-05, "loss": 0.1503, "step": 652000 }, { "epoch": 0.04, "learning_rate": 4.926301016949153e-05, "loss": 0.1565, "step": 652500 }, { "epoch": 0.04, "learning_rate": 4.926244632768362e-05, "loss": 0.154, "step": 653000 }, { "epoch": 0.04, "learning_rate": 4.926188135593221e-05, "loss": 0.1432, "step": 653500 }, { "epoch": 0.04, "learning_rate": 4.926131638418079e-05, "loss": 0.1519, "step": 654000 }, { "epoch": 0.04, "learning_rate": 4.926075141242938e-05, "loss": 0.1436, "step": 654500 }, { "epoch": 0.04, "learning_rate": 4.9260186440677966e-05, "loss": 0.1498, "step": 655000 }, { "epoch": 0.04, "learning_rate": 4.925962259887006e-05, "loss": 0.1477, "step": 655500 }, { "epoch": 0.04, "learning_rate": 4.925905762711865e-05, "loss": 0.1483, "step": 656000 }, { "epoch": 0.04, "learning_rate": 4.9258492655367234e-05, "loss": 0.1517, "step": 656500 }, { "epoch": 0.04, "learning_rate": 4.9257927683615825e-05, "loss": 0.1495, "step": 657000 }, { "epoch": 0.04, "learning_rate": 4.925736271186441e-05, "loss": 0.1525, "step": 657500 }, { "epoch": 0.04, "learning_rate": 4.9256798870056496e-05, "loss": 0.147, "step": 658000 }, { "epoch": 0.04, "learning_rate": 4.9256233898305086e-05, "loss": 0.1559, "step": 658500 }, { "epoch": 0.04, "learning_rate": 4.925566892655367e-05, "loss": 0.1461, "step": 659000 }, { "epoch": 0.04, "learning_rate": 4.925510395480226e-05, "loss": 0.1534, "step": 659500 }, { "epoch": 0.04, "learning_rate": 4.9254540112994355e-05, "loss": 0.1507, "step": 660000 }, { "epoch": 0.04, "learning_rate": 4.925397514124294e-05, "loss": 0.1498, "step": 660500 }, { "epoch": 0.04, "learning_rate": 4.925341016949153e-05, "loss": 0.1518, "step": 661000 }, { "epoch": 0.04, "learning_rate": 4.925284519774011e-05, "loss": 0.146, "step": 661500 }, { "epoch": 0.04, "learning_rate": 4.9252280225988703e-05, "loss": 0.1516, "step": 662000 }, { "epoch": 0.04, "learning_rate": 4.9251715254237294e-05, "loss": 0.1404, "step": 662500 }, { "epoch": 0.04, "learning_rate": 4.925115141242938e-05, "loss": 0.1567, "step": 663000 }, { "epoch": 0.04, "learning_rate": 4.925058644067797e-05, "loss": 0.1502, "step": 663500 }, { "epoch": 0.05, "learning_rate": 4.9250021468926556e-05, "loss": 0.1557, "step": 664000 }, { "epoch": 0.05, "learning_rate": 4.9249456497175146e-05, "loss": 0.1425, "step": 664500 }, { "epoch": 0.05, "learning_rate": 4.924889152542373e-05, "loss": 0.1472, "step": 665000 }, { "epoch": 0.05, "learning_rate": 4.924832768361582e-05, "loss": 0.1584, "step": 665500 }, { "epoch": 0.05, "learning_rate": 4.924776271186441e-05, "loss": 0.1456, "step": 666000 }, { "epoch": 0.05, "learning_rate": 4.9247197740113e-05, "loss": 0.155, "step": 666500 }, { "epoch": 0.05, "learning_rate": 4.924663276836158e-05, "loss": 0.15, "step": 667000 }, { "epoch": 0.05, "learning_rate": 4.924606779661017e-05, "loss": 0.1406, "step": 667500 }, { "epoch": 0.05, "learning_rate": 4.9245502824858757e-05, "loss": 0.1539, "step": 668000 }, { "epoch": 0.05, "learning_rate": 4.924493898305085e-05, "loss": 0.1458, "step": 668500 }, { "epoch": 0.05, "learning_rate": 4.924437401129944e-05, "loss": 0.1418, "step": 669000 }, { "epoch": 0.05, "learning_rate": 4.9243809039548025e-05, "loss": 0.1525, "step": 669500 }, { "epoch": 0.05, "learning_rate": 4.9243244067796615e-05, "loss": 0.1472, "step": 670000 }, { "epoch": 0.05, "learning_rate": 4.92426790960452e-05, "loss": 0.1474, "step": 670500 }, { "epoch": 0.05, "learning_rate": 4.924211525423729e-05, "loss": 0.1391, "step": 671000 }, { "epoch": 0.05, "learning_rate": 4.924155028248588e-05, "loss": 0.1521, "step": 671500 }, { "epoch": 0.05, "learning_rate": 4.924098531073447e-05, "loss": 0.146, "step": 672000 }, { "epoch": 0.05, "learning_rate": 4.924042033898305e-05, "loss": 0.1516, "step": 672500 }, { "epoch": 0.05, "learning_rate": 4.923985536723164e-05, "loss": 0.147, "step": 673000 }, { "epoch": 0.05, "learning_rate": 4.923929039548023e-05, "loss": 0.1467, "step": 673500 }, { "epoch": 0.05, "learning_rate": 4.923872655367232e-05, "loss": 0.1493, "step": 674000 }, { "epoch": 0.05, "learning_rate": 4.9238161581920904e-05, "loss": 0.1516, "step": 674500 }, { "epoch": 0.05, "learning_rate": 4.9237596610169494e-05, "loss": 0.143, "step": 675000 }, { "epoch": 0.05, "learning_rate": 4.923703163841808e-05, "loss": 0.1418, "step": 675500 }, { "epoch": 0.05, "learning_rate": 4.9236468926553675e-05, "loss": 0.1481, "step": 676000 }, { "epoch": 0.05, "learning_rate": 4.9235903954802266e-05, "loss": 0.1477, "step": 676500 }, { "epoch": 0.05, "learning_rate": 4.923533898305085e-05, "loss": 0.1478, "step": 677000 }, { "epoch": 0.05, "learning_rate": 4.923477401129944e-05, "loss": 0.1477, "step": 677500 }, { "epoch": 0.05, "learning_rate": 4.9234209039548024e-05, "loss": 0.1566, "step": 678000 }, { "epoch": 0.05, "learning_rate": 4.9233644067796615e-05, "loss": 0.1478, "step": 678500 }, { "epoch": 0.05, "learning_rate": 4.92330790960452e-05, "loss": 0.159, "step": 679000 }, { "epoch": 0.05, "learning_rate": 4.923251412429379e-05, "loss": 0.1468, "step": 679500 }, { "epoch": 0.05, "learning_rate": 4.923194915254237e-05, "loss": 0.1526, "step": 680000 }, { "epoch": 0.05, "learning_rate": 4.9231384180790964e-05, "loss": 0.1459, "step": 680500 }, { "epoch": 0.05, "learning_rate": 4.9230819209039554e-05, "loss": 0.1389, "step": 681000 }, { "epoch": 0.05, "learning_rate": 4.923025536723164e-05, "loss": 0.15, "step": 681500 }, { "epoch": 0.05, "learning_rate": 4.9229690395480225e-05, "loss": 0.155, "step": 682000 }, { "epoch": 0.05, "learning_rate": 4.9229125423728816e-05, "loss": 0.1493, "step": 682500 }, { "epoch": 0.05, "learning_rate": 4.92285604519774e-05, "loss": 0.1431, "step": 683000 }, { "epoch": 0.05, "learning_rate": 4.9227996610169493e-05, "loss": 0.1441, "step": 683500 }, { "epoch": 0.05, "learning_rate": 4.9227431638418084e-05, "loss": 0.1496, "step": 684000 }, { "epoch": 0.05, "learning_rate": 4.922686666666667e-05, "loss": 0.1509, "step": 684500 }, { "epoch": 0.05, "learning_rate": 4.922630169491526e-05, "loss": 0.1484, "step": 685000 }, { "epoch": 0.05, "learning_rate": 4.9225737853107346e-05, "loss": 0.1609, "step": 685500 }, { "epoch": 0.05, "learning_rate": 4.9225172881355936e-05, "loss": 0.1489, "step": 686000 }, { "epoch": 0.05, "learning_rate": 4.922460790960452e-05, "loss": 0.1491, "step": 686500 }, { "epoch": 0.05, "learning_rate": 4.922404293785311e-05, "loss": 0.1511, "step": 687000 }, { "epoch": 0.05, "learning_rate": 4.92234779661017e-05, "loss": 0.1501, "step": 687500 }, { "epoch": 0.05, "learning_rate": 4.922291412429379e-05, "loss": 0.1449, "step": 688000 }, { "epoch": 0.05, "learning_rate": 4.922234915254237e-05, "loss": 0.1494, "step": 688500 }, { "epoch": 0.05, "learning_rate": 4.922178418079096e-05, "loss": 0.1471, "step": 689000 }, { "epoch": 0.05, "learning_rate": 4.9221219209039547e-05, "loss": 0.1558, "step": 689500 }, { "epoch": 0.05, "learning_rate": 4.922065536723164e-05, "loss": 0.1499, "step": 690000 }, { "epoch": 0.05, "learning_rate": 4.922009039548023e-05, "loss": 0.1492, "step": 690500 }, { "epoch": 0.05, "learning_rate": 4.921952542372882e-05, "loss": 0.1531, "step": 691000 }, { "epoch": 0.05, "learning_rate": 4.9218960451977406e-05, "loss": 0.1528, "step": 691500 }, { "epoch": 0.05, "learning_rate": 4.921839661016949e-05, "loss": 0.1435, "step": 692000 }, { "epoch": 0.05, "learning_rate": 4.921783163841808e-05, "loss": 0.1509, "step": 692500 }, { "epoch": 0.05, "learning_rate": 4.921726666666667e-05, "loss": 0.1451, "step": 693000 }, { "epoch": 0.05, "learning_rate": 4.921670169491526e-05, "loss": 0.1421, "step": 693500 }, { "epoch": 0.05, "learning_rate": 4.921613672316384e-05, "loss": 0.147, "step": 694000 }, { "epoch": 0.05, "learning_rate": 4.9215572881355936e-05, "loss": 0.1451, "step": 694500 }, { "epoch": 0.05, "learning_rate": 4.921500790960452e-05, "loss": 0.1412, "step": 695000 }, { "epoch": 0.05, "learning_rate": 4.921444293785311e-05, "loss": 0.1416, "step": 695500 }, { "epoch": 0.05, "learning_rate": 4.9213879096045204e-05, "loss": 0.1409, "step": 696000 }, { "epoch": 0.05, "learning_rate": 4.921331412429379e-05, "loss": 0.147, "step": 696500 }, { "epoch": 0.05, "learning_rate": 4.921274915254238e-05, "loss": 0.1478, "step": 697000 }, { "epoch": 0.05, "learning_rate": 4.921218418079096e-05, "loss": 0.1435, "step": 697500 }, { "epoch": 0.05, "learning_rate": 4.921161920903955e-05, "loss": 0.1401, "step": 698000 }, { "epoch": 0.05, "learning_rate": 4.921105423728814e-05, "loss": 0.1562, "step": 698500 }, { "epoch": 0.05, "learning_rate": 4.921048926553673e-05, "loss": 0.1517, "step": 699000 }, { "epoch": 0.05, "learning_rate": 4.920992429378532e-05, "loss": 0.1445, "step": 699500 }, { "epoch": 0.05, "learning_rate": 4.92093593220339e-05, "loss": 0.1498, "step": 700000 }, { "epoch": 0.05, "learning_rate": 4.920879548022599e-05, "loss": 0.1529, "step": 700500 }, { "epoch": 0.05, "learning_rate": 4.920823050847458e-05, "loss": 0.1515, "step": 701000 }, { "epoch": 0.05, "learning_rate": 4.920766553672317e-05, "loss": 0.1465, "step": 701500 }, { "epoch": 0.05, "learning_rate": 4.9207100564971754e-05, "loss": 0.1482, "step": 702000 }, { "epoch": 0.05, "learning_rate": 4.9206535593220344e-05, "loss": 0.1481, "step": 702500 }, { "epoch": 0.05, "learning_rate": 4.920597175141243e-05, "loss": 0.1514, "step": 703000 }, { "epoch": 0.05, "learning_rate": 4.9205406779661015e-05, "loss": 0.1562, "step": 703500 }, { "epoch": 0.05, "learning_rate": 4.9204841807909606e-05, "loss": 0.1556, "step": 704000 }, { "epoch": 0.05, "learning_rate": 4.920427683615819e-05, "loss": 0.1459, "step": 704500 }, { "epoch": 0.05, "learning_rate": 4.920371186440678e-05, "loss": 0.1548, "step": 705000 }, { "epoch": 0.05, "learning_rate": 4.920314689265537e-05, "loss": 0.1464, "step": 705500 }, { "epoch": 0.05, "learning_rate": 4.9202581920903955e-05, "loss": 0.1483, "step": 706000 }, { "epoch": 0.05, "learning_rate": 4.9202016949152545e-05, "loss": 0.1408, "step": 706500 }, { "epoch": 0.05, "learning_rate": 4.920145310734464e-05, "loss": 0.1431, "step": 707000 }, { "epoch": 0.05, "learning_rate": 4.9200889265536726e-05, "loss": 0.1569, "step": 707500 }, { "epoch": 0.05, "learning_rate": 4.9200325423728814e-05, "loss": 0.1403, "step": 708000 }, { "epoch": 0.05, "learning_rate": 4.9199760451977404e-05, "loss": 0.1479, "step": 708500 }, { "epoch": 0.05, "learning_rate": 4.919919548022599e-05, "loss": 0.1452, "step": 709000 }, { "epoch": 0.05, "learning_rate": 4.919863050847458e-05, "loss": 0.1433, "step": 709500 }, { "epoch": 0.05, "learning_rate": 4.919806553672316e-05, "loss": 0.1418, "step": 710000 }, { "epoch": 0.05, "learning_rate": 4.919750056497175e-05, "loss": 0.1433, "step": 710500 }, { "epoch": 0.05, "learning_rate": 4.919693559322034e-05, "loss": 0.1421, "step": 711000 }, { "epoch": 0.05, "learning_rate": 4.919637062146893e-05, "loss": 0.149, "step": 711500 }, { "epoch": 0.05, "learning_rate": 4.919580564971752e-05, "loss": 0.1414, "step": 712000 }, { "epoch": 0.05, "learning_rate": 4.91952406779661e-05, "loss": 0.1469, "step": 712500 }, { "epoch": 0.05, "learning_rate": 4.9194676836158196e-05, "loss": 0.1459, "step": 713000 }, { "epoch": 0.05, "learning_rate": 4.9194111864406786e-05, "loss": 0.1505, "step": 713500 }, { "epoch": 0.05, "learning_rate": 4.919354689265537e-05, "loss": 0.149, "step": 714000 }, { "epoch": 0.05, "learning_rate": 4.919298192090396e-05, "loss": 0.1494, "step": 714500 }, { "epoch": 0.05, "learning_rate": 4.9192416949152544e-05, "loss": 0.1428, "step": 715000 }, { "epoch": 0.05, "learning_rate": 4.9191851977401135e-05, "loss": 0.1529, "step": 715500 }, { "epoch": 0.05, "learning_rate": 4.919128813559322e-05, "loss": 0.1469, "step": 716000 }, { "epoch": 0.05, "learning_rate": 4.919072316384181e-05, "loss": 0.1492, "step": 716500 }, { "epoch": 0.05, "learning_rate": 4.9190158192090397e-05, "loss": 0.1419, "step": 717000 }, { "epoch": 0.05, "learning_rate": 4.918959322033899e-05, "loss": 0.1431, "step": 717500 }, { "epoch": 0.05, "learning_rate": 4.918902824858757e-05, "loss": 0.1471, "step": 718000 }, { "epoch": 0.05, "learning_rate": 4.918846327683616e-05, "loss": 0.1539, "step": 718500 }, { "epoch": 0.05, "learning_rate": 4.918789943502825e-05, "loss": 0.1392, "step": 719000 }, { "epoch": 0.05, "learning_rate": 4.918733446327684e-05, "loss": 0.1431, "step": 719500 }, { "epoch": 0.05, "learning_rate": 4.918676949152542e-05, "loss": 0.1399, "step": 720000 }, { "epoch": 0.05, "learning_rate": 4.9186204519774014e-05, "loss": 0.1367, "step": 720500 }, { "epoch": 0.05, "learning_rate": 4.918564067796611e-05, "loss": 0.1457, "step": 721000 }, { "epoch": 0.05, "learning_rate": 4.9185076836158195e-05, "loss": 0.1482, "step": 721500 }, { "epoch": 0.05, "learning_rate": 4.918451186440678e-05, "loss": 0.1438, "step": 722000 }, { "epoch": 0.05, "learning_rate": 4.918394689265537e-05, "loss": 0.1522, "step": 722500 }, { "epoch": 0.05, "learning_rate": 4.918338192090396e-05, "loss": 0.1434, "step": 723000 }, { "epoch": 0.05, "learning_rate": 4.9182816949152544e-05, "loss": 0.1497, "step": 723500 }, { "epoch": 0.05, "learning_rate": 4.9182251977401134e-05, "loss": 0.1424, "step": 724000 }, { "epoch": 0.05, "learning_rate": 4.918168700564972e-05, "loss": 0.1494, "step": 724500 }, { "epoch": 0.05, "learning_rate": 4.918112203389831e-05, "loss": 0.1474, "step": 725000 }, { "epoch": 0.05, "learning_rate": 4.918055706214689e-05, "loss": 0.1517, "step": 725500 }, { "epoch": 0.05, "learning_rate": 4.9179993220338986e-05, "loss": 0.1396, "step": 726000 }, { "epoch": 0.05, "learning_rate": 4.917942824858757e-05, "loss": 0.1388, "step": 726500 }, { "epoch": 0.05, "learning_rate": 4.917886327683616e-05, "loss": 0.1435, "step": 727000 }, { "epoch": 0.05, "learning_rate": 4.9178298305084745e-05, "loss": 0.1486, "step": 727500 }, { "epoch": 0.05, "learning_rate": 4.917773446327684e-05, "loss": 0.1382, "step": 728000 }, { "epoch": 0.05, "learning_rate": 4.917716949152543e-05, "loss": 0.1455, "step": 728500 }, { "epoch": 0.05, "learning_rate": 4.917660451977401e-05, "loss": 0.1525, "step": 729000 }, { "epoch": 0.05, "learning_rate": 4.9176039548022604e-05, "loss": 0.1478, "step": 729500 }, { "epoch": 0.05, "learning_rate": 4.9175474576271194e-05, "loss": 0.1463, "step": 730000 }, { "epoch": 0.05, "learning_rate": 4.917491073446328e-05, "loss": 0.1441, "step": 730500 }, { "epoch": 0.05, "learning_rate": 4.9174345762711865e-05, "loss": 0.1426, "step": 731000 }, { "epoch": 0.05, "learning_rate": 4.9173780790960456e-05, "loss": 0.1503, "step": 731500 }, { "epoch": 0.05, "learning_rate": 4.917321581920904e-05, "loss": 0.1498, "step": 732000 }, { "epoch": 0.05, "learning_rate": 4.917265084745763e-05, "loss": 0.1431, "step": 732500 }, { "epoch": 0.05, "learning_rate": 4.917208587570622e-05, "loss": 0.1484, "step": 733000 }, { "epoch": 0.05, "learning_rate": 4.917152203389831e-05, "loss": 0.1428, "step": 733500 }, { "epoch": 0.05, "learning_rate": 4.917095706214689e-05, "loss": 0.1377, "step": 734000 }, { "epoch": 0.05, "learning_rate": 4.917039209039548e-05, "loss": 0.1551, "step": 734500 }, { "epoch": 0.05, "learning_rate": 4.9169827118644066e-05, "loss": 0.1527, "step": 735000 }, { "epoch": 0.05, "learning_rate": 4.916926327683616e-05, "loss": 0.1387, "step": 735500 }, { "epoch": 0.05, "learning_rate": 4.916869830508475e-05, "loss": 0.1453, "step": 736000 }, { "epoch": 0.05, "learning_rate": 4.916813333333334e-05, "loss": 0.1531, "step": 736500 }, { "epoch": 0.05, "learning_rate": 4.9167568361581925e-05, "loss": 0.1513, "step": 737000 }, { "epoch": 0.05, "learning_rate": 4.9167003389830516e-05, "loss": 0.1402, "step": 737500 }, { "epoch": 0.05, "learning_rate": 4.91664384180791e-05, "loss": 0.1429, "step": 738000 }, { "epoch": 0.05, "learning_rate": 4.916587457627119e-05, "loss": 0.147, "step": 738500 }, { "epoch": 0.05, "learning_rate": 4.916530960451978e-05, "loss": 0.1582, "step": 739000 }, { "epoch": 0.05, "learning_rate": 4.916474463276836e-05, "loss": 0.1506, "step": 739500 }, { "epoch": 0.05, "learning_rate": 4.916417966101695e-05, "loss": 0.1527, "step": 740000 }, { "epoch": 0.05, "learning_rate": 4.916361468926554e-05, "loss": 0.1427, "step": 740500 }, { "epoch": 0.05, "learning_rate": 4.916305084745763e-05, "loss": 0.1395, "step": 741000 }, { "epoch": 0.05, "learning_rate": 4.916248587570621e-05, "loss": 0.1388, "step": 741500 }, { "epoch": 0.05, "learning_rate": 4.9161920903954804e-05, "loss": 0.1384, "step": 742000 }, { "epoch": 0.05, "learning_rate": 4.916135593220339e-05, "loss": 0.1437, "step": 742500 }, { "epoch": 0.05, "learning_rate": 4.916079209039548e-05, "loss": 0.148, "step": 743000 }, { "epoch": 0.05, "learning_rate": 4.916022711864407e-05, "loss": 0.1512, "step": 743500 }, { "epoch": 0.05, "learning_rate": 4.915966214689266e-05, "loss": 0.1387, "step": 744000 }, { "epoch": 0.05, "learning_rate": 4.9159097175141247e-05, "loss": 0.1457, "step": 744500 }, { "epoch": 0.05, "learning_rate": 4.915853220338984e-05, "loss": 0.1455, "step": 745000 }, { "epoch": 0.05, "learning_rate": 4.915796723163842e-05, "loss": 0.1475, "step": 745500 }, { "epoch": 0.05, "learning_rate": 4.915740225988701e-05, "loss": 0.1462, "step": 746000 }, { "epoch": 0.05, "learning_rate": 4.91568384180791e-05, "loss": 0.146, "step": 746500 }, { "epoch": 0.05, "learning_rate": 4.915627344632769e-05, "loss": 0.1395, "step": 747000 }, { "epoch": 0.05, "learning_rate": 4.915570847457627e-05, "loss": 0.1465, "step": 747500 }, { "epoch": 0.05, "learning_rate": 4.9155143502824864e-05, "loss": 0.1504, "step": 748000 }, { "epoch": 0.05, "learning_rate": 4.915457853107345e-05, "loss": 0.14, "step": 748500 }, { "epoch": 0.05, "learning_rate": 4.9154014689265535e-05, "loss": 0.1459, "step": 749000 }, { "epoch": 0.05, "learning_rate": 4.9153449717514125e-05, "loss": 0.1395, "step": 749500 }, { "epoch": 0.05, "learning_rate": 4.915288474576271e-05, "loss": 0.1454, "step": 750000 }, { "epoch": 0.05, "learning_rate": 4.91523197740113e-05, "loss": 0.1431, "step": 750500 }, { "epoch": 0.05, "learning_rate": 4.915175480225989e-05, "loss": 0.1363, "step": 751000 }, { "epoch": 0.05, "learning_rate": 4.9151189830508474e-05, "loss": 0.1404, "step": 751500 }, { "epoch": 0.05, "learning_rate": 4.915062598870057e-05, "loss": 0.1454, "step": 752000 }, { "epoch": 0.05, "learning_rate": 4.915006101694916e-05, "loss": 0.1373, "step": 752500 }, { "epoch": 0.05, "learning_rate": 4.914949604519774e-05, "loss": 0.1403, "step": 753000 }, { "epoch": 0.05, "learning_rate": 4.914893107344633e-05, "loss": 0.1456, "step": 753500 }, { "epoch": 0.05, "learning_rate": 4.914836723163842e-05, "loss": 0.1432, "step": 754000 }, { "epoch": 0.05, "learning_rate": 4.914780225988701e-05, "loss": 0.1436, "step": 754500 }, { "epoch": 0.05, "learning_rate": 4.9147237288135595e-05, "loss": 0.1473, "step": 755000 }, { "epoch": 0.05, "learning_rate": 4.9146672316384185e-05, "loss": 0.1392, "step": 755500 }, { "epoch": 0.05, "learning_rate": 4.914610847457627e-05, "loss": 0.1446, "step": 756000 }, { "epoch": 0.05, "learning_rate": 4.9145543502824856e-05, "loss": 0.1386, "step": 756500 }, { "epoch": 0.05, "learning_rate": 4.914497853107345e-05, "loss": 0.1407, "step": 757000 }, { "epoch": 0.05, "learning_rate": 4.914441355932204e-05, "loss": 0.1439, "step": 757500 }, { "epoch": 0.05, "learning_rate": 4.914384858757062e-05, "loss": 0.1422, "step": 758000 }, { "epoch": 0.05, "learning_rate": 4.9143284745762715e-05, "loss": 0.1387, "step": 758500 }, { "epoch": 0.05, "learning_rate": 4.9142719774011306e-05, "loss": 0.1392, "step": 759000 }, { "epoch": 0.05, "learning_rate": 4.914215480225989e-05, "loss": 0.1527, "step": 759500 }, { "epoch": 0.05, "learning_rate": 4.914158983050848e-05, "loss": 0.1469, "step": 760000 }, { "epoch": 0.05, "learning_rate": 4.9141024858757064e-05, "loss": 0.1434, "step": 760500 }, { "epoch": 0.05, "learning_rate": 4.914046101694916e-05, "loss": 0.1431, "step": 761000 }, { "epoch": 0.05, "learning_rate": 4.913989604519774e-05, "loss": 0.1474, "step": 761500 }, { "epoch": 0.05, "learning_rate": 4.913933107344633e-05, "loss": 0.1473, "step": 762000 }, { "epoch": 0.05, "learning_rate": 4.9138766101694916e-05, "loss": 0.1443, "step": 762500 }, { "epoch": 0.05, "learning_rate": 4.9138202259887e-05, "loss": 0.1436, "step": 763000 }, { "epoch": 0.05, "learning_rate": 4.9137637288135594e-05, "loss": 0.1459, "step": 763500 }, { "epoch": 0.05, "learning_rate": 4.913707231638418e-05, "loss": 0.1369, "step": 764000 }, { "epoch": 0.05, "learning_rate": 4.913650734463277e-05, "loss": 0.1432, "step": 764500 }, { "epoch": 0.05, "learning_rate": 4.913594237288136e-05, "loss": 0.1436, "step": 765000 }, { "epoch": 0.05, "learning_rate": 4.913537853107345e-05, "loss": 0.1412, "step": 765500 }, { "epoch": 0.05, "learning_rate": 4.913481355932204e-05, "loss": 0.1474, "step": 766000 }, { "epoch": 0.05, "learning_rate": 4.913424858757063e-05, "loss": 0.1421, "step": 766500 }, { "epoch": 0.05, "learning_rate": 4.913368361581921e-05, "loss": 0.1425, "step": 767000 }, { "epoch": 0.05, "learning_rate": 4.91331197740113e-05, "loss": 0.1455, "step": 767500 }, { "epoch": 0.05, "learning_rate": 4.913255480225989e-05, "loss": 0.1468, "step": 768000 }, { "epoch": 0.05, "learning_rate": 4.913198983050848e-05, "loss": 0.1438, "step": 768500 }, { "epoch": 0.05, "learning_rate": 4.913142485875706e-05, "loss": 0.1499, "step": 769000 }, { "epoch": 0.05, "learning_rate": 4.913086101694915e-05, "loss": 0.14, "step": 769500 }, { "epoch": 0.05, "learning_rate": 4.913029604519774e-05, "loss": 0.1436, "step": 770000 }, { "epoch": 0.05, "learning_rate": 4.9129731073446325e-05, "loss": 0.1445, "step": 770500 }, { "epoch": 0.05, "learning_rate": 4.9129166101694915e-05, "loss": 0.1482, "step": 771000 }, { "epoch": 0.05, "learning_rate": 4.9128601129943506e-05, "loss": 0.147, "step": 771500 }, { "epoch": 0.05, "learning_rate": 4.91280372881356e-05, "loss": 0.1396, "step": 772000 }, { "epoch": 0.05, "learning_rate": 4.9127472316384184e-05, "loss": 0.1427, "step": 772500 }, { "epoch": 0.05, "learning_rate": 4.9126907344632774e-05, "loss": 0.146, "step": 773000 }, { "epoch": 0.05, "learning_rate": 4.912634237288136e-05, "loss": 0.145, "step": 773500 }, { "epoch": 0.05, "learning_rate": 4.912577740112995e-05, "loss": 0.1436, "step": 774000 }, { "epoch": 0.05, "learning_rate": 4.9125213559322036e-05, "loss": 0.139, "step": 774500 }, { "epoch": 0.05, "learning_rate": 4.9124648587570627e-05, "loss": 0.1505, "step": 775000 }, { "epoch": 0.05, "learning_rate": 4.912408361581921e-05, "loss": 0.1339, "step": 775500 }, { "epoch": 0.05, "learning_rate": 4.91235186440678e-05, "loss": 0.1407, "step": 776000 }, { "epoch": 0.05, "learning_rate": 4.912295480225989e-05, "loss": 0.1416, "step": 776500 }, { "epoch": 0.05, "learning_rate": 4.912238983050847e-05, "loss": 0.1419, "step": 777000 }, { "epoch": 0.05, "learning_rate": 4.912182485875706e-05, "loss": 0.1428, "step": 777500 }, { "epoch": 0.05, "learning_rate": 4.9121259887005646e-05, "loss": 0.139, "step": 778000 }, { "epoch": 0.05, "learning_rate": 4.912069491525424e-05, "loss": 0.145, "step": 778500 }, { "epoch": 0.05, "learning_rate": 4.912013107344633e-05, "loss": 0.1445, "step": 779000 }, { "epoch": 0.05, "learning_rate": 4.911956610169492e-05, "loss": 0.1364, "step": 779500 }, { "epoch": 0.05, "learning_rate": 4.9119001129943505e-05, "loss": 0.132, "step": 780000 }, { "epoch": 0.05, "learning_rate": 4.9118436158192096e-05, "loss": 0.1489, "step": 780500 }, { "epoch": 0.05, "learning_rate": 4.911787231638418e-05, "loss": 0.141, "step": 781000 }, { "epoch": 0.05, "learning_rate": 4.911730734463277e-05, "loss": 0.1504, "step": 781500 }, { "epoch": 0.05, "learning_rate": 4.911674237288136e-05, "loss": 0.1356, "step": 782000 }, { "epoch": 0.05, "learning_rate": 4.911617740112995e-05, "loss": 0.1504, "step": 782500 }, { "epoch": 0.05, "learning_rate": 4.9115613559322035e-05, "loss": 0.1386, "step": 783000 }, { "epoch": 0.05, "learning_rate": 4.911504858757062e-05, "loss": 0.1549, "step": 783500 }, { "epoch": 0.05, "learning_rate": 4.911448361581921e-05, "loss": 0.1382, "step": 784000 }, { "epoch": 0.05, "learning_rate": 4.9113918644067793e-05, "loss": 0.138, "step": 784500 }, { "epoch": 0.05, "learning_rate": 4.9113353672316384e-05, "loss": 0.1345, "step": 785000 }, { "epoch": 0.05, "learning_rate": 4.9112788700564975e-05, "loss": 0.1407, "step": 785500 }, { "epoch": 0.05, "learning_rate": 4.911222485875707e-05, "loss": 0.1426, "step": 786000 }, { "epoch": 0.05, "learning_rate": 4.911165988700565e-05, "loss": 0.1386, "step": 786500 }, { "epoch": 0.05, "learning_rate": 4.911109491525424e-05, "loss": 0.1397, "step": 787000 }, { "epoch": 0.05, "learning_rate": 4.911052994350283e-05, "loss": 0.1361, "step": 787500 }, { "epoch": 0.05, "learning_rate": 4.9109966101694914e-05, "loss": 0.1429, "step": 788000 }, { "epoch": 0.05, "learning_rate": 4.9109401129943505e-05, "loss": 0.1404, "step": 788500 }, { "epoch": 0.05, "learning_rate": 4.9108836158192095e-05, "loss": 0.1393, "step": 789000 }, { "epoch": 0.05, "learning_rate": 4.910827118644068e-05, "loss": 0.1437, "step": 789500 }, { "epoch": 0.05, "learning_rate": 4.910770621468927e-05, "loss": 0.1336, "step": 790000 }, { "epoch": 0.05, "learning_rate": 4.910714237288136e-05, "loss": 0.1371, "step": 790500 }, { "epoch": 0.05, "learning_rate": 4.910657740112995e-05, "loss": 0.1441, "step": 791000 }, { "epoch": 0.05, "learning_rate": 4.910601242937853e-05, "loss": 0.1432, "step": 791500 }, { "epoch": 0.05, "learning_rate": 4.910544745762712e-05, "loss": 0.139, "step": 792000 }, { "epoch": 0.05, "learning_rate": 4.9104883615819216e-05, "loss": 0.1469, "step": 792500 }, { "epoch": 0.05, "learning_rate": 4.91043186440678e-05, "loss": 0.1414, "step": 793000 }, { "epoch": 0.05, "learning_rate": 4.910375367231639e-05, "loss": 0.1479, "step": 793500 }, { "epoch": 0.05, "learning_rate": 4.9103188700564974e-05, "loss": 0.1462, "step": 794000 }, { "epoch": 0.05, "learning_rate": 4.9102623728813564e-05, "loss": 0.1486, "step": 794500 }, { "epoch": 0.05, "learning_rate": 4.910205988700565e-05, "loss": 0.1412, "step": 795000 }, { "epoch": 0.05, "learning_rate": 4.9101494915254235e-05, "loss": 0.1504, "step": 795500 }, { "epoch": 0.05, "learning_rate": 4.9100929943502826e-05, "loss": 0.138, "step": 796000 }, { "epoch": 0.05, "learning_rate": 4.9100364971751417e-05, "loss": 0.1359, "step": 796500 }, { "epoch": 0.05, "learning_rate": 4.90998e-05, "loss": 0.1532, "step": 797000 }, { "epoch": 0.05, "learning_rate": 4.9099236158192094e-05, "loss": 0.1388, "step": 797500 }, { "epoch": 0.05, "learning_rate": 4.909867118644068e-05, "loss": 0.1422, "step": 798000 }, { "epoch": 0.05, "learning_rate": 4.909810621468927e-05, "loss": 0.1439, "step": 798500 }, { "epoch": 0.05, "learning_rate": 4.909754124293785e-05, "loss": 0.1464, "step": 799000 }, { "epoch": 0.05, "learning_rate": 4.909697627118644e-05, "loss": 0.1468, "step": 799500 }, { "epoch": 0.05, "learning_rate": 4.909641242937854e-05, "loss": 0.1419, "step": 800000 }, { "epoch": 0.05, "learning_rate": 4.909584745762712e-05, "loss": 0.1366, "step": 800500 }, { "epoch": 0.05, "learning_rate": 4.909528248587571e-05, "loss": 0.1289, "step": 801000 }, { "epoch": 0.05, "learning_rate": 4.9094717514124295e-05, "loss": 0.1397, "step": 801500 }, { "epoch": 0.05, "learning_rate": 4.9094152542372886e-05, "loss": 0.1358, "step": 802000 }, { "epoch": 0.05, "learning_rate": 4.909358757062147e-05, "loss": 0.1423, "step": 802500 }, { "epoch": 0.05, "learning_rate": 4.9093023728813564e-05, "loss": 0.1319, "step": 803000 }, { "epoch": 0.05, "learning_rate": 4.909245875706215e-05, "loss": 0.1432, "step": 803500 }, { "epoch": 0.05, "learning_rate": 4.909189378531074e-05, "loss": 0.1382, "step": 804000 }, { "epoch": 0.05, "learning_rate": 4.909132881355932e-05, "loss": 0.1386, "step": 804500 }, { "epoch": 0.05, "learning_rate": 4.909076384180791e-05, "loss": 0.1444, "step": 805000 }, { "epoch": 0.05, "learning_rate": 4.9090198870056496e-05, "loss": 0.1408, "step": 805500 }, { "epoch": 0.05, "learning_rate": 4.908963502824859e-05, "loss": 0.1415, "step": 806000 }, { "epoch": 0.05, "learning_rate": 4.9089070056497174e-05, "loss": 0.1383, "step": 806500 }, { "epoch": 0.05, "learning_rate": 4.9088505084745765e-05, "loss": 0.1477, "step": 807000 }, { "epoch": 0.05, "learning_rate": 4.908794011299435e-05, "loss": 0.1397, "step": 807500 }, { "epoch": 0.05, "learning_rate": 4.908737627118644e-05, "loss": 0.1488, "step": 808000 }, { "epoch": 0.05, "learning_rate": 4.908681129943503e-05, "loss": 0.1354, "step": 808500 }, { "epoch": 0.05, "learning_rate": 4.908624632768362e-05, "loss": 0.1426, "step": 809000 }, { "epoch": 0.05, "learning_rate": 4.908568135593221e-05, "loss": 0.1387, "step": 809500 }, { "epoch": 0.05, "learning_rate": 4.90851163841808e-05, "loss": 0.1446, "step": 810000 }, { "epoch": 0.05, "learning_rate": 4.9084552542372885e-05, "loss": 0.1483, "step": 810500 }, { "epoch": 0.05, "learning_rate": 4.908398757062147e-05, "loss": 0.1386, "step": 811000 }, { "epoch": 0.06, "learning_rate": 4.908342259887006e-05, "loss": 0.1396, "step": 811500 }, { "epoch": 0.06, "learning_rate": 4.9082857627118643e-05, "loss": 0.1432, "step": 812000 }, { "epoch": 0.06, "learning_rate": 4.9082292655367234e-05, "loss": 0.1377, "step": 812500 }, { "epoch": 0.06, "learning_rate": 4.908172881355932e-05, "loss": 0.1397, "step": 813000 }, { "epoch": 0.06, "learning_rate": 4.908116384180791e-05, "loss": 0.1377, "step": 813500 }, { "epoch": 0.06, "learning_rate": 4.9080598870056496e-05, "loss": 0.1439, "step": 814000 }, { "epoch": 0.06, "learning_rate": 4.9080033898305086e-05, "loss": 0.1399, "step": 814500 }, { "epoch": 0.06, "learning_rate": 4.907946892655368e-05, "loss": 0.139, "step": 815000 }, { "epoch": 0.06, "learning_rate": 4.907890395480226e-05, "loss": 0.1415, "step": 815500 }, { "epoch": 0.06, "learning_rate": 4.9078340112994355e-05, "loss": 0.1418, "step": 816000 }, { "epoch": 0.06, "learning_rate": 4.907777514124294e-05, "loss": 0.1441, "step": 816500 }, { "epoch": 0.06, "learning_rate": 4.907721016949153e-05, "loss": 0.1475, "step": 817000 }, { "epoch": 0.06, "learning_rate": 4.907664519774012e-05, "loss": 0.139, "step": 817500 }, { "epoch": 0.06, "learning_rate": 4.907608135593221e-05, "loss": 0.1456, "step": 818000 }, { "epoch": 0.06, "learning_rate": 4.907551638418079e-05, "loss": 0.1398, "step": 818500 }, { "epoch": 0.06, "learning_rate": 4.907495141242938e-05, "loss": 0.1372, "step": 819000 }, { "epoch": 0.06, "learning_rate": 4.9074386440677965e-05, "loss": 0.1484, "step": 819500 }, { "epoch": 0.06, "learning_rate": 4.9073821468926555e-05, "loss": 0.145, "step": 820000 }, { "epoch": 0.06, "learning_rate": 4.907325762711865e-05, "loss": 0.1453, "step": 820500 }, { "epoch": 0.06, "learning_rate": 4.907269265536723e-05, "loss": 0.1416, "step": 821000 }, { "epoch": 0.06, "learning_rate": 4.9072127683615824e-05, "loss": 0.1437, "step": 821500 }, { "epoch": 0.06, "learning_rate": 4.907156271186441e-05, "loss": 0.1418, "step": 822000 }, { "epoch": 0.06, "learning_rate": 4.90709988700565e-05, "loss": 0.141, "step": 822500 }, { "epoch": 0.06, "learning_rate": 4.9070433898305085e-05, "loss": 0.143, "step": 823000 }, { "epoch": 0.06, "learning_rate": 4.9069868926553676e-05, "loss": 0.1391, "step": 823500 }, { "epoch": 0.06, "learning_rate": 4.9069303954802267e-05, "loss": 0.1366, "step": 824000 }, { "epoch": 0.06, "learning_rate": 4.906873898305085e-05, "loss": 0.1362, "step": 824500 }, { "epoch": 0.06, "learning_rate": 4.906817514124294e-05, "loss": 0.1403, "step": 825000 }, { "epoch": 0.06, "learning_rate": 4.906761016949153e-05, "loss": 0.1423, "step": 825500 }, { "epoch": 0.06, "learning_rate": 4.906704519774011e-05, "loss": 0.145, "step": 826000 }, { "epoch": 0.06, "learning_rate": 4.90664802259887e-05, "loss": 0.1436, "step": 826500 }, { "epoch": 0.06, "learning_rate": 4.9065916384180797e-05, "loss": 0.1402, "step": 827000 }, { "epoch": 0.06, "learning_rate": 4.906535141242938e-05, "loss": 0.1424, "step": 827500 }, { "epoch": 0.06, "learning_rate": 4.906478644067797e-05, "loss": 0.1419, "step": 828000 }, { "epoch": 0.06, "learning_rate": 4.9064221468926555e-05, "loss": 0.1368, "step": 828500 }, { "epoch": 0.06, "learning_rate": 4.9063656497175145e-05, "loss": 0.1414, "step": 829000 }, { "epoch": 0.06, "learning_rate": 4.906309265536723e-05, "loss": 0.1412, "step": 829500 }, { "epoch": 0.06, "learning_rate": 4.906252768361582e-05, "loss": 0.133, "step": 830000 }, { "epoch": 0.06, "learning_rate": 4.906196271186441e-05, "loss": 0.138, "step": 830500 }, { "epoch": 0.06, "learning_rate": 4.9061397740113e-05, "loss": 0.1467, "step": 831000 }, { "epoch": 0.06, "learning_rate": 4.906083276836159e-05, "loss": 0.1407, "step": 831500 }, { "epoch": 0.06, "learning_rate": 4.906026779661017e-05, "loss": 0.1361, "step": 832000 }, { "epoch": 0.06, "learning_rate": 4.905970395480226e-05, "loss": 0.1477, "step": 832500 }, { "epoch": 0.06, "learning_rate": 4.905913898305085e-05, "loss": 0.1485, "step": 833000 }, { "epoch": 0.06, "learning_rate": 4.9058574011299433e-05, "loss": 0.1452, "step": 833500 }, { "epoch": 0.06, "learning_rate": 4.9058009039548024e-05, "loss": 0.1355, "step": 834000 }, { "epoch": 0.06, "learning_rate": 4.905744519774012e-05, "loss": 0.1306, "step": 834500 }, { "epoch": 0.06, "learning_rate": 4.9056881355932205e-05, "loss": 0.1333, "step": 835000 }, { "epoch": 0.06, "learning_rate": 4.9056316384180796e-05, "loss": 0.1328, "step": 835500 }, { "epoch": 0.06, "learning_rate": 4.905575141242938e-05, "loss": 0.1399, "step": 836000 }, { "epoch": 0.06, "learning_rate": 4.905518644067797e-05, "loss": 0.1458, "step": 836500 }, { "epoch": 0.06, "learning_rate": 4.9054621468926554e-05, "loss": 0.1423, "step": 837000 }, { "epoch": 0.06, "learning_rate": 4.9054056497175145e-05, "loss": 0.1467, "step": 837500 }, { "epoch": 0.06, "learning_rate": 4.9053491525423735e-05, "loss": 0.142, "step": 838000 }, { "epoch": 0.06, "learning_rate": 4.905292655367232e-05, "loss": 0.1475, "step": 838500 }, { "epoch": 0.06, "learning_rate": 4.905236158192091e-05, "loss": 0.1455, "step": 839000 }, { "epoch": 0.06, "learning_rate": 4.9051797740113e-05, "loss": 0.1431, "step": 839500 }, { "epoch": 0.06, "learning_rate": 4.905123276836158e-05, "loss": 0.1424, "step": 840000 }, { "epoch": 0.06, "learning_rate": 4.905066779661017e-05, "loss": 0.1366, "step": 840500 }, { "epoch": 0.06, "learning_rate": 4.9050102824858755e-05, "loss": 0.1483, "step": 841000 }, { "epoch": 0.06, "learning_rate": 4.9049537853107346e-05, "loss": 0.1353, "step": 841500 }, { "epoch": 0.06, "learning_rate": 4.9048972881355936e-05, "loss": 0.1491, "step": 842000 }, { "epoch": 0.06, "learning_rate": 4.904840903954802e-05, "loss": 0.1344, "step": 842500 }, { "epoch": 0.06, "learning_rate": 4.9047844067796614e-05, "loss": 0.1374, "step": 843000 }, { "epoch": 0.06, "learning_rate": 4.90472790960452e-05, "loss": 0.1376, "step": 843500 }, { "epoch": 0.06, "learning_rate": 4.904671412429379e-05, "loss": 0.1373, "step": 844000 }, { "epoch": 0.06, "learning_rate": 4.9046150282485876e-05, "loss": 0.1433, "step": 844500 }, { "epoch": 0.06, "learning_rate": 4.9045585310734466e-05, "loss": 0.1334, "step": 845000 }, { "epoch": 0.06, "learning_rate": 4.904502033898306e-05, "loss": 0.1409, "step": 845500 }, { "epoch": 0.06, "learning_rate": 4.904445536723164e-05, "loss": 0.1342, "step": 846000 }, { "epoch": 0.06, "learning_rate": 4.904389039548023e-05, "loss": 0.1449, "step": 846500 }, { "epoch": 0.06, "learning_rate": 4.9043325423728815e-05, "loss": 0.1454, "step": 847000 }, { "epoch": 0.06, "learning_rate": 4.9042760451977405e-05, "loss": 0.1392, "step": 847500 }, { "epoch": 0.06, "learning_rate": 4.904219548022599e-05, "loss": 0.147, "step": 848000 }, { "epoch": 0.06, "learning_rate": 4.904163163841808e-05, "loss": 0.1407, "step": 848500 }, { "epoch": 0.06, "learning_rate": 4.904106666666667e-05, "loss": 0.1412, "step": 849000 }, { "epoch": 0.06, "learning_rate": 4.904050169491526e-05, "loss": 0.1369, "step": 849500 }, { "epoch": 0.06, "learning_rate": 4.903993672316384e-05, "loss": 0.1373, "step": 850000 }, { "epoch": 0.06, "learning_rate": 4.903937175141243e-05, "loss": 0.1391, "step": 850500 }, { "epoch": 0.06, "learning_rate": 4.9038806779661016e-05, "loss": 0.1384, "step": 851000 }, { "epoch": 0.06, "learning_rate": 4.903824293785311e-05, "loss": 0.1385, "step": 851500 }, { "epoch": 0.06, "learning_rate": 4.90376779661017e-05, "loss": 0.1392, "step": 852000 }, { "epoch": 0.06, "learning_rate": 4.9037112994350284e-05, "loss": 0.1388, "step": 852500 }, { "epoch": 0.06, "learning_rate": 4.9036548022598875e-05, "loss": 0.135, "step": 853000 }, { "epoch": 0.06, "learning_rate": 4.903598305084746e-05, "loss": 0.1352, "step": 853500 }, { "epoch": 0.06, "learning_rate": 4.903541807909605e-05, "loss": 0.1372, "step": 854000 }, { "epoch": 0.06, "learning_rate": 4.903485310734463e-05, "loss": 0.1452, "step": 854500 }, { "epoch": 0.06, "learning_rate": 4.903428926553673e-05, "loss": 0.1389, "step": 855000 }, { "epoch": 0.06, "learning_rate": 4.903372429378532e-05, "loss": 0.1388, "step": 855500 }, { "epoch": 0.06, "learning_rate": 4.90331593220339e-05, "loss": 0.1422, "step": 856000 }, { "epoch": 0.06, "learning_rate": 4.903259435028249e-05, "loss": 0.1442, "step": 856500 }, { "epoch": 0.06, "learning_rate": 4.9032029378531076e-05, "loss": 0.1444, "step": 857000 }, { "epoch": 0.06, "learning_rate": 4.903146553672316e-05, "loss": 0.1331, "step": 857500 }, { "epoch": 0.06, "learning_rate": 4.9030900564971753e-05, "loss": 0.1399, "step": 858000 }, { "epoch": 0.06, "learning_rate": 4.903033559322034e-05, "loss": 0.1387, "step": 858500 }, { "epoch": 0.06, "learning_rate": 4.902977062146893e-05, "loss": 0.1356, "step": 859000 }, { "epoch": 0.06, "learning_rate": 4.902920677966102e-05, "loss": 0.1459, "step": 859500 }, { "epoch": 0.06, "learning_rate": 4.9028641807909606e-05, "loss": 0.1318, "step": 860000 }, { "epoch": 0.06, "learning_rate": 4.9028076836158196e-05, "loss": 0.1324, "step": 860500 }, { "epoch": 0.06, "learning_rate": 4.902751186440678e-05, "loss": 0.137, "step": 861000 }, { "epoch": 0.06, "learning_rate": 4.9026948022598874e-05, "loss": 0.1473, "step": 861500 }, { "epoch": 0.06, "learning_rate": 4.902638305084746e-05, "loss": 0.1343, "step": 862000 }, { "epoch": 0.06, "learning_rate": 4.902581807909605e-05, "loss": 0.1415, "step": 862500 }, { "epoch": 0.06, "learning_rate": 4.902525310734464e-05, "loss": 0.1366, "step": 863000 }, { "epoch": 0.06, "learning_rate": 4.902468813559322e-05, "loss": 0.1353, "step": 863500 }, { "epoch": 0.06, "learning_rate": 4.9024123163841813e-05, "loss": 0.1386, "step": 864000 }, { "epoch": 0.06, "learning_rate": 4.90235581920904e-05, "loss": 0.1427, "step": 864500 }, { "epoch": 0.06, "learning_rate": 4.902299322033899e-05, "loss": 0.1439, "step": 865000 }, { "epoch": 0.06, "learning_rate": 4.902243050847458e-05, "loss": 0.1339, "step": 865500 }, { "epoch": 0.06, "learning_rate": 4.902186553672317e-05, "loss": 0.1407, "step": 866000 }, { "epoch": 0.06, "learning_rate": 4.902130056497175e-05, "loss": 0.1479, "step": 866500 }, { "epoch": 0.06, "learning_rate": 4.902073559322034e-05, "loss": 0.1383, "step": 867000 }, { "epoch": 0.06, "learning_rate": 4.902017062146893e-05, "loss": 0.1407, "step": 867500 }, { "epoch": 0.06, "learning_rate": 4.901960564971752e-05, "loss": 0.1356, "step": 868000 }, { "epoch": 0.06, "learning_rate": 4.9019041807909605e-05, "loss": 0.129, "step": 868500 }, { "epoch": 0.06, "learning_rate": 4.9018476836158196e-05, "loss": 0.1392, "step": 869000 }, { "epoch": 0.06, "learning_rate": 4.9017911864406786e-05, "loss": 0.1466, "step": 869500 }, { "epoch": 0.06, "learning_rate": 4.901734689265537e-05, "loss": 0.1441, "step": 870000 }, { "epoch": 0.06, "learning_rate": 4.901678192090396e-05, "loss": 0.1337, "step": 870500 }, { "epoch": 0.06, "learning_rate": 4.9016216949152544e-05, "loss": 0.135, "step": 871000 }, { "epoch": 0.06, "learning_rate": 4.901565310734463e-05, "loss": 0.1343, "step": 871500 }, { "epoch": 0.06, "learning_rate": 4.901508813559322e-05, "loss": 0.1426, "step": 872000 }, { "epoch": 0.06, "learning_rate": 4.9014523163841806e-05, "loss": 0.1393, "step": 872500 }, { "epoch": 0.06, "learning_rate": 4.9013958192090396e-05, "loss": 0.1429, "step": 873000 }, { "epoch": 0.06, "learning_rate": 4.901339322033899e-05, "loss": 0.1387, "step": 873500 }, { "epoch": 0.06, "learning_rate": 4.901282937853108e-05, "loss": 0.1387, "step": 874000 }, { "epoch": 0.06, "learning_rate": 4.9012264406779665e-05, "loss": 0.1369, "step": 874500 }, { "epoch": 0.06, "learning_rate": 4.9011699435028255e-05, "loss": 0.1377, "step": 875000 }, { "epoch": 0.06, "learning_rate": 4.901113446327684e-05, "loss": 0.136, "step": 875500 }, { "epoch": 0.06, "learning_rate": 4.901056949152543e-05, "loss": 0.1336, "step": 876000 }, { "epoch": 0.06, "learning_rate": 4.9010004519774014e-05, "loss": 0.1279, "step": 876500 }, { "epoch": 0.06, "learning_rate": 4.900944067796611e-05, "loss": 0.1402, "step": 877000 }, { "epoch": 0.06, "learning_rate": 4.900887570621469e-05, "loss": 0.1368, "step": 877500 }, { "epoch": 0.06, "learning_rate": 4.900831073446328e-05, "loss": 0.1426, "step": 878000 }, { "epoch": 0.06, "learning_rate": 4.9007745762711866e-05, "loss": 0.1376, "step": 878500 }, { "epoch": 0.06, "learning_rate": 4.9007180790960456e-05, "loss": 0.1366, "step": 879000 }, { "epoch": 0.06, "learning_rate": 4.900661581920904e-05, "loss": 0.1372, "step": 879500 }, { "epoch": 0.06, "learning_rate": 4.900605084745763e-05, "loss": 0.1378, "step": 880000 }, { "epoch": 0.06, "learning_rate": 4.900548587570622e-05, "loss": 0.1389, "step": 880500 }, { "epoch": 0.06, "learning_rate": 4.900492203389831e-05, "loss": 0.1381, "step": 881000 }, { "epoch": 0.06, "learning_rate": 4.90043581920904e-05, "loss": 0.1298, "step": 881500 }, { "epoch": 0.06, "learning_rate": 4.9003793220338986e-05, "loss": 0.1443, "step": 882000 }, { "epoch": 0.06, "learning_rate": 4.900322824858758e-05, "loss": 0.1387, "step": 882500 }, { "epoch": 0.06, "learning_rate": 4.900266327683616e-05, "loss": 0.1416, "step": 883000 }, { "epoch": 0.06, "learning_rate": 4.900209830508475e-05, "loss": 0.1431, "step": 883500 }, { "epoch": 0.06, "learning_rate": 4.900153446327684e-05, "loss": 0.1385, "step": 884000 }, { "epoch": 0.06, "learning_rate": 4.900096949152543e-05, "loss": 0.135, "step": 884500 }, { "epoch": 0.06, "learning_rate": 4.900040451977401e-05, "loss": 0.134, "step": 885000 }, { "epoch": 0.06, "learning_rate": 4.8999839548022603e-05, "loss": 0.1344, "step": 885500 }, { "epoch": 0.06, "learning_rate": 4.899927570621469e-05, "loss": 0.1495, "step": 886000 }, { "epoch": 0.06, "learning_rate": 4.8998710734463274e-05, "loss": 0.1401, "step": 886500 }, { "epoch": 0.06, "learning_rate": 4.8998145762711865e-05, "loss": 0.1391, "step": 887000 }, { "epoch": 0.06, "learning_rate": 4.8997580790960456e-05, "loss": 0.1369, "step": 887500 }, { "epoch": 0.06, "learning_rate": 4.899701581920904e-05, "loss": 0.1359, "step": 888000 }, { "epoch": 0.06, "learning_rate": 4.8996451977401133e-05, "loss": 0.1355, "step": 888500 }, { "epoch": 0.06, "learning_rate": 4.8995887005649724e-05, "loss": 0.1366, "step": 889000 }, { "epoch": 0.06, "learning_rate": 4.899532203389831e-05, "loss": 0.1382, "step": 889500 }, { "epoch": 0.06, "learning_rate": 4.89947570621469e-05, "loss": 0.1452, "step": 890000 }, { "epoch": 0.06, "learning_rate": 4.8994193220338986e-05, "loss": 0.1449, "step": 890500 }, { "epoch": 0.06, "learning_rate": 4.8993628248587576e-05, "loss": 0.1363, "step": 891000 }, { "epoch": 0.06, "learning_rate": 4.899306327683616e-05, "loss": 0.1364, "step": 891500 }, { "epoch": 0.06, "learning_rate": 4.899249830508475e-05, "loss": 0.1391, "step": 892000 }, { "epoch": 0.06, "learning_rate": 4.899193446327684e-05, "loss": 0.1424, "step": 892500 }, { "epoch": 0.06, "learning_rate": 4.899136949152542e-05, "loss": 0.1355, "step": 893000 }, { "epoch": 0.06, "learning_rate": 4.899080451977401e-05, "loss": 0.1411, "step": 893500 }, { "epoch": 0.06, "learning_rate": 4.89902395480226e-05, "loss": 0.1376, "step": 894000 }, { "epoch": 0.06, "learning_rate": 4.89896757062147e-05, "loss": 0.138, "step": 894500 }, { "epoch": 0.06, "learning_rate": 4.898911073446328e-05, "loss": 0.1354, "step": 895000 }, { "epoch": 0.06, "learning_rate": 4.898854576271187e-05, "loss": 0.141, "step": 895500 }, { "epoch": 0.06, "learning_rate": 4.8987980790960455e-05, "loss": 0.1366, "step": 896000 }, { "epoch": 0.06, "learning_rate": 4.8987415819209046e-05, "loss": 0.1311, "step": 896500 }, { "epoch": 0.06, "learning_rate": 4.898685197740113e-05, "loss": 0.136, "step": 897000 }, { "epoch": 0.06, "learning_rate": 4.898628700564972e-05, "loss": 0.1373, "step": 897500 }, { "epoch": 0.06, "learning_rate": 4.898572203389831e-05, "loss": 0.1375, "step": 898000 }, { "epoch": 0.06, "learning_rate": 4.89851570621469e-05, "loss": 0.1412, "step": 898500 }, { "epoch": 0.06, "learning_rate": 4.8984593220338985e-05, "loss": 0.1386, "step": 899000 }, { "epoch": 0.06, "learning_rate": 4.898402824858757e-05, "loss": 0.1388, "step": 899500 }, { "epoch": 0.06, "learning_rate": 4.898346327683616e-05, "loss": 0.1386, "step": 900000 }, { "epoch": 0.06, "learning_rate": 4.898289830508474e-05, "loss": 0.1382, "step": 900500 }, { "epoch": 0.06, "learning_rate": 4.8982334463276844e-05, "loss": 0.1365, "step": 901000 }, { "epoch": 0.06, "learning_rate": 4.898176949152543e-05, "loss": 0.137, "step": 901500 }, { "epoch": 0.06, "learning_rate": 4.898120451977402e-05, "loss": 0.1385, "step": 902000 }, { "epoch": 0.06, "learning_rate": 4.89806395480226e-05, "loss": 0.1387, "step": 902500 }, { "epoch": 0.06, "learning_rate": 4.898007457627119e-05, "loss": 0.1481, "step": 903000 }, { "epoch": 0.06, "learning_rate": 4.897951073446328e-05, "loss": 0.1258, "step": 903500 }, { "epoch": 0.06, "learning_rate": 4.8978945762711864e-05, "loss": 0.1343, "step": 904000 }, { "epoch": 0.06, "learning_rate": 4.8978380790960454e-05, "loss": 0.1374, "step": 904500 }, { "epoch": 0.06, "learning_rate": 4.8977815819209045e-05, "loss": 0.1375, "step": 905000 }, { "epoch": 0.06, "learning_rate": 4.897725084745763e-05, "loss": 0.1372, "step": 905500 }, { "epoch": 0.06, "learning_rate": 4.897668587570622e-05, "loss": 0.1402, "step": 906000 }, { "epoch": 0.06, "learning_rate": 4.8976122033898306e-05, "loss": 0.1402, "step": 906500 }, { "epoch": 0.06, "learning_rate": 4.897555706214689e-05, "loss": 0.132, "step": 907000 }, { "epoch": 0.06, "learning_rate": 4.897499209039548e-05, "loss": 0.1431, "step": 907500 }, { "epoch": 0.06, "learning_rate": 4.897442711864407e-05, "loss": 0.1365, "step": 908000 }, { "epoch": 0.06, "learning_rate": 4.8973863276836165e-05, "loss": 0.1333, "step": 908500 }, { "epoch": 0.06, "learning_rate": 4.897329830508475e-05, "loss": 0.143, "step": 909000 }, { "epoch": 0.06, "learning_rate": 4.897273333333334e-05, "loss": 0.1447, "step": 909500 }, { "epoch": 0.06, "learning_rate": 4.8972168361581924e-05, "loss": 0.1287, "step": 910000 }, { "epoch": 0.06, "learning_rate": 4.8971603389830514e-05, "loss": 0.1333, "step": 910500 }, { "epoch": 0.06, "learning_rate": 4.89710395480226e-05, "loss": 0.1391, "step": 911000 }, { "epoch": 0.06, "learning_rate": 4.897047457627119e-05, "loss": 0.1353, "step": 911500 }, { "epoch": 0.06, "learning_rate": 4.8969909604519776e-05, "loss": 0.1402, "step": 912000 }, { "epoch": 0.06, "learning_rate": 4.8969344632768366e-05, "loss": 0.1315, "step": 912500 }, { "epoch": 0.06, "learning_rate": 4.8968780790960453e-05, "loss": 0.1368, "step": 913000 }, { "epoch": 0.06, "learning_rate": 4.896821581920904e-05, "loss": 0.1373, "step": 913500 }, { "epoch": 0.06, "learning_rate": 4.896765084745763e-05, "loss": 0.1407, "step": 914000 }, { "epoch": 0.06, "learning_rate": 4.896708587570621e-05, "loss": 0.1325, "step": 914500 }, { "epoch": 0.06, "learning_rate": 4.896652203389831e-05, "loss": 0.1297, "step": 915000 }, { "epoch": 0.06, "learning_rate": 4.8965957062146896e-05, "loss": 0.1433, "step": 915500 }, { "epoch": 0.06, "learning_rate": 4.896539209039549e-05, "loss": 0.132, "step": 916000 }, { "epoch": 0.06, "learning_rate": 4.8964828248587574e-05, "loss": 0.1359, "step": 916500 }, { "epoch": 0.06, "learning_rate": 4.896426327683616e-05, "loss": 0.1391, "step": 917000 }, { "epoch": 0.06, "learning_rate": 4.896369830508475e-05, "loss": 0.135, "step": 917500 }, { "epoch": 0.06, "learning_rate": 4.896313333333333e-05, "loss": 0.1386, "step": 918000 }, { "epoch": 0.06, "learning_rate": 4.896256836158192e-05, "loss": 0.1382, "step": 918500 }, { "epoch": 0.06, "learning_rate": 4.8962003389830513e-05, "loss": 0.1387, "step": 919000 }, { "epoch": 0.06, "learning_rate": 4.89614384180791e-05, "loss": 0.14, "step": 919500 }, { "epoch": 0.06, "learning_rate": 4.896087344632769e-05, "loss": 0.145, "step": 920000 }, { "epoch": 0.06, "learning_rate": 4.896030847457627e-05, "loss": 0.1375, "step": 920500 }, { "epoch": 0.06, "learning_rate": 4.895974350282486e-05, "loss": 0.1351, "step": 921000 }, { "epoch": 0.06, "learning_rate": 4.895917966101695e-05, "loss": 0.1353, "step": 921500 }, { "epoch": 0.06, "learning_rate": 4.895861468926554e-05, "loss": 0.1312, "step": 922000 }, { "epoch": 0.06, "learning_rate": 4.8958049717514124e-05, "loss": 0.1318, "step": 922500 }, { "epoch": 0.06, "learning_rate": 4.8957484745762714e-05, "loss": 0.1333, "step": 923000 }, { "epoch": 0.06, "learning_rate": 4.89569197740113e-05, "loss": 0.1396, "step": 923500 }, { "epoch": 0.06, "learning_rate": 4.895635593220339e-05, "loss": 0.1382, "step": 924000 }, { "epoch": 0.06, "learning_rate": 4.895579096045198e-05, "loss": 0.1331, "step": 924500 }, { "epoch": 0.06, "learning_rate": 4.8955225988700567e-05, "loss": 0.1367, "step": 925000 }, { "epoch": 0.06, "learning_rate": 4.895466101694916e-05, "loss": 0.1413, "step": 925500 }, { "epoch": 0.06, "learning_rate": 4.8954097175141244e-05, "loss": 0.1361, "step": 926000 }, { "epoch": 0.06, "learning_rate": 4.8953532203389835e-05, "loss": 0.1293, "step": 926500 }, { "epoch": 0.06, "learning_rate": 4.895296723163842e-05, "loss": 0.1336, "step": 927000 }, { "epoch": 0.06, "learning_rate": 4.895240225988701e-05, "loss": 0.1374, "step": 927500 }, { "epoch": 0.06, "learning_rate": 4.895183728813559e-05, "loss": 0.1337, "step": 928000 }, { "epoch": 0.06, "learning_rate": 4.895127344632768e-05, "loss": 0.14, "step": 928500 }, { "epoch": 0.06, "learning_rate": 4.895070847457627e-05, "loss": 0.136, "step": 929000 }, { "epoch": 0.06, "learning_rate": 4.895014350282486e-05, "loss": 0.1421, "step": 929500 }, { "epoch": 0.06, "learning_rate": 4.8949578531073445e-05, "loss": 0.1228, "step": 930000 }, { "epoch": 0.06, "learning_rate": 4.8949013559322036e-05, "loss": 0.1371, "step": 930500 }, { "epoch": 0.06, "learning_rate": 4.894844858757062e-05, "loss": 0.1366, "step": 931000 }, { "epoch": 0.06, "learning_rate": 4.8947884745762714e-05, "loss": 0.1432, "step": 931500 }, { "epoch": 0.06, "learning_rate": 4.8947319774011304e-05, "loss": 0.1366, "step": 932000 }, { "epoch": 0.06, "learning_rate": 4.8946754802259895e-05, "loss": 0.1393, "step": 932500 }, { "epoch": 0.06, "learning_rate": 4.894618983050848e-05, "loss": 0.1373, "step": 933000 }, { "epoch": 0.06, "learning_rate": 4.8945625988700566e-05, "loss": 0.1393, "step": 933500 }, { "epoch": 0.06, "learning_rate": 4.8945061016949156e-05, "loss": 0.1374, "step": 934000 }, { "epoch": 0.06, "learning_rate": 4.894449604519774e-05, "loss": 0.1351, "step": 934500 }, { "epoch": 0.06, "learning_rate": 4.894393107344633e-05, "loss": 0.1412, "step": 935000 }, { "epoch": 0.06, "learning_rate": 4.8943366101694915e-05, "loss": 0.1306, "step": 935500 }, { "epoch": 0.06, "learning_rate": 4.8942801129943505e-05, "loss": 0.1322, "step": 936000 }, { "epoch": 0.06, "learning_rate": 4.8942236158192096e-05, "loss": 0.1371, "step": 936500 }, { "epoch": 0.06, "learning_rate": 4.894167118644068e-05, "loss": 0.1364, "step": 937000 }, { "epoch": 0.06, "learning_rate": 4.894110847457628e-05, "loss": 0.1367, "step": 937500 }, { "epoch": 0.06, "learning_rate": 4.894054350282486e-05, "loss": 0.1377, "step": 938000 }, { "epoch": 0.06, "learning_rate": 4.893997853107345e-05, "loss": 0.1304, "step": 938500 }, { "epoch": 0.06, "learning_rate": 4.8939413559322035e-05, "loss": 0.1357, "step": 939000 }, { "epoch": 0.06, "learning_rate": 4.8938848587570626e-05, "loss": 0.1293, "step": 939500 }, { "epoch": 0.06, "learning_rate": 4.8938283615819216e-05, "loss": 0.138, "step": 940000 }, { "epoch": 0.06, "learning_rate": 4.8937719774011303e-05, "loss": 0.1341, "step": 940500 }, { "epoch": 0.06, "learning_rate": 4.893715480225989e-05, "loss": 0.1354, "step": 941000 }, { "epoch": 0.06, "learning_rate": 4.893658983050848e-05, "loss": 0.1338, "step": 941500 }, { "epoch": 0.06, "learning_rate": 4.893602485875706e-05, "loss": 0.1375, "step": 942000 }, { "epoch": 0.06, "learning_rate": 4.8935461016949156e-05, "loss": 0.1331, "step": 942500 }, { "epoch": 0.06, "learning_rate": 4.893489604519774e-05, "loss": 0.1384, "step": 943000 }, { "epoch": 0.06, "learning_rate": 4.893433107344633e-05, "loss": 0.1413, "step": 943500 }, { "epoch": 0.06, "learning_rate": 4.8933766101694914e-05, "loss": 0.1377, "step": 944000 }, { "epoch": 0.06, "learning_rate": 4.8933201129943504e-05, "loss": 0.1315, "step": 944500 }, { "epoch": 0.06, "learning_rate": 4.89326372881356e-05, "loss": 0.1267, "step": 945000 }, { "epoch": 0.06, "learning_rate": 4.8932073446327686e-05, "loss": 0.1385, "step": 945500 }, { "epoch": 0.06, "learning_rate": 4.893150847457627e-05, "loss": 0.1393, "step": 946000 }, { "epoch": 0.06, "learning_rate": 4.893094350282486e-05, "loss": 0.1288, "step": 946500 }, { "epoch": 0.06, "learning_rate": 4.893037853107345e-05, "loss": 0.133, "step": 947000 }, { "epoch": 0.06, "learning_rate": 4.8929813559322034e-05, "loss": 0.1401, "step": 947500 }, { "epoch": 0.06, "learning_rate": 4.8929248587570625e-05, "loss": 0.1423, "step": 948000 }, { "epoch": 0.06, "learning_rate": 4.892868361581921e-05, "loss": 0.1328, "step": 948500 }, { "epoch": 0.06, "learning_rate": 4.89281197740113e-05, "loss": 0.1333, "step": 949000 }, { "epoch": 0.06, "learning_rate": 4.8927554802259887e-05, "loss": 0.1373, "step": 949500 }, { "epoch": 0.06, "learning_rate": 4.892698983050848e-05, "loss": 0.1349, "step": 950000 }, { "epoch": 0.06, "learning_rate": 4.892642485875706e-05, "loss": 0.1326, "step": 950500 }, { "epoch": 0.06, "learning_rate": 4.892585988700565e-05, "loss": 0.1379, "step": 951000 }, { "epoch": 0.06, "learning_rate": 4.8925294915254235e-05, "loss": 0.1315, "step": 951500 }, { "epoch": 0.06, "learning_rate": 4.8924729943502826e-05, "loss": 0.1369, "step": 952000 }, { "epoch": 0.06, "learning_rate": 4.892416497175141e-05, "loss": 0.1381, "step": 952500 }, { "epoch": 0.06, "learning_rate": 4.89236e-05, "loss": 0.1285, "step": 953000 }, { "epoch": 0.06, "learning_rate": 4.8923036158192094e-05, "loss": 0.1308, "step": 953500 }, { "epoch": 0.06, "learning_rate": 4.8922471186440685e-05, "loss": 0.1307, "step": 954000 }, { "epoch": 0.06, "learning_rate": 4.892190621468927e-05, "loss": 0.1349, "step": 954500 }, { "epoch": 0.06, "learning_rate": 4.892134124293786e-05, "loss": 0.14, "step": 955000 }, { "epoch": 0.06, "learning_rate": 4.892077627118644e-05, "loss": 0.1368, "step": 955500 }, { "epoch": 0.06, "learning_rate": 4.892021242937853e-05, "loss": 0.1325, "step": 956000 }, { "epoch": 0.06, "learning_rate": 4.891964745762712e-05, "loss": 0.1246, "step": 956500 }, { "epoch": 0.06, "learning_rate": 4.891908248587571e-05, "loss": 0.1425, "step": 957000 }, { "epoch": 0.06, "learning_rate": 4.8918517514124295e-05, "loss": 0.1385, "step": 957500 }, { "epoch": 0.06, "learning_rate": 4.8917952542372886e-05, "loss": 0.1419, "step": 958000 }, { "epoch": 0.06, "learning_rate": 4.891738870056497e-05, "loss": 0.1354, "step": 958500 }, { "epoch": 0.07, "learning_rate": 4.891682372881356e-05, "loss": 0.1437, "step": 959000 }, { "epoch": 0.07, "learning_rate": 4.891625875706215e-05, "loss": 0.1353, "step": 959500 }, { "epoch": 0.07, "learning_rate": 4.891569378531073e-05, "loss": 0.1349, "step": 960000 }, { "epoch": 0.07, "learning_rate": 4.891512994350283e-05, "loss": 0.1366, "step": 960500 }, { "epoch": 0.07, "learning_rate": 4.891456610169492e-05, "loss": 0.1316, "step": 961000 }, { "epoch": 0.07, "learning_rate": 4.89140011299435e-05, "loss": 0.1339, "step": 961500 }, { "epoch": 0.07, "learning_rate": 4.8913436158192094e-05, "loss": 0.1292, "step": 962000 }, { "epoch": 0.07, "learning_rate": 4.891287118644068e-05, "loss": 0.1409, "step": 962500 }, { "epoch": 0.07, "learning_rate": 4.891230621468927e-05, "loss": 0.1328, "step": 963000 }, { "epoch": 0.07, "learning_rate": 4.891174124293785e-05, "loss": 0.1379, "step": 963500 }, { "epoch": 0.07, "learning_rate": 4.891117627118644e-05, "loss": 0.141, "step": 964000 }, { "epoch": 0.07, "learning_rate": 4.891061129943503e-05, "loss": 0.1316, "step": 964500 }, { "epoch": 0.07, "learning_rate": 4.891004632768362e-05, "loss": 0.1358, "step": 965000 }, { "epoch": 0.07, "learning_rate": 4.890948135593221e-05, "loss": 0.133, "step": 965500 }, { "epoch": 0.07, "learning_rate": 4.8908917514124295e-05, "loss": 0.1376, "step": 966000 }, { "epoch": 0.07, "learning_rate": 4.890835254237288e-05, "loss": 0.1369, "step": 966500 }, { "epoch": 0.07, "learning_rate": 4.890778757062147e-05, "loss": 0.1339, "step": 967000 }, { "epoch": 0.07, "learning_rate": 4.890722259887005e-05, "loss": 0.1313, "step": 967500 }, { "epoch": 0.07, "learning_rate": 4.8906658757062153e-05, "loss": 0.1418, "step": 968000 }, { "epoch": 0.07, "learning_rate": 4.890609378531074e-05, "loss": 0.1356, "step": 968500 }, { "epoch": 0.07, "learning_rate": 4.890552881355933e-05, "loss": 0.1368, "step": 969000 }, { "epoch": 0.07, "learning_rate": 4.890496384180791e-05, "loss": 0.1414, "step": 969500 }, { "epoch": 0.07, "learning_rate": 4.89044e-05, "loss": 0.1378, "step": 970000 }, { "epoch": 0.07, "learning_rate": 4.890383502824859e-05, "loss": 0.1273, "step": 970500 }, { "epoch": 0.07, "learning_rate": 4.890327005649718e-05, "loss": 0.1345, "step": 971000 }, { "epoch": 0.07, "learning_rate": 4.8902705084745764e-05, "loss": 0.1304, "step": 971500 }, { "epoch": 0.07, "learning_rate": 4.8902140112994354e-05, "loss": 0.1383, "step": 972000 }, { "epoch": 0.07, "learning_rate": 4.890157627118644e-05, "loss": 0.1337, "step": 972500 }, { "epoch": 0.07, "learning_rate": 4.890101129943503e-05, "loss": 0.1366, "step": 973000 }, { "epoch": 0.07, "learning_rate": 4.8900446327683616e-05, "loss": 0.1391, "step": 973500 }, { "epoch": 0.07, "learning_rate": 4.8899881355932207e-05, "loss": 0.1333, "step": 974000 }, { "epoch": 0.07, "learning_rate": 4.88993175141243e-05, "loss": 0.1317, "step": 974500 }, { "epoch": 0.07, "learning_rate": 4.8898752542372884e-05, "loss": 0.1332, "step": 975000 }, { "epoch": 0.07, "learning_rate": 4.8898187570621475e-05, "loss": 0.133, "step": 975500 }, { "epoch": 0.07, "learning_rate": 4.889762259887006e-05, "loss": 0.1232, "step": 976000 }, { "epoch": 0.07, "learning_rate": 4.8897058757062146e-05, "loss": 0.1448, "step": 976500 }, { "epoch": 0.07, "learning_rate": 4.8896493785310737e-05, "loss": 0.132, "step": 977000 }, { "epoch": 0.07, "learning_rate": 4.889592881355932e-05, "loss": 0.1328, "step": 977500 }, { "epoch": 0.07, "learning_rate": 4.889536384180791e-05, "loss": 0.1363, "step": 978000 }, { "epoch": 0.07, "learning_rate": 4.88947988700565e-05, "loss": 0.1284, "step": 978500 }, { "epoch": 0.07, "learning_rate": 4.889423502824859e-05, "loss": 0.1341, "step": 979000 }, { "epoch": 0.07, "learning_rate": 4.889367005649718e-05, "loss": 0.1343, "step": 979500 }, { "epoch": 0.07, "learning_rate": 4.889310508474576e-05, "loss": 0.1365, "step": 980000 }, { "epoch": 0.07, "learning_rate": 4.8892540112994354e-05, "loss": 0.1263, "step": 980500 }, { "epoch": 0.07, "learning_rate": 4.889197514124294e-05, "loss": 0.1351, "step": 981000 }, { "epoch": 0.07, "learning_rate": 4.889141129943503e-05, "loss": 0.1347, "step": 981500 }, { "epoch": 0.07, "learning_rate": 4.889084632768362e-05, "loss": 0.1376, "step": 982000 }, { "epoch": 0.07, "learning_rate": 4.8890281355932206e-05, "loss": 0.1349, "step": 982500 }, { "epoch": 0.07, "learning_rate": 4.888971751412429e-05, "loss": 0.1295, "step": 983000 }, { "epoch": 0.07, "learning_rate": 4.8889152542372884e-05, "loss": 0.1441, "step": 983500 }, { "epoch": 0.07, "learning_rate": 4.888858757062147e-05, "loss": 0.1326, "step": 984000 }, { "epoch": 0.07, "learning_rate": 4.888802259887006e-05, "loss": 0.1383, "step": 984500 }, { "epoch": 0.07, "learning_rate": 4.888745762711865e-05, "loss": 0.1275, "step": 985000 }, { "epoch": 0.07, "learning_rate": 4.888689265536723e-05, "loss": 0.1329, "step": 985500 }, { "epoch": 0.07, "learning_rate": 4.888632768361582e-05, "loss": 0.1386, "step": 986000 }, { "epoch": 0.07, "learning_rate": 4.888576271186441e-05, "loss": 0.1404, "step": 986500 }, { "epoch": 0.07, "learning_rate": 4.8885197740113e-05, "loss": 0.1298, "step": 987000 }, { "epoch": 0.07, "learning_rate": 4.8884633898305085e-05, "loss": 0.1364, "step": 987500 }, { "epoch": 0.07, "learning_rate": 4.8884068926553675e-05, "loss": 0.1311, "step": 988000 }, { "epoch": 0.07, "learning_rate": 4.888350395480226e-05, "loss": 0.1376, "step": 988500 }, { "epoch": 0.07, "learning_rate": 4.888293898305085e-05, "loss": 0.1248, "step": 989000 }, { "epoch": 0.07, "learning_rate": 4.888237401129943e-05, "loss": 0.1327, "step": 989500 }, { "epoch": 0.07, "learning_rate": 4.888181016949153e-05, "loss": 0.1323, "step": 990000 }, { "epoch": 0.07, "learning_rate": 4.888124519774012e-05, "loss": 0.1362, "step": 990500 }, { "epoch": 0.07, "learning_rate": 4.88806802259887e-05, "loss": 0.1393, "step": 991000 }, { "epoch": 0.07, "learning_rate": 4.888011525423729e-05, "loss": 0.129, "step": 991500 }, { "epoch": 0.07, "learning_rate": 4.8879550282485876e-05, "loss": 0.1338, "step": 992000 }, { "epoch": 0.07, "learning_rate": 4.887898531073447e-05, "loss": 0.1271, "step": 992500 }, { "epoch": 0.07, "learning_rate": 4.8878421468926554e-05, "loss": 0.1371, "step": 993000 }, { "epoch": 0.07, "learning_rate": 4.8877856497175144e-05, "loss": 0.1297, "step": 993500 }, { "epoch": 0.07, "learning_rate": 4.887729152542373e-05, "loss": 0.1311, "step": 994000 }, { "epoch": 0.07, "learning_rate": 4.887672655367232e-05, "loss": 0.1259, "step": 994500 }, { "epoch": 0.07, "learning_rate": 4.88761615819209e-05, "loss": 0.1369, "step": 995000 }, { "epoch": 0.07, "learning_rate": 4.8875597740113e-05, "loss": 0.1407, "step": 995500 }, { "epoch": 0.07, "learning_rate": 4.887503276836158e-05, "loss": 0.1365, "step": 996000 }, { "epoch": 0.07, "learning_rate": 4.887446779661017e-05, "loss": 0.1368, "step": 996500 }, { "epoch": 0.07, "learning_rate": 4.887390282485876e-05, "loss": 0.1273, "step": 997000 }, { "epoch": 0.07, "learning_rate": 4.8873337853107345e-05, "loss": 0.1317, "step": 997500 }, { "epoch": 0.07, "learning_rate": 4.8872772881355936e-05, "loss": 0.1252, "step": 998000 }, { "epoch": 0.07, "learning_rate": 4.887220903954802e-05, "loss": 0.1274, "step": 998500 }, { "epoch": 0.07, "learning_rate": 4.8871644067796614e-05, "loss": 0.1267, "step": 999000 }, { "epoch": 0.07, "learning_rate": 4.8871079096045204e-05, "loss": 0.1359, "step": 999500 }, { "epoch": 0.07, "learning_rate": 4.887051412429379e-05, "loss": 0.1305, "step": 1000000 }, { "epoch": 0.07, "learning_rate": 4.8869950282485875e-05, "loss": 0.1324, "step": 1000500 }, { "epoch": 0.07, "learning_rate": 4.8869385310734466e-05, "loss": 0.1318, "step": 1001000 }, { "epoch": 0.07, "learning_rate": 4.886882033898305e-05, "loss": 0.1351, "step": 1001500 }, { "epoch": 0.07, "learning_rate": 4.886825536723164e-05, "loss": 0.1306, "step": 1002000 }, { "epoch": 0.07, "learning_rate": 4.8867691525423734e-05, "loss": 0.1317, "step": 1002500 }, { "epoch": 0.07, "learning_rate": 4.886712655367232e-05, "loss": 0.1301, "step": 1003000 }, { "epoch": 0.07, "learning_rate": 4.886656158192091e-05, "loss": 0.1323, "step": 1003500 }, { "epoch": 0.07, "learning_rate": 4.886599661016949e-05, "loss": 0.1273, "step": 1004000 }, { "epoch": 0.07, "learning_rate": 4.886543163841808e-05, "loss": 0.1412, "step": 1004500 }, { "epoch": 0.07, "learning_rate": 4.886486779661017e-05, "loss": 0.1353, "step": 1005000 }, { "epoch": 0.07, "learning_rate": 4.886430282485876e-05, "loss": 0.1344, "step": 1005500 }, { "epoch": 0.07, "learning_rate": 4.8863737853107345e-05, "loss": 0.1315, "step": 1006000 }, { "epoch": 0.07, "learning_rate": 4.8863172881355935e-05, "loss": 0.1339, "step": 1006500 }, { "epoch": 0.07, "learning_rate": 4.886260903954802e-05, "loss": 0.1287, "step": 1007000 }, { "epoch": 0.07, "learning_rate": 4.886204406779661e-05, "loss": 0.1435, "step": 1007500 }, { "epoch": 0.07, "learning_rate": 4.88614790960452e-05, "loss": 0.1376, "step": 1008000 }, { "epoch": 0.07, "learning_rate": 4.886091412429379e-05, "loss": 0.1362, "step": 1008500 }, { "epoch": 0.07, "learning_rate": 4.886034915254237e-05, "loss": 0.129, "step": 1009000 }, { "epoch": 0.07, "learning_rate": 4.885978418079096e-05, "loss": 0.1389, "step": 1009500 }, { "epoch": 0.07, "learning_rate": 4.8859220338983056e-05, "loss": 0.1349, "step": 1010000 }, { "epoch": 0.07, "learning_rate": 4.885865536723164e-05, "loss": 0.1347, "step": 1010500 }, { "epoch": 0.07, "learning_rate": 4.885809039548023e-05, "loss": 0.1416, "step": 1011000 }, { "epoch": 0.07, "learning_rate": 4.8857525423728814e-05, "loss": 0.1295, "step": 1011500 }, { "epoch": 0.07, "learning_rate": 4.8856960451977405e-05, "loss": 0.1306, "step": 1012000 }, { "epoch": 0.07, "learning_rate": 4.885639661016949e-05, "loss": 0.1337, "step": 1012500 }, { "epoch": 0.07, "learning_rate": 4.885583163841808e-05, "loss": 0.1302, "step": 1013000 }, { "epoch": 0.07, "learning_rate": 4.885526666666667e-05, "loss": 0.133, "step": 1013500 }, { "epoch": 0.07, "learning_rate": 4.885470169491526e-05, "loss": 0.1291, "step": 1014000 }, { "epoch": 0.07, "learning_rate": 4.8854137853107344e-05, "loss": 0.1348, "step": 1014500 }, { "epoch": 0.07, "learning_rate": 4.8853572881355935e-05, "loss": 0.1357, "step": 1015000 }, { "epoch": 0.07, "learning_rate": 4.885300790960452e-05, "loss": 0.1343, "step": 1015500 }, { "epoch": 0.07, "learning_rate": 4.885244293785311e-05, "loss": 0.1345, "step": 1016000 }, { "epoch": 0.07, "learning_rate": 4.885187796610169e-05, "loss": 0.1318, "step": 1016500 }, { "epoch": 0.07, "learning_rate": 4.885131299435028e-05, "loss": 0.1336, "step": 1017000 }, { "epoch": 0.07, "learning_rate": 4.8850748022598874e-05, "loss": 0.1345, "step": 1017500 }, { "epoch": 0.07, "learning_rate": 4.885018418079096e-05, "loss": 0.1305, "step": 1018000 }, { "epoch": 0.07, "learning_rate": 4.884961920903955e-05, "loss": 0.1364, "step": 1018500 }, { "epoch": 0.07, "learning_rate": 4.8849054237288136e-05, "loss": 0.1383, "step": 1019000 }, { "epoch": 0.07, "learning_rate": 4.8848489265536726e-05, "loss": 0.1359, "step": 1019500 }, { "epoch": 0.07, "learning_rate": 4.884792429378531e-05, "loss": 0.1391, "step": 1020000 }, { "epoch": 0.07, "learning_rate": 4.8847360451977404e-05, "loss": 0.1342, "step": 1020500 }, { "epoch": 0.07, "learning_rate": 4.8846795480225994e-05, "loss": 0.1327, "step": 1021000 }, { "epoch": 0.07, "learning_rate": 4.884623050847458e-05, "loss": 0.1309, "step": 1021500 }, { "epoch": 0.07, "learning_rate": 4.884566553672317e-05, "loss": 0.1271, "step": 1022000 }, { "epoch": 0.07, "learning_rate": 4.8845101694915256e-05, "loss": 0.1319, "step": 1022500 }, { "epoch": 0.07, "learning_rate": 4.884453672316384e-05, "loss": 0.1306, "step": 1023000 }, { "epoch": 0.07, "learning_rate": 4.884397175141243e-05, "loss": 0.1357, "step": 1023500 }, { "epoch": 0.07, "learning_rate": 4.884340677966102e-05, "loss": 0.1292, "step": 1024000 }, { "epoch": 0.07, "learning_rate": 4.8842841807909605e-05, "loss": 0.1455, "step": 1024500 }, { "epoch": 0.07, "learning_rate": 4.8842276836158195e-05, "loss": 0.1345, "step": 1025000 }, { "epoch": 0.07, "learning_rate": 4.884171186440678e-05, "loss": 0.135, "step": 1025500 }, { "epoch": 0.07, "learning_rate": 4.884114802259887e-05, "loss": 0.1388, "step": 1026000 }, { "epoch": 0.07, "learning_rate": 4.8840583050847464e-05, "loss": 0.1322, "step": 1026500 }, { "epoch": 0.07, "learning_rate": 4.884001807909605e-05, "loss": 0.1311, "step": 1027000 }, { "epoch": 0.07, "learning_rate": 4.883945310734464e-05, "loss": 0.1395, "step": 1027500 }, { "epoch": 0.07, "learning_rate": 4.883888813559322e-05, "loss": 0.1296, "step": 1028000 }, { "epoch": 0.07, "learning_rate": 4.8838324293785316e-05, "loss": 0.138, "step": 1028500 }, { "epoch": 0.07, "learning_rate": 4.88377593220339e-05, "loss": 0.1333, "step": 1029000 }, { "epoch": 0.07, "learning_rate": 4.883719435028249e-05, "loss": 0.1348, "step": 1029500 }, { "epoch": 0.07, "learning_rate": 4.8836629378531074e-05, "loss": 0.1283, "step": 1030000 }, { "epoch": 0.07, "learning_rate": 4.8836064406779665e-05, "loss": 0.1372, "step": 1030500 }, { "epoch": 0.07, "learning_rate": 4.883550056497175e-05, "loss": 0.1304, "step": 1031000 }, { "epoch": 0.07, "learning_rate": 4.883493559322034e-05, "loss": 0.1369, "step": 1031500 }, { "epoch": 0.07, "learning_rate": 4.8834370621468926e-05, "loss": 0.1281, "step": 1032000 }, { "epoch": 0.07, "learning_rate": 4.883380564971752e-05, "loss": 0.1296, "step": 1032500 }, { "epoch": 0.07, "learning_rate": 4.88332406779661e-05, "loss": 0.1353, "step": 1033000 }, { "epoch": 0.07, "learning_rate": 4.8832676836158195e-05, "loss": 0.1503, "step": 1033500 }, { "epoch": 0.07, "learning_rate": 4.8832111864406785e-05, "loss": 0.1324, "step": 1034000 }, { "epoch": 0.07, "learning_rate": 4.883154689265537e-05, "loss": 0.1332, "step": 1034500 }, { "epoch": 0.07, "learning_rate": 4.883098192090396e-05, "loss": 0.1296, "step": 1035000 }, { "epoch": 0.07, "learning_rate": 4.883041807909605e-05, "loss": 0.1331, "step": 1035500 }, { "epoch": 0.07, "learning_rate": 4.882985310734464e-05, "loss": 0.1338, "step": 1036000 }, { "epoch": 0.07, "learning_rate": 4.882928813559322e-05, "loss": 0.1319, "step": 1036500 }, { "epoch": 0.07, "learning_rate": 4.882872316384181e-05, "loss": 0.1392, "step": 1037000 }, { "epoch": 0.07, "learning_rate": 4.8828158192090396e-05, "loss": 0.1377, "step": 1037500 }, { "epoch": 0.07, "learning_rate": 4.8827593220338986e-05, "loss": 0.1314, "step": 1038000 }, { "epoch": 0.07, "learning_rate": 4.8827029378531073e-05, "loss": 0.1378, "step": 1038500 }, { "epoch": 0.07, "learning_rate": 4.8826464406779664e-05, "loss": 0.1271, "step": 1039000 }, { "epoch": 0.07, "learning_rate": 4.882589943502825e-05, "loss": 0.1364, "step": 1039500 }, { "epoch": 0.07, "learning_rate": 4.882533446327684e-05, "loss": 0.1375, "step": 1040000 }, { "epoch": 0.07, "learning_rate": 4.882476949152542e-05, "loss": 0.1295, "step": 1040500 }, { "epoch": 0.07, "learning_rate": 4.8824205649717516e-05, "loss": 0.1309, "step": 1041000 }, { "epoch": 0.07, "learning_rate": 4.882364067796611e-05, "loss": 0.1439, "step": 1041500 }, { "epoch": 0.07, "learning_rate": 4.882307570621469e-05, "loss": 0.1297, "step": 1042000 }, { "epoch": 0.07, "learning_rate": 4.8822511864406785e-05, "loss": 0.1307, "step": 1042500 }, { "epoch": 0.07, "learning_rate": 4.882194689265537e-05, "loss": 0.1346, "step": 1043000 }, { "epoch": 0.07, "learning_rate": 4.882138192090396e-05, "loss": 0.1324, "step": 1043500 }, { "epoch": 0.07, "learning_rate": 4.882081694915254e-05, "loss": 0.1269, "step": 1044000 }, { "epoch": 0.07, "learning_rate": 4.882025197740113e-05, "loss": 0.1357, "step": 1044500 }, { "epoch": 0.07, "learning_rate": 4.8819687005649724e-05, "loss": 0.1282, "step": 1045000 }, { "epoch": 0.07, "learning_rate": 4.881912203389831e-05, "loss": 0.1307, "step": 1045500 }, { "epoch": 0.07, "learning_rate": 4.88185570621469e-05, "loss": 0.1354, "step": 1046000 }, { "epoch": 0.07, "learning_rate": 4.881799209039548e-05, "loss": 0.1308, "step": 1046500 }, { "epoch": 0.07, "learning_rate": 4.881742711864407e-05, "loss": 0.124, "step": 1047000 }, { "epoch": 0.07, "learning_rate": 4.881686440677966e-05, "loss": 0.1251, "step": 1047500 }, { "epoch": 0.07, "learning_rate": 4.8816299435028254e-05, "loss": 0.1261, "step": 1048000 }, { "epoch": 0.07, "learning_rate": 4.881573446327684e-05, "loss": 0.1329, "step": 1048500 }, { "epoch": 0.07, "learning_rate": 4.881516949152543e-05, "loss": 0.132, "step": 1049000 }, { "epoch": 0.07, "learning_rate": 4.881460451977401e-05, "loss": 0.1322, "step": 1049500 }, { "epoch": 0.07, "learning_rate": 4.88140395480226e-05, "loss": 0.1378, "step": 1050000 }, { "epoch": 0.07, "learning_rate": 4.8813474576271186e-05, "loss": 0.1319, "step": 1050500 }, { "epoch": 0.07, "learning_rate": 4.881290960451978e-05, "loss": 0.1337, "step": 1051000 }, { "epoch": 0.07, "learning_rate": 4.881234463276836e-05, "loss": 0.138, "step": 1051500 }, { "epoch": 0.07, "learning_rate": 4.8811780790960455e-05, "loss": 0.1401, "step": 1052000 }, { "epoch": 0.07, "learning_rate": 4.8811215819209045e-05, "loss": 0.1326, "step": 1052500 }, { "epoch": 0.07, "learning_rate": 4.881065084745763e-05, "loss": 0.1257, "step": 1053000 }, { "epoch": 0.07, "learning_rate": 4.881008587570622e-05, "loss": 0.1339, "step": 1053500 }, { "epoch": 0.07, "learning_rate": 4.8809520903954804e-05, "loss": 0.1323, "step": 1054000 }, { "epoch": 0.07, "learning_rate": 4.8808955932203394e-05, "loss": 0.1296, "step": 1054500 }, { "epoch": 0.07, "learning_rate": 4.880839209039548e-05, "loss": 0.1294, "step": 1055000 }, { "epoch": 0.07, "learning_rate": 4.880782711864407e-05, "loss": 0.1279, "step": 1055500 }, { "epoch": 0.07, "learning_rate": 4.8807262146892656e-05, "loss": 0.1281, "step": 1056000 }, { "epoch": 0.07, "learning_rate": 4.8806697175141246e-05, "loss": 0.1307, "step": 1056500 }, { "epoch": 0.07, "learning_rate": 4.880613220338983e-05, "loss": 0.134, "step": 1057000 }, { "epoch": 0.07, "learning_rate": 4.880556723163842e-05, "loss": 0.13, "step": 1057500 }, { "epoch": 0.07, "learning_rate": 4.8805003389830515e-05, "loss": 0.1291, "step": 1058000 }, { "epoch": 0.07, "learning_rate": 4.88044384180791e-05, "loss": 0.1343, "step": 1058500 }, { "epoch": 0.07, "learning_rate": 4.880387344632769e-05, "loss": 0.141, "step": 1059000 }, { "epoch": 0.07, "learning_rate": 4.880330847457627e-05, "loss": 0.1308, "step": 1059500 }, { "epoch": 0.07, "learning_rate": 4.8802743502824863e-05, "loss": 0.1343, "step": 1060000 }, { "epoch": 0.07, "learning_rate": 4.880217966101695e-05, "loss": 0.1242, "step": 1060500 }, { "epoch": 0.07, "learning_rate": 4.880161468926554e-05, "loss": 0.141, "step": 1061000 }, { "epoch": 0.07, "learning_rate": 4.8801049717514125e-05, "loss": 0.134, "step": 1061500 }, { "epoch": 0.07, "learning_rate": 4.8800484745762716e-05, "loss": 0.1373, "step": 1062000 }, { "epoch": 0.07, "learning_rate": 4.87999209039548e-05, "loss": 0.1283, "step": 1062500 }, { "epoch": 0.07, "learning_rate": 4.8799355932203393e-05, "loss": 0.1302, "step": 1063000 }, { "epoch": 0.07, "learning_rate": 4.879879096045198e-05, "loss": 0.1322, "step": 1063500 }, { "epoch": 0.07, "learning_rate": 4.879822598870057e-05, "loss": 0.1299, "step": 1064000 }, { "epoch": 0.07, "learning_rate": 4.879766214689266e-05, "loss": 0.1283, "step": 1064500 }, { "epoch": 0.07, "learning_rate": 4.8797097175141246e-05, "loss": 0.1386, "step": 1065000 }, { "epoch": 0.07, "learning_rate": 4.8796532203389836e-05, "loss": 0.1363, "step": 1065500 }, { "epoch": 0.07, "learning_rate": 4.879596723163842e-05, "loss": 0.1325, "step": 1066000 }, { "epoch": 0.07, "learning_rate": 4.879540225988701e-05, "loss": 0.1413, "step": 1066500 }, { "epoch": 0.07, "learning_rate": 4.87948384180791e-05, "loss": 0.1319, "step": 1067000 }, { "epoch": 0.07, "learning_rate": 4.879427344632769e-05, "loss": 0.1231, "step": 1067500 }, { "epoch": 0.07, "learning_rate": 4.879370847457627e-05, "loss": 0.132, "step": 1068000 }, { "epoch": 0.07, "learning_rate": 4.879314350282486e-05, "loss": 0.1357, "step": 1068500 }, { "epoch": 0.07, "learning_rate": 4.8792578531073447e-05, "loss": 0.128, "step": 1069000 }, { "epoch": 0.07, "learning_rate": 4.879201468926554e-05, "loss": 0.1279, "step": 1069500 }, { "epoch": 0.07, "learning_rate": 4.8791449717514124e-05, "loss": 0.1292, "step": 1070000 }, { "epoch": 0.07, "learning_rate": 4.8790884745762715e-05, "loss": 0.1377, "step": 1070500 }, { "epoch": 0.07, "learning_rate": 4.87903197740113e-05, "loss": 0.1283, "step": 1071000 }, { "epoch": 0.07, "learning_rate": 4.878975480225989e-05, "loss": 0.1341, "step": 1071500 }, { "epoch": 0.07, "learning_rate": 4.878918983050847e-05, "loss": 0.1305, "step": 1072000 }, { "epoch": 0.07, "learning_rate": 4.8788624858757064e-05, "loss": 0.1351, "step": 1072500 }, { "epoch": 0.07, "learning_rate": 4.878805988700565e-05, "loss": 0.1249, "step": 1073000 }, { "epoch": 0.07, "learning_rate": 4.878749604519774e-05, "loss": 0.1289, "step": 1073500 }, { "epoch": 0.07, "learning_rate": 4.8786932203389835e-05, "loss": 0.132, "step": 1074000 }, { "epoch": 0.07, "learning_rate": 4.878636723163842e-05, "loss": 0.1351, "step": 1074500 }, { "epoch": 0.07, "learning_rate": 4.878580225988701e-05, "loss": 0.1279, "step": 1075000 }, { "epoch": 0.07, "learning_rate": 4.8785237288135594e-05, "loss": 0.1318, "step": 1075500 }, { "epoch": 0.07, "learning_rate": 4.8784672316384184e-05, "loss": 0.1347, "step": 1076000 }, { "epoch": 0.07, "learning_rate": 4.8784107344632775e-05, "loss": 0.1299, "step": 1076500 }, { "epoch": 0.07, "learning_rate": 4.878354350282486e-05, "loss": 0.1273, "step": 1077000 }, { "epoch": 0.07, "learning_rate": 4.8782978531073446e-05, "loss": 0.1285, "step": 1077500 }, { "epoch": 0.07, "learning_rate": 4.8782413559322036e-05, "loss": 0.1315, "step": 1078000 }, { "epoch": 0.07, "learning_rate": 4.878184858757062e-05, "loss": 0.1359, "step": 1078500 }, { "epoch": 0.07, "learning_rate": 4.878128361581921e-05, "loss": 0.1269, "step": 1079000 }, { "epoch": 0.07, "learning_rate": 4.8780719774011305e-05, "loss": 0.1339, "step": 1079500 }, { "epoch": 0.07, "learning_rate": 4.878015480225989e-05, "loss": 0.1269, "step": 1080000 }, { "epoch": 0.07, "learning_rate": 4.877958983050848e-05, "loss": 0.1257, "step": 1080500 }, { "epoch": 0.07, "learning_rate": 4.877902485875707e-05, "loss": 0.1214, "step": 1081000 }, { "epoch": 0.07, "learning_rate": 4.8778459887005654e-05, "loss": 0.1332, "step": 1081500 }, { "epoch": 0.07, "learning_rate": 4.877789604519774e-05, "loss": 0.1296, "step": 1082000 }, { "epoch": 0.07, "learning_rate": 4.877733107344633e-05, "loss": 0.1367, "step": 1082500 }, { "epoch": 0.07, "learning_rate": 4.8776766101694915e-05, "loss": 0.1337, "step": 1083000 }, { "epoch": 0.07, "learning_rate": 4.8776201129943506e-05, "loss": 0.1281, "step": 1083500 }, { "epoch": 0.07, "learning_rate": 4.8775636158192096e-05, "loss": 0.1325, "step": 1084000 }, { "epoch": 0.07, "learning_rate": 4.8775072316384184e-05, "loss": 0.136, "step": 1084500 }, { "epoch": 0.07, "learning_rate": 4.877450734463277e-05, "loss": 0.1342, "step": 1085000 }, { "epoch": 0.07, "learning_rate": 4.877394237288136e-05, "loss": 0.1302, "step": 1085500 }, { "epoch": 0.07, "learning_rate": 4.877337740112994e-05, "loss": 0.1326, "step": 1086000 }, { "epoch": 0.07, "learning_rate": 4.877281242937853e-05, "loss": 0.1352, "step": 1086500 }, { "epoch": 0.07, "learning_rate": 4.8772248587570626e-05, "loss": 0.1295, "step": 1087000 }, { "epoch": 0.07, "learning_rate": 4.877168361581922e-05, "loss": 0.1285, "step": 1087500 }, { "epoch": 0.07, "learning_rate": 4.87711186440678e-05, "loss": 0.1352, "step": 1088000 }, { "epoch": 0.07, "learning_rate": 4.877055367231639e-05, "loss": 0.1317, "step": 1088500 }, { "epoch": 0.07, "learning_rate": 4.8769988700564975e-05, "loss": 0.1283, "step": 1089000 }, { "epoch": 0.07, "learning_rate": 4.876942485875706e-05, "loss": 0.1278, "step": 1089500 }, { "epoch": 0.07, "learning_rate": 4.876885988700565e-05, "loss": 0.1306, "step": 1090000 }, { "epoch": 0.07, "learning_rate": 4.8768294915254243e-05, "loss": 0.1301, "step": 1090500 }, { "epoch": 0.07, "learning_rate": 4.876772994350283e-05, "loss": 0.1334, "step": 1091000 }, { "epoch": 0.07, "learning_rate": 4.876716497175142e-05, "loss": 0.1302, "step": 1091500 }, { "epoch": 0.07, "learning_rate": 4.87666e-05, "loss": 0.1272, "step": 1092000 }, { "epoch": 0.07, "learning_rate": 4.876603615819209e-05, "loss": 0.135, "step": 1092500 }, { "epoch": 0.07, "learning_rate": 4.876547118644068e-05, "loss": 0.1291, "step": 1093000 }, { "epoch": 0.07, "learning_rate": 4.876490621468926e-05, "loss": 0.1308, "step": 1093500 }, { "epoch": 0.07, "learning_rate": 4.8764341242937854e-05, "loss": 0.122, "step": 1094000 }, { "epoch": 0.07, "learning_rate": 4.8763776271186444e-05, "loss": 0.1375, "step": 1094500 }, { "epoch": 0.07, "learning_rate": 4.876321129943503e-05, "loss": 0.1313, "step": 1095000 }, { "epoch": 0.07, "learning_rate": 4.876264745762712e-05, "loss": 0.1245, "step": 1095500 }, { "epoch": 0.07, "learning_rate": 4.876208248587571e-05, "loss": 0.1374, "step": 1096000 }, { "epoch": 0.07, "learning_rate": 4.8761517514124297e-05, "loss": 0.1266, "step": 1096500 }, { "epoch": 0.07, "learning_rate": 4.876095254237289e-05, "loss": 0.1316, "step": 1097000 }, { "epoch": 0.07, "learning_rate": 4.876038757062147e-05, "loss": 0.1307, "step": 1097500 }, { "epoch": 0.07, "learning_rate": 4.875982259887006e-05, "loss": 0.1234, "step": 1098000 }, { "epoch": 0.07, "learning_rate": 4.8759257627118645e-05, "loss": 0.1318, "step": 1098500 }, { "epoch": 0.07, "learning_rate": 4.8758692655367236e-05, "loss": 0.1271, "step": 1099000 }, { "epoch": 0.07, "learning_rate": 4.875812881355932e-05, "loss": 0.1351, "step": 1099500 }, { "epoch": 0.07, "learning_rate": 4.875756497175141e-05, "loss": 0.1328, "step": 1100000 }, { "epoch": 0.07, "learning_rate": 4.8757e-05, "loss": 0.1288, "step": 1100500 }, { "epoch": 0.07, "learning_rate": 4.875643502824859e-05, "loss": 0.1222, "step": 1101000 }, { "epoch": 0.07, "learning_rate": 4.8755870056497175e-05, "loss": 0.1337, "step": 1101500 }, { "epoch": 0.07, "learning_rate": 4.8755305084745766e-05, "loss": 0.133, "step": 1102000 }, { "epoch": 0.07, "learning_rate": 4.875474124293786e-05, "loss": 0.1233, "step": 1102500 }, { "epoch": 0.07, "learning_rate": 4.8754176271186444e-05, "loss": 0.1247, "step": 1103000 }, { "epoch": 0.07, "learning_rate": 4.8753611299435034e-05, "loss": 0.1348, "step": 1103500 }, { "epoch": 0.07, "learning_rate": 4.875304632768362e-05, "loss": 0.1373, "step": 1104000 }, { "epoch": 0.07, "learning_rate": 4.875248248587571e-05, "loss": 0.1337, "step": 1104500 }, { "epoch": 0.07, "learning_rate": 4.8751917514124296e-05, "loss": 0.1316, "step": 1105000 }, { "epoch": 0.07, "learning_rate": 4.8751352542372886e-05, "loss": 0.1273, "step": 1105500 }, { "epoch": 0.07, "learning_rate": 4.875078757062147e-05, "loss": 0.1295, "step": 1106000 }, { "epoch": 0.08, "learning_rate": 4.875022259887006e-05, "loss": 0.1344, "step": 1106500 }, { "epoch": 0.08, "learning_rate": 4.874965875706215e-05, "loss": 0.1289, "step": 1107000 }, { "epoch": 0.08, "learning_rate": 4.874909378531073e-05, "loss": 0.135, "step": 1107500 }, { "epoch": 0.08, "learning_rate": 4.874852881355932e-05, "loss": 0.1257, "step": 1108000 }, { "epoch": 0.08, "learning_rate": 4.874796384180791e-05, "loss": 0.1265, "step": 1108500 }, { "epoch": 0.08, "learning_rate": 4.874740000000001e-05, "loss": 0.1226, "step": 1109000 }, { "epoch": 0.08, "learning_rate": 4.874683502824859e-05, "loss": 0.1336, "step": 1109500 }, { "epoch": 0.08, "learning_rate": 4.874627005649718e-05, "loss": 0.1274, "step": 1110000 }, { "epoch": 0.08, "learning_rate": 4.8745705084745765e-05, "loss": 0.128, "step": 1110500 }, { "epoch": 0.08, "learning_rate": 4.8745140112994356e-05, "loss": 0.1338, "step": 1111000 }, { "epoch": 0.08, "learning_rate": 4.874457627118644e-05, "loss": 0.1278, "step": 1111500 }, { "epoch": 0.08, "learning_rate": 4.8744011299435034e-05, "loss": 0.1304, "step": 1112000 }, { "epoch": 0.08, "learning_rate": 4.874344632768362e-05, "loss": 0.127, "step": 1112500 }, { "epoch": 0.08, "learning_rate": 4.874288135593221e-05, "loss": 0.1292, "step": 1113000 }, { "epoch": 0.08, "learning_rate": 4.874231638418079e-05, "loss": 0.1283, "step": 1113500 }, { "epoch": 0.08, "learning_rate": 4.874175141242938e-05, "loss": 0.1305, "step": 1114000 }, { "epoch": 0.08, "learning_rate": 4.8741186440677966e-05, "loss": 0.1329, "step": 1114500 }, { "epoch": 0.08, "learning_rate": 4.874062259887006e-05, "loss": 0.1288, "step": 1115000 }, { "epoch": 0.08, "learning_rate": 4.8740057627118644e-05, "loss": 0.1265, "step": 1115500 }, { "epoch": 0.08, "learning_rate": 4.8739492655367234e-05, "loss": 0.1265, "step": 1116000 }, { "epoch": 0.08, "learning_rate": 4.873892768361582e-05, "loss": 0.1302, "step": 1116500 }, { "epoch": 0.08, "learning_rate": 4.873836271186441e-05, "loss": 0.1305, "step": 1117000 }, { "epoch": 0.08, "learning_rate": 4.873779774011299e-05, "loss": 0.1292, "step": 1117500 }, { "epoch": 0.08, "learning_rate": 4.873723389830509e-05, "loss": 0.14, "step": 1118000 }, { "epoch": 0.08, "learning_rate": 4.873666892655368e-05, "loss": 0.1345, "step": 1118500 }, { "epoch": 0.08, "learning_rate": 4.873610395480227e-05, "loss": 0.1322, "step": 1119000 }, { "epoch": 0.08, "learning_rate": 4.8735540112994355e-05, "loss": 0.1265, "step": 1119500 }, { "epoch": 0.08, "learning_rate": 4.873497514124294e-05, "loss": 0.1288, "step": 1120000 }, { "epoch": 0.08, "learning_rate": 4.873441016949153e-05, "loss": 0.1208, "step": 1120500 }, { "epoch": 0.08, "learning_rate": 4.873384519774011e-05, "loss": 0.1272, "step": 1121000 }, { "epoch": 0.08, "learning_rate": 4.8733280225988704e-05, "loss": 0.1243, "step": 1121500 }, { "epoch": 0.08, "learning_rate": 4.873271525423729e-05, "loss": 0.1319, "step": 1122000 }, { "epoch": 0.08, "learning_rate": 4.873215028248588e-05, "loss": 0.1362, "step": 1122500 }, { "epoch": 0.08, "learning_rate": 4.873158531073447e-05, "loss": 0.1346, "step": 1123000 }, { "epoch": 0.08, "learning_rate": 4.873102033898305e-05, "loss": 0.1297, "step": 1123500 }, { "epoch": 0.08, "learning_rate": 4.873045649717514e-05, "loss": 0.1345, "step": 1124000 }, { "epoch": 0.08, "learning_rate": 4.872989152542373e-05, "loss": 0.1288, "step": 1124500 }, { "epoch": 0.08, "learning_rate": 4.8729326553672314e-05, "loss": 0.1249, "step": 1125000 }, { "epoch": 0.08, "learning_rate": 4.8728761581920905e-05, "loss": 0.1259, "step": 1125500 }, { "epoch": 0.08, "learning_rate": 4.8728196610169495e-05, "loss": 0.1334, "step": 1126000 }, { "epoch": 0.08, "learning_rate": 4.872763163841808e-05, "loss": 0.1357, "step": 1126500 }, { "epoch": 0.08, "learning_rate": 4.872706666666667e-05, "loss": 0.1282, "step": 1127000 }, { "epoch": 0.08, "learning_rate": 4.8726502824858764e-05, "loss": 0.1397, "step": 1127500 }, { "epoch": 0.08, "learning_rate": 4.872593785310735e-05, "loss": 0.1244, "step": 1128000 }, { "epoch": 0.08, "learning_rate": 4.872537288135594e-05, "loss": 0.1324, "step": 1128500 }, { "epoch": 0.08, "learning_rate": 4.872480790960452e-05, "loss": 0.1343, "step": 1129000 }, { "epoch": 0.08, "learning_rate": 4.8724244067796616e-05, "loss": 0.1223, "step": 1129500 }, { "epoch": 0.08, "learning_rate": 4.87236790960452e-05, "loss": 0.1335, "step": 1130000 }, { "epoch": 0.08, "learning_rate": 4.872311412429379e-05, "loss": 0.1265, "step": 1130500 }, { "epoch": 0.08, "learning_rate": 4.8722549152542374e-05, "loss": 0.1305, "step": 1131000 }, { "epoch": 0.08, "learning_rate": 4.8721984180790965e-05, "loss": 0.1272, "step": 1131500 }, { "epoch": 0.08, "learning_rate": 4.872142033898305e-05, "loss": 0.1343, "step": 1132000 }, { "epoch": 0.08, "learning_rate": 4.8720855367231636e-05, "loss": 0.125, "step": 1132500 }, { "epoch": 0.08, "learning_rate": 4.8720290395480226e-05, "loss": 0.1344, "step": 1133000 }, { "epoch": 0.08, "learning_rate": 4.871972542372882e-05, "loss": 0.1294, "step": 1133500 }, { "epoch": 0.08, "learning_rate": 4.87191604519774e-05, "loss": 0.1277, "step": 1134000 }, { "epoch": 0.08, "learning_rate": 4.8718596610169495e-05, "loss": 0.1335, "step": 1134500 }, { "epoch": 0.08, "learning_rate": 4.8718031638418085e-05, "loss": 0.1323, "step": 1135000 }, { "epoch": 0.08, "learning_rate": 4.871746666666667e-05, "loss": 0.1327, "step": 1135500 }, { "epoch": 0.08, "learning_rate": 4.871690169491526e-05, "loss": 0.1231, "step": 1136000 }, { "epoch": 0.08, "learning_rate": 4.871633785310735e-05, "loss": 0.1329, "step": 1136500 }, { "epoch": 0.08, "learning_rate": 4.871577288135594e-05, "loss": 0.136, "step": 1137000 }, { "epoch": 0.08, "learning_rate": 4.871520790960452e-05, "loss": 0.1275, "step": 1137500 }, { "epoch": 0.08, "learning_rate": 4.871464293785311e-05, "loss": 0.126, "step": 1138000 }, { "epoch": 0.08, "learning_rate": 4.8714077966101696e-05, "loss": 0.134, "step": 1138500 }, { "epoch": 0.08, "learning_rate": 4.871351412429378e-05, "loss": 0.1352, "step": 1139000 }, { "epoch": 0.08, "learning_rate": 4.871294915254237e-05, "loss": 0.126, "step": 1139500 }, { "epoch": 0.08, "learning_rate": 4.8712384180790964e-05, "loss": 0.1271, "step": 1140000 }, { "epoch": 0.08, "learning_rate": 4.871181920903955e-05, "loss": 0.1316, "step": 1140500 }, { "epoch": 0.08, "learning_rate": 4.871125423728814e-05, "loss": 0.1309, "step": 1141000 }, { "epoch": 0.08, "learning_rate": 4.871068926553672e-05, "loss": 0.1293, "step": 1141500 }, { "epoch": 0.08, "learning_rate": 4.871012429378531e-05, "loss": 0.127, "step": 1142000 }, { "epoch": 0.08, "learning_rate": 4.870956045197741e-05, "loss": 0.133, "step": 1142500 }, { "epoch": 0.08, "learning_rate": 4.870899548022599e-05, "loss": 0.1303, "step": 1143000 }, { "epoch": 0.08, "learning_rate": 4.870843050847458e-05, "loss": 0.1344, "step": 1143500 }, { "epoch": 0.08, "learning_rate": 4.870786553672317e-05, "loss": 0.1373, "step": 1144000 }, { "epoch": 0.08, "learning_rate": 4.8707300564971755e-05, "loss": 0.13, "step": 1144500 }, { "epoch": 0.08, "learning_rate": 4.8706735593220346e-05, "loss": 0.1327, "step": 1145000 }, { "epoch": 0.08, "learning_rate": 4.870617175141243e-05, "loss": 0.1336, "step": 1145500 }, { "epoch": 0.08, "learning_rate": 4.870560677966102e-05, "loss": 0.1321, "step": 1146000 }, { "epoch": 0.08, "learning_rate": 4.870504180790961e-05, "loss": 0.121, "step": 1146500 }, { "epoch": 0.08, "learning_rate": 4.87044768361582e-05, "loss": 0.1282, "step": 1147000 }, { "epoch": 0.08, "learning_rate": 4.870391186440678e-05, "loss": 0.13, "step": 1147500 }, { "epoch": 0.08, "learning_rate": 4.870334802259887e-05, "loss": 0.1286, "step": 1148000 }, { "epoch": 0.08, "learning_rate": 4.870278305084746e-05, "loss": 0.1317, "step": 1148500 }, { "epoch": 0.08, "learning_rate": 4.8702218079096044e-05, "loss": 0.1284, "step": 1149000 }, { "epoch": 0.08, "learning_rate": 4.8701653107344634e-05, "loss": 0.1257, "step": 1149500 }, { "epoch": 0.08, "learning_rate": 4.870108813559322e-05, "loss": 0.128, "step": 1150000 }, { "epoch": 0.08, "learning_rate": 4.870052429378532e-05, "loss": 0.1172, "step": 1150500 }, { "epoch": 0.08, "learning_rate": 4.86999593220339e-05, "loss": 0.1276, "step": 1151000 }, { "epoch": 0.08, "learning_rate": 4.869939435028249e-05, "loss": 0.1315, "step": 1151500 }, { "epoch": 0.08, "learning_rate": 4.869882937853108e-05, "loss": 0.1259, "step": 1152000 }, { "epoch": 0.08, "learning_rate": 4.8698265536723164e-05, "loss": 0.1291, "step": 1152500 }, { "epoch": 0.08, "learning_rate": 4.8697700564971755e-05, "loss": 0.1273, "step": 1153000 }, { "epoch": 0.08, "learning_rate": 4.869713559322034e-05, "loss": 0.1262, "step": 1153500 }, { "epoch": 0.08, "learning_rate": 4.869657062146893e-05, "loss": 0.1306, "step": 1154000 }, { "epoch": 0.08, "learning_rate": 4.8696006779661016e-05, "loss": 0.1232, "step": 1154500 }, { "epoch": 0.08, "learning_rate": 4.869544180790961e-05, "loss": 0.1294, "step": 1155000 }, { "epoch": 0.08, "learning_rate": 4.869487683615819e-05, "loss": 0.1259, "step": 1155500 }, { "epoch": 0.08, "learning_rate": 4.869431186440678e-05, "loss": 0.1291, "step": 1156000 }, { "epoch": 0.08, "learning_rate": 4.8693746892655365e-05, "loss": 0.1259, "step": 1156500 }, { "epoch": 0.08, "learning_rate": 4.869318305084746e-05, "loss": 0.1355, "step": 1157000 }, { "epoch": 0.08, "learning_rate": 4.869261807909605e-05, "loss": 0.1304, "step": 1157500 }, { "epoch": 0.08, "learning_rate": 4.869205310734464e-05, "loss": 0.134, "step": 1158000 }, { "epoch": 0.08, "learning_rate": 4.8691488135593224e-05, "loss": 0.1269, "step": 1158500 }, { "epoch": 0.08, "learning_rate": 4.8690923163841815e-05, "loss": 0.1308, "step": 1159000 }, { "epoch": 0.08, "learning_rate": 4.86903581920904e-05, "loss": 0.1263, "step": 1159500 }, { "epoch": 0.08, "learning_rate": 4.8689794350282486e-05, "loss": 0.1248, "step": 1160000 }, { "epoch": 0.08, "learning_rate": 4.8689229378531076e-05, "loss": 0.1391, "step": 1160500 }, { "epoch": 0.08, "learning_rate": 4.868866440677967e-05, "loss": 0.1282, "step": 1161000 }, { "epoch": 0.08, "learning_rate": 4.868809943502825e-05, "loss": 0.1269, "step": 1161500 }, { "epoch": 0.08, "learning_rate": 4.868753559322034e-05, "loss": 0.1292, "step": 1162000 }, { "epoch": 0.08, "learning_rate": 4.868697062146893e-05, "loss": 0.1269, "step": 1162500 }, { "epoch": 0.08, "learning_rate": 4.868640564971751e-05, "loss": 0.1333, "step": 1163000 }, { "epoch": 0.08, "learning_rate": 4.86858406779661e-05, "loss": 0.1332, "step": 1163500 }, { "epoch": 0.08, "learning_rate": 4.86852768361582e-05, "loss": 0.1222, "step": 1164000 }, { "epoch": 0.08, "learning_rate": 4.868471186440679e-05, "loss": 0.1307, "step": 1164500 }, { "epoch": 0.08, "learning_rate": 4.868414689265537e-05, "loss": 0.1289, "step": 1165000 }, { "epoch": 0.08, "learning_rate": 4.868358192090396e-05, "loss": 0.1288, "step": 1165500 }, { "epoch": 0.08, "learning_rate": 4.8683016949152546e-05, "loss": 0.1281, "step": 1166000 }, { "epoch": 0.08, "learning_rate": 4.8682451977401136e-05, "loss": 0.1345, "step": 1166500 }, { "epoch": 0.08, "learning_rate": 4.868188813559322e-05, "loss": 0.1329, "step": 1167000 }, { "epoch": 0.08, "learning_rate": 4.868132316384181e-05, "loss": 0.1288, "step": 1167500 }, { "epoch": 0.08, "learning_rate": 4.86807581920904e-05, "loss": 0.124, "step": 1168000 }, { "epoch": 0.08, "learning_rate": 4.868019322033899e-05, "loss": 0.1254, "step": 1168500 }, { "epoch": 0.08, "learning_rate": 4.8679629378531075e-05, "loss": 0.128, "step": 1169000 }, { "epoch": 0.08, "learning_rate": 4.867906440677966e-05, "loss": 0.1367, "step": 1169500 }, { "epoch": 0.08, "learning_rate": 4.867849943502825e-05, "loss": 0.1275, "step": 1170000 }, { "epoch": 0.08, "learning_rate": 4.8677934463276834e-05, "loss": 0.1317, "step": 1170500 }, { "epoch": 0.08, "learning_rate": 4.8677369491525424e-05, "loss": 0.1275, "step": 1171000 }, { "epoch": 0.08, "learning_rate": 4.867680564971752e-05, "loss": 0.1275, "step": 1171500 }, { "epoch": 0.08, "learning_rate": 4.867624067796611e-05, "loss": 0.1287, "step": 1172000 }, { "epoch": 0.08, "learning_rate": 4.867567570621469e-05, "loss": 0.1265, "step": 1172500 }, { "epoch": 0.08, "learning_rate": 4.867511073446328e-05, "loss": 0.1319, "step": 1173000 }, { "epoch": 0.08, "learning_rate": 4.867454689265537e-05, "loss": 0.1251, "step": 1173500 }, { "epoch": 0.08, "learning_rate": 4.8673981920903954e-05, "loss": 0.1292, "step": 1174000 }, { "epoch": 0.08, "learning_rate": 4.8673416949152545e-05, "loss": 0.1299, "step": 1174500 }, { "epoch": 0.08, "learning_rate": 4.8672851977401135e-05, "loss": 0.1328, "step": 1175000 }, { "epoch": 0.08, "learning_rate": 4.867228700564972e-05, "loss": 0.1293, "step": 1175500 }, { "epoch": 0.08, "learning_rate": 4.867172203389831e-05, "loss": 0.1287, "step": 1176000 }, { "epoch": 0.08, "learning_rate": 4.86711581920904e-05, "loss": 0.1238, "step": 1176500 }, { "epoch": 0.08, "learning_rate": 4.867059322033898e-05, "loss": 0.1211, "step": 1177000 }, { "epoch": 0.08, "learning_rate": 4.867002824858757e-05, "loss": 0.1271, "step": 1177500 }, { "epoch": 0.08, "learning_rate": 4.8669463276836155e-05, "loss": 0.1255, "step": 1178000 }, { "epoch": 0.08, "learning_rate": 4.8668898305084746e-05, "loss": 0.1258, "step": 1178500 }, { "epoch": 0.08, "learning_rate": 4.866833446327684e-05, "loss": 0.134, "step": 1179000 }, { "epoch": 0.08, "learning_rate": 4.866776949152543e-05, "loss": 0.1213, "step": 1179500 }, { "epoch": 0.08, "learning_rate": 4.8667204519774014e-05, "loss": 0.1346, "step": 1180000 }, { "epoch": 0.08, "learning_rate": 4.8666639548022605e-05, "loss": 0.1301, "step": 1180500 }, { "epoch": 0.08, "learning_rate": 4.866607570621469e-05, "loss": 0.1303, "step": 1181000 }, { "epoch": 0.08, "learning_rate": 4.8665510734463276e-05, "loss": 0.1304, "step": 1181500 }, { "epoch": 0.08, "learning_rate": 4.8664945762711866e-05, "loss": 0.1353, "step": 1182000 }, { "epoch": 0.08, "learning_rate": 4.866438079096046e-05, "loss": 0.1367, "step": 1182500 }, { "epoch": 0.08, "learning_rate": 4.8663816949152544e-05, "loss": 0.123, "step": 1183000 }, { "epoch": 0.08, "learning_rate": 4.866325197740113e-05, "loss": 0.1258, "step": 1183500 }, { "epoch": 0.08, "learning_rate": 4.866268700564972e-05, "loss": 0.1306, "step": 1184000 }, { "epoch": 0.08, "learning_rate": 4.86621220338983e-05, "loss": 0.1284, "step": 1184500 }, { "epoch": 0.08, "learning_rate": 4.866155706214689e-05, "loss": 0.1282, "step": 1185000 }, { "epoch": 0.08, "learning_rate": 4.866099322033899e-05, "loss": 0.1218, "step": 1185500 }, { "epoch": 0.08, "learning_rate": 4.866042824858758e-05, "loss": 0.1175, "step": 1186000 }, { "epoch": 0.08, "learning_rate": 4.865986327683616e-05, "loss": 0.1312, "step": 1186500 }, { "epoch": 0.08, "learning_rate": 4.865929830508475e-05, "loss": 0.1278, "step": 1187000 }, { "epoch": 0.08, "learning_rate": 4.8658733333333336e-05, "loss": 0.1312, "step": 1187500 }, { "epoch": 0.08, "learning_rate": 4.8658168361581926e-05, "loss": 0.1224, "step": 1188000 }, { "epoch": 0.08, "learning_rate": 4.8657604519774013e-05, "loss": 0.1213, "step": 1188500 }, { "epoch": 0.08, "learning_rate": 4.8657039548022604e-05, "loss": 0.1274, "step": 1189000 }, { "epoch": 0.08, "learning_rate": 4.865647457627119e-05, "loss": 0.1227, "step": 1189500 }, { "epoch": 0.08, "learning_rate": 4.865590960451978e-05, "loss": 0.1212, "step": 1190000 }, { "epoch": 0.08, "learning_rate": 4.865534463276836e-05, "loss": 0.1314, "step": 1190500 }, { "epoch": 0.08, "learning_rate": 4.865478079096045e-05, "loss": 0.1296, "step": 1191000 }, { "epoch": 0.08, "learning_rate": 4.865421581920904e-05, "loss": 0.1301, "step": 1191500 }, { "epoch": 0.08, "learning_rate": 4.8653650847457624e-05, "loss": 0.132, "step": 1192000 }, { "epoch": 0.08, "learning_rate": 4.8653085875706214e-05, "loss": 0.1274, "step": 1192500 }, { "epoch": 0.08, "learning_rate": 4.865252203389831e-05, "loss": 0.1192, "step": 1193000 }, { "epoch": 0.08, "learning_rate": 4.86519570621469e-05, "loss": 0.1242, "step": 1193500 }, { "epoch": 0.08, "learning_rate": 4.865139209039548e-05, "loss": 0.1258, "step": 1194000 }, { "epoch": 0.08, "learning_rate": 4.865082711864407e-05, "loss": 0.1297, "step": 1194500 }, { "epoch": 0.08, "learning_rate": 4.865026214689266e-05, "loss": 0.1279, "step": 1195000 }, { "epoch": 0.08, "learning_rate": 4.864969717514125e-05, "loss": 0.131, "step": 1195500 }, { "epoch": 0.08, "learning_rate": 4.8649133333333335e-05, "loss": 0.1304, "step": 1196000 }, { "epoch": 0.08, "learning_rate": 4.8648568361581925e-05, "loss": 0.1299, "step": 1196500 }, { "epoch": 0.08, "learning_rate": 4.864800338983051e-05, "loss": 0.1284, "step": 1197000 }, { "epoch": 0.08, "learning_rate": 4.86474384180791e-05, "loss": 0.127, "step": 1197500 }, { "epoch": 0.08, "learning_rate": 4.8646873446327684e-05, "loss": 0.1289, "step": 1198000 }, { "epoch": 0.08, "learning_rate": 4.8646308474576274e-05, "loss": 0.1209, "step": 1198500 }, { "epoch": 0.08, "learning_rate": 4.864574350282486e-05, "loss": 0.1319, "step": 1199000 }, { "epoch": 0.08, "learning_rate": 4.864517966101695e-05, "loss": 0.1249, "step": 1199500 }, { "epoch": 0.08, "learning_rate": 4.8644614689265536e-05, "loss": 0.1175, "step": 1200000 }, { "epoch": 0.08, "learning_rate": 4.8644049717514126e-05, "loss": 0.1355, "step": 1200500 }, { "epoch": 0.08, "learning_rate": 4.864348474576271e-05, "loss": 0.1267, "step": 1201000 }, { "epoch": 0.08, "learning_rate": 4.86429197740113e-05, "loss": 0.1251, "step": 1201500 }, { "epoch": 0.08, "learning_rate": 4.8642354802259885e-05, "loss": 0.1305, "step": 1202000 }, { "epoch": 0.08, "learning_rate": 4.864179096045198e-05, "loss": 0.1246, "step": 1202500 }, { "epoch": 0.08, "learning_rate": 4.864122598870057e-05, "loss": 0.1309, "step": 1203000 }, { "epoch": 0.08, "learning_rate": 4.864066101694916e-05, "loss": 0.124, "step": 1203500 }, { "epoch": 0.08, "learning_rate": 4.8640096045197744e-05, "loss": 0.1297, "step": 1204000 }, { "epoch": 0.08, "learning_rate": 4.8639531073446334e-05, "loss": 0.1289, "step": 1204500 }, { "epoch": 0.08, "learning_rate": 4.863896723163842e-05, "loss": 0.1336, "step": 1205000 }, { "epoch": 0.08, "learning_rate": 4.8638402259887005e-05, "loss": 0.1335, "step": 1205500 }, { "epoch": 0.08, "learning_rate": 4.8637837288135596e-05, "loss": 0.1294, "step": 1206000 }, { "epoch": 0.08, "learning_rate": 4.863727231638418e-05, "loss": 0.13, "step": 1206500 }, { "epoch": 0.08, "learning_rate": 4.8636708474576274e-05, "loss": 0.1206, "step": 1207000 }, { "epoch": 0.08, "learning_rate": 4.863614350282486e-05, "loss": 0.1235, "step": 1207500 }, { "epoch": 0.08, "learning_rate": 4.863557853107345e-05, "loss": 0.1276, "step": 1208000 }, { "epoch": 0.08, "learning_rate": 4.863501355932203e-05, "loss": 0.1218, "step": 1208500 }, { "epoch": 0.08, "learning_rate": 4.863444858757062e-05, "loss": 0.1219, "step": 1209000 }, { "epoch": 0.08, "learning_rate": 4.8633883615819206e-05, "loss": 0.1319, "step": 1209500 }, { "epoch": 0.08, "learning_rate": 4.86333186440678e-05, "loss": 0.1284, "step": 1210000 }, { "epoch": 0.08, "learning_rate": 4.863275480225989e-05, "loss": 0.1312, "step": 1210500 }, { "epoch": 0.08, "learning_rate": 4.863218983050848e-05, "loss": 0.1258, "step": 1211000 }, { "epoch": 0.08, "learning_rate": 4.8631624858757065e-05, "loss": 0.1315, "step": 1211500 }, { "epoch": 0.08, "learning_rate": 4.8631059887005656e-05, "loss": 0.129, "step": 1212000 }, { "epoch": 0.08, "learning_rate": 4.863049491525424e-05, "loss": 0.1323, "step": 1212500 }, { "epoch": 0.08, "learning_rate": 4.862992994350283e-05, "loss": 0.1369, "step": 1213000 }, { "epoch": 0.08, "learning_rate": 4.8629364971751414e-05, "loss": 0.1342, "step": 1213500 }, { "epoch": 0.08, "learning_rate": 4.862880112994351e-05, "loss": 0.1314, "step": 1214000 }, { "epoch": 0.08, "learning_rate": 4.862823615819209e-05, "loss": 0.1206, "step": 1214500 }, { "epoch": 0.08, "learning_rate": 4.862767118644068e-05, "loss": 0.1223, "step": 1215000 }, { "epoch": 0.08, "learning_rate": 4.8627106214689266e-05, "loss": 0.1208, "step": 1215500 }, { "epoch": 0.08, "learning_rate": 4.8626541242937857e-05, "loss": 0.1306, "step": 1216000 }, { "epoch": 0.08, "learning_rate": 4.8625977401129944e-05, "loss": 0.1225, "step": 1216500 }, { "epoch": 0.08, "learning_rate": 4.862541242937853e-05, "loss": 0.1336, "step": 1217000 }, { "epoch": 0.08, "learning_rate": 4.862484745762712e-05, "loss": 0.1273, "step": 1217500 }, { "epoch": 0.08, "learning_rate": 4.862428248587571e-05, "loss": 0.1272, "step": 1218000 }, { "epoch": 0.08, "learning_rate": 4.862371751412429e-05, "loss": 0.1246, "step": 1218500 }, { "epoch": 0.08, "learning_rate": 4.8623153672316387e-05, "loss": 0.1192, "step": 1219000 }, { "epoch": 0.08, "learning_rate": 4.862258870056498e-05, "loss": 0.1279, "step": 1219500 }, { "epoch": 0.08, "learning_rate": 4.862202372881356e-05, "loss": 0.1216, "step": 1220000 }, { "epoch": 0.08, "learning_rate": 4.862145875706215e-05, "loss": 0.1276, "step": 1220500 }, { "epoch": 0.08, "learning_rate": 4.862089491525424e-05, "loss": 0.1289, "step": 1221000 }, { "epoch": 0.08, "learning_rate": 4.862032994350283e-05, "loss": 0.1312, "step": 1221500 }, { "epoch": 0.08, "learning_rate": 4.861976497175141e-05, "loss": 0.1294, "step": 1222000 }, { "epoch": 0.08, "learning_rate": 4.8619200000000004e-05, "loss": 0.1288, "step": 1222500 }, { "epoch": 0.08, "learning_rate": 4.861863502824859e-05, "loss": 0.1325, "step": 1223000 }, { "epoch": 0.08, "learning_rate": 4.8618071186440675e-05, "loss": 0.1316, "step": 1223500 }, { "epoch": 0.08, "learning_rate": 4.8617506214689265e-05, "loss": 0.1318, "step": 1224000 }, { "epoch": 0.08, "learning_rate": 4.8616941242937856e-05, "loss": 0.127, "step": 1224500 }, { "epoch": 0.08, "learning_rate": 4.861637627118644e-05, "loss": 0.1319, "step": 1225000 }, { "epoch": 0.08, "learning_rate": 4.8615812429378534e-05, "loss": 0.1273, "step": 1225500 }, { "epoch": 0.08, "learning_rate": 4.8615247457627124e-05, "loss": 0.1273, "step": 1226000 }, { "epoch": 0.08, "learning_rate": 4.861468248587571e-05, "loss": 0.13, "step": 1226500 }, { "epoch": 0.08, "learning_rate": 4.86141175141243e-05, "loss": 0.123, "step": 1227000 }, { "epoch": 0.08, "learning_rate": 4.861355254237288e-05, "loss": 0.1255, "step": 1227500 }, { "epoch": 0.08, "learning_rate": 4.861298757062147e-05, "loss": 0.1254, "step": 1228000 }, { "epoch": 0.08, "learning_rate": 4.8612422598870064e-05, "loss": 0.1255, "step": 1228500 }, { "epoch": 0.08, "learning_rate": 4.861185762711865e-05, "loss": 0.1202, "step": 1229000 }, { "epoch": 0.08, "learning_rate": 4.861129265536724e-05, "loss": 0.1299, "step": 1229500 }, { "epoch": 0.08, "learning_rate": 4.861072994350282e-05, "loss": 0.1282, "step": 1230000 }, { "epoch": 0.08, "learning_rate": 4.861016497175141e-05, "loss": 0.1218, "step": 1230500 }, { "epoch": 0.08, "learning_rate": 4.86096e-05, "loss": 0.1211, "step": 1231000 }, { "epoch": 0.08, "learning_rate": 4.860903502824859e-05, "loss": 0.1318, "step": 1231500 }, { "epoch": 0.08, "learning_rate": 4.860847005649718e-05, "loss": 0.1258, "step": 1232000 }, { "epoch": 0.08, "learning_rate": 4.860790621468927e-05, "loss": 0.1235, "step": 1232500 }, { "epoch": 0.08, "learning_rate": 4.8607341242937855e-05, "loss": 0.1254, "step": 1233000 }, { "epoch": 0.08, "learning_rate": 4.8606776271186446e-05, "loss": 0.121, "step": 1233500 }, { "epoch": 0.08, "learning_rate": 4.860621129943503e-05, "loss": 0.1307, "step": 1234000 }, { "epoch": 0.08, "learning_rate": 4.860564632768362e-05, "loss": 0.1261, "step": 1234500 }, { "epoch": 0.08, "learning_rate": 4.860508248587571e-05, "loss": 0.1367, "step": 1235000 }, { "epoch": 0.08, "learning_rate": 4.86045175141243e-05, "loss": 0.1242, "step": 1235500 }, { "epoch": 0.08, "learning_rate": 4.860395254237288e-05, "loss": 0.1255, "step": 1236000 }, { "epoch": 0.08, "learning_rate": 4.860338757062147e-05, "loss": 0.1348, "step": 1236500 }, { "epoch": 0.08, "learning_rate": 4.860282372881356e-05, "loss": 0.1246, "step": 1237000 }, { "epoch": 0.08, "learning_rate": 4.860225875706215e-05, "loss": 0.1233, "step": 1237500 }, { "epoch": 0.08, "learning_rate": 4.8601693785310734e-05, "loss": 0.1256, "step": 1238000 }, { "epoch": 0.08, "learning_rate": 4.8601128813559324e-05, "loss": 0.1291, "step": 1238500 }, { "epoch": 0.08, "learning_rate": 4.860056384180791e-05, "loss": 0.1263, "step": 1239000 }, { "epoch": 0.08, "learning_rate": 4.86e-05, "loss": 0.128, "step": 1239500 }, { "epoch": 0.08, "learning_rate": 4.859943502824859e-05, "loss": 0.1309, "step": 1240000 }, { "epoch": 0.08, "learning_rate": 4.859887005649718e-05, "loss": 0.1199, "step": 1240500 }, { "epoch": 0.08, "learning_rate": 4.859830508474577e-05, "loss": 0.1249, "step": 1241000 }, { "epoch": 0.08, "learning_rate": 4.859774011299435e-05, "loss": 0.1334, "step": 1241500 }, { "epoch": 0.08, "learning_rate": 4.8597176271186445e-05, "loss": 0.1321, "step": 1242000 }, { "epoch": 0.08, "learning_rate": 4.859661242937853e-05, "loss": 0.1289, "step": 1242500 }, { "epoch": 0.08, "learning_rate": 4.859604745762712e-05, "loss": 0.1169, "step": 1243000 }, { "epoch": 0.08, "learning_rate": 4.8595482485875707e-05, "loss": 0.1195, "step": 1243500 }, { "epoch": 0.08, "learning_rate": 4.85949175141243e-05, "loss": 0.1187, "step": 1244000 }, { "epoch": 0.08, "learning_rate": 4.859435254237288e-05, "loss": 0.1221, "step": 1244500 }, { "epoch": 0.08, "learning_rate": 4.859378757062147e-05, "loss": 0.1208, "step": 1245000 }, { "epoch": 0.08, "learning_rate": 4.8593222598870055e-05, "loss": 0.1197, "step": 1245500 }, { "epoch": 0.08, "learning_rate": 4.8592657627118646e-05, "loss": 0.127, "step": 1246000 }, { "epoch": 0.08, "learning_rate": 4.859209265536723e-05, "loss": 0.1285, "step": 1246500 }, { "epoch": 0.08, "learning_rate": 4.859152768361582e-05, "loss": 0.134, "step": 1247000 }, { "epoch": 0.08, "learning_rate": 4.8590963841807914e-05, "loss": 0.1305, "step": 1247500 }, { "epoch": 0.08, "learning_rate": 4.85903988700565e-05, "loss": 0.1261, "step": 1248000 }, { "epoch": 0.08, "learning_rate": 4.858983389830509e-05, "loss": 0.1251, "step": 1248500 }, { "epoch": 0.08, "learning_rate": 4.858926892655368e-05, "loss": 0.1252, "step": 1249000 }, { "epoch": 0.08, "learning_rate": 4.8588705084745766e-05, "loss": 0.1318, "step": 1249500 }, { "epoch": 0.08, "learning_rate": 4.858814011299435e-05, "loss": 0.1294, "step": 1250000 }, { "epoch": 0.08, "learning_rate": 4.858757514124294e-05, "loss": 0.1232, "step": 1250500 }, { "epoch": 0.08, "learning_rate": 4.8587010169491525e-05, "loss": 0.127, "step": 1251000 }, { "epoch": 0.08, "learning_rate": 4.858644632768362e-05, "loss": 0.1294, "step": 1251500 }, { "epoch": 0.08, "learning_rate": 4.85858813559322e-05, "loss": 0.1309, "step": 1252000 }, { "epoch": 0.08, "learning_rate": 4.858531638418079e-05, "loss": 0.1234, "step": 1252500 }, { "epoch": 0.08, "learning_rate": 4.858475141242938e-05, "loss": 0.1229, "step": 1253000 }, { "epoch": 0.08, "learning_rate": 4.858418644067797e-05, "loss": 0.1304, "step": 1253500 }, { "epoch": 0.09, "learning_rate": 4.858362259887006e-05, "loss": 0.1318, "step": 1254000 }, { "epoch": 0.09, "learning_rate": 4.8583057627118645e-05, "loss": 0.1266, "step": 1254500 }, { "epoch": 0.09, "learning_rate": 4.8582492655367236e-05, "loss": 0.1234, "step": 1255000 }, { "epoch": 0.09, "learning_rate": 4.858192768361582e-05, "loss": 0.1327, "step": 1255500 }, { "epoch": 0.09, "learning_rate": 4.8581363841807914e-05, "loss": 0.127, "step": 1256000 }, { "epoch": 0.09, "learning_rate": 4.85807988700565e-05, "loss": 0.1273, "step": 1256500 }, { "epoch": 0.09, "learning_rate": 4.858023389830509e-05, "loss": 0.1347, "step": 1257000 }, { "epoch": 0.09, "learning_rate": 4.857966892655367e-05, "loss": 0.128, "step": 1257500 }, { "epoch": 0.09, "learning_rate": 4.857910395480226e-05, "loss": 0.1219, "step": 1258000 }, { "epoch": 0.09, "learning_rate": 4.8578538983050846e-05, "loss": 0.1291, "step": 1258500 }, { "epoch": 0.09, "learning_rate": 4.857797514124294e-05, "loss": 0.1231, "step": 1259000 }, { "epoch": 0.09, "learning_rate": 4.8577410169491524e-05, "loss": 0.1236, "step": 1259500 }, { "epoch": 0.09, "learning_rate": 4.8576845197740115e-05, "loss": 0.1244, "step": 1260000 }, { "epoch": 0.09, "learning_rate": 4.8576280225988705e-05, "loss": 0.1255, "step": 1260500 }, { "epoch": 0.09, "learning_rate": 4.857571638418079e-05, "loss": 0.1264, "step": 1261000 }, { "epoch": 0.09, "learning_rate": 4.857515141242938e-05, "loss": 0.1254, "step": 1261500 }, { "epoch": 0.09, "learning_rate": 4.857458644067797e-05, "loss": 0.1295, "step": 1262000 }, { "epoch": 0.09, "learning_rate": 4.857402146892656e-05, "loss": 0.1344, "step": 1262500 }, { "epoch": 0.09, "learning_rate": 4.857345649717515e-05, "loss": 0.1204, "step": 1263000 }, { "epoch": 0.09, "learning_rate": 4.857289152542373e-05, "loss": 0.1203, "step": 1263500 }, { "epoch": 0.09, "learning_rate": 4.857232655367232e-05, "loss": 0.1294, "step": 1264000 }, { "epoch": 0.09, "learning_rate": 4.857176271186441e-05, "loss": 0.1349, "step": 1264500 }, { "epoch": 0.09, "learning_rate": 4.857119774011299e-05, "loss": 0.1285, "step": 1265000 }, { "epoch": 0.09, "learning_rate": 4.8570632768361584e-05, "loss": 0.1129, "step": 1265500 }, { "epoch": 0.09, "learning_rate": 4.857006779661017e-05, "loss": 0.1241, "step": 1266000 }, { "epoch": 0.09, "learning_rate": 4.856950282485876e-05, "loss": 0.1233, "step": 1266500 }, { "epoch": 0.09, "learning_rate": 4.856893898305085e-05, "loss": 0.1293, "step": 1267000 }, { "epoch": 0.09, "learning_rate": 4.8568374011299436e-05, "loss": 0.1248, "step": 1267500 }, { "epoch": 0.09, "learning_rate": 4.856780903954803e-05, "loss": 0.1185, "step": 1268000 }, { "epoch": 0.09, "learning_rate": 4.856724406779661e-05, "loss": 0.1273, "step": 1268500 }, { "epoch": 0.09, "learning_rate": 4.85666790960452e-05, "loss": 0.1326, "step": 1269000 }, { "epoch": 0.09, "learning_rate": 4.856611525423729e-05, "loss": 0.1214, "step": 1269500 }, { "epoch": 0.09, "learning_rate": 4.856555028248588e-05, "loss": 0.1146, "step": 1270000 }, { "epoch": 0.09, "learning_rate": 4.856498531073447e-05, "loss": 0.1321, "step": 1270500 }, { "epoch": 0.09, "learning_rate": 4.856442033898305e-05, "loss": 0.129, "step": 1271000 }, { "epoch": 0.09, "learning_rate": 4.856385649717514e-05, "loss": 0.1314, "step": 1271500 }, { "epoch": 0.09, "learning_rate": 4.856329152542373e-05, "loss": 0.1298, "step": 1272000 }, { "epoch": 0.09, "learning_rate": 4.8562726553672315e-05, "loss": 0.1183, "step": 1272500 }, { "epoch": 0.09, "learning_rate": 4.8562161581920905e-05, "loss": 0.1231, "step": 1273000 }, { "epoch": 0.09, "learning_rate": 4.8561596610169496e-05, "loss": 0.1269, "step": 1273500 }, { "epoch": 0.09, "learning_rate": 4.856103163841808e-05, "loss": 0.1349, "step": 1274000 }, { "epoch": 0.09, "learning_rate": 4.8560467796610174e-05, "loss": 0.1243, "step": 1274500 }, { "epoch": 0.09, "learning_rate": 4.855990282485876e-05, "loss": 0.133, "step": 1275000 }, { "epoch": 0.09, "learning_rate": 4.855933785310735e-05, "loss": 0.1235, "step": 1275500 }, { "epoch": 0.09, "learning_rate": 4.855877288135593e-05, "loss": 0.1316, "step": 1276000 }, { "epoch": 0.09, "learning_rate": 4.855820790960452e-05, "loss": 0.1249, "step": 1276500 }, { "epoch": 0.09, "learning_rate": 4.8557644067796616e-05, "loss": 0.1276, "step": 1277000 }, { "epoch": 0.09, "learning_rate": 4.85570790960452e-05, "loss": 0.128, "step": 1277500 }, { "epoch": 0.09, "learning_rate": 4.855651412429379e-05, "loss": 0.1286, "step": 1278000 }, { "epoch": 0.09, "learning_rate": 4.8555949152542375e-05, "loss": 0.127, "step": 1278500 }, { "epoch": 0.09, "learning_rate": 4.8555384180790965e-05, "loss": 0.1265, "step": 1279000 }, { "epoch": 0.09, "learning_rate": 4.855481920903955e-05, "loss": 0.1275, "step": 1279500 }, { "epoch": 0.09, "learning_rate": 4.855425423728814e-05, "loss": 0.1272, "step": 1280000 }, { "epoch": 0.09, "learning_rate": 4.855368926553673e-05, "loss": 0.1216, "step": 1280500 }, { "epoch": 0.09, "learning_rate": 4.855312655367232e-05, "loss": 0.1198, "step": 1281000 }, { "epoch": 0.09, "learning_rate": 4.8552561581920905e-05, "loss": 0.1246, "step": 1281500 }, { "epoch": 0.09, "learning_rate": 4.8551996610169495e-05, "loss": 0.1263, "step": 1282000 }, { "epoch": 0.09, "learning_rate": 4.855143163841808e-05, "loss": 0.1325, "step": 1282500 }, { "epoch": 0.09, "learning_rate": 4.855086666666667e-05, "loss": 0.1243, "step": 1283000 }, { "epoch": 0.09, "learning_rate": 4.8550301694915253e-05, "loss": 0.1314, "step": 1283500 }, { "epoch": 0.09, "learning_rate": 4.854973785310735e-05, "loss": 0.1333, "step": 1284000 }, { "epoch": 0.09, "learning_rate": 4.854917288135594e-05, "loss": 0.1288, "step": 1284500 }, { "epoch": 0.09, "learning_rate": 4.854860790960452e-05, "loss": 0.1252, "step": 1285000 }, { "epoch": 0.09, "learning_rate": 4.854804293785311e-05, "loss": 0.1291, "step": 1285500 }, { "epoch": 0.09, "learning_rate": 4.85474790960452e-05, "loss": 0.1243, "step": 1286000 }, { "epoch": 0.09, "learning_rate": 4.8546915254237294e-05, "loss": 0.1215, "step": 1286500 }, { "epoch": 0.09, "learning_rate": 4.854635028248588e-05, "loss": 0.1258, "step": 1287000 }, { "epoch": 0.09, "learning_rate": 4.854578531073447e-05, "loss": 0.1287, "step": 1287500 }, { "epoch": 0.09, "learning_rate": 4.854522033898305e-05, "loss": 0.1243, "step": 1288000 }, { "epoch": 0.09, "learning_rate": 4.854465536723164e-05, "loss": 0.1301, "step": 1288500 }, { "epoch": 0.09, "learning_rate": 4.854409039548023e-05, "loss": 0.1217, "step": 1289000 }, { "epoch": 0.09, "learning_rate": 4.854352542372882e-05, "loss": 0.1274, "step": 1289500 }, { "epoch": 0.09, "learning_rate": 4.854296045197741e-05, "loss": 0.1278, "step": 1290000 }, { "epoch": 0.09, "learning_rate": 4.854239548022599e-05, "loss": 0.1259, "step": 1290500 }, { "epoch": 0.09, "learning_rate": 4.8541831638418085e-05, "loss": 0.1295, "step": 1291000 }, { "epoch": 0.09, "learning_rate": 4.854126666666667e-05, "loss": 0.1241, "step": 1291500 }, { "epoch": 0.09, "learning_rate": 4.854070169491526e-05, "loss": 0.124, "step": 1292000 }, { "epoch": 0.09, "learning_rate": 4.854013672316384e-05, "loss": 0.1284, "step": 1292500 }, { "epoch": 0.09, "learning_rate": 4.8539571751412434e-05, "loss": 0.1279, "step": 1293000 }, { "epoch": 0.09, "learning_rate": 4.853900790960452e-05, "loss": 0.1284, "step": 1293500 }, { "epoch": 0.09, "learning_rate": 4.8538442937853105e-05, "loss": 0.1236, "step": 1294000 }, { "epoch": 0.09, "learning_rate": 4.8537877966101695e-05, "loss": 0.1289, "step": 1294500 }, { "epoch": 0.09, "learning_rate": 4.8537312994350286e-05, "loss": 0.13, "step": 1295000 }, { "epoch": 0.09, "learning_rate": 4.853674915254238e-05, "loss": 0.1239, "step": 1295500 }, { "epoch": 0.09, "learning_rate": 4.8536184180790964e-05, "loss": 0.1283, "step": 1296000 }, { "epoch": 0.09, "learning_rate": 4.8535619209039554e-05, "loss": 0.128, "step": 1296500 }, { "epoch": 0.09, "learning_rate": 4.853505423728814e-05, "loss": 0.1247, "step": 1297000 }, { "epoch": 0.09, "learning_rate": 4.853448926553673e-05, "loss": 0.1181, "step": 1297500 }, { "epoch": 0.09, "learning_rate": 4.853392429378531e-05, "loss": 0.1221, "step": 1298000 }, { "epoch": 0.09, "learning_rate": 4.8533360451977407e-05, "loss": 0.1277, "step": 1298500 }, { "epoch": 0.09, "learning_rate": 4.853279548022599e-05, "loss": 0.1167, "step": 1299000 }, { "epoch": 0.09, "learning_rate": 4.853223050847458e-05, "loss": 0.1272, "step": 1299500 }, { "epoch": 0.09, "learning_rate": 4.8531665536723165e-05, "loss": 0.1334, "step": 1300000 }, { "epoch": 0.09, "learning_rate": 4.853110169491525e-05, "loss": 0.1243, "step": 1300500 }, { "epoch": 0.09, "learning_rate": 4.853053672316384e-05, "loss": 0.1196, "step": 1301000 }, { "epoch": 0.09, "learning_rate": 4.852997175141243e-05, "loss": 0.1318, "step": 1301500 }, { "epoch": 0.09, "learning_rate": 4.852940677966102e-05, "loss": 0.1272, "step": 1302000 }, { "epoch": 0.09, "learning_rate": 4.852884180790961e-05, "loss": 0.1301, "step": 1302500 }, { "epoch": 0.09, "learning_rate": 4.852827683615819e-05, "loss": 0.1274, "step": 1303000 }, { "epoch": 0.09, "learning_rate": 4.852771186440678e-05, "loss": 0.1316, "step": 1303500 }, { "epoch": 0.09, "learning_rate": 4.8527148022598876e-05, "loss": 0.119, "step": 1304000 }, { "epoch": 0.09, "learning_rate": 4.852658305084746e-05, "loss": 0.1291, "step": 1304500 }, { "epoch": 0.09, "learning_rate": 4.852601807909605e-05, "loss": 0.1254, "step": 1305000 }, { "epoch": 0.09, "learning_rate": 4.8525453107344634e-05, "loss": 0.1253, "step": 1305500 }, { "epoch": 0.09, "learning_rate": 4.8524888135593225e-05, "loss": 0.126, "step": 1306000 }, { "epoch": 0.09, "learning_rate": 4.852432429378531e-05, "loss": 0.1234, "step": 1306500 }, { "epoch": 0.09, "learning_rate": 4.85237593220339e-05, "loss": 0.1244, "step": 1307000 }, { "epoch": 0.09, "learning_rate": 4.8523194350282486e-05, "loss": 0.1243, "step": 1307500 }, { "epoch": 0.09, "learning_rate": 4.852262937853108e-05, "loss": 0.1161, "step": 1308000 }, { "epoch": 0.09, "learning_rate": 4.8522065536723164e-05, "loss": 0.1236, "step": 1308500 }, { "epoch": 0.09, "learning_rate": 4.8521500564971755e-05, "loss": 0.1259, "step": 1309000 }, { "epoch": 0.09, "learning_rate": 4.852093559322034e-05, "loss": 0.1271, "step": 1309500 }, { "epoch": 0.09, "learning_rate": 4.852037062146893e-05, "loss": 0.1287, "step": 1310000 }, { "epoch": 0.09, "learning_rate": 4.851980564971751e-05, "loss": 0.1265, "step": 1310500 }, { "epoch": 0.09, "learning_rate": 4.851924180790961e-05, "loss": 0.1267, "step": 1311000 }, { "epoch": 0.09, "learning_rate": 4.85186768361582e-05, "loss": 0.1253, "step": 1311500 }, { "epoch": 0.09, "learning_rate": 4.851811186440678e-05, "loss": 0.131, "step": 1312000 }, { "epoch": 0.09, "learning_rate": 4.851754689265537e-05, "loss": 0.1184, "step": 1312500 }, { "epoch": 0.09, "learning_rate": 4.8516981920903956e-05, "loss": 0.1184, "step": 1313000 }, { "epoch": 0.09, "learning_rate": 4.8516416949152546e-05, "loss": 0.1234, "step": 1313500 }, { "epoch": 0.09, "learning_rate": 4.851585310734463e-05, "loss": 0.124, "step": 1314000 }, { "epoch": 0.09, "learning_rate": 4.8515288135593224e-05, "loss": 0.1296, "step": 1314500 }, { "epoch": 0.09, "learning_rate": 4.851472316384181e-05, "loss": 0.1198, "step": 1315000 }, { "epoch": 0.09, "learning_rate": 4.85141581920904e-05, "loss": 0.1326, "step": 1315500 }, { "epoch": 0.09, "learning_rate": 4.8513594350282486e-05, "loss": 0.1292, "step": 1316000 }, { "epoch": 0.09, "learning_rate": 4.8513029378531076e-05, "loss": 0.1205, "step": 1316500 }, { "epoch": 0.09, "learning_rate": 4.851246440677966e-05, "loss": 0.1227, "step": 1317000 }, { "epoch": 0.09, "learning_rate": 4.851189943502825e-05, "loss": 0.1296, "step": 1317500 }, { "epoch": 0.09, "learning_rate": 4.8511334463276834e-05, "loss": 0.127, "step": 1318000 }, { "epoch": 0.09, "learning_rate": 4.8510769491525425e-05, "loss": 0.12, "step": 1318500 }, { "epoch": 0.09, "learning_rate": 4.851020564971752e-05, "loss": 0.1208, "step": 1319000 }, { "epoch": 0.09, "learning_rate": 4.850964067796611e-05, "loss": 0.1226, "step": 1319500 }, { "epoch": 0.09, "learning_rate": 4.850907570621469e-05, "loss": 0.124, "step": 1320000 }, { "epoch": 0.09, "learning_rate": 4.8508510734463284e-05, "loss": 0.1223, "step": 1320500 }, { "epoch": 0.09, "learning_rate": 4.850794576271187e-05, "loss": 0.122, "step": 1321000 }, { "epoch": 0.09, "learning_rate": 4.850738079096046e-05, "loss": 0.119, "step": 1321500 }, { "epoch": 0.09, "learning_rate": 4.8506816949152545e-05, "loss": 0.1213, "step": 1322000 }, { "epoch": 0.09, "learning_rate": 4.8506251977401136e-05, "loss": 0.1158, "step": 1322500 }, { "epoch": 0.09, "learning_rate": 4.850568700564972e-05, "loss": 0.1265, "step": 1323000 }, { "epoch": 0.09, "learning_rate": 4.850512203389831e-05, "loss": 0.1295, "step": 1323500 }, { "epoch": 0.09, "learning_rate": 4.8504557062146894e-05, "loss": 0.1283, "step": 1324000 }, { "epoch": 0.09, "learning_rate": 4.850399322033898e-05, "loss": 0.1175, "step": 1324500 }, { "epoch": 0.09, "learning_rate": 4.850342824858757e-05, "loss": 0.1203, "step": 1325000 }, { "epoch": 0.09, "learning_rate": 4.8502863276836156e-05, "loss": 0.1221, "step": 1325500 }, { "epoch": 0.09, "learning_rate": 4.8502298305084746e-05, "loss": 0.1238, "step": 1326000 }, { "epoch": 0.09, "learning_rate": 4.850173446327684e-05, "loss": 0.1279, "step": 1326500 }, { "epoch": 0.09, "learning_rate": 4.850116949152543e-05, "loss": 0.1263, "step": 1327000 }, { "epoch": 0.09, "learning_rate": 4.8500604519774015e-05, "loss": 0.12, "step": 1327500 }, { "epoch": 0.09, "learning_rate": 4.8500039548022605e-05, "loss": 0.1262, "step": 1328000 }, { "epoch": 0.09, "learning_rate": 4.849947457627119e-05, "loss": 0.124, "step": 1328500 }, { "epoch": 0.09, "learning_rate": 4.849890960451978e-05, "loss": 0.1284, "step": 1329000 }, { "epoch": 0.09, "learning_rate": 4.8498344632768364e-05, "loss": 0.1266, "step": 1329500 }, { "epoch": 0.09, "learning_rate": 4.849778079096046e-05, "loss": 0.1222, "step": 1330000 }, { "epoch": 0.09, "learning_rate": 4.849721581920904e-05, "loss": 0.1234, "step": 1330500 }, { "epoch": 0.09, "learning_rate": 4.849665084745763e-05, "loss": 0.113, "step": 1331000 }, { "epoch": 0.09, "learning_rate": 4.8496085875706216e-05, "loss": 0.127, "step": 1331500 }, { "epoch": 0.09, "learning_rate": 4.84955220338983e-05, "loss": 0.1319, "step": 1332000 }, { "epoch": 0.09, "learning_rate": 4.8494957062146893e-05, "loss": 0.1246, "step": 1332500 }, { "epoch": 0.09, "learning_rate": 4.8494392090395484e-05, "loss": 0.1252, "step": 1333000 }, { "epoch": 0.09, "learning_rate": 4.849382711864407e-05, "loss": 0.1278, "step": 1333500 }, { "epoch": 0.09, "learning_rate": 4.849326214689266e-05, "loss": 0.1194, "step": 1334000 }, { "epoch": 0.09, "learning_rate": 4.849269717514124e-05, "loss": 0.1287, "step": 1334500 }, { "epoch": 0.09, "learning_rate": 4.849213220338983e-05, "loss": 0.1274, "step": 1335000 }, { "epoch": 0.09, "learning_rate": 4.849156836158193e-05, "loss": 0.1263, "step": 1335500 }, { "epoch": 0.09, "learning_rate": 4.849100338983051e-05, "loss": 0.1171, "step": 1336000 }, { "epoch": 0.09, "learning_rate": 4.84904384180791e-05, "loss": 0.1225, "step": 1336500 }, { "epoch": 0.09, "learning_rate": 4.8489873446327685e-05, "loss": 0.1237, "step": 1337000 }, { "epoch": 0.09, "learning_rate": 4.8489308474576276e-05, "loss": 0.1243, "step": 1337500 }, { "epoch": 0.09, "learning_rate": 4.848874350282486e-05, "loss": 0.1289, "step": 1338000 }, { "epoch": 0.09, "learning_rate": 4.848817853107345e-05, "loss": 0.1266, "step": 1338500 }, { "epoch": 0.09, "learning_rate": 4.848761468926554e-05, "loss": 0.1193, "step": 1339000 }, { "epoch": 0.09, "learning_rate": 4.848704971751413e-05, "loss": 0.1204, "step": 1339500 }, { "epoch": 0.09, "learning_rate": 4.848648474576272e-05, "loss": 0.1194, "step": 1340000 }, { "epoch": 0.09, "learning_rate": 4.84859197740113e-05, "loss": 0.1279, "step": 1340500 }, { "epoch": 0.09, "learning_rate": 4.848535593220339e-05, "loss": 0.1257, "step": 1341000 }, { "epoch": 0.09, "learning_rate": 4.848479096045198e-05, "loss": 0.127, "step": 1341500 }, { "epoch": 0.09, "learning_rate": 4.8484225988700564e-05, "loss": 0.1203, "step": 1342000 }, { "epoch": 0.09, "learning_rate": 4.8483661016949154e-05, "loss": 0.1198, "step": 1342500 }, { "epoch": 0.09, "learning_rate": 4.848309717514125e-05, "loss": 0.1254, "step": 1343000 }, { "epoch": 0.09, "learning_rate": 4.848253220338983e-05, "loss": 0.1203, "step": 1343500 }, { "epoch": 0.09, "learning_rate": 4.848196723163842e-05, "loss": 0.1298, "step": 1344000 }, { "epoch": 0.09, "learning_rate": 4.848140225988701e-05, "loss": 0.1247, "step": 1344500 }, { "epoch": 0.09, "learning_rate": 4.84808372881356e-05, "loss": 0.1286, "step": 1345000 }, { "epoch": 0.09, "learning_rate": 4.848027231638419e-05, "loss": 0.1192, "step": 1345500 }, { "epoch": 0.09, "learning_rate": 4.847970734463277e-05, "loss": 0.1314, "step": 1346000 }, { "epoch": 0.09, "learning_rate": 4.847914237288136e-05, "loss": 0.1342, "step": 1346500 }, { "epoch": 0.09, "learning_rate": 4.847857853107345e-05, "loss": 0.1272, "step": 1347000 }, { "epoch": 0.09, "learning_rate": 4.847801355932204e-05, "loss": 0.1242, "step": 1347500 }, { "epoch": 0.09, "learning_rate": 4.8477448587570624e-05, "loss": 0.1251, "step": 1348000 }, { "epoch": 0.09, "learning_rate": 4.8476883615819214e-05, "loss": 0.1228, "step": 1348500 }, { "epoch": 0.09, "learning_rate": 4.84763197740113e-05, "loss": 0.1222, "step": 1349000 }, { "epoch": 0.09, "learning_rate": 4.8475754802259885e-05, "loss": 0.1232, "step": 1349500 }, { "epoch": 0.09, "learning_rate": 4.8475189830508476e-05, "loss": 0.1293, "step": 1350000 }, { "epoch": 0.09, "learning_rate": 4.847462485875706e-05, "loss": 0.1213, "step": 1350500 }, { "epoch": 0.09, "learning_rate": 4.847405988700565e-05, "loss": 0.125, "step": 1351000 }, { "epoch": 0.09, "learning_rate": 4.8473496045197744e-05, "loss": 0.1275, "step": 1351500 }, { "epoch": 0.09, "learning_rate": 4.8472931073446335e-05, "loss": 0.125, "step": 1352000 }, { "epoch": 0.09, "learning_rate": 4.847236610169492e-05, "loss": 0.123, "step": 1352500 }, { "epoch": 0.09, "learning_rate": 4.847180112994351e-05, "loss": 0.1152, "step": 1353000 }, { "epoch": 0.09, "learning_rate": 4.847123615819209e-05, "loss": 0.1175, "step": 1353500 }, { "epoch": 0.09, "learning_rate": 4.847067231638419e-05, "loss": 0.1229, "step": 1354000 }, { "epoch": 0.09, "learning_rate": 4.847010734463277e-05, "loss": 0.1189, "step": 1354500 }, { "epoch": 0.09, "learning_rate": 4.846954237288136e-05, "loss": 0.1279, "step": 1355000 }, { "epoch": 0.09, "learning_rate": 4.8468977401129945e-05, "loss": 0.1234, "step": 1355500 }, { "epoch": 0.09, "learning_rate": 4.846841355932203e-05, "loss": 0.1272, "step": 1356000 }, { "epoch": 0.09, "learning_rate": 4.846784858757062e-05, "loss": 0.1299, "step": 1356500 }, { "epoch": 0.09, "learning_rate": 4.846728361581921e-05, "loss": 0.1299, "step": 1357000 }, { "epoch": 0.09, "learning_rate": 4.84667186440678e-05, "loss": 0.1232, "step": 1357500 }, { "epoch": 0.09, "learning_rate": 4.846615367231639e-05, "loss": 0.1318, "step": 1358000 }, { "epoch": 0.09, "learning_rate": 4.846558983050848e-05, "loss": 0.1255, "step": 1358500 }, { "epoch": 0.09, "learning_rate": 4.8465024858757066e-05, "loss": 0.1216, "step": 1359000 }, { "epoch": 0.09, "learning_rate": 4.8464459887005656e-05, "loss": 0.1305, "step": 1359500 }, { "epoch": 0.09, "learning_rate": 4.846389491525424e-05, "loss": 0.123, "step": 1360000 }, { "epoch": 0.09, "learning_rate": 4.846332994350283e-05, "loss": 0.1359, "step": 1360500 }, { "epoch": 0.09, "learning_rate": 4.846276610169492e-05, "loss": 0.1234, "step": 1361000 }, { "epoch": 0.09, "learning_rate": 4.846220112994351e-05, "loss": 0.1285, "step": 1361500 }, { "epoch": 0.09, "learning_rate": 4.846163615819209e-05, "loss": 0.1258, "step": 1362000 }, { "epoch": 0.09, "learning_rate": 4.846107118644068e-05, "loss": 0.1264, "step": 1362500 }, { "epoch": 0.09, "learning_rate": 4.846050621468927e-05, "loss": 0.1172, "step": 1363000 }, { "epoch": 0.09, "learning_rate": 4.845994124293786e-05, "loss": 0.1275, "step": 1363500 }, { "epoch": 0.09, "learning_rate": 4.845937627118644e-05, "loss": 0.1284, "step": 1364000 }, { "epoch": 0.09, "learning_rate": 4.845881129943503e-05, "loss": 0.1212, "step": 1364500 }, { "epoch": 0.09, "learning_rate": 4.845824745762712e-05, "loss": 0.1174, "step": 1365000 }, { "epoch": 0.09, "learning_rate": 4.845768248587571e-05, "loss": 0.1143, "step": 1365500 }, { "epoch": 0.09, "learning_rate": 4.845711751412429e-05, "loss": 0.1217, "step": 1366000 }, { "epoch": 0.09, "learning_rate": 4.8456552542372884e-05, "loss": 0.1248, "step": 1366500 }, { "epoch": 0.09, "learning_rate": 4.845598870056498e-05, "loss": 0.1208, "step": 1367000 }, { "epoch": 0.09, "learning_rate": 4.845542372881356e-05, "loss": 0.1188, "step": 1367500 }, { "epoch": 0.09, "learning_rate": 4.845485875706215e-05, "loss": 0.126, "step": 1368000 }, { "epoch": 0.09, "learning_rate": 4.8454293785310736e-05, "loss": 0.1232, "step": 1368500 }, { "epoch": 0.09, "learning_rate": 4.8453728813559327e-05, "loss": 0.1269, "step": 1369000 }, { "epoch": 0.09, "learning_rate": 4.845316384180791e-05, "loss": 0.1342, "step": 1369500 }, { "epoch": 0.09, "learning_rate": 4.8452600000000004e-05, "loss": 0.1206, "step": 1370000 }, { "epoch": 0.09, "learning_rate": 4.845203502824859e-05, "loss": 0.1227, "step": 1370500 }, { "epoch": 0.09, "learning_rate": 4.845147005649718e-05, "loss": 0.1214, "step": 1371000 }, { "epoch": 0.09, "learning_rate": 4.845090508474576e-05, "loss": 0.1252, "step": 1371500 }, { "epoch": 0.09, "learning_rate": 4.845034011299435e-05, "loss": 0.129, "step": 1372000 }, { "epoch": 0.09, "learning_rate": 4.844977627118644e-05, "loss": 0.1244, "step": 1372500 }, { "epoch": 0.09, "learning_rate": 4.844921129943503e-05, "loss": 0.1189, "step": 1373000 }, { "epoch": 0.09, "learning_rate": 4.8448646327683615e-05, "loss": 0.1212, "step": 1373500 }, { "epoch": 0.09, "learning_rate": 4.8448081355932205e-05, "loss": 0.12, "step": 1374000 }, { "epoch": 0.09, "learning_rate": 4.844751638418079e-05, "loss": 0.1173, "step": 1374500 }, { "epoch": 0.09, "learning_rate": 4.844695254237288e-05, "loss": 0.1221, "step": 1375000 }, { "epoch": 0.09, "learning_rate": 4.844638870056498e-05, "loss": 0.1294, "step": 1375500 }, { "epoch": 0.09, "learning_rate": 4.844582372881356e-05, "loss": 0.1246, "step": 1376000 }, { "epoch": 0.09, "learning_rate": 4.844525875706215e-05, "loss": 0.1244, "step": 1376500 }, { "epoch": 0.09, "learning_rate": 4.8444693785310735e-05, "loss": 0.127, "step": 1377000 }, { "epoch": 0.09, "learning_rate": 4.8444128813559326e-05, "loss": 0.1218, "step": 1377500 }, { "epoch": 0.09, "learning_rate": 4.844356384180791e-05, "loss": 0.126, "step": 1378000 }, { "epoch": 0.09, "learning_rate": 4.84429988700565e-05, "loss": 0.1209, "step": 1378500 }, { "epoch": 0.09, "learning_rate": 4.844243389830509e-05, "loss": 0.1216, "step": 1379000 }, { "epoch": 0.09, "learning_rate": 4.8441868926553675e-05, "loss": 0.122, "step": 1379500 }, { "epoch": 0.09, "learning_rate": 4.8441303954802265e-05, "loss": 0.1271, "step": 1380000 }, { "epoch": 0.09, "learning_rate": 4.844073898305085e-05, "loss": 0.1251, "step": 1380500 }, { "epoch": 0.09, "learning_rate": 4.8440175141242936e-05, "loss": 0.1201, "step": 1381000 }, { "epoch": 0.09, "learning_rate": 4.843961016949153e-05, "loss": 0.1222, "step": 1381500 }, { "epoch": 0.09, "learning_rate": 4.843904519774011e-05, "loss": 0.1238, "step": 1382000 }, { "epoch": 0.09, "learning_rate": 4.84384802259887e-05, "loss": 0.1263, "step": 1382500 }, { "epoch": 0.09, "learning_rate": 4.843791525423729e-05, "loss": 0.1183, "step": 1383000 }, { "epoch": 0.09, "learning_rate": 4.8437351412429386e-05, "loss": 0.1204, "step": 1383500 }, { "epoch": 0.09, "learning_rate": 4.843678644067797e-05, "loss": 0.1263, "step": 1384000 }, { "epoch": 0.09, "learning_rate": 4.843622146892656e-05, "loss": 0.1201, "step": 1384500 }, { "epoch": 0.09, "learning_rate": 4.8435656497175144e-05, "loss": 0.1202, "step": 1385000 }, { "epoch": 0.09, "learning_rate": 4.8435091525423734e-05, "loss": 0.1255, "step": 1385500 }, { "epoch": 0.09, "learning_rate": 4.843452655367232e-05, "loss": 0.1241, "step": 1386000 }, { "epoch": 0.09, "learning_rate": 4.843396271186441e-05, "loss": 0.1224, "step": 1386500 }, { "epoch": 0.09, "learning_rate": 4.8433397740112996e-05, "loss": 0.1144, "step": 1387000 }, { "epoch": 0.09, "learning_rate": 4.843283276836159e-05, "loss": 0.1267, "step": 1387500 }, { "epoch": 0.09, "learning_rate": 4.843226779661017e-05, "loss": 0.1258, "step": 1388000 }, { "epoch": 0.09, "learning_rate": 4.843170282485876e-05, "loss": 0.1203, "step": 1388500 }, { "epoch": 0.09, "learning_rate": 4.843113898305085e-05, "loss": 0.1229, "step": 1389000 }, { "epoch": 0.09, "learning_rate": 4.843057401129944e-05, "loss": 0.1312, "step": 1389500 }, { "epoch": 0.09, "learning_rate": 4.843000903954802e-05, "loss": 0.1235, "step": 1390000 }, { "epoch": 0.09, "learning_rate": 4.842944406779661e-05, "loss": 0.1256, "step": 1390500 }, { "epoch": 0.09, "learning_rate": 4.84288790960452e-05, "loss": 0.1228, "step": 1391000 }, { "epoch": 0.09, "learning_rate": 4.842831525423729e-05, "loss": 0.1149, "step": 1391500 }, { "epoch": 0.09, "learning_rate": 4.842775028248588e-05, "loss": 0.1238, "step": 1392000 }, { "epoch": 0.09, "learning_rate": 4.8427185310734465e-05, "loss": 0.1345, "step": 1392500 }, { "epoch": 0.09, "learning_rate": 4.8426620338983056e-05, "loss": 0.1253, "step": 1393000 }, { "epoch": 0.09, "learning_rate": 4.842605536723164e-05, "loss": 0.1214, "step": 1393500 }, { "epoch": 0.09, "learning_rate": 4.842549039548023e-05, "loss": 0.1224, "step": 1394000 }, { "epoch": 0.09, "learning_rate": 4.8424925423728814e-05, "loss": 0.1226, "step": 1394500 }, { "epoch": 0.09, "learning_rate": 4.842436158192091e-05, "loss": 0.1199, "step": 1395000 }, { "epoch": 0.09, "learning_rate": 4.842379661016949e-05, "loss": 0.1198, "step": 1395500 }, { "epoch": 0.09, "learning_rate": 4.842323163841808e-05, "loss": 0.1203, "step": 1396000 }, { "epoch": 0.09, "learning_rate": 4.842266666666667e-05, "loss": 0.1189, "step": 1396500 }, { "epoch": 0.09, "learning_rate": 4.842210169491526e-05, "loss": 0.124, "step": 1397000 }, { "epoch": 0.09, "learning_rate": 4.8421537853107344e-05, "loss": 0.1272, "step": 1397500 }, { "epoch": 0.09, "learning_rate": 4.8420972881355935e-05, "loss": 0.1271, "step": 1398000 }, { "epoch": 0.09, "learning_rate": 4.842040790960452e-05, "loss": 0.1254, "step": 1398500 }, { "epoch": 0.09, "learning_rate": 4.841984293785311e-05, "loss": 0.1176, "step": 1399000 }, { "epoch": 0.09, "learning_rate": 4.84192790960452e-05, "loss": 0.1153, "step": 1399500 }, { "epoch": 0.09, "learning_rate": 4.8418714124293794e-05, "loss": 0.1177, "step": 1400000 }, { "epoch": 0.09, "learning_rate": 4.841814915254238e-05, "loss": 0.1219, "step": 1400500 }, { "epoch": 0.09, "learning_rate": 4.841758418079097e-05, "loss": 0.1267, "step": 1401000 }, { "epoch": 0.1, "learning_rate": 4.841701920903955e-05, "loss": 0.1206, "step": 1401500 }, { "epoch": 0.1, "learning_rate": 4.841645423728814e-05, "loss": 0.1215, "step": 1402000 }, { "epoch": 0.1, "learning_rate": 4.841589039548023e-05, "loss": 0.1204, "step": 1402500 }, { "epoch": 0.1, "learning_rate": 4.8415325423728813e-05, "loss": 0.1204, "step": 1403000 }, { "epoch": 0.1, "learning_rate": 4.8414760451977404e-05, "loss": 0.1216, "step": 1403500 }, { "epoch": 0.1, "learning_rate": 4.8414195480225995e-05, "loss": 0.1221, "step": 1404000 }, { "epoch": 0.1, "learning_rate": 4.841363163841808e-05, "loss": 0.1197, "step": 1404500 }, { "epoch": 0.1, "learning_rate": 4.8413066666666666e-05, "loss": 0.1247, "step": 1405000 }, { "epoch": 0.1, "learning_rate": 4.8412501694915256e-05, "loss": 0.1244, "step": 1405500 }, { "epoch": 0.1, "learning_rate": 4.841193672316384e-05, "loss": 0.1185, "step": 1406000 }, { "epoch": 0.1, "learning_rate": 4.841137175141243e-05, "loss": 0.1266, "step": 1406500 }, { "epoch": 0.1, "learning_rate": 4.8410807909604525e-05, "loss": 0.1212, "step": 1407000 }, { "epoch": 0.1, "learning_rate": 4.8410242937853115e-05, "loss": 0.1194, "step": 1407500 }, { "epoch": 0.1, "learning_rate": 4.84096779661017e-05, "loss": 0.1165, "step": 1408000 }, { "epoch": 0.1, "learning_rate": 4.840911299435029e-05, "loss": 0.1252, "step": 1408500 }, { "epoch": 0.1, "learning_rate": 4.840854915254238e-05, "loss": 0.1266, "step": 1409000 }, { "epoch": 0.1, "learning_rate": 4.840798418079096e-05, "loss": 0.127, "step": 1409500 }, { "epoch": 0.1, "learning_rate": 4.840741920903955e-05, "loss": 0.124, "step": 1410000 }, { "epoch": 0.1, "learning_rate": 4.840685423728814e-05, "loss": 0.1207, "step": 1410500 }, { "epoch": 0.1, "learning_rate": 4.8406289265536725e-05, "loss": 0.1282, "step": 1411000 }, { "epoch": 0.1, "learning_rate": 4.840572542372881e-05, "loss": 0.1229, "step": 1411500 }, { "epoch": 0.1, "learning_rate": 4.84051604519774e-05, "loss": 0.1311, "step": 1412000 }, { "epoch": 0.1, "learning_rate": 4.840459548022599e-05, "loss": 0.1174, "step": 1412500 }, { "epoch": 0.1, "learning_rate": 4.840403050847458e-05, "loss": 0.1202, "step": 1413000 }, { "epoch": 0.1, "learning_rate": 4.840346666666667e-05, "loss": 0.1295, "step": 1413500 }, { "epoch": 0.1, "learning_rate": 4.840290169491526e-05, "loss": 0.128, "step": 1414000 }, { "epoch": 0.1, "learning_rate": 4.8402336723163846e-05, "loss": 0.1228, "step": 1414500 }, { "epoch": 0.1, "learning_rate": 4.840177175141244e-05, "loss": 0.1177, "step": 1415000 }, { "epoch": 0.1, "learning_rate": 4.840120677966102e-05, "loss": 0.1257, "step": 1415500 }, { "epoch": 0.1, "learning_rate": 4.840064293785311e-05, "loss": 0.12, "step": 1416000 }, { "epoch": 0.1, "learning_rate": 4.84000779661017e-05, "loss": 0.1296, "step": 1416500 }, { "epoch": 0.1, "learning_rate": 4.839951299435028e-05, "loss": 0.1142, "step": 1417000 }, { "epoch": 0.1, "learning_rate": 4.839894802259887e-05, "loss": 0.1222, "step": 1417500 }, { "epoch": 0.1, "learning_rate": 4.839838305084746e-05, "loss": 0.1265, "step": 1418000 }, { "epoch": 0.1, "learning_rate": 4.839781920903955e-05, "loss": 0.1183, "step": 1418500 }, { "epoch": 0.1, "learning_rate": 4.8397254237288134e-05, "loss": 0.1298, "step": 1419000 }, { "epoch": 0.1, "learning_rate": 4.8396689265536725e-05, "loss": 0.1334, "step": 1419500 }, { "epoch": 0.1, "learning_rate": 4.839612429378531e-05, "loss": 0.1139, "step": 1420000 }, { "epoch": 0.1, "learning_rate": 4.83955593220339e-05, "loss": 0.1181, "step": 1420500 }, { "epoch": 0.1, "learning_rate": 4.839499435028249e-05, "loss": 0.1294, "step": 1421000 }, { "epoch": 0.1, "learning_rate": 4.8394430508474584e-05, "loss": 0.129, "step": 1421500 }, { "epoch": 0.1, "learning_rate": 4.839386553672317e-05, "loss": 0.1271, "step": 1422000 }, { "epoch": 0.1, "learning_rate": 4.839330056497176e-05, "loss": 0.123, "step": 1422500 }, { "epoch": 0.1, "learning_rate": 4.839273559322034e-05, "loss": 0.1195, "step": 1423000 }, { "epoch": 0.1, "learning_rate": 4.839217175141243e-05, "loss": 0.1188, "step": 1423500 }, { "epoch": 0.1, "learning_rate": 4.839160677966102e-05, "loss": 0.1203, "step": 1424000 }, { "epoch": 0.1, "learning_rate": 4.839104180790961e-05, "loss": 0.1279, "step": 1424500 }, { "epoch": 0.1, "learning_rate": 4.8390476836158194e-05, "loss": 0.1183, "step": 1425000 }, { "epoch": 0.1, "learning_rate": 4.8389911864406785e-05, "loss": 0.1181, "step": 1425500 }, { "epoch": 0.1, "learning_rate": 4.838934802259887e-05, "loss": 0.1233, "step": 1426000 }, { "epoch": 0.1, "learning_rate": 4.8388783050847456e-05, "loss": 0.1238, "step": 1426500 }, { "epoch": 0.1, "learning_rate": 4.8388218079096046e-05, "loss": 0.1249, "step": 1427000 }, { "epoch": 0.1, "learning_rate": 4.838765423728814e-05, "loss": 0.1191, "step": 1427500 }, { "epoch": 0.1, "learning_rate": 4.838708926553673e-05, "loss": 0.121, "step": 1428000 }, { "epoch": 0.1, "learning_rate": 4.8386524293785315e-05, "loss": 0.131, "step": 1428500 }, { "epoch": 0.1, "learning_rate": 4.8385959322033905e-05, "loss": 0.1272, "step": 1429000 }, { "epoch": 0.1, "learning_rate": 4.838539435028249e-05, "loss": 0.1227, "step": 1429500 }, { "epoch": 0.1, "learning_rate": 4.838482937853108e-05, "loss": 0.1236, "step": 1430000 }, { "epoch": 0.1, "learning_rate": 4.8384264406779663e-05, "loss": 0.129, "step": 1430500 }, { "epoch": 0.1, "learning_rate": 4.8383699435028254e-05, "loss": 0.1214, "step": 1431000 }, { "epoch": 0.1, "learning_rate": 4.838313446327684e-05, "loss": 0.123, "step": 1431500 }, { "epoch": 0.1, "learning_rate": 4.838256949152543e-05, "loss": 0.1246, "step": 1432000 }, { "epoch": 0.1, "learning_rate": 4.8382005649717516e-05, "loss": 0.12, "step": 1432500 }, { "epoch": 0.1, "learning_rate": 4.8381440677966106e-05, "loss": 0.1181, "step": 1433000 }, { "epoch": 0.1, "learning_rate": 4.838087570621469e-05, "loss": 0.1218, "step": 1433500 }, { "epoch": 0.1, "learning_rate": 4.838031073446328e-05, "loss": 0.1364, "step": 1434000 }, { "epoch": 0.1, "learning_rate": 4.8379745762711864e-05, "loss": 0.1262, "step": 1434500 }, { "epoch": 0.1, "learning_rate": 4.8379180790960455e-05, "loss": 0.1191, "step": 1435000 }, { "epoch": 0.1, "learning_rate": 4.8378615819209046e-05, "loss": 0.1228, "step": 1435500 }, { "epoch": 0.1, "learning_rate": 4.837805084745763e-05, "loss": 0.1175, "step": 1436000 }, { "epoch": 0.1, "learning_rate": 4.8377487005649717e-05, "loss": 0.1206, "step": 1436500 }, { "epoch": 0.1, "learning_rate": 4.837692203389831e-05, "loss": 0.1279, "step": 1437000 }, { "epoch": 0.1, "learning_rate": 4.837635706214689e-05, "loss": 0.1273, "step": 1437500 }, { "epoch": 0.1, "learning_rate": 4.837579209039548e-05, "loss": 0.13, "step": 1438000 }, { "epoch": 0.1, "learning_rate": 4.8375228248587575e-05, "loss": 0.1241, "step": 1438500 }, { "epoch": 0.1, "learning_rate": 4.8374663276836166e-05, "loss": 0.1217, "step": 1439000 }, { "epoch": 0.1, "learning_rate": 4.837409830508475e-05, "loss": 0.1265, "step": 1439500 }, { "epoch": 0.1, "learning_rate": 4.837353333333334e-05, "loss": 0.1179, "step": 1440000 }, { "epoch": 0.1, "learning_rate": 4.8372968361581924e-05, "loss": 0.1211, "step": 1440500 }, { "epoch": 0.1, "learning_rate": 4.837240451977401e-05, "loss": 0.1279, "step": 1441000 }, { "epoch": 0.1, "learning_rate": 4.83718395480226e-05, "loss": 0.1198, "step": 1441500 }, { "epoch": 0.1, "learning_rate": 4.8371274576271186e-05, "loss": 0.1239, "step": 1442000 }, { "epoch": 0.1, "learning_rate": 4.8370709604519776e-05, "loss": 0.1265, "step": 1442500 }, { "epoch": 0.1, "learning_rate": 4.837014463276837e-05, "loss": 0.117, "step": 1443000 }, { "epoch": 0.1, "learning_rate": 4.8369580790960454e-05, "loss": 0.1161, "step": 1443500 }, { "epoch": 0.1, "learning_rate": 4.836901581920904e-05, "loss": 0.1219, "step": 1444000 }, { "epoch": 0.1, "learning_rate": 4.836845084745763e-05, "loss": 0.1223, "step": 1444500 }, { "epoch": 0.1, "learning_rate": 4.836788587570621e-05, "loss": 0.1194, "step": 1445000 }, { "epoch": 0.1, "learning_rate": 4.83673209039548e-05, "loss": 0.1197, "step": 1445500 }, { "epoch": 0.1, "learning_rate": 4.83667570621469e-05, "loss": 0.1269, "step": 1446000 }, { "epoch": 0.1, "learning_rate": 4.836619209039549e-05, "loss": 0.1267, "step": 1446500 }, { "epoch": 0.1, "learning_rate": 4.836562711864407e-05, "loss": 0.1205, "step": 1447000 }, { "epoch": 0.1, "learning_rate": 4.836506214689266e-05, "loss": 0.1218, "step": 1447500 }, { "epoch": 0.1, "learning_rate": 4.836449830508475e-05, "loss": 0.1206, "step": 1448000 }, { "epoch": 0.1, "learning_rate": 4.836393333333333e-05, "loss": 0.1234, "step": 1448500 }, { "epoch": 0.1, "learning_rate": 4.8363368361581924e-05, "loss": 0.1251, "step": 1449000 }, { "epoch": 0.1, "learning_rate": 4.8362803389830514e-05, "loss": 0.1232, "step": 1449500 }, { "epoch": 0.1, "learning_rate": 4.83622384180791e-05, "loss": 0.1173, "step": 1450000 }, { "epoch": 0.1, "learning_rate": 4.836167344632769e-05, "loss": 0.1217, "step": 1450500 }, { "epoch": 0.1, "learning_rate": 4.836110847457627e-05, "loss": 0.1229, "step": 1451000 }, { "epoch": 0.1, "learning_rate": 4.836054350282486e-05, "loss": 0.1192, "step": 1451500 }, { "epoch": 0.1, "learning_rate": 4.835997966101695e-05, "loss": 0.1231, "step": 1452000 }, { "epoch": 0.1, "learning_rate": 4.8359415819209044e-05, "loss": 0.1217, "step": 1452500 }, { "epoch": 0.1, "learning_rate": 4.8358850847457635e-05, "loss": 0.1262, "step": 1453000 }, { "epoch": 0.1, "learning_rate": 4.835828587570622e-05, "loss": 0.1198, "step": 1453500 }, { "epoch": 0.1, "learning_rate": 4.835772090395481e-05, "loss": 0.1196, "step": 1454000 }, { "epoch": 0.1, "learning_rate": 4.835715593220339e-05, "loss": 0.1319, "step": 1454500 }, { "epoch": 0.1, "learning_rate": 4.8356590960451983e-05, "loss": 0.1195, "step": 1455000 }, { "epoch": 0.1, "learning_rate": 4.835602711864407e-05, "loss": 0.1213, "step": 1455500 }, { "epoch": 0.1, "learning_rate": 4.8355462146892654e-05, "loss": 0.1249, "step": 1456000 }, { "epoch": 0.1, "learning_rate": 4.8354897175141245e-05, "loss": 0.1179, "step": 1456500 }, { "epoch": 0.1, "learning_rate": 4.8354332203389836e-05, "loss": 0.1138, "step": 1457000 }, { "epoch": 0.1, "learning_rate": 4.835376723163842e-05, "loss": 0.1207, "step": 1457500 }, { "epoch": 0.1, "learning_rate": 4.835320338983051e-05, "loss": 0.1214, "step": 1458000 }, { "epoch": 0.1, "learning_rate": 4.83526384180791e-05, "loss": 0.1213, "step": 1458500 }, { "epoch": 0.1, "learning_rate": 4.835207457627119e-05, "loss": 0.1203, "step": 1459000 }, { "epoch": 0.1, "learning_rate": 4.835150960451978e-05, "loss": 0.1202, "step": 1459500 }, { "epoch": 0.1, "learning_rate": 4.8350944632768366e-05, "loss": 0.1171, "step": 1460000 }, { "epoch": 0.1, "learning_rate": 4.8350379661016956e-05, "loss": 0.1193, "step": 1460500 }, { "epoch": 0.1, "learning_rate": 4.834981468926554e-05, "loss": 0.1255, "step": 1461000 }, { "epoch": 0.1, "learning_rate": 4.834924971751413e-05, "loss": 0.1239, "step": 1461500 }, { "epoch": 0.1, "learning_rate": 4.8348684745762714e-05, "loss": 0.1178, "step": 1462000 }, { "epoch": 0.1, "learning_rate": 4.8348119774011305e-05, "loss": 0.1249, "step": 1462500 }, { "epoch": 0.1, "learning_rate": 4.834755480225989e-05, "loss": 0.1141, "step": 1463000 }, { "epoch": 0.1, "learning_rate": 4.834699096045198e-05, "loss": 0.1208, "step": 1463500 }, { "epoch": 0.1, "learning_rate": 4.8346425988700567e-05, "loss": 0.1202, "step": 1464000 }, { "epoch": 0.1, "learning_rate": 4.834586101694916e-05, "loss": 0.126, "step": 1464500 }, { "epoch": 0.1, "learning_rate": 4.834529604519774e-05, "loss": 0.1202, "step": 1465000 }, { "epoch": 0.1, "learning_rate": 4.834473107344633e-05, "loss": 0.1261, "step": 1465500 }, { "epoch": 0.1, "learning_rate": 4.834416723163842e-05, "loss": 0.1177, "step": 1466000 }, { "epoch": 0.1, "learning_rate": 4.8343602259887e-05, "loss": 0.1257, "step": 1466500 }, { "epoch": 0.1, "learning_rate": 4.834303728813559e-05, "loss": 0.1236, "step": 1467000 }, { "epoch": 0.1, "learning_rate": 4.8342472316384184e-05, "loss": 0.1115, "step": 1467500 }, { "epoch": 0.1, "learning_rate": 4.834190734463277e-05, "loss": 0.1203, "step": 1468000 }, { "epoch": 0.1, "learning_rate": 4.834134350282486e-05, "loss": 0.1239, "step": 1468500 }, { "epoch": 0.1, "learning_rate": 4.834077853107345e-05, "loss": 0.1227, "step": 1469000 }, { "epoch": 0.1, "learning_rate": 4.8340213559322036e-05, "loss": 0.1158, "step": 1469500 }, { "epoch": 0.1, "learning_rate": 4.8339648587570626e-05, "loss": 0.1246, "step": 1470000 }, { "epoch": 0.1, "learning_rate": 4.833908361581922e-05, "loss": 0.1231, "step": 1470500 }, { "epoch": 0.1, "learning_rate": 4.83385186440678e-05, "loss": 0.1205, "step": 1471000 }, { "epoch": 0.1, "learning_rate": 4.833795367231639e-05, "loss": 0.1241, "step": 1471500 }, { "epoch": 0.1, "learning_rate": 4.8337388700564975e-05, "loss": 0.1148, "step": 1472000 }, { "epoch": 0.1, "learning_rate": 4.833682485875706e-05, "loss": 0.122, "step": 1472500 }, { "epoch": 0.1, "learning_rate": 4.833625988700565e-05, "loss": 0.1296, "step": 1473000 }, { "epoch": 0.1, "learning_rate": 4.833569491525424e-05, "loss": 0.1212, "step": 1473500 }, { "epoch": 0.1, "learning_rate": 4.833512994350283e-05, "loss": 0.1196, "step": 1474000 }, { "epoch": 0.1, "learning_rate": 4.833456497175142e-05, "loss": 0.1153, "step": 1474500 }, { "epoch": 0.1, "learning_rate": 4.8334e-05, "loss": 0.1286, "step": 1475000 }, { "epoch": 0.1, "learning_rate": 4.833343502824859e-05, "loss": 0.1258, "step": 1475500 }, { "epoch": 0.1, "learning_rate": 4.833287118644068e-05, "loss": 0.1275, "step": 1476000 }, { "epoch": 0.1, "learning_rate": 4.833230621468926e-05, "loss": 0.1229, "step": 1476500 }, { "epoch": 0.1, "learning_rate": 4.8331741242937854e-05, "loss": 0.1139, "step": 1477000 }, { "epoch": 0.1, "learning_rate": 4.8331176271186445e-05, "loss": 0.1179, "step": 1477500 }, { "epoch": 0.1, "learning_rate": 4.833061242937854e-05, "loss": 0.116, "step": 1478000 }, { "epoch": 0.1, "learning_rate": 4.833004745762712e-05, "loss": 0.117, "step": 1478500 }, { "epoch": 0.1, "learning_rate": 4.832948248587571e-05, "loss": 0.1211, "step": 1479000 }, { "epoch": 0.1, "learning_rate": 4.83289175141243e-05, "loss": 0.1201, "step": 1479500 }, { "epoch": 0.1, "learning_rate": 4.832835254237289e-05, "loss": 0.1207, "step": 1480000 }, { "epoch": 0.1, "learning_rate": 4.832778757062147e-05, "loss": 0.1168, "step": 1480500 }, { "epoch": 0.1, "learning_rate": 4.8327223728813565e-05, "loss": 0.1187, "step": 1481000 }, { "epoch": 0.1, "learning_rate": 4.832665875706215e-05, "loss": 0.1208, "step": 1481500 }, { "epoch": 0.1, "learning_rate": 4.832609378531074e-05, "loss": 0.1281, "step": 1482000 }, { "epoch": 0.1, "learning_rate": 4.832552881355932e-05, "loss": 0.1306, "step": 1482500 }, { "epoch": 0.1, "learning_rate": 4.832496497175141e-05, "loss": 0.1196, "step": 1483000 }, { "epoch": 0.1, "learning_rate": 4.83244e-05, "loss": 0.125, "step": 1483500 }, { "epoch": 0.1, "learning_rate": 4.8323835028248585e-05, "loss": 0.1208, "step": 1484000 }, { "epoch": 0.1, "learning_rate": 4.8323270056497175e-05, "loss": 0.1169, "step": 1484500 }, { "epoch": 0.1, "learning_rate": 4.8322705084745766e-05, "loss": 0.1197, "step": 1485000 }, { "epoch": 0.1, "learning_rate": 4.832214124293786e-05, "loss": 0.1247, "step": 1485500 }, { "epoch": 0.1, "learning_rate": 4.8321576271186444e-05, "loss": 0.1168, "step": 1486000 }, { "epoch": 0.1, "learning_rate": 4.8321011299435034e-05, "loss": 0.1282, "step": 1486500 }, { "epoch": 0.1, "learning_rate": 4.832044632768362e-05, "loss": 0.1162, "step": 1487000 }, { "epoch": 0.1, "learning_rate": 4.8319882485875705e-05, "loss": 0.1191, "step": 1487500 }, { "epoch": 0.1, "learning_rate": 4.8319317514124296e-05, "loss": 0.1243, "step": 1488000 }, { "epoch": 0.1, "learning_rate": 4.8318752542372887e-05, "loss": 0.1137, "step": 1488500 }, { "epoch": 0.1, "learning_rate": 4.831818757062147e-05, "loss": 0.128, "step": 1489000 }, { "epoch": 0.1, "learning_rate": 4.831762259887006e-05, "loss": 0.1211, "step": 1489500 }, { "epoch": 0.1, "learning_rate": 4.8317057627118645e-05, "loss": 0.1187, "step": 1490000 }, { "epoch": 0.1, "learning_rate": 4.831649378531073e-05, "loss": 0.1221, "step": 1490500 }, { "epoch": 0.1, "learning_rate": 4.831592881355932e-05, "loss": 0.1246, "step": 1491000 }, { "epoch": 0.1, "learning_rate": 4.831536384180791e-05, "loss": 0.1145, "step": 1491500 }, { "epoch": 0.1, "learning_rate": 4.83147988700565e-05, "loss": 0.1219, "step": 1492000 }, { "epoch": 0.1, "learning_rate": 4.831423502824859e-05, "loss": 0.129, "step": 1492500 }, { "epoch": 0.1, "learning_rate": 4.831367005649718e-05, "loss": 0.12, "step": 1493000 }, { "epoch": 0.1, "learning_rate": 4.8313105084745765e-05, "loss": 0.1179, "step": 1493500 }, { "epoch": 0.1, "learning_rate": 4.8312540112994356e-05, "loss": 0.1235, "step": 1494000 }, { "epoch": 0.1, "learning_rate": 4.831197514124294e-05, "loss": 0.1237, "step": 1494500 }, { "epoch": 0.1, "learning_rate": 4.8311411299435034e-05, "loss": 0.112, "step": 1495000 }, { "epoch": 0.1, "learning_rate": 4.831084632768362e-05, "loss": 0.1169, "step": 1495500 }, { "epoch": 0.1, "learning_rate": 4.831028135593221e-05, "loss": 0.1266, "step": 1496000 }, { "epoch": 0.1, "learning_rate": 4.830971638418079e-05, "loss": 0.1272, "step": 1496500 }, { "epoch": 0.1, "learning_rate": 4.830915141242938e-05, "loss": 0.1219, "step": 1497000 }, { "epoch": 0.1, "learning_rate": 4.8308586440677966e-05, "loss": 0.1229, "step": 1497500 }, { "epoch": 0.1, "learning_rate": 4.830802146892656e-05, "loss": 0.1158, "step": 1498000 }, { "epoch": 0.1, "learning_rate": 4.830745649717514e-05, "loss": 0.1169, "step": 1498500 }, { "epoch": 0.1, "learning_rate": 4.8306892655367235e-05, "loss": 0.1218, "step": 1499000 }, { "epoch": 0.1, "learning_rate": 4.830632881355933e-05, "loss": 0.1238, "step": 1499500 }, { "epoch": 0.1, "learning_rate": 4.830576384180791e-05, "loss": 0.119, "step": 1500000 }, { "epoch": 0.1, "learning_rate": 4.83051988700565e-05, "loss": 0.1214, "step": 1500500 }, { "epoch": 0.1, "learning_rate": 4.830463389830509e-05, "loss": 0.1243, "step": 1501000 }, { "epoch": 0.1, "learning_rate": 4.830406892655368e-05, "loss": 0.1241, "step": 1501500 }, { "epoch": 0.1, "learning_rate": 4.830350395480226e-05, "loss": 0.1173, "step": 1502000 }, { "epoch": 0.1, "learning_rate": 4.8302940112994355e-05, "loss": 0.1221, "step": 1502500 }, { "epoch": 0.1, "learning_rate": 4.830237514124294e-05, "loss": 0.1192, "step": 1503000 }, { "epoch": 0.1, "learning_rate": 4.830181016949153e-05, "loss": 0.1164, "step": 1503500 }, { "epoch": 0.1, "learning_rate": 4.830124519774011e-05, "loss": 0.1175, "step": 1504000 }, { "epoch": 0.1, "learning_rate": 4.8300680225988704e-05, "loss": 0.1186, "step": 1504500 }, { "epoch": 0.1, "learning_rate": 4.830011638418079e-05, "loss": 0.115, "step": 1505000 }, { "epoch": 0.1, "learning_rate": 4.8299552542372885e-05, "loss": 0.122, "step": 1505500 }, { "epoch": 0.1, "learning_rate": 4.8298987570621476e-05, "loss": 0.1258, "step": 1506000 }, { "epoch": 0.1, "learning_rate": 4.829842259887006e-05, "loss": 0.1205, "step": 1506500 }, { "epoch": 0.1, "learning_rate": 4.829785762711865e-05, "loss": 0.1225, "step": 1507000 }, { "epoch": 0.1, "learning_rate": 4.8297292655367234e-05, "loss": 0.1212, "step": 1507500 }, { "epoch": 0.1, "learning_rate": 4.8296727683615824e-05, "loss": 0.1223, "step": 1508000 }, { "epoch": 0.1, "learning_rate": 4.829616271186441e-05, "loss": 0.118, "step": 1508500 }, { "epoch": 0.1, "learning_rate": 4.8295597740113e-05, "loss": 0.1178, "step": 1509000 }, { "epoch": 0.1, "learning_rate": 4.829503276836159e-05, "loss": 0.1235, "step": 1509500 }, { "epoch": 0.1, "learning_rate": 4.829446779661017e-05, "loss": 0.1189, "step": 1510000 }, { "epoch": 0.1, "learning_rate": 4.8293902824858764e-05, "loss": 0.1262, "step": 1510500 }, { "epoch": 0.1, "learning_rate": 4.829333785310735e-05, "loss": 0.1203, "step": 1511000 }, { "epoch": 0.1, "learning_rate": 4.8292774011299435e-05, "loss": 0.1262, "step": 1511500 }, { "epoch": 0.1, "learning_rate": 4.829221016949152e-05, "loss": 0.1159, "step": 1512000 }, { "epoch": 0.1, "learning_rate": 4.829164519774011e-05, "loss": 0.1217, "step": 1512500 }, { "epoch": 0.1, "learning_rate": 4.82910802259887e-05, "loss": 0.1177, "step": 1513000 }, { "epoch": 0.1, "learning_rate": 4.829051525423729e-05, "loss": 0.1202, "step": 1513500 }, { "epoch": 0.1, "learning_rate": 4.828995028248588e-05, "loss": 0.1232, "step": 1514000 }, { "epoch": 0.1, "learning_rate": 4.828938644067797e-05, "loss": 0.1207, "step": 1514500 }, { "epoch": 0.1, "learning_rate": 4.8288821468926555e-05, "loss": 0.124, "step": 1515000 }, { "epoch": 0.1, "learning_rate": 4.8288256497175146e-05, "loss": 0.1231, "step": 1515500 }, { "epoch": 0.1, "learning_rate": 4.8287691525423737e-05, "loss": 0.1193, "step": 1516000 }, { "epoch": 0.1, "learning_rate": 4.828712655367232e-05, "loss": 0.1197, "step": 1516500 }, { "epoch": 0.1, "learning_rate": 4.828656158192091e-05, "loss": 0.118, "step": 1517000 }, { "epoch": 0.1, "learning_rate": 4.8285997740113e-05, "loss": 0.1251, "step": 1517500 }, { "epoch": 0.1, "learning_rate": 4.828543276836158e-05, "loss": 0.1222, "step": 1518000 }, { "epoch": 0.1, "learning_rate": 4.828486779661017e-05, "loss": 0.1182, "step": 1518500 }, { "epoch": 0.1, "learning_rate": 4.8284302824858756e-05, "loss": 0.1269, "step": 1519000 }, { "epoch": 0.1, "learning_rate": 4.828373785310735e-05, "loss": 0.1132, "step": 1519500 }, { "epoch": 0.1, "learning_rate": 4.8283174011299434e-05, "loss": 0.1217, "step": 1520000 }, { "epoch": 0.1, "learning_rate": 4.8282609039548025e-05, "loss": 0.1223, "step": 1520500 }, { "epoch": 0.1, "learning_rate": 4.828204406779661e-05, "loss": 0.1294, "step": 1521000 }, { "epoch": 0.1, "learning_rate": 4.82814790960452e-05, "loss": 0.1234, "step": 1521500 }, { "epoch": 0.1, "learning_rate": 4.828091638418079e-05, "loss": 0.1274, "step": 1522000 }, { "epoch": 0.1, "learning_rate": 4.828035141242938e-05, "loss": 0.1231, "step": 1522500 }, { "epoch": 0.1, "learning_rate": 4.827978644067797e-05, "loss": 0.1205, "step": 1523000 }, { "epoch": 0.1, "learning_rate": 4.8279221468926555e-05, "loss": 0.1204, "step": 1523500 }, { "epoch": 0.1, "learning_rate": 4.8278656497175145e-05, "loss": 0.118, "step": 1524000 }, { "epoch": 0.1, "learning_rate": 4.827809152542373e-05, "loss": 0.1162, "step": 1524500 }, { "epoch": 0.1, "learning_rate": 4.827752655367232e-05, "loss": 0.115, "step": 1525000 }, { "epoch": 0.1, "learning_rate": 4.8276961581920903e-05, "loss": 0.1184, "step": 1525500 }, { "epoch": 0.1, "learning_rate": 4.8276396610169494e-05, "loss": 0.1197, "step": 1526000 }, { "epoch": 0.1, "learning_rate": 4.827583276836158e-05, "loss": 0.1185, "step": 1526500 }, { "epoch": 0.1, "learning_rate": 4.827526779661017e-05, "loss": 0.1236, "step": 1527000 }, { "epoch": 0.1, "learning_rate": 4.8274702824858756e-05, "loss": 0.126, "step": 1527500 }, { "epoch": 0.1, "learning_rate": 4.8274137853107346e-05, "loss": 0.119, "step": 1528000 }, { "epoch": 0.1, "learning_rate": 4.827357288135593e-05, "loss": 0.1219, "step": 1528500 }, { "epoch": 0.1, "learning_rate": 4.8273009039548024e-05, "loss": 0.1195, "step": 1529000 }, { "epoch": 0.1, "learning_rate": 4.8272444067796615e-05, "loss": 0.1149, "step": 1529500 }, { "epoch": 0.1, "learning_rate": 4.8271879096045205e-05, "loss": 0.1268, "step": 1530000 }, { "epoch": 0.1, "learning_rate": 4.827131412429379e-05, "loss": 0.1181, "step": 1530500 }, { "epoch": 0.1, "learning_rate": 4.827074915254238e-05, "loss": 0.122, "step": 1531000 }, { "epoch": 0.1, "learning_rate": 4.827018531073447e-05, "loss": 0.1219, "step": 1531500 }, { "epoch": 0.1, "learning_rate": 4.826962033898305e-05, "loss": 0.1236, "step": 1532000 }, { "epoch": 0.1, "learning_rate": 4.826905536723164e-05, "loss": 0.1189, "step": 1532500 }, { "epoch": 0.1, "learning_rate": 4.8268490395480225e-05, "loss": 0.1246, "step": 1533000 }, { "epoch": 0.1, "learning_rate": 4.826792655367232e-05, "loss": 0.1191, "step": 1533500 }, { "epoch": 0.1, "learning_rate": 4.82673615819209e-05, "loss": 0.1169, "step": 1534000 }, { "epoch": 0.1, "learning_rate": 4.826679661016949e-05, "loss": 0.1178, "step": 1534500 }, { "epoch": 0.1, "learning_rate": 4.826623163841808e-05, "loss": 0.1141, "step": 1535000 }, { "epoch": 0.1, "learning_rate": 4.826566666666667e-05, "loss": 0.117, "step": 1535500 }, { "epoch": 0.1, "learning_rate": 4.826510282485876e-05, "loss": 0.123, "step": 1536000 }, { "epoch": 0.1, "learning_rate": 4.8264537853107345e-05, "loss": 0.1192, "step": 1536500 }, { "epoch": 0.1, "learning_rate": 4.8263972881355936e-05, "loss": 0.1188, "step": 1537000 }, { "epoch": 0.1, "learning_rate": 4.8263407909604527e-05, "loss": 0.12, "step": 1537500 }, { "epoch": 0.1, "learning_rate": 4.826284293785311e-05, "loss": 0.126, "step": 1538000 }, { "epoch": 0.1, "learning_rate": 4.82622779661017e-05, "loss": 0.1104, "step": 1538500 }, { "epoch": 0.1, "learning_rate": 4.826171412429379e-05, "loss": 0.1192, "step": 1539000 }, { "epoch": 0.1, "learning_rate": 4.826114915254237e-05, "loss": 0.1194, "step": 1539500 }, { "epoch": 0.1, "learning_rate": 4.826058418079096e-05, "loss": 0.1185, "step": 1540000 }, { "epoch": 0.1, "learning_rate": 4.826001920903955e-05, "loss": 0.118, "step": 1540500 }, { "epoch": 0.1, "learning_rate": 4.825945423728814e-05, "loss": 0.1306, "step": 1541000 }, { "epoch": 0.1, "learning_rate": 4.825888926553673e-05, "loss": 0.1145, "step": 1541500 }, { "epoch": 0.1, "learning_rate": 4.8258325423728815e-05, "loss": 0.1224, "step": 1542000 }, { "epoch": 0.1, "learning_rate": 4.82577604519774e-05, "loss": 0.1216, "step": 1542500 }, { "epoch": 0.1, "learning_rate": 4.825719548022599e-05, "loss": 0.1214, "step": 1543000 }, { "epoch": 0.1, "learning_rate": 4.825663050847457e-05, "loss": 0.1185, "step": 1543500 }, { "epoch": 0.1, "learning_rate": 4.8256066666666674e-05, "loss": 0.1193, "step": 1544000 }, { "epoch": 0.1, "learning_rate": 4.825550169491526e-05, "loss": 0.1239, "step": 1544500 }, { "epoch": 0.1, "learning_rate": 4.825493672316385e-05, "loss": 0.1165, "step": 1545000 }, { "epoch": 0.1, "learning_rate": 4.825437175141243e-05, "loss": 0.1236, "step": 1545500 }, { "epoch": 0.1, "learning_rate": 4.825380790960452e-05, "loss": 0.1202, "step": 1546000 }, { "epoch": 0.1, "learning_rate": 4.825324293785311e-05, "loss": 0.1173, "step": 1546500 }, { "epoch": 0.1, "learning_rate": 4.8252677966101693e-05, "loss": 0.1168, "step": 1547000 }, { "epoch": 0.1, "learning_rate": 4.8252112994350284e-05, "loss": 0.1184, "step": 1547500 }, { "epoch": 0.1, "learning_rate": 4.8251548022598875e-05, "loss": 0.1164, "step": 1548000 }, { "epoch": 0.1, "learning_rate": 4.825098305084746e-05, "loss": 0.1226, "step": 1548500 }, { "epoch": 0.11, "learning_rate": 4.825041807909605e-05, "loss": 0.1161, "step": 1549000 }, { "epoch": 0.11, "learning_rate": 4.8249854237288136e-05, "loss": 0.1192, "step": 1549500 }, { "epoch": 0.11, "learning_rate": 4.824928926553673e-05, "loss": 0.1196, "step": 1550000 }, { "epoch": 0.11, "learning_rate": 4.824872429378531e-05, "loss": 0.1211, "step": 1550500 }, { "epoch": 0.11, "learning_rate": 4.82481593220339e-05, "loss": 0.1226, "step": 1551000 }, { "epoch": 0.11, "learning_rate": 4.8247594350282485e-05, "loss": 0.1203, "step": 1551500 }, { "epoch": 0.11, "learning_rate": 4.8247029378531076e-05, "loss": 0.1183, "step": 1552000 }, { "epoch": 0.11, "learning_rate": 4.824646553672317e-05, "loss": 0.1224, "step": 1552500 }, { "epoch": 0.11, "learning_rate": 4.8245900564971753e-05, "loss": 0.122, "step": 1553000 }, { "epoch": 0.11, "learning_rate": 4.8245335593220344e-05, "loss": 0.1213, "step": 1553500 }, { "epoch": 0.11, "learning_rate": 4.824477062146893e-05, "loss": 0.1198, "step": 1554000 }, { "epoch": 0.11, "learning_rate": 4.824420564971752e-05, "loss": 0.1122, "step": 1554500 }, { "epoch": 0.11, "learning_rate": 4.8243641807909606e-05, "loss": 0.1229, "step": 1555000 }, { "epoch": 0.11, "learning_rate": 4.8243076836158196e-05, "loss": 0.1162, "step": 1555500 }, { "epoch": 0.11, "learning_rate": 4.824251186440678e-05, "loss": 0.1221, "step": 1556000 }, { "epoch": 0.11, "learning_rate": 4.824194689265537e-05, "loss": 0.1174, "step": 1556500 }, { "epoch": 0.11, "learning_rate": 4.824138305084746e-05, "loss": 0.1179, "step": 1557000 }, { "epoch": 0.11, "learning_rate": 4.824081807909605e-05, "loss": 0.1265, "step": 1557500 }, { "epoch": 0.11, "learning_rate": 4.824025310734463e-05, "loss": 0.122, "step": 1558000 }, { "epoch": 0.11, "learning_rate": 4.823968813559322e-05, "loss": 0.1266, "step": 1558500 }, { "epoch": 0.11, "learning_rate": 4.8239123163841807e-05, "loss": 0.116, "step": 1559000 }, { "epoch": 0.11, "learning_rate": 4.82385581920904e-05, "loss": 0.1167, "step": 1559500 }, { "epoch": 0.11, "learning_rate": 4.823799322033898e-05, "loss": 0.114, "step": 1560000 }, { "epoch": 0.11, "learning_rate": 4.823742824858757e-05, "loss": 0.1193, "step": 1560500 }, { "epoch": 0.11, "learning_rate": 4.8236864406779665e-05, "loss": 0.1167, "step": 1561000 }, { "epoch": 0.11, "learning_rate": 4.823630056497175e-05, "loss": 0.1195, "step": 1561500 }, { "epoch": 0.11, "learning_rate": 4.823573559322034e-05, "loss": 0.1165, "step": 1562000 }, { "epoch": 0.11, "learning_rate": 4.823517062146893e-05, "loss": 0.1195, "step": 1562500 }, { "epoch": 0.11, "learning_rate": 4.823460564971752e-05, "loss": 0.1221, "step": 1563000 }, { "epoch": 0.11, "learning_rate": 4.82340406779661e-05, "loss": 0.1205, "step": 1563500 }, { "epoch": 0.11, "learning_rate": 4.8233476836158195e-05, "loss": 0.1172, "step": 1564000 }, { "epoch": 0.11, "learning_rate": 4.823291186440678e-05, "loss": 0.1252, "step": 1564500 }, { "epoch": 0.11, "learning_rate": 4.823234689265537e-05, "loss": 0.1229, "step": 1565000 }, { "epoch": 0.11, "learning_rate": 4.8231781920903954e-05, "loss": 0.1229, "step": 1565500 }, { "epoch": 0.11, "learning_rate": 4.8231216949152544e-05, "loss": 0.1185, "step": 1566000 }, { "epoch": 0.11, "learning_rate": 4.823065197740113e-05, "loss": 0.1166, "step": 1566500 }, { "epoch": 0.11, "learning_rate": 4.823008700564972e-05, "loss": 0.1234, "step": 1567000 }, { "epoch": 0.11, "learning_rate": 4.82295220338983e-05, "loss": 0.1232, "step": 1567500 }, { "epoch": 0.11, "learning_rate": 4.8228958192090396e-05, "loss": 0.1199, "step": 1568000 }, { "epoch": 0.11, "learning_rate": 4.822839435028249e-05, "loss": 0.1146, "step": 1568500 }, { "epoch": 0.11, "learning_rate": 4.8227829378531074e-05, "loss": 0.1144, "step": 1569000 }, { "epoch": 0.11, "learning_rate": 4.8227264406779665e-05, "loss": 0.1214, "step": 1569500 }, { "epoch": 0.11, "learning_rate": 4.822669943502825e-05, "loss": 0.1175, "step": 1570000 }, { "epoch": 0.11, "learning_rate": 4.822613446327684e-05, "loss": 0.1151, "step": 1570500 }, { "epoch": 0.11, "learning_rate": 4.8225570621468926e-05, "loss": 0.1279, "step": 1571000 }, { "epoch": 0.11, "learning_rate": 4.822500564971752e-05, "loss": 0.117, "step": 1571500 }, { "epoch": 0.11, "learning_rate": 4.822444180790961e-05, "loss": 0.1162, "step": 1572000 }, { "epoch": 0.11, "learning_rate": 4.8223876836158195e-05, "loss": 0.1254, "step": 1572500 }, { "epoch": 0.11, "learning_rate": 4.8223311864406785e-05, "loss": 0.1177, "step": 1573000 }, { "epoch": 0.11, "learning_rate": 4.822274689265537e-05, "loss": 0.1199, "step": 1573500 }, { "epoch": 0.11, "learning_rate": 4.822218192090396e-05, "loss": 0.1152, "step": 1574000 }, { "epoch": 0.11, "learning_rate": 4.8221616949152543e-05, "loss": 0.1193, "step": 1574500 }, { "epoch": 0.11, "learning_rate": 4.8221051977401134e-05, "loss": 0.1196, "step": 1575000 }, { "epoch": 0.11, "learning_rate": 4.822048700564972e-05, "loss": 0.1262, "step": 1575500 }, { "epoch": 0.11, "learning_rate": 4.821992203389831e-05, "loss": 0.1192, "step": 1576000 }, { "epoch": 0.11, "learning_rate": 4.82193570621469e-05, "loss": 0.1139, "step": 1576500 }, { "epoch": 0.11, "learning_rate": 4.821879209039548e-05, "loss": 0.1279, "step": 1577000 }, { "epoch": 0.11, "learning_rate": 4.8218227118644073e-05, "loss": 0.1171, "step": 1577500 }, { "epoch": 0.11, "learning_rate": 4.821766214689266e-05, "loss": 0.1142, "step": 1578000 }, { "epoch": 0.11, "learning_rate": 4.8217098305084744e-05, "loss": 0.1207, "step": 1578500 }, { "epoch": 0.11, "learning_rate": 4.821653446327684e-05, "loss": 0.1174, "step": 1579000 }, { "epoch": 0.11, "learning_rate": 4.821596949152543e-05, "loss": 0.122, "step": 1579500 }, { "epoch": 0.11, "learning_rate": 4.821540451977401e-05, "loss": 0.1228, "step": 1580000 }, { "epoch": 0.11, "learning_rate": 4.82148395480226e-05, "loss": 0.1181, "step": 1580500 }, { "epoch": 0.11, "learning_rate": 4.821427457627119e-05, "loss": 0.12, "step": 1581000 }, { "epoch": 0.11, "learning_rate": 4.821371073446328e-05, "loss": 0.1174, "step": 1581500 }, { "epoch": 0.11, "learning_rate": 4.8213145762711865e-05, "loss": 0.1201, "step": 1582000 }, { "epoch": 0.11, "learning_rate": 4.8212580790960456e-05, "loss": 0.1192, "step": 1582500 }, { "epoch": 0.11, "learning_rate": 4.8212015819209046e-05, "loss": 0.1145, "step": 1583000 }, { "epoch": 0.11, "learning_rate": 4.821145197740113e-05, "loss": 0.1248, "step": 1583500 }, { "epoch": 0.11, "learning_rate": 4.821088700564972e-05, "loss": 0.1217, "step": 1584000 }, { "epoch": 0.11, "learning_rate": 4.821032203389831e-05, "loss": 0.1158, "step": 1584500 }, { "epoch": 0.11, "learning_rate": 4.820975706214689e-05, "loss": 0.1237, "step": 1585000 }, { "epoch": 0.11, "learning_rate": 4.820919209039548e-05, "loss": 0.1203, "step": 1585500 }, { "epoch": 0.11, "learning_rate": 4.8208627118644066e-05, "loss": 0.1217, "step": 1586000 }, { "epoch": 0.11, "learning_rate": 4.820806327683616e-05, "loss": 0.1258, "step": 1586500 }, { "epoch": 0.11, "learning_rate": 4.820749830508475e-05, "loss": 0.1188, "step": 1587000 }, { "epoch": 0.11, "learning_rate": 4.8206933333333334e-05, "loss": 0.1146, "step": 1587500 }, { "epoch": 0.11, "learning_rate": 4.8206368361581925e-05, "loss": 0.1179, "step": 1588000 }, { "epoch": 0.11, "learning_rate": 4.820580338983051e-05, "loss": 0.1141, "step": 1588500 }, { "epoch": 0.11, "learning_rate": 4.82052395480226e-05, "loss": 0.1186, "step": 1589000 }, { "epoch": 0.11, "learning_rate": 4.8204674576271186e-05, "loss": 0.1151, "step": 1589500 }, { "epoch": 0.11, "learning_rate": 4.820410960451978e-05, "loss": 0.116, "step": 1590000 }, { "epoch": 0.11, "learning_rate": 4.820354463276837e-05, "loss": 0.1158, "step": 1590500 }, { "epoch": 0.11, "learning_rate": 4.8202980790960455e-05, "loss": 0.1229, "step": 1591000 }, { "epoch": 0.11, "learning_rate": 4.820241581920904e-05, "loss": 0.113, "step": 1591500 }, { "epoch": 0.11, "learning_rate": 4.820185084745763e-05, "loss": 0.1135, "step": 1592000 }, { "epoch": 0.11, "learning_rate": 4.820128587570621e-05, "loss": 0.1208, "step": 1592500 }, { "epoch": 0.11, "learning_rate": 4.820072203389831e-05, "loss": 0.1216, "step": 1593000 }, { "epoch": 0.11, "learning_rate": 4.82001570621469e-05, "loss": 0.1155, "step": 1593500 }, { "epoch": 0.11, "learning_rate": 4.819959209039548e-05, "loss": 0.1221, "step": 1594000 }, { "epoch": 0.11, "learning_rate": 4.819902711864407e-05, "loss": 0.1153, "step": 1594500 }, { "epoch": 0.11, "learning_rate": 4.8198462146892656e-05, "loss": 0.1233, "step": 1595000 }, { "epoch": 0.11, "learning_rate": 4.819789830508475e-05, "loss": 0.1164, "step": 1595500 }, { "epoch": 0.11, "learning_rate": 4.8197333333333334e-05, "loss": 0.1236, "step": 1596000 }, { "epoch": 0.11, "learning_rate": 4.8196768361581924e-05, "loss": 0.1225, "step": 1596500 }, { "epoch": 0.11, "learning_rate": 4.8196203389830515e-05, "loss": 0.1244, "step": 1597000 }, { "epoch": 0.11, "learning_rate": 4.81956395480226e-05, "loss": 0.1156, "step": 1597500 }, { "epoch": 0.11, "learning_rate": 4.8195074576271186e-05, "loss": 0.1223, "step": 1598000 }, { "epoch": 0.11, "learning_rate": 4.8194509604519776e-05, "loss": 0.1162, "step": 1598500 }, { "epoch": 0.11, "learning_rate": 4.819394463276836e-05, "loss": 0.112, "step": 1599000 }, { "epoch": 0.11, "learning_rate": 4.8193380790960454e-05, "loss": 0.1128, "step": 1599500 }, { "epoch": 0.11, "learning_rate": 4.8192815819209045e-05, "loss": 0.119, "step": 1600000 }, { "epoch": 0.11, "learning_rate": 4.819225084745763e-05, "loss": 0.1148, "step": 1600500 }, { "epoch": 0.11, "learning_rate": 4.819168587570622e-05, "loss": 0.1212, "step": 1601000 }, { "epoch": 0.11, "learning_rate": 4.81911209039548e-05, "loss": 0.1174, "step": 1601500 }, { "epoch": 0.11, "learning_rate": 4.81905570621469e-05, "loss": 0.1258, "step": 1602000 }, { "epoch": 0.11, "learning_rate": 4.818999209039548e-05, "loss": 0.1137, "step": 1602500 }, { "epoch": 0.11, "learning_rate": 4.818942711864407e-05, "loss": 0.1196, "step": 1603000 }, { "epoch": 0.11, "learning_rate": 4.818886214689266e-05, "loss": 0.1269, "step": 1603500 }, { "epoch": 0.11, "learning_rate": 4.8188297175141246e-05, "loss": 0.1134, "step": 1604000 }, { "epoch": 0.11, "learning_rate": 4.8187732203389836e-05, "loss": 0.1161, "step": 1604500 }, { "epoch": 0.11, "learning_rate": 4.8187168361581923e-05, "loss": 0.1116, "step": 1605000 }, { "epoch": 0.11, "learning_rate": 4.818660338983051e-05, "loss": 0.118, "step": 1605500 }, { "epoch": 0.11, "learning_rate": 4.81860384180791e-05, "loss": 0.1224, "step": 1606000 }, { "epoch": 0.11, "learning_rate": 4.818547344632768e-05, "loss": 0.122, "step": 1606500 }, { "epoch": 0.11, "learning_rate": 4.818490847457627e-05, "loss": 0.1222, "step": 1607000 }, { "epoch": 0.11, "learning_rate": 4.8184344632768366e-05, "loss": 0.1138, "step": 1607500 }, { "epoch": 0.11, "learning_rate": 4.818377966101696e-05, "loss": 0.1158, "step": 1608000 }, { "epoch": 0.11, "learning_rate": 4.818321468926554e-05, "loss": 0.1263, "step": 1608500 }, { "epoch": 0.11, "learning_rate": 4.818264971751413e-05, "loss": 0.1142, "step": 1609000 }, { "epoch": 0.11, "learning_rate": 4.818208587570622e-05, "loss": 0.1205, "step": 1609500 }, { "epoch": 0.11, "learning_rate": 4.81815209039548e-05, "loss": 0.1151, "step": 1610000 }, { "epoch": 0.11, "learning_rate": 4.818095593220339e-05, "loss": 0.1239, "step": 1610500 }, { "epoch": 0.11, "learning_rate": 4.818039096045198e-05, "loss": 0.1211, "step": 1611000 }, { "epoch": 0.11, "learning_rate": 4.817982598870057e-05, "loss": 0.1213, "step": 1611500 }, { "epoch": 0.11, "learning_rate": 4.8179262146892654e-05, "loss": 0.1206, "step": 1612000 }, { "epoch": 0.11, "learning_rate": 4.8178697175141245e-05, "loss": 0.128, "step": 1612500 }, { "epoch": 0.11, "learning_rate": 4.817813220338983e-05, "loss": 0.1171, "step": 1613000 }, { "epoch": 0.11, "learning_rate": 4.817756723163842e-05, "loss": 0.1192, "step": 1613500 }, { "epoch": 0.11, "learning_rate": 4.8177002259887e-05, "loss": 0.1212, "step": 1614000 }, { "epoch": 0.11, "learning_rate": 4.8176437288135594e-05, "loss": 0.1238, "step": 1614500 }, { "epoch": 0.11, "learning_rate": 4.8175872316384184e-05, "loss": 0.1256, "step": 1615000 }, { "epoch": 0.11, "learning_rate": 4.817530847457628e-05, "loss": 0.118, "step": 1615500 }, { "epoch": 0.11, "learning_rate": 4.817474350282486e-05, "loss": 0.1156, "step": 1616000 }, { "epoch": 0.11, "learning_rate": 4.817417853107345e-05, "loss": 0.1169, "step": 1616500 }, { "epoch": 0.11, "learning_rate": 4.8173613559322036e-05, "loss": 0.1172, "step": 1617000 }, { "epoch": 0.11, "learning_rate": 4.817304971751413e-05, "loss": 0.1145, "step": 1617500 }, { "epoch": 0.11, "learning_rate": 4.8172484745762714e-05, "loss": 0.1269, "step": 1618000 }, { "epoch": 0.11, "learning_rate": 4.8171919774011305e-05, "loss": 0.1164, "step": 1618500 }, { "epoch": 0.11, "learning_rate": 4.817135480225989e-05, "loss": 0.1252, "step": 1619000 }, { "epoch": 0.11, "learning_rate": 4.8170790960451976e-05, "loss": 0.1277, "step": 1619500 }, { "epoch": 0.11, "learning_rate": 4.8170225988700566e-05, "loss": 0.1163, "step": 1620000 }, { "epoch": 0.11, "learning_rate": 4.816966101694915e-05, "loss": 0.1289, "step": 1620500 }, { "epoch": 0.11, "learning_rate": 4.816909604519774e-05, "loss": 0.1191, "step": 1621000 }, { "epoch": 0.11, "learning_rate": 4.816853107344633e-05, "loss": 0.1192, "step": 1621500 }, { "epoch": 0.11, "learning_rate": 4.8167967231638425e-05, "loss": 0.1162, "step": 1622000 }, { "epoch": 0.11, "learning_rate": 4.816740225988701e-05, "loss": 0.1214, "step": 1622500 }, { "epoch": 0.11, "learning_rate": 4.81668372881356e-05, "loss": 0.119, "step": 1623000 }, { "epoch": 0.11, "learning_rate": 4.8166272316384184e-05, "loss": 0.12, "step": 1623500 }, { "epoch": 0.11, "learning_rate": 4.8165707344632774e-05, "loss": 0.1136, "step": 1624000 }, { "epoch": 0.11, "learning_rate": 4.816514237288136e-05, "loss": 0.1182, "step": 1624500 }, { "epoch": 0.11, "learning_rate": 4.816457740112995e-05, "loss": 0.1253, "step": 1625000 }, { "epoch": 0.11, "learning_rate": 4.8164013559322036e-05, "loss": 0.1191, "step": 1625500 }, { "epoch": 0.11, "learning_rate": 4.8163448587570626e-05, "loss": 0.1126, "step": 1626000 }, { "epoch": 0.11, "learning_rate": 4.816288361581921e-05, "loss": 0.1194, "step": 1626500 }, { "epoch": 0.11, "learning_rate": 4.81623186440678e-05, "loss": 0.1189, "step": 1627000 }, { "epoch": 0.11, "learning_rate": 4.8161753672316384e-05, "loss": 0.1174, "step": 1627500 }, { "epoch": 0.11, "learning_rate": 4.816118983050848e-05, "loss": 0.1301, "step": 1628000 }, { "epoch": 0.11, "learning_rate": 4.816062485875706e-05, "loss": 0.1177, "step": 1628500 }, { "epoch": 0.11, "learning_rate": 4.816005988700565e-05, "loss": 0.1235, "step": 1629000 }, { "epoch": 0.11, "learning_rate": 4.815949491525424e-05, "loss": 0.1153, "step": 1629500 }, { "epoch": 0.11, "learning_rate": 4.815892994350283e-05, "loss": 0.1131, "step": 1630000 }, { "epoch": 0.11, "learning_rate": 4.815836497175141e-05, "loss": 0.1279, "step": 1630500 }, { "epoch": 0.11, "learning_rate": 4.8157801129943505e-05, "loss": 0.1186, "step": 1631000 }, { "epoch": 0.11, "learning_rate": 4.8157236158192096e-05, "loss": 0.1207, "step": 1631500 }, { "epoch": 0.11, "learning_rate": 4.815667118644068e-05, "loss": 0.1164, "step": 1632000 }, { "epoch": 0.11, "learning_rate": 4.815610621468927e-05, "loss": 0.1166, "step": 1632500 }, { "epoch": 0.11, "learning_rate": 4.815554237288136e-05, "loss": 0.1194, "step": 1633000 }, { "epoch": 0.11, "learning_rate": 4.815497740112995e-05, "loss": 0.1128, "step": 1633500 }, { "epoch": 0.11, "learning_rate": 4.815441242937853e-05, "loss": 0.1189, "step": 1634000 }, { "epoch": 0.11, "learning_rate": 4.815384745762712e-05, "loss": 0.1232, "step": 1634500 }, { "epoch": 0.11, "learning_rate": 4.8153282485875706e-05, "loss": 0.1242, "step": 1635000 }, { "epoch": 0.11, "learning_rate": 4.8152717514124297e-05, "loss": 0.1265, "step": 1635500 }, { "epoch": 0.11, "learning_rate": 4.815215254237289e-05, "loss": 0.1226, "step": 1636000 }, { "epoch": 0.11, "learning_rate": 4.8151588700564974e-05, "loss": 0.1247, "step": 1636500 }, { "epoch": 0.11, "learning_rate": 4.815102372881356e-05, "loss": 0.114, "step": 1637000 }, { "epoch": 0.11, "learning_rate": 4.815045875706215e-05, "loss": 0.1172, "step": 1637500 }, { "epoch": 0.11, "learning_rate": 4.814989378531073e-05, "loss": 0.1144, "step": 1638000 }, { "epoch": 0.11, "learning_rate": 4.8149329943502827e-05, "loss": 0.1224, "step": 1638500 }, { "epoch": 0.11, "learning_rate": 4.814876497175142e-05, "loss": 0.1209, "step": 1639000 }, { "epoch": 0.11, "learning_rate": 4.814820000000001e-05, "loss": 0.1154, "step": 1639500 }, { "epoch": 0.11, "learning_rate": 4.814763502824859e-05, "loss": 0.114, "step": 1640000 }, { "epoch": 0.11, "learning_rate": 4.814707005649718e-05, "loss": 0.121, "step": 1640500 }, { "epoch": 0.11, "learning_rate": 4.814650621468927e-05, "loss": 0.1188, "step": 1641000 }, { "epoch": 0.11, "learning_rate": 4.814594124293785e-05, "loss": 0.122, "step": 1641500 }, { "epoch": 0.11, "learning_rate": 4.8145376271186444e-05, "loss": 0.121, "step": 1642000 }, { "epoch": 0.11, "learning_rate": 4.814481242937853e-05, "loss": 0.1225, "step": 1642500 }, { "epoch": 0.11, "learning_rate": 4.814424745762712e-05, "loss": 0.1223, "step": 1643000 }, { "epoch": 0.11, "learning_rate": 4.8143682485875705e-05, "loss": 0.1227, "step": 1643500 }, { "epoch": 0.11, "learning_rate": 4.8143117514124296e-05, "loss": 0.1175, "step": 1644000 }, { "epoch": 0.11, "learning_rate": 4.814255254237288e-05, "loss": 0.1125, "step": 1644500 }, { "epoch": 0.11, "learning_rate": 4.814198757062147e-05, "loss": 0.119, "step": 1645000 }, { "epoch": 0.11, "learning_rate": 4.8141422598870054e-05, "loss": 0.1156, "step": 1645500 }, { "epoch": 0.11, "learning_rate": 4.8140857627118645e-05, "loss": 0.1141, "step": 1646000 }, { "epoch": 0.11, "learning_rate": 4.8140292655367235e-05, "loss": 0.1143, "step": 1646500 }, { "epoch": 0.11, "learning_rate": 4.813972768361582e-05, "loss": 0.122, "step": 1647000 }, { "epoch": 0.11, "learning_rate": 4.813916271186441e-05, "loss": 0.1127, "step": 1647500 }, { "epoch": 0.11, "learning_rate": 4.8138598870056504e-05, "loss": 0.1156, "step": 1648000 }, { "epoch": 0.11, "learning_rate": 4.813803389830509e-05, "loss": 0.1073, "step": 1648500 }, { "epoch": 0.11, "learning_rate": 4.813746892655368e-05, "loss": 0.1198, "step": 1649000 }, { "epoch": 0.11, "learning_rate": 4.813690395480226e-05, "loss": 0.1213, "step": 1649500 }, { "epoch": 0.11, "learning_rate": 4.813633898305085e-05, "loss": 0.12, "step": 1650000 }, { "epoch": 0.11, "learning_rate": 4.8135774011299436e-05, "loss": 0.1209, "step": 1650500 }, { "epoch": 0.11, "learning_rate": 4.813521016949153e-05, "loss": 0.1182, "step": 1651000 }, { "epoch": 0.11, "learning_rate": 4.8134645197740114e-05, "loss": 0.1226, "step": 1651500 }, { "epoch": 0.11, "learning_rate": 4.8134080225988705e-05, "loss": 0.1147, "step": 1652000 }, { "epoch": 0.11, "learning_rate": 4.813351525423729e-05, "loss": 0.1247, "step": 1652500 }, { "epoch": 0.11, "learning_rate": 4.813295028248588e-05, "loss": 0.1162, "step": 1653000 }, { "epoch": 0.11, "learning_rate": 4.8132386440677966e-05, "loss": 0.1151, "step": 1653500 }, { "epoch": 0.11, "learning_rate": 4.813182146892656e-05, "loss": 0.1171, "step": 1654000 }, { "epoch": 0.11, "learning_rate": 4.813125649717514e-05, "loss": 0.1257, "step": 1654500 }, { "epoch": 0.11, "learning_rate": 4.813069152542373e-05, "loss": 0.1167, "step": 1655000 }, { "epoch": 0.11, "learning_rate": 4.8130127683615825e-05, "loss": 0.1228, "step": 1655500 }, { "epoch": 0.11, "learning_rate": 4.812956271186441e-05, "loss": 0.1199, "step": 1656000 }, { "epoch": 0.11, "learning_rate": 4.8128997740113e-05, "loss": 0.1157, "step": 1656500 }, { "epoch": 0.11, "learning_rate": 4.812843276836158e-05, "loss": 0.1174, "step": 1657000 }, { "epoch": 0.11, "learning_rate": 4.8127867796610174e-05, "loss": 0.1142, "step": 1657500 }, { "epoch": 0.11, "learning_rate": 4.812730282485876e-05, "loss": 0.117, "step": 1658000 }, { "epoch": 0.11, "learning_rate": 4.812673785310735e-05, "loss": 0.1248, "step": 1658500 }, { "epoch": 0.11, "learning_rate": 4.812617288135593e-05, "loss": 0.1263, "step": 1659000 }, { "epoch": 0.11, "learning_rate": 4.812561016949152e-05, "loss": 0.1248, "step": 1659500 }, { "epoch": 0.11, "learning_rate": 4.812504519774011e-05, "loss": 0.1179, "step": 1660000 }, { "epoch": 0.11, "learning_rate": 4.8124480225988704e-05, "loss": 0.1152, "step": 1660500 }, { "epoch": 0.11, "learning_rate": 4.812391525423729e-05, "loss": 0.1199, "step": 1661000 }, { "epoch": 0.11, "learning_rate": 4.812335028248588e-05, "loss": 0.1195, "step": 1661500 }, { "epoch": 0.11, "learning_rate": 4.812278644067797e-05, "loss": 0.1247, "step": 1662000 }, { "epoch": 0.11, "learning_rate": 4.8122221468926556e-05, "loss": 0.1119, "step": 1662500 }, { "epoch": 0.11, "learning_rate": 4.8121656497175147e-05, "loss": 0.1141, "step": 1663000 }, { "epoch": 0.11, "learning_rate": 4.812109152542374e-05, "loss": 0.1174, "step": 1663500 }, { "epoch": 0.11, "learning_rate": 4.812052655367232e-05, "loss": 0.1273, "step": 1664000 }, { "epoch": 0.11, "learning_rate": 4.811996158192091e-05, "loss": 0.1119, "step": 1664500 }, { "epoch": 0.11, "learning_rate": 4.8119396610169495e-05, "loss": 0.1173, "step": 1665000 }, { "epoch": 0.11, "learning_rate": 4.8118831638418086e-05, "loss": 0.1173, "step": 1665500 }, { "epoch": 0.11, "learning_rate": 4.811826779661017e-05, "loss": 0.1103, "step": 1666000 }, { "epoch": 0.11, "learning_rate": 4.811770395480226e-05, "loss": 0.1122, "step": 1666500 }, { "epoch": 0.11, "learning_rate": 4.811713898305085e-05, "loss": 0.1192, "step": 1667000 }, { "epoch": 0.11, "learning_rate": 4.8116574011299435e-05, "loss": 0.1115, "step": 1667500 }, { "epoch": 0.11, "learning_rate": 4.8116009039548025e-05, "loss": 0.1221, "step": 1668000 }, { "epoch": 0.11, "learning_rate": 4.811544519774012e-05, "loss": 0.1198, "step": 1668500 }, { "epoch": 0.11, "learning_rate": 4.81148802259887e-05, "loss": 0.1175, "step": 1669000 }, { "epoch": 0.11, "learning_rate": 4.8114315254237294e-05, "loss": 0.1192, "step": 1669500 }, { "epoch": 0.11, "learning_rate": 4.811375028248588e-05, "loss": 0.1074, "step": 1670000 }, { "epoch": 0.11, "learning_rate": 4.811318531073447e-05, "loss": 0.1179, "step": 1670500 }, { "epoch": 0.11, "learning_rate": 4.8112621468926555e-05, "loss": 0.1195, "step": 1671000 }, { "epoch": 0.11, "learning_rate": 4.8112056497175146e-05, "loss": 0.1147, "step": 1671500 }, { "epoch": 0.11, "learning_rate": 4.811149152542373e-05, "loss": 0.1165, "step": 1672000 }, { "epoch": 0.11, "learning_rate": 4.811092655367232e-05, "loss": 0.1207, "step": 1672500 }, { "epoch": 0.11, "learning_rate": 4.8110361581920904e-05, "loss": 0.1157, "step": 1673000 }, { "epoch": 0.11, "learning_rate": 4.8109796610169495e-05, "loss": 0.1181, "step": 1673500 }, { "epoch": 0.11, "learning_rate": 4.8109231638418085e-05, "loss": 0.1155, "step": 1674000 }, { "epoch": 0.11, "learning_rate": 4.810866779661017e-05, "loss": 0.1143, "step": 1674500 }, { "epoch": 0.11, "learning_rate": 4.8108102824858756e-05, "loss": 0.1183, "step": 1675000 }, { "epoch": 0.11, "learning_rate": 4.810753785310735e-05, "loss": 0.1165, "step": 1675500 }, { "epoch": 0.11, "learning_rate": 4.810697288135593e-05, "loss": 0.1179, "step": 1676000 }, { "epoch": 0.11, "learning_rate": 4.810640790960452e-05, "loss": 0.1175, "step": 1676500 }, { "epoch": 0.11, "learning_rate": 4.8105842937853105e-05, "loss": 0.1168, "step": 1677000 }, { "epoch": 0.11, "learning_rate": 4.8105279096045206e-05, "loss": 0.1183, "step": 1677500 }, { "epoch": 0.11, "learning_rate": 4.810471412429379e-05, "loss": 0.1232, "step": 1678000 }, { "epoch": 0.11, "learning_rate": 4.810414915254238e-05, "loss": 0.1181, "step": 1678500 }, { "epoch": 0.11, "learning_rate": 4.8103584180790964e-05, "loss": 0.1139, "step": 1679000 }, { "epoch": 0.11, "learning_rate": 4.8103019209039554e-05, "loss": 0.1205, "step": 1679500 }, { "epoch": 0.11, "learning_rate": 4.810245536723164e-05, "loss": 0.1286, "step": 1680000 }, { "epoch": 0.11, "learning_rate": 4.8101890395480226e-05, "loss": 0.1157, "step": 1680500 }, { "epoch": 0.11, "learning_rate": 4.8101325423728816e-05, "loss": 0.1179, "step": 1681000 }, { "epoch": 0.11, "learning_rate": 4.810076045197741e-05, "loss": 0.1221, "step": 1681500 }, { "epoch": 0.11, "learning_rate": 4.810019548022599e-05, "loss": 0.1142, "step": 1682000 }, { "epoch": 0.11, "learning_rate": 4.809963050847458e-05, "loss": 0.121, "step": 1682500 }, { "epoch": 0.11, "learning_rate": 4.8099065536723165e-05, "loss": 0.1181, "step": 1683000 }, { "epoch": 0.11, "learning_rate": 4.809850169491525e-05, "loss": 0.112, "step": 1683500 }, { "epoch": 0.11, "learning_rate": 4.809793672316384e-05, "loss": 0.1158, "step": 1684000 }, { "epoch": 0.11, "learning_rate": 4.809737175141243e-05, "loss": 0.1093, "step": 1684500 }, { "epoch": 0.11, "learning_rate": 4.809680677966102e-05, "loss": 0.1109, "step": 1685000 }, { "epoch": 0.11, "learning_rate": 4.809624293785311e-05, "loss": 0.1152, "step": 1685500 }, { "epoch": 0.11, "learning_rate": 4.80956779661017e-05, "loss": 0.1151, "step": 1686000 }, { "epoch": 0.11, "learning_rate": 4.8095112994350285e-05, "loss": 0.1061, "step": 1686500 }, { "epoch": 0.11, "learning_rate": 4.8094548022598876e-05, "loss": 0.1216, "step": 1687000 }, { "epoch": 0.11, "learning_rate": 4.809398305084746e-05, "loss": 0.1229, "step": 1687500 }, { "epoch": 0.11, "learning_rate": 4.809341807909605e-05, "loss": 0.1216, "step": 1688000 }, { "epoch": 0.11, "learning_rate": 4.8092853107344634e-05, "loss": 0.1171, "step": 1688500 }, { "epoch": 0.11, "learning_rate": 4.809228926553673e-05, "loss": 0.1169, "step": 1689000 }, { "epoch": 0.11, "learning_rate": 4.809172429378531e-05, "loss": 0.1157, "step": 1689500 }, { "epoch": 0.11, "learning_rate": 4.80911593220339e-05, "loss": 0.1211, "step": 1690000 }, { "epoch": 0.11, "learning_rate": 4.8090594350282486e-05, "loss": 0.1181, "step": 1690500 }, { "epoch": 0.11, "learning_rate": 4.809002937853108e-05, "loss": 0.1138, "step": 1691000 }, { "epoch": 0.11, "learning_rate": 4.808946440677966e-05, "loss": 0.1169, "step": 1691500 }, { "epoch": 0.11, "learning_rate": 4.8088900564971755e-05, "loss": 0.1156, "step": 1692000 }, { "epoch": 0.11, "learning_rate": 4.808833559322034e-05, "loss": 0.1164, "step": 1692500 }, { "epoch": 0.11, "learning_rate": 4.808777062146893e-05, "loss": 0.1227, "step": 1693000 }, { "epoch": 0.11, "learning_rate": 4.808720564971751e-05, "loss": 0.121, "step": 1693500 }, { "epoch": 0.11, "learning_rate": 4.808664180790961e-05, "loss": 0.1151, "step": 1694000 }, { "epoch": 0.11, "learning_rate": 4.80860768361582e-05, "loss": 0.1173, "step": 1694500 }, { "epoch": 0.11, "learning_rate": 4.808551186440678e-05, "loss": 0.1244, "step": 1695000 }, { "epoch": 0.11, "learning_rate": 4.808494689265537e-05, "loss": 0.1206, "step": 1695500 }, { "epoch": 0.11, "learning_rate": 4.808438192090396e-05, "loss": 0.1174, "step": 1696000 }, { "epoch": 0.12, "learning_rate": 4.8083816949152546e-05, "loss": 0.1222, "step": 1696500 }, { "epoch": 0.12, "learning_rate": 4.8083253107344633e-05, "loss": 0.1183, "step": 1697000 }, { "epoch": 0.12, "learning_rate": 4.8082688135593224e-05, "loss": 0.1183, "step": 1697500 }, { "epoch": 0.12, "learning_rate": 4.808212316384181e-05, "loss": 0.1176, "step": 1698000 }, { "epoch": 0.12, "learning_rate": 4.80815581920904e-05, "loss": 0.1142, "step": 1698500 }, { "epoch": 0.12, "learning_rate": 4.8080994350282486e-05, "loss": 0.1226, "step": 1699000 }, { "epoch": 0.12, "learning_rate": 4.8080429378531076e-05, "loss": 0.1158, "step": 1699500 }, { "epoch": 0.12, "learning_rate": 4.807986440677966e-05, "loss": 0.115, "step": 1700000 }, { "epoch": 0.12, "learning_rate": 4.807929943502825e-05, "loss": 0.1157, "step": 1700500 }, { "epoch": 0.12, "learning_rate": 4.8078734463276834e-05, "loss": 0.115, "step": 1701000 }, { "epoch": 0.12, "learning_rate": 4.807817062146893e-05, "loss": 0.1209, "step": 1701500 }, { "epoch": 0.12, "learning_rate": 4.807760564971752e-05, "loss": 0.1174, "step": 1702000 }, { "epoch": 0.12, "learning_rate": 4.807704067796611e-05, "loss": 0.1122, "step": 1702500 }, { "epoch": 0.12, "learning_rate": 4.807647570621469e-05, "loss": 0.1149, "step": 1703000 }, { "epoch": 0.12, "learning_rate": 4.8075910734463284e-05, "loss": 0.1168, "step": 1703500 }, { "epoch": 0.12, "learning_rate": 4.807534689265537e-05, "loss": 0.1217, "step": 1704000 }, { "epoch": 0.12, "learning_rate": 4.8074781920903955e-05, "loss": 0.1174, "step": 1704500 }, { "epoch": 0.12, "learning_rate": 4.8074216949152546e-05, "loss": 0.1175, "step": 1705000 }, { "epoch": 0.12, "learning_rate": 4.807365197740113e-05, "loss": 0.1174, "step": 1705500 }, { "epoch": 0.12, "learning_rate": 4.807308700564972e-05, "loss": 0.1125, "step": 1706000 }, { "epoch": 0.12, "learning_rate": 4.807252316384181e-05, "loss": 0.1154, "step": 1706500 }, { "epoch": 0.12, "learning_rate": 4.80719581920904e-05, "loss": 0.125, "step": 1707000 }, { "epoch": 0.12, "learning_rate": 4.807139322033898e-05, "loss": 0.1189, "step": 1707500 }, { "epoch": 0.12, "learning_rate": 4.807082824858757e-05, "loss": 0.118, "step": 1708000 }, { "epoch": 0.12, "learning_rate": 4.8070264406779666e-05, "loss": 0.1151, "step": 1708500 }, { "epoch": 0.12, "learning_rate": 4.806969943502825e-05, "loss": 0.1171, "step": 1709000 }, { "epoch": 0.12, "learning_rate": 4.806913446327684e-05, "loss": 0.1159, "step": 1709500 }, { "epoch": 0.12, "learning_rate": 4.806856949152543e-05, "loss": 0.1173, "step": 1710000 }, { "epoch": 0.12, "learning_rate": 4.806800564971752e-05, "loss": 0.1238, "step": 1710500 }, { "epoch": 0.12, "learning_rate": 4.80674406779661e-05, "loss": 0.1183, "step": 1711000 }, { "epoch": 0.12, "learning_rate": 4.806687570621469e-05, "loss": 0.1164, "step": 1711500 }, { "epoch": 0.12, "learning_rate": 4.8066310734463276e-05, "loss": 0.1176, "step": 1712000 }, { "epoch": 0.12, "learning_rate": 4.806574576271187e-05, "loss": 0.1213, "step": 1712500 }, { "epoch": 0.12, "learning_rate": 4.8065181920903954e-05, "loss": 0.1149, "step": 1713000 }, { "epoch": 0.12, "learning_rate": 4.8064616949152545e-05, "loss": 0.1206, "step": 1713500 }, { "epoch": 0.12, "learning_rate": 4.806405197740113e-05, "loss": 0.1167, "step": 1714000 }, { "epoch": 0.12, "learning_rate": 4.806348700564972e-05, "loss": 0.1231, "step": 1714500 }, { "epoch": 0.12, "learning_rate": 4.80629220338983e-05, "loss": 0.1142, "step": 1715000 }, { "epoch": 0.12, "learning_rate": 4.80623581920904e-05, "loss": 0.1193, "step": 1715500 }, { "epoch": 0.12, "learning_rate": 4.806179322033899e-05, "loss": 0.1125, "step": 1716000 }, { "epoch": 0.12, "learning_rate": 4.806122824858758e-05, "loss": 0.1106, "step": 1716500 }, { "epoch": 0.12, "learning_rate": 4.806066327683616e-05, "loss": 0.1162, "step": 1717000 }, { "epoch": 0.12, "learning_rate": 4.806009943502825e-05, "loss": 0.1191, "step": 1717500 }, { "epoch": 0.12, "learning_rate": 4.805953446327684e-05, "loss": 0.1214, "step": 1718000 }, { "epoch": 0.12, "learning_rate": 4.8058969491525424e-05, "loss": 0.1205, "step": 1718500 }, { "epoch": 0.12, "learning_rate": 4.8058404519774014e-05, "loss": 0.117, "step": 1719000 }, { "epoch": 0.12, "learning_rate": 4.80578395480226e-05, "loss": 0.1177, "step": 1719500 }, { "epoch": 0.12, "learning_rate": 4.805727570621469e-05, "loss": 0.1159, "step": 1720000 }, { "epoch": 0.12, "learning_rate": 4.8056710734463276e-05, "loss": 0.1218, "step": 1720500 }, { "epoch": 0.12, "learning_rate": 4.8056145762711866e-05, "loss": 0.1108, "step": 1721000 }, { "epoch": 0.12, "learning_rate": 4.805558079096045e-05, "loss": 0.1118, "step": 1721500 }, { "epoch": 0.12, "learning_rate": 4.805501581920904e-05, "loss": 0.1145, "step": 1722000 }, { "epoch": 0.12, "learning_rate": 4.8054450847457624e-05, "loss": 0.1133, "step": 1722500 }, { "epoch": 0.12, "learning_rate": 4.8053887005649725e-05, "loss": 0.1215, "step": 1723000 }, { "epoch": 0.12, "learning_rate": 4.805332203389831e-05, "loss": 0.1167, "step": 1723500 }, { "epoch": 0.12, "learning_rate": 4.80527570621469e-05, "loss": 0.1122, "step": 1724000 }, { "epoch": 0.12, "learning_rate": 4.8052192090395483e-05, "loss": 0.1234, "step": 1724500 }, { "epoch": 0.12, "learning_rate": 4.8051627118644074e-05, "loss": 0.1221, "step": 1725000 }, { "epoch": 0.12, "learning_rate": 4.805106327683616e-05, "loss": 0.1112, "step": 1725500 }, { "epoch": 0.12, "learning_rate": 4.8050498305084745e-05, "loss": 0.1214, "step": 1726000 }, { "epoch": 0.12, "learning_rate": 4.8049933333333336e-05, "loss": 0.11, "step": 1726500 }, { "epoch": 0.12, "learning_rate": 4.8049368361581926e-05, "loss": 0.1202, "step": 1727000 }, { "epoch": 0.12, "learning_rate": 4.8048804519774013e-05, "loss": 0.1188, "step": 1727500 }, { "epoch": 0.12, "learning_rate": 4.80482395480226e-05, "loss": 0.1087, "step": 1728000 }, { "epoch": 0.12, "learning_rate": 4.804767457627119e-05, "loss": 0.1138, "step": 1728500 }, { "epoch": 0.12, "learning_rate": 4.804710960451977e-05, "loss": 0.1237, "step": 1729000 }, { "epoch": 0.12, "learning_rate": 4.804654463276836e-05, "loss": 0.1137, "step": 1729500 }, { "epoch": 0.12, "learning_rate": 4.8045979661016946e-05, "loss": 0.1173, "step": 1730000 }, { "epoch": 0.12, "learning_rate": 4.8045414689265537e-05, "loss": 0.1191, "step": 1730500 }, { "epoch": 0.12, "learning_rate": 4.804485084745763e-05, "loss": 0.119, "step": 1731000 }, { "epoch": 0.12, "learning_rate": 4.804428587570622e-05, "loss": 0.1161, "step": 1731500 }, { "epoch": 0.12, "learning_rate": 4.8043720903954805e-05, "loss": 0.117, "step": 1732000 }, { "epoch": 0.12, "learning_rate": 4.8043155932203396e-05, "loss": 0.1218, "step": 1732500 }, { "epoch": 0.12, "learning_rate": 4.804259209039548e-05, "loss": 0.1187, "step": 1733000 }, { "epoch": 0.12, "learning_rate": 4.8042027118644067e-05, "loss": 0.1178, "step": 1733500 }, { "epoch": 0.12, "learning_rate": 4.804146214689266e-05, "loss": 0.1143, "step": 1734000 }, { "epoch": 0.12, "learning_rate": 4.804089717514125e-05, "loss": 0.1269, "step": 1734500 }, { "epoch": 0.12, "learning_rate": 4.804033220338983e-05, "loss": 0.123, "step": 1735000 }, { "epoch": 0.12, "learning_rate": 4.803976836158192e-05, "loss": 0.1183, "step": 1735500 }, { "epoch": 0.12, "learning_rate": 4.803920338983051e-05, "loss": 0.1134, "step": 1736000 }, { "epoch": 0.12, "learning_rate": 4.803863841807909e-05, "loss": 0.1126, "step": 1736500 }, { "epoch": 0.12, "learning_rate": 4.8038073446327684e-05, "loss": 0.112, "step": 1737000 }, { "epoch": 0.12, "learning_rate": 4.803750960451978e-05, "loss": 0.1124, "step": 1737500 }, { "epoch": 0.12, "learning_rate": 4.803694463276837e-05, "loss": 0.1169, "step": 1738000 }, { "epoch": 0.12, "learning_rate": 4.803637966101695e-05, "loss": 0.1193, "step": 1738500 }, { "epoch": 0.12, "learning_rate": 4.803581468926554e-05, "loss": 0.1141, "step": 1739000 }, { "epoch": 0.12, "learning_rate": 4.8035249717514126e-05, "loss": 0.1183, "step": 1739500 }, { "epoch": 0.12, "learning_rate": 4.803468474576272e-05, "loss": 0.1139, "step": 1740000 }, { "epoch": 0.12, "learning_rate": 4.8034120903954804e-05, "loss": 0.1194, "step": 1740500 }, { "epoch": 0.12, "learning_rate": 4.8033555932203395e-05, "loss": 0.1168, "step": 1741000 }, { "epoch": 0.12, "learning_rate": 4.803299096045198e-05, "loss": 0.1198, "step": 1741500 }, { "epoch": 0.12, "learning_rate": 4.803242598870057e-05, "loss": 0.1214, "step": 1742000 }, { "epoch": 0.12, "learning_rate": 4.8031862146892656e-05, "loss": 0.1139, "step": 1742500 }, { "epoch": 0.12, "learning_rate": 4.803129830508475e-05, "loss": 0.1225, "step": 1743000 }, { "epoch": 0.12, "learning_rate": 4.8030733333333334e-05, "loss": 0.1152, "step": 1743500 }, { "epoch": 0.12, "learning_rate": 4.8030168361581925e-05, "loss": 0.1187, "step": 1744000 }, { "epoch": 0.12, "learning_rate": 4.8029603389830515e-05, "loss": 0.1142, "step": 1744500 }, { "epoch": 0.12, "learning_rate": 4.80290384180791e-05, "loss": 0.126, "step": 1745000 }, { "epoch": 0.12, "learning_rate": 4.802847344632769e-05, "loss": 0.1155, "step": 1745500 }, { "epoch": 0.12, "learning_rate": 4.8027908474576274e-05, "loss": 0.1178, "step": 1746000 }, { "epoch": 0.12, "learning_rate": 4.8027343502824864e-05, "loss": 0.1185, "step": 1746500 }, { "epoch": 0.12, "learning_rate": 4.802677853107345e-05, "loss": 0.1199, "step": 1747000 }, { "epoch": 0.12, "learning_rate": 4.802621468926554e-05, "loss": 0.1159, "step": 1747500 }, { "epoch": 0.12, "learning_rate": 4.8025649717514126e-05, "loss": 0.1146, "step": 1748000 }, { "epoch": 0.12, "learning_rate": 4.8025084745762716e-05, "loss": 0.1176, "step": 1748500 }, { "epoch": 0.12, "learning_rate": 4.80245197740113e-05, "loss": 0.1173, "step": 1749000 }, { "epoch": 0.12, "learning_rate": 4.802395480225989e-05, "loss": 0.1194, "step": 1749500 }, { "epoch": 0.12, "learning_rate": 4.8023389830508474e-05, "loss": 0.1241, "step": 1750000 }, { "epoch": 0.12, "learning_rate": 4.802282598870056e-05, "loss": 0.1134, "step": 1750500 }, { "epoch": 0.12, "learning_rate": 4.802226101694915e-05, "loss": 0.119, "step": 1751000 }, { "epoch": 0.12, "learning_rate": 4.802169604519774e-05, "loss": 0.1182, "step": 1751500 }, { "epoch": 0.12, "learning_rate": 4.802113107344633e-05, "loss": 0.1222, "step": 1752000 }, { "epoch": 0.12, "learning_rate": 4.802056723163842e-05, "loss": 0.1138, "step": 1752500 }, { "epoch": 0.12, "learning_rate": 4.802000225988701e-05, "loss": 0.1231, "step": 1753000 }, { "epoch": 0.12, "learning_rate": 4.8019437288135595e-05, "loss": 0.1174, "step": 1753500 }, { "epoch": 0.12, "learning_rate": 4.8018872316384186e-05, "loss": 0.1099, "step": 1754000 }, { "epoch": 0.12, "learning_rate": 4.801830734463277e-05, "loss": 0.1214, "step": 1754500 }, { "epoch": 0.12, "learning_rate": 4.8017743502824863e-05, "loss": 0.119, "step": 1755000 }, { "epoch": 0.12, "learning_rate": 4.801717853107345e-05, "loss": 0.1124, "step": 1755500 }, { "epoch": 0.12, "learning_rate": 4.801661355932204e-05, "loss": 0.1199, "step": 1756000 }, { "epoch": 0.12, "learning_rate": 4.801604858757062e-05, "loss": 0.1252, "step": 1756500 }, { "epoch": 0.12, "learning_rate": 4.801548361581921e-05, "loss": 0.1173, "step": 1757000 }, { "epoch": 0.12, "learning_rate": 4.8014918644067796e-05, "loss": 0.1178, "step": 1757500 }, { "epoch": 0.12, "learning_rate": 4.8014353672316387e-05, "loss": 0.1178, "step": 1758000 }, { "epoch": 0.12, "learning_rate": 4.8013789830508474e-05, "loss": 0.1108, "step": 1758500 }, { "epoch": 0.12, "learning_rate": 4.8013224858757064e-05, "loss": 0.1182, "step": 1759000 }, { "epoch": 0.12, "learning_rate": 4.801265988700565e-05, "loss": 0.1205, "step": 1759500 }, { "epoch": 0.12, "learning_rate": 4.801209491525424e-05, "loss": 0.116, "step": 1760000 }, { "epoch": 0.12, "learning_rate": 4.801152994350282e-05, "loss": 0.121, "step": 1760500 }, { "epoch": 0.12, "learning_rate": 4.801096497175141e-05, "loss": 0.1096, "step": 1761000 }, { "epoch": 0.12, "learning_rate": 4.801040112994351e-05, "loss": 0.1179, "step": 1761500 }, { "epoch": 0.12, "learning_rate": 4.80098361581921e-05, "loss": 0.1159, "step": 1762000 }, { "epoch": 0.12, "learning_rate": 4.800927118644068e-05, "loss": 0.1144, "step": 1762500 }, { "epoch": 0.12, "learning_rate": 4.800870621468927e-05, "loss": 0.1234, "step": 1763000 }, { "epoch": 0.12, "learning_rate": 4.800814237288136e-05, "loss": 0.1134, "step": 1763500 }, { "epoch": 0.12, "learning_rate": 4.800757740112994e-05, "loss": 0.1118, "step": 1764000 }, { "epoch": 0.12, "learning_rate": 4.8007012429378534e-05, "loss": 0.1139, "step": 1764500 }, { "epoch": 0.12, "learning_rate": 4.800644745762712e-05, "loss": 0.1107, "step": 1765000 }, { "epoch": 0.12, "learning_rate": 4.800588248587571e-05, "loss": 0.1119, "step": 1765500 }, { "epoch": 0.12, "learning_rate": 4.80053175141243e-05, "loss": 0.1132, "step": 1766000 }, { "epoch": 0.12, "learning_rate": 4.8004753672316386e-05, "loss": 0.1182, "step": 1766500 }, { "epoch": 0.12, "learning_rate": 4.800418870056497e-05, "loss": 0.1146, "step": 1767000 }, { "epoch": 0.12, "learning_rate": 4.800362372881356e-05, "loss": 0.1207, "step": 1767500 }, { "epoch": 0.12, "learning_rate": 4.8003058757062144e-05, "loss": 0.1155, "step": 1768000 }, { "epoch": 0.12, "learning_rate": 4.800249491525424e-05, "loss": 0.1213, "step": 1768500 }, { "epoch": 0.12, "learning_rate": 4.800192994350283e-05, "loss": 0.1174, "step": 1769000 }, { "epoch": 0.12, "learning_rate": 4.800136497175142e-05, "loss": 0.114, "step": 1769500 }, { "epoch": 0.12, "learning_rate": 4.80008e-05, "loss": 0.1094, "step": 1770000 }, { "epoch": 0.12, "learning_rate": 4.800023615819209e-05, "loss": 0.1135, "step": 1770500 }, { "epoch": 0.12, "learning_rate": 4.799967118644068e-05, "loss": 0.1156, "step": 1771000 }, { "epoch": 0.12, "learning_rate": 4.7999106214689265e-05, "loss": 0.1156, "step": 1771500 }, { "epoch": 0.12, "learning_rate": 4.7998541242937855e-05, "loss": 0.1208, "step": 1772000 }, { "epoch": 0.12, "learning_rate": 4.7997976271186446e-05, "loss": 0.1108, "step": 1772500 }, { "epoch": 0.12, "learning_rate": 4.799741242937853e-05, "loss": 0.1133, "step": 1773000 }, { "epoch": 0.12, "learning_rate": 4.799684745762712e-05, "loss": 0.1139, "step": 1773500 }, { "epoch": 0.12, "learning_rate": 4.799628248587571e-05, "loss": 0.1138, "step": 1774000 }, { "epoch": 0.12, "learning_rate": 4.799571751412429e-05, "loss": 0.1139, "step": 1774500 }, { "epoch": 0.12, "learning_rate": 4.799515254237288e-05, "loss": 0.1148, "step": 1775000 }, { "epoch": 0.12, "learning_rate": 4.7994587570621466e-05, "loss": 0.1108, "step": 1775500 }, { "epoch": 0.12, "learning_rate": 4.7994023728813566e-05, "loss": 0.1228, "step": 1776000 }, { "epoch": 0.12, "learning_rate": 4.799345875706215e-05, "loss": 0.1173, "step": 1776500 }, { "epoch": 0.12, "learning_rate": 4.799289491525424e-05, "loss": 0.1089, "step": 1777000 }, { "epoch": 0.12, "learning_rate": 4.799232994350283e-05, "loss": 0.1164, "step": 1777500 }, { "epoch": 0.12, "learning_rate": 4.799176497175141e-05, "loss": 0.1129, "step": 1778000 }, { "epoch": 0.12, "learning_rate": 4.79912e-05, "loss": 0.1246, "step": 1778500 }, { "epoch": 0.12, "learning_rate": 4.7990635028248586e-05, "loss": 0.1099, "step": 1779000 }, { "epoch": 0.12, "learning_rate": 4.799007005649718e-05, "loss": 0.1197, "step": 1779500 }, { "epoch": 0.12, "learning_rate": 4.798950508474577e-05, "loss": 0.1188, "step": 1780000 }, { "epoch": 0.12, "learning_rate": 4.798894011299435e-05, "loss": 0.114, "step": 1780500 }, { "epoch": 0.12, "learning_rate": 4.798837627118644e-05, "loss": 0.125, "step": 1781000 }, { "epoch": 0.12, "learning_rate": 4.798781129943503e-05, "loss": 0.1155, "step": 1781500 }, { "epoch": 0.12, "learning_rate": 4.798724632768361e-05, "loss": 0.1229, "step": 1782000 }, { "epoch": 0.12, "learning_rate": 4.79866813559322e-05, "loss": 0.1192, "step": 1782500 }, { "epoch": 0.12, "learning_rate": 4.7986116384180794e-05, "loss": 0.1071, "step": 1783000 }, { "epoch": 0.12, "learning_rate": 4.798555254237289e-05, "loss": 0.1131, "step": 1783500 }, { "epoch": 0.12, "learning_rate": 4.798498757062147e-05, "loss": 0.1166, "step": 1784000 }, { "epoch": 0.12, "learning_rate": 4.798442259887006e-05, "loss": 0.1141, "step": 1784500 }, { "epoch": 0.12, "learning_rate": 4.7983857627118646e-05, "loss": 0.1195, "step": 1785000 }, { "epoch": 0.12, "learning_rate": 4.7983292655367237e-05, "loss": 0.1188, "step": 1785500 }, { "epoch": 0.12, "learning_rate": 4.798272768361582e-05, "loss": 0.1163, "step": 1786000 }, { "epoch": 0.12, "learning_rate": 4.798216271186441e-05, "loss": 0.1177, "step": 1786500 }, { "epoch": 0.12, "learning_rate": 4.7981597740113e-05, "loss": 0.1189, "step": 1787000 }, { "epoch": 0.12, "learning_rate": 4.798103389830509e-05, "loss": 0.1076, "step": 1787500 }, { "epoch": 0.12, "learning_rate": 4.798046892655367e-05, "loss": 0.1167, "step": 1788000 }, { "epoch": 0.12, "learning_rate": 4.797990395480226e-05, "loss": 0.1204, "step": 1788500 }, { "epoch": 0.12, "learning_rate": 4.797933898305085e-05, "loss": 0.1182, "step": 1789000 }, { "epoch": 0.12, "learning_rate": 4.797877514124294e-05, "loss": 0.1224, "step": 1789500 }, { "epoch": 0.12, "learning_rate": 4.7978210169491525e-05, "loss": 0.1168, "step": 1790000 }, { "epoch": 0.12, "learning_rate": 4.7977645197740115e-05, "loss": 0.1139, "step": 1790500 }, { "epoch": 0.12, "learning_rate": 4.79770802259887e-05, "loss": 0.1139, "step": 1791000 }, { "epoch": 0.12, "learning_rate": 4.797651525423729e-05, "loss": 0.1202, "step": 1791500 }, { "epoch": 0.12, "learning_rate": 4.7975951412429384e-05, "loss": 0.1168, "step": 1792000 }, { "epoch": 0.12, "learning_rate": 4.797538644067797e-05, "loss": 0.1146, "step": 1792500 }, { "epoch": 0.12, "learning_rate": 4.797482146892656e-05, "loss": 0.1119, "step": 1793000 }, { "epoch": 0.12, "learning_rate": 4.797425649717515e-05, "loss": 0.1098, "step": 1793500 }, { "epoch": 0.12, "learning_rate": 4.7973692655367236e-05, "loss": 0.1155, "step": 1794000 }, { "epoch": 0.12, "learning_rate": 4.797312768361582e-05, "loss": 0.1189, "step": 1794500 }, { "epoch": 0.12, "learning_rate": 4.797256271186441e-05, "loss": 0.1162, "step": 1795000 }, { "epoch": 0.12, "learning_rate": 4.7971997740112994e-05, "loss": 0.1074, "step": 1795500 }, { "epoch": 0.12, "learning_rate": 4.7971432768361585e-05, "loss": 0.1102, "step": 1796000 }, { "epoch": 0.12, "learning_rate": 4.797086779661017e-05, "loss": 0.116, "step": 1796500 }, { "epoch": 0.12, "learning_rate": 4.797030282485876e-05, "loss": 0.1113, "step": 1797000 }, { "epoch": 0.12, "learning_rate": 4.7969738983050846e-05, "loss": 0.1089, "step": 1797500 }, { "epoch": 0.12, "learning_rate": 4.796917401129944e-05, "loss": 0.1174, "step": 1798000 }, { "epoch": 0.12, "learning_rate": 4.796860903954802e-05, "loss": 0.1126, "step": 1798500 }, { "epoch": 0.12, "learning_rate": 4.796804406779661e-05, "loss": 0.1155, "step": 1799000 }, { "epoch": 0.12, "learning_rate": 4.7967480225988705e-05, "loss": 0.1134, "step": 1799500 }, { "epoch": 0.12, "learning_rate": 4.796691525423729e-05, "loss": 0.1159, "step": 1800000 }, { "epoch": 0.12, "learning_rate": 4.796635028248588e-05, "loss": 0.1205, "step": 1800500 }, { "epoch": 0.12, "learning_rate": 4.796578531073447e-05, "loss": 0.1073, "step": 1801000 }, { "epoch": 0.12, "learning_rate": 4.7965220338983054e-05, "loss": 0.1139, "step": 1801500 }, { "epoch": 0.12, "learning_rate": 4.7964655367231644e-05, "loss": 0.1182, "step": 1802000 }, { "epoch": 0.12, "learning_rate": 4.796409152542373e-05, "loss": 0.1211, "step": 1802500 }, { "epoch": 0.12, "learning_rate": 4.7963526553672315e-05, "loss": 0.1122, "step": 1803000 }, { "epoch": 0.12, "learning_rate": 4.7962961581920906e-05, "loss": 0.1181, "step": 1803500 }, { "epoch": 0.12, "learning_rate": 4.79623966101695e-05, "loss": 0.123, "step": 1804000 }, { "epoch": 0.12, "learning_rate": 4.796183163841808e-05, "loss": 0.1153, "step": 1804500 }, { "epoch": 0.12, "learning_rate": 4.796126779661017e-05, "loss": 0.1221, "step": 1805000 }, { "epoch": 0.12, "learning_rate": 4.796070282485876e-05, "loss": 0.1116, "step": 1805500 }, { "epoch": 0.12, "learning_rate": 4.796013785310734e-05, "loss": 0.1208, "step": 1806000 }, { "epoch": 0.12, "learning_rate": 4.795957288135593e-05, "loss": 0.1095, "step": 1806500 }, { "epoch": 0.12, "learning_rate": 4.7959007909604516e-05, "loss": 0.1242, "step": 1807000 }, { "epoch": 0.12, "learning_rate": 4.795844406779662e-05, "loss": 0.119, "step": 1807500 }, { "epoch": 0.12, "learning_rate": 4.79578790960452e-05, "loss": 0.1122, "step": 1808000 }, { "epoch": 0.12, "learning_rate": 4.795731412429379e-05, "loss": 0.1173, "step": 1808500 }, { "epoch": 0.12, "learning_rate": 4.7956749152542375e-05, "loss": 0.1115, "step": 1809000 }, { "epoch": 0.12, "learning_rate": 4.7956184180790966e-05, "loss": 0.1193, "step": 1809500 }, { "epoch": 0.12, "learning_rate": 4.795562033898305e-05, "loss": 0.1143, "step": 1810000 }, { "epoch": 0.12, "learning_rate": 4.795505536723164e-05, "loss": 0.1143, "step": 1810500 }, { "epoch": 0.12, "learning_rate": 4.795449039548023e-05, "loss": 0.116, "step": 1811000 }, { "epoch": 0.12, "learning_rate": 4.795392542372882e-05, "loss": 0.1093, "step": 1811500 }, { "epoch": 0.12, "learning_rate": 4.7953361581920905e-05, "loss": 0.1125, "step": 1812000 }, { "epoch": 0.12, "learning_rate": 4.795279661016949e-05, "loss": 0.1088, "step": 1812500 }, { "epoch": 0.12, "learning_rate": 4.795223163841808e-05, "loss": 0.1232, "step": 1813000 }, { "epoch": 0.12, "learning_rate": 4.795166666666667e-05, "loss": 0.1235, "step": 1813500 }, { "epoch": 0.12, "learning_rate": 4.7951101694915254e-05, "loss": 0.119, "step": 1814000 }, { "epoch": 0.12, "learning_rate": 4.7950536723163845e-05, "loss": 0.1162, "step": 1814500 }, { "epoch": 0.12, "learning_rate": 4.794997288135594e-05, "loss": 0.1121, "step": 1815000 }, { "epoch": 0.12, "learning_rate": 4.794940790960452e-05, "loss": 0.1205, "step": 1815500 }, { "epoch": 0.12, "learning_rate": 4.794884293785311e-05, "loss": 0.1174, "step": 1816000 }, { "epoch": 0.12, "learning_rate": 4.79482779661017e-05, "loss": 0.1134, "step": 1816500 }, { "epoch": 0.12, "learning_rate": 4.794771299435029e-05, "loss": 0.1123, "step": 1817000 }, { "epoch": 0.12, "learning_rate": 4.794714802259887e-05, "loss": 0.1134, "step": 1817500 }, { "epoch": 0.12, "learning_rate": 4.7946584180790965e-05, "loss": 0.1179, "step": 1818000 }, { "epoch": 0.12, "learning_rate": 4.794601920903955e-05, "loss": 0.1095, "step": 1818500 }, { "epoch": 0.12, "learning_rate": 4.794545423728814e-05, "loss": 0.1196, "step": 1819000 }, { "epoch": 0.12, "learning_rate": 4.7944889265536723e-05, "loss": 0.1202, "step": 1819500 }, { "epoch": 0.12, "learning_rate": 4.7944324293785314e-05, "loss": 0.1212, "step": 1820000 }, { "epoch": 0.12, "learning_rate": 4.79437593220339e-05, "loss": 0.1156, "step": 1820500 }, { "epoch": 0.12, "learning_rate": 4.794319548022599e-05, "loss": 0.1161, "step": 1821000 }, { "epoch": 0.12, "learning_rate": 4.7942630508474576e-05, "loss": 0.1195, "step": 1821500 }, { "epoch": 0.12, "learning_rate": 4.794206666666667e-05, "loss": 0.1162, "step": 1822000 }, { "epoch": 0.12, "learning_rate": 4.794150169491526e-05, "loss": 0.121, "step": 1822500 }, { "epoch": 0.12, "learning_rate": 4.7940936723163844e-05, "loss": 0.1129, "step": 1823000 }, { "epoch": 0.12, "learning_rate": 4.7940371751412435e-05, "loss": 0.1177, "step": 1823500 }, { "epoch": 0.12, "learning_rate": 4.793980677966102e-05, "loss": 0.1134, "step": 1824000 }, { "epoch": 0.12, "learning_rate": 4.793924180790961e-05, "loss": 0.1112, "step": 1824500 }, { "epoch": 0.12, "learning_rate": 4.793867683615819e-05, "loss": 0.1168, "step": 1825000 }, { "epoch": 0.12, "learning_rate": 4.793811186440678e-05, "loss": 0.1171, "step": 1825500 }, { "epoch": 0.12, "learning_rate": 4.7937546892655374e-05, "loss": 0.1152, "step": 1826000 }, { "epoch": 0.12, "learning_rate": 4.793698192090396e-05, "loss": 0.1153, "step": 1826500 }, { "epoch": 0.12, "learning_rate": 4.793641694915255e-05, "loss": 0.1177, "step": 1827000 }, { "epoch": 0.12, "learning_rate": 4.7935853107344636e-05, "loss": 0.1153, "step": 1827500 }, { "epoch": 0.12, "learning_rate": 4.793528813559322e-05, "loss": 0.1226, "step": 1828000 }, { "epoch": 0.12, "learning_rate": 4.793472316384181e-05, "loss": 0.1136, "step": 1828500 }, { "epoch": 0.12, "learning_rate": 4.79341581920904e-05, "loss": 0.1168, "step": 1829000 }, { "epoch": 0.12, "learning_rate": 4.7933593220338984e-05, "loss": 0.1151, "step": 1829500 }, { "epoch": 0.12, "learning_rate": 4.7933028248587575e-05, "loss": 0.1159, "step": 1830000 }, { "epoch": 0.12, "learning_rate": 4.793246327683616e-05, "loss": 0.114, "step": 1830500 }, { "epoch": 0.12, "learning_rate": 4.793189830508475e-05, "loss": 0.1121, "step": 1831000 }, { "epoch": 0.12, "learning_rate": 4.793133559322034e-05, "loss": 0.1157, "step": 1831500 }, { "epoch": 0.12, "learning_rate": 4.793077062146893e-05, "loss": 0.1131, "step": 1832000 }, { "epoch": 0.12, "learning_rate": 4.793020564971752e-05, "loss": 0.1145, "step": 1832500 }, { "epoch": 0.12, "learning_rate": 4.7929640677966105e-05, "loss": 0.1161, "step": 1833000 }, { "epoch": 0.12, "learning_rate": 4.7929075706214695e-05, "loss": 0.1154, "step": 1833500 }, { "epoch": 0.12, "learning_rate": 4.792851073446328e-05, "loss": 0.1086, "step": 1834000 }, { "epoch": 0.12, "learning_rate": 4.792794576271187e-05, "loss": 0.1218, "step": 1834500 }, { "epoch": 0.12, "learning_rate": 4.792738192090396e-05, "loss": 0.1147, "step": 1835000 }, { "epoch": 0.12, "learning_rate": 4.792681694915254e-05, "loss": 0.1161, "step": 1835500 }, { "epoch": 0.12, "learning_rate": 4.792625197740113e-05, "loss": 0.1168, "step": 1836000 }, { "epoch": 0.12, "learning_rate": 4.792568700564972e-05, "loss": 0.1184, "step": 1836500 }, { "epoch": 0.12, "learning_rate": 4.7925122033898306e-05, "loss": 0.1242, "step": 1837000 }, { "epoch": 0.12, "learning_rate": 4.7924557062146896e-05, "loss": 0.116, "step": 1837500 }, { "epoch": 0.12, "learning_rate": 4.792399209039548e-05, "loss": 0.1113, "step": 1838000 }, { "epoch": 0.12, "learning_rate": 4.792342711864407e-05, "loss": 0.1201, "step": 1838500 }, { "epoch": 0.12, "learning_rate": 4.792286327683616e-05, "loss": 0.1115, "step": 1839000 }, { "epoch": 0.12, "learning_rate": 4.792229943502825e-05, "loss": 0.1166, "step": 1839500 }, { "epoch": 0.12, "learning_rate": 4.792173446327684e-05, "loss": 0.1124, "step": 1840000 }, { "epoch": 0.12, "learning_rate": 4.7921169491525426e-05, "loss": 0.1132, "step": 1840500 }, { "epoch": 0.12, "learning_rate": 4.792060451977402e-05, "loss": 0.1086, "step": 1841000 }, { "epoch": 0.12, "learning_rate": 4.79200395480226e-05, "loss": 0.1177, "step": 1841500 }, { "epoch": 0.12, "learning_rate": 4.791947457627119e-05, "loss": 0.1137, "step": 1842000 }, { "epoch": 0.12, "learning_rate": 4.791891073446328e-05, "loss": 0.1166, "step": 1842500 }, { "epoch": 0.12, "learning_rate": 4.791834576271187e-05, "loss": 0.1118, "step": 1843000 }, { "epoch": 0.12, "learning_rate": 4.791778079096045e-05, "loss": 0.1139, "step": 1843500 }, { "epoch": 0.13, "learning_rate": 4.7917215819209043e-05, "loss": 0.1066, "step": 1844000 }, { "epoch": 0.13, "learning_rate": 4.791665084745763e-05, "loss": 0.1133, "step": 1844500 }, { "epoch": 0.13, "learning_rate": 4.791608587570622e-05, "loss": 0.1135, "step": 1845000 }, { "epoch": 0.13, "learning_rate": 4.7915522033898305e-05, "loss": 0.1048, "step": 1845500 }, { "epoch": 0.13, "learning_rate": 4.7914957062146896e-05, "loss": 0.1167, "step": 1846000 }, { "epoch": 0.13, "learning_rate": 4.791439209039548e-05, "loss": 0.111, "step": 1846500 }, { "epoch": 0.13, "learning_rate": 4.791382711864407e-05, "loss": 0.1107, "step": 1847000 }, { "epoch": 0.13, "learning_rate": 4.7913262146892654e-05, "loss": 0.1099, "step": 1847500 }, { "epoch": 0.13, "learning_rate": 4.7912697175141244e-05, "loss": 0.1143, "step": 1848000 }, { "epoch": 0.13, "learning_rate": 4.791213333333334e-05, "loss": 0.1101, "step": 1848500 }, { "epoch": 0.13, "learning_rate": 4.791156836158192e-05, "loss": 0.1085, "step": 1849000 }, { "epoch": 0.13, "learning_rate": 4.791100338983051e-05, "loss": 0.1203, "step": 1849500 }, { "epoch": 0.13, "learning_rate": 4.79104384180791e-05, "loss": 0.1232, "step": 1850000 }, { "epoch": 0.13, "learning_rate": 4.790987457627119e-05, "loss": 0.1124, "step": 1850500 }, { "epoch": 0.13, "learning_rate": 4.7909309604519774e-05, "loss": 0.1137, "step": 1851000 }, { "epoch": 0.13, "learning_rate": 4.7908744632768365e-05, "loss": 0.1173, "step": 1851500 }, { "epoch": 0.13, "learning_rate": 4.790817966101695e-05, "loss": 0.1106, "step": 1852000 }, { "epoch": 0.13, "learning_rate": 4.790761468926554e-05, "loss": 0.1192, "step": 1852500 }, { "epoch": 0.13, "learning_rate": 4.7907050847457627e-05, "loss": 0.1058, "step": 1853000 }, { "epoch": 0.13, "learning_rate": 4.790648587570622e-05, "loss": 0.1153, "step": 1853500 }, { "epoch": 0.13, "learning_rate": 4.79059209039548e-05, "loss": 0.1153, "step": 1854000 }, { "epoch": 0.13, "learning_rate": 4.790535593220339e-05, "loss": 0.1232, "step": 1854500 }, { "epoch": 0.13, "learning_rate": 4.7904792090395485e-05, "loss": 0.1185, "step": 1855000 }, { "epoch": 0.13, "learning_rate": 4.790422711864407e-05, "loss": 0.1198, "step": 1855500 }, { "epoch": 0.13, "learning_rate": 4.790366214689266e-05, "loss": 0.1114, "step": 1856000 }, { "epoch": 0.13, "learning_rate": 4.7903097175141244e-05, "loss": 0.1189, "step": 1856500 }, { "epoch": 0.13, "learning_rate": 4.7902532203389834e-05, "loss": 0.1166, "step": 1857000 }, { "epoch": 0.13, "learning_rate": 4.7901967231638425e-05, "loss": 0.1155, "step": 1857500 }, { "epoch": 0.13, "learning_rate": 4.790140338983051e-05, "loss": 0.1124, "step": 1858000 }, { "epoch": 0.13, "learning_rate": 4.7900838418079096e-05, "loss": 0.1131, "step": 1858500 }, { "epoch": 0.13, "learning_rate": 4.7900273446327686e-05, "loss": 0.1117, "step": 1859000 }, { "epoch": 0.13, "learning_rate": 4.789970847457627e-05, "loss": 0.119, "step": 1859500 }, { "epoch": 0.13, "learning_rate": 4.789914350282486e-05, "loss": 0.1182, "step": 1860000 }, { "epoch": 0.13, "learning_rate": 4.789857966101695e-05, "loss": 0.1179, "step": 1860500 }, { "epoch": 0.13, "learning_rate": 4.789801468926554e-05, "loss": 0.1181, "step": 1861000 }, { "epoch": 0.13, "learning_rate": 4.789744971751412e-05, "loss": 0.1175, "step": 1861500 }, { "epoch": 0.13, "learning_rate": 4.789688474576271e-05, "loss": 0.1091, "step": 1862000 }, { "epoch": 0.13, "learning_rate": 4.789632090395481e-05, "loss": 0.117, "step": 1862500 }, { "epoch": 0.13, "learning_rate": 4.789575593220339e-05, "loss": 0.1223, "step": 1863000 }, { "epoch": 0.13, "learning_rate": 4.789519096045198e-05, "loss": 0.1159, "step": 1863500 }, { "epoch": 0.13, "learning_rate": 4.789462598870057e-05, "loss": 0.1089, "step": 1864000 }, { "epoch": 0.13, "learning_rate": 4.7894061016949156e-05, "loss": 0.1144, "step": 1864500 }, { "epoch": 0.13, "learning_rate": 4.7893496045197746e-05, "loss": 0.1074, "step": 1865000 }, { "epoch": 0.13, "learning_rate": 4.789293107344633e-05, "loss": 0.1139, "step": 1865500 }, { "epoch": 0.13, "learning_rate": 4.789236723163842e-05, "loss": 0.1184, "step": 1866000 }, { "epoch": 0.13, "learning_rate": 4.789180225988701e-05, "loss": 0.1054, "step": 1866500 }, { "epoch": 0.13, "learning_rate": 4.789123728813559e-05, "loss": 0.1174, "step": 1867000 }, { "epoch": 0.13, "learning_rate": 4.789067231638418e-05, "loss": 0.1151, "step": 1867500 }, { "epoch": 0.13, "learning_rate": 4.7890108474576276e-05, "loss": 0.1115, "step": 1868000 }, { "epoch": 0.13, "learning_rate": 4.788954350282486e-05, "loss": 0.1184, "step": 1868500 }, { "epoch": 0.13, "learning_rate": 4.788897853107345e-05, "loss": 0.1139, "step": 1869000 }, { "epoch": 0.13, "learning_rate": 4.7888413559322034e-05, "loss": 0.118, "step": 1869500 }, { "epoch": 0.13, "learning_rate": 4.7887848587570625e-05, "loss": 0.1112, "step": 1870000 }, { "epoch": 0.13, "learning_rate": 4.788728474576271e-05, "loss": 0.1123, "step": 1870500 }, { "epoch": 0.13, "learning_rate": 4.78867197740113e-05, "loss": 0.1167, "step": 1871000 }, { "epoch": 0.13, "learning_rate": 4.7886154802259893e-05, "loss": 0.1131, "step": 1871500 }, { "epoch": 0.13, "learning_rate": 4.788558983050848e-05, "loss": 0.1136, "step": 1872000 }, { "epoch": 0.13, "learning_rate": 4.7885025988700564e-05, "loss": 0.1196, "step": 1872500 }, { "epoch": 0.13, "learning_rate": 4.7884461016949155e-05, "loss": 0.1208, "step": 1873000 }, { "epoch": 0.13, "learning_rate": 4.788389604519774e-05, "loss": 0.1072, "step": 1873500 }, { "epoch": 0.13, "learning_rate": 4.788333107344633e-05, "loss": 0.1172, "step": 1874000 }, { "epoch": 0.13, "learning_rate": 4.7882767231638423e-05, "loss": 0.1157, "step": 1874500 }, { "epoch": 0.13, "learning_rate": 4.788220225988701e-05, "loss": 0.1181, "step": 1875000 }, { "epoch": 0.13, "learning_rate": 4.78816372881356e-05, "loss": 0.1135, "step": 1875500 }, { "epoch": 0.13, "learning_rate": 4.788107231638418e-05, "loss": 0.116, "step": 1876000 }, { "epoch": 0.13, "learning_rate": 4.788050734463277e-05, "loss": 0.1149, "step": 1876500 }, { "epoch": 0.13, "learning_rate": 4.7879942372881356e-05, "loss": 0.1056, "step": 1877000 }, { "epoch": 0.13, "learning_rate": 4.787937853107345e-05, "loss": 0.1056, "step": 1877500 }, { "epoch": 0.13, "learning_rate": 4.787881355932204e-05, "loss": 0.1063, "step": 1878000 }, { "epoch": 0.13, "learning_rate": 4.7878248587570624e-05, "loss": 0.1135, "step": 1878500 }, { "epoch": 0.13, "learning_rate": 4.7877683615819215e-05, "loss": 0.1148, "step": 1879000 }, { "epoch": 0.13, "learning_rate": 4.78771186440678e-05, "loss": 0.1138, "step": 1879500 }, { "epoch": 0.13, "learning_rate": 4.7876554802259886e-05, "loss": 0.1155, "step": 1880000 }, { "epoch": 0.13, "learning_rate": 4.7875989830508477e-05, "loss": 0.1117, "step": 1880500 }, { "epoch": 0.13, "learning_rate": 4.787542485875706e-05, "loss": 0.12, "step": 1881000 }, { "epoch": 0.13, "learning_rate": 4.787485988700565e-05, "loss": 0.1099, "step": 1881500 }, { "epoch": 0.13, "learning_rate": 4.7874296045197745e-05, "loss": 0.119, "step": 1882000 }, { "epoch": 0.13, "learning_rate": 4.787373107344633e-05, "loss": 0.1154, "step": 1882500 }, { "epoch": 0.13, "learning_rate": 4.787316610169492e-05, "loss": 0.1181, "step": 1883000 }, { "epoch": 0.13, "learning_rate": 4.78726011299435e-05, "loss": 0.1246, "step": 1883500 }, { "epoch": 0.13, "learning_rate": 4.7872036158192094e-05, "loss": 0.1171, "step": 1884000 }, { "epoch": 0.13, "learning_rate": 4.787147118644068e-05, "loss": 0.1159, "step": 1884500 }, { "epoch": 0.13, "learning_rate": 4.787090734463277e-05, "loss": 0.1171, "step": 1885000 }, { "epoch": 0.13, "learning_rate": 4.787034237288136e-05, "loss": 0.1152, "step": 1885500 }, { "epoch": 0.13, "learning_rate": 4.7869777401129946e-05, "loss": 0.1094, "step": 1886000 }, { "epoch": 0.13, "learning_rate": 4.7869212429378536e-05, "loss": 0.1182, "step": 1886500 }, { "epoch": 0.13, "learning_rate": 4.786864971751413e-05, "loss": 0.1178, "step": 1887000 }, { "epoch": 0.13, "learning_rate": 4.786808474576272e-05, "loss": 0.1252, "step": 1887500 }, { "epoch": 0.13, "learning_rate": 4.78675197740113e-05, "loss": 0.1127, "step": 1888000 }, { "epoch": 0.13, "learning_rate": 4.786695480225989e-05, "loss": 0.1131, "step": 1888500 }, { "epoch": 0.13, "learning_rate": 4.7866389830508476e-05, "loss": 0.1175, "step": 1889000 }, { "epoch": 0.13, "learning_rate": 4.7865824858757066e-05, "loss": 0.1127, "step": 1889500 }, { "epoch": 0.13, "learning_rate": 4.786525988700565e-05, "loss": 0.1086, "step": 1890000 }, { "epoch": 0.13, "learning_rate": 4.786469491525424e-05, "loss": 0.1136, "step": 1890500 }, { "epoch": 0.13, "learning_rate": 4.786413107344633e-05, "loss": 0.1087, "step": 1891000 }, { "epoch": 0.13, "learning_rate": 4.786356610169492e-05, "loss": 0.1168, "step": 1891500 }, { "epoch": 0.13, "learning_rate": 4.786300112994351e-05, "loss": 0.1147, "step": 1892000 }, { "epoch": 0.13, "learning_rate": 4.786243615819209e-05, "loss": 0.1124, "step": 1892500 }, { "epoch": 0.13, "learning_rate": 4.7861871186440684e-05, "loss": 0.1045, "step": 1893000 }, { "epoch": 0.13, "learning_rate": 4.786130734463277e-05, "loss": 0.1148, "step": 1893500 }, { "epoch": 0.13, "learning_rate": 4.7860742372881355e-05, "loss": 0.1142, "step": 1894000 }, { "epoch": 0.13, "learning_rate": 4.7860177401129945e-05, "loss": 0.1068, "step": 1894500 }, { "epoch": 0.13, "learning_rate": 4.785961242937853e-05, "loss": 0.1117, "step": 1895000 }, { "epoch": 0.13, "learning_rate": 4.785904745762712e-05, "loss": 0.1155, "step": 1895500 }, { "epoch": 0.13, "learning_rate": 4.7858483615819213e-05, "loss": 0.1067, "step": 1896000 }, { "epoch": 0.13, "learning_rate": 4.78579186440678e-05, "loss": 0.1043, "step": 1896500 }, { "epoch": 0.13, "learning_rate": 4.785735367231639e-05, "loss": 0.1098, "step": 1897000 }, { "epoch": 0.13, "learning_rate": 4.785678870056498e-05, "loss": 0.109, "step": 1897500 }, { "epoch": 0.13, "learning_rate": 4.785622372881356e-05, "loss": 0.1141, "step": 1898000 }, { "epoch": 0.13, "learning_rate": 4.785565875706215e-05, "loss": 0.1075, "step": 1898500 }, { "epoch": 0.13, "learning_rate": 4.785509378531074e-05, "loss": 0.1177, "step": 1899000 }, { "epoch": 0.13, "learning_rate": 4.785452881355933e-05, "loss": 0.1134, "step": 1899500 }, { "epoch": 0.13, "learning_rate": 4.7853964971751414e-05, "loss": 0.1168, "step": 1900000 }, { "epoch": 0.13, "learning_rate": 4.7853400000000005e-05, "loss": 0.1118, "step": 1900500 }, { "epoch": 0.13, "learning_rate": 4.785283502824859e-05, "loss": 0.117, "step": 1901000 }, { "epoch": 0.13, "learning_rate": 4.7852271186440676e-05, "loss": 0.1124, "step": 1901500 }, { "epoch": 0.13, "learning_rate": 4.785170621468927e-05, "loss": 0.1146, "step": 1902000 }, { "epoch": 0.13, "learning_rate": 4.785114124293786e-05, "loss": 0.1134, "step": 1902500 }, { "epoch": 0.13, "learning_rate": 4.785057627118644e-05, "loss": 0.1149, "step": 1903000 }, { "epoch": 0.13, "learning_rate": 4.785001129943503e-05, "loss": 0.1107, "step": 1903500 }, { "epoch": 0.13, "learning_rate": 4.7849446327683615e-05, "loss": 0.1142, "step": 1904000 }, { "epoch": 0.13, "learning_rate": 4.7848881355932206e-05, "loss": 0.1133, "step": 1904500 }, { "epoch": 0.13, "learning_rate": 4.784831638418079e-05, "loss": 0.1121, "step": 1905000 }, { "epoch": 0.13, "learning_rate": 4.784775367231639e-05, "loss": 0.1154, "step": 1905500 }, { "epoch": 0.13, "learning_rate": 4.784718870056498e-05, "loss": 0.1145, "step": 1906000 }, { "epoch": 0.13, "learning_rate": 4.784662372881356e-05, "loss": 0.1145, "step": 1906500 }, { "epoch": 0.13, "learning_rate": 4.784605875706215e-05, "loss": 0.1157, "step": 1907000 }, { "epoch": 0.13, "learning_rate": 4.7845493785310736e-05, "loss": 0.1153, "step": 1907500 }, { "epoch": 0.13, "learning_rate": 4.7844928813559327e-05, "loss": 0.1075, "step": 1908000 }, { "epoch": 0.13, "learning_rate": 4.784436384180791e-05, "loss": 0.1183, "step": 1908500 }, { "epoch": 0.13, "learning_rate": 4.78437988700565e-05, "loss": 0.1159, "step": 1909000 }, { "epoch": 0.13, "learning_rate": 4.784323502824859e-05, "loss": 0.1126, "step": 1909500 }, { "epoch": 0.13, "learning_rate": 4.784267005649718e-05, "loss": 0.1135, "step": 1910000 }, { "epoch": 0.13, "learning_rate": 4.784210508474576e-05, "loss": 0.1154, "step": 1910500 }, { "epoch": 0.13, "learning_rate": 4.784154011299435e-05, "loss": 0.1039, "step": 1911000 }, { "epoch": 0.13, "learning_rate": 4.784097514124294e-05, "loss": 0.1095, "step": 1911500 }, { "epoch": 0.13, "learning_rate": 4.784041129943503e-05, "loss": 0.1161, "step": 1912000 }, { "epoch": 0.13, "learning_rate": 4.783984632768362e-05, "loss": 0.1093, "step": 1912500 }, { "epoch": 0.13, "learning_rate": 4.7839281355932205e-05, "loss": 0.1175, "step": 1913000 }, { "epoch": 0.13, "learning_rate": 4.7838716384180796e-05, "loss": 0.1115, "step": 1913500 }, { "epoch": 0.13, "learning_rate": 4.783815254237288e-05, "loss": 0.1182, "step": 1914000 }, { "epoch": 0.13, "learning_rate": 4.7837587570621474e-05, "loss": 0.1116, "step": 1914500 }, { "epoch": 0.13, "learning_rate": 4.783702259887006e-05, "loss": 0.112, "step": 1915000 }, { "epoch": 0.13, "learning_rate": 4.783645762711865e-05, "loss": 0.115, "step": 1915500 }, { "epoch": 0.13, "learning_rate": 4.783589265536723e-05, "loss": 0.1168, "step": 1916000 }, { "epoch": 0.13, "learning_rate": 4.7835328813559326e-05, "loss": 0.1085, "step": 1916500 }, { "epoch": 0.13, "learning_rate": 4.783476384180791e-05, "loss": 0.1151, "step": 1917000 }, { "epoch": 0.13, "learning_rate": 4.78341988700565e-05, "loss": 0.1195, "step": 1917500 }, { "epoch": 0.13, "learning_rate": 4.7833633898305084e-05, "loss": 0.1147, "step": 1918000 }, { "epoch": 0.13, "learning_rate": 4.783307005649718e-05, "loss": 0.1093, "step": 1918500 }, { "epoch": 0.13, "learning_rate": 4.783250508474577e-05, "loss": 0.1125, "step": 1919000 }, { "epoch": 0.13, "learning_rate": 4.783194011299435e-05, "loss": 0.1101, "step": 1919500 }, { "epoch": 0.13, "learning_rate": 4.783137514124294e-05, "loss": 0.1184, "step": 1920000 }, { "epoch": 0.13, "learning_rate": 4.783081016949153e-05, "loss": 0.1156, "step": 1920500 }, { "epoch": 0.13, "learning_rate": 4.783024632768362e-05, "loss": 0.112, "step": 1921000 }, { "epoch": 0.13, "learning_rate": 4.7829681355932205e-05, "loss": 0.1166, "step": 1921500 }, { "epoch": 0.13, "learning_rate": 4.7829116384180795e-05, "loss": 0.1092, "step": 1922000 }, { "epoch": 0.13, "learning_rate": 4.782855141242938e-05, "loss": 0.1124, "step": 1922500 }, { "epoch": 0.13, "learning_rate": 4.782798644067797e-05, "loss": 0.1097, "step": 1923000 }, { "epoch": 0.13, "learning_rate": 4.782742259887006e-05, "loss": 0.1154, "step": 1923500 }, { "epoch": 0.13, "learning_rate": 4.782685762711865e-05, "loss": 0.1163, "step": 1924000 }, { "epoch": 0.13, "learning_rate": 4.782629265536723e-05, "loss": 0.1135, "step": 1924500 }, { "epoch": 0.13, "learning_rate": 4.782572768361582e-05, "loss": 0.1095, "step": 1925000 }, { "epoch": 0.13, "learning_rate": 4.7825162711864405e-05, "loss": 0.1081, "step": 1925500 }, { "epoch": 0.13, "learning_rate": 4.78245988700565e-05, "loss": 0.1122, "step": 1926000 }, { "epoch": 0.13, "learning_rate": 4.782403389830509e-05, "loss": 0.1142, "step": 1926500 }, { "epoch": 0.13, "learning_rate": 4.782346892655368e-05, "loss": 0.1124, "step": 1927000 }, { "epoch": 0.13, "learning_rate": 4.7822903954802264e-05, "loss": 0.1079, "step": 1927500 }, { "epoch": 0.13, "learning_rate": 4.782234011299435e-05, "loss": 0.1076, "step": 1928000 }, { "epoch": 0.13, "learning_rate": 4.782177514124294e-05, "loss": 0.1125, "step": 1928500 }, { "epoch": 0.13, "learning_rate": 4.7821210169491526e-05, "loss": 0.1199, "step": 1929000 }, { "epoch": 0.13, "learning_rate": 4.7820645197740117e-05, "loss": 0.1075, "step": 1929500 }, { "epoch": 0.13, "learning_rate": 4.7820081355932204e-05, "loss": 0.1176, "step": 1930000 }, { "epoch": 0.13, "learning_rate": 4.7819516384180794e-05, "loss": 0.1137, "step": 1930500 }, { "epoch": 0.13, "learning_rate": 4.781895141242938e-05, "loss": 0.1148, "step": 1931000 }, { "epoch": 0.13, "learning_rate": 4.781838644067797e-05, "loss": 0.1097, "step": 1931500 }, { "epoch": 0.13, "learning_rate": 4.781782259887006e-05, "loss": 0.1139, "step": 1932000 }, { "epoch": 0.13, "learning_rate": 4.7817257627118647e-05, "loss": 0.1156, "step": 1932500 }, { "epoch": 0.13, "learning_rate": 4.781669265536724e-05, "loss": 0.1125, "step": 1933000 }, { "epoch": 0.13, "learning_rate": 4.781612768361582e-05, "loss": 0.1149, "step": 1933500 }, { "epoch": 0.13, "learning_rate": 4.781556271186441e-05, "loss": 0.1213, "step": 1934000 }, { "epoch": 0.13, "learning_rate": 4.78149988700565e-05, "loss": 0.1188, "step": 1934500 }, { "epoch": 0.13, "learning_rate": 4.781443389830509e-05, "loss": 0.1168, "step": 1935000 }, { "epoch": 0.13, "learning_rate": 4.781386892655367e-05, "loss": 0.1166, "step": 1935500 }, { "epoch": 0.13, "learning_rate": 4.7813303954802264e-05, "loss": 0.1182, "step": 1936000 }, { "epoch": 0.13, "learning_rate": 4.781273898305085e-05, "loss": 0.1175, "step": 1936500 }, { "epoch": 0.13, "learning_rate": 4.7812175141242935e-05, "loss": 0.1153, "step": 1937000 }, { "epoch": 0.13, "learning_rate": 4.7811610169491525e-05, "loss": 0.1157, "step": 1937500 }, { "epoch": 0.13, "learning_rate": 4.7811045197740116e-05, "loss": 0.1098, "step": 1938000 }, { "epoch": 0.13, "learning_rate": 4.78104802259887e-05, "loss": 0.118, "step": 1938500 }, { "epoch": 0.13, "learning_rate": 4.7809916384180794e-05, "loss": 0.112, "step": 1939000 }, { "epoch": 0.13, "learning_rate": 4.7809351412429384e-05, "loss": 0.109, "step": 1939500 }, { "epoch": 0.13, "learning_rate": 4.780878644067797e-05, "loss": 0.1111, "step": 1940000 }, { "epoch": 0.13, "learning_rate": 4.780822146892656e-05, "loss": 0.1096, "step": 1940500 }, { "epoch": 0.13, "learning_rate": 4.7807657627118646e-05, "loss": 0.1174, "step": 1941000 }, { "epoch": 0.13, "learning_rate": 4.7807092655367236e-05, "loss": 0.1189, "step": 1941500 }, { "epoch": 0.13, "learning_rate": 4.780652768361582e-05, "loss": 0.1081, "step": 1942000 }, { "epoch": 0.13, "learning_rate": 4.780596271186441e-05, "loss": 0.1153, "step": 1942500 }, { "epoch": 0.13, "learning_rate": 4.7805397740112995e-05, "loss": 0.1106, "step": 1943000 }, { "epoch": 0.13, "learning_rate": 4.7804832768361585e-05, "loss": 0.1191, "step": 1943500 }, { "epoch": 0.13, "learning_rate": 4.780426779661017e-05, "loss": 0.1149, "step": 1944000 }, { "epoch": 0.13, "learning_rate": 4.780370282485876e-05, "loss": 0.1123, "step": 1944500 }, { "epoch": 0.13, "learning_rate": 4.780313898305085e-05, "loss": 0.1138, "step": 1945000 }, { "epoch": 0.13, "learning_rate": 4.780257401129944e-05, "loss": 0.11, "step": 1945500 }, { "epoch": 0.13, "learning_rate": 4.780200903954802e-05, "loss": 0.111, "step": 1946000 }, { "epoch": 0.13, "learning_rate": 4.780144406779661e-05, "loss": 0.1221, "step": 1946500 }, { "epoch": 0.13, "learning_rate": 4.7800879096045196e-05, "loss": 0.112, "step": 1947000 }, { "epoch": 0.13, "learning_rate": 4.780031525423729e-05, "loss": 0.1145, "step": 1947500 }, { "epoch": 0.13, "learning_rate": 4.779975028248588e-05, "loss": 0.116, "step": 1948000 }, { "epoch": 0.13, "learning_rate": 4.779918531073447e-05, "loss": 0.1077, "step": 1948500 }, { "epoch": 0.13, "learning_rate": 4.7798620338983055e-05, "loss": 0.1198, "step": 1949000 }, { "epoch": 0.13, "learning_rate": 4.7798055367231645e-05, "loss": 0.1255, "step": 1949500 }, { "epoch": 0.13, "learning_rate": 4.779749152542373e-05, "loss": 0.1134, "step": 1950000 }, { "epoch": 0.13, "learning_rate": 4.7796926553672316e-05, "loss": 0.1089, "step": 1950500 }, { "epoch": 0.13, "learning_rate": 4.77963627118644e-05, "loss": 0.1192, "step": 1951000 }, { "epoch": 0.13, "learning_rate": 4.7795797740112994e-05, "loss": 0.1151, "step": 1951500 }, { "epoch": 0.13, "learning_rate": 4.7795232768361584e-05, "loss": 0.1088, "step": 1952000 }, { "epoch": 0.13, "learning_rate": 4.779466779661017e-05, "loss": 0.115, "step": 1952500 }, { "epoch": 0.13, "learning_rate": 4.779410282485876e-05, "loss": 0.1109, "step": 1953000 }, { "epoch": 0.13, "learning_rate": 4.779353785310734e-05, "loss": 0.1115, "step": 1953500 }, { "epoch": 0.13, "learning_rate": 4.779297288135593e-05, "loss": 0.1131, "step": 1954000 }, { "epoch": 0.13, "learning_rate": 4.779240790960452e-05, "loss": 0.1135, "step": 1954500 }, { "epoch": 0.13, "learning_rate": 4.779184406779662e-05, "loss": 0.1156, "step": 1955000 }, { "epoch": 0.13, "learning_rate": 4.77912790960452e-05, "loss": 0.1113, "step": 1955500 }, { "epoch": 0.13, "learning_rate": 4.779071412429379e-05, "loss": 0.1128, "step": 1956000 }, { "epoch": 0.13, "learning_rate": 4.7790149152542376e-05, "loss": 0.113, "step": 1956500 }, { "epoch": 0.13, "learning_rate": 4.778958531073446e-05, "loss": 0.1134, "step": 1957000 }, { "epoch": 0.13, "learning_rate": 4.7789020338983054e-05, "loss": 0.1126, "step": 1957500 }, { "epoch": 0.13, "learning_rate": 4.778845536723164e-05, "loss": 0.1158, "step": 1958000 }, { "epoch": 0.13, "learning_rate": 4.778789039548023e-05, "loss": 0.1121, "step": 1958500 }, { "epoch": 0.13, "learning_rate": 4.778732542372882e-05, "loss": 0.1138, "step": 1959000 }, { "epoch": 0.13, "learning_rate": 4.77867604519774e-05, "loss": 0.1151, "step": 1959500 }, { "epoch": 0.13, "learning_rate": 4.778619661016949e-05, "loss": 0.1194, "step": 1960000 }, { "epoch": 0.13, "learning_rate": 4.778563163841808e-05, "loss": 0.1191, "step": 1960500 }, { "epoch": 0.13, "learning_rate": 4.7785066666666664e-05, "loss": 0.1122, "step": 1961000 }, { "epoch": 0.13, "learning_rate": 4.7784501694915255e-05, "loss": 0.1102, "step": 1961500 }, { "epoch": 0.13, "learning_rate": 4.7783936723163845e-05, "loss": 0.1108, "step": 1962000 }, { "epoch": 0.13, "learning_rate": 4.778337288135594e-05, "loss": 0.1108, "step": 1962500 }, { "epoch": 0.13, "learning_rate": 4.778280790960452e-05, "loss": 0.1115, "step": 1963000 }, { "epoch": 0.13, "learning_rate": 4.7782242937853114e-05, "loss": 0.1097, "step": 1963500 }, { "epoch": 0.13, "learning_rate": 4.77816779661017e-05, "loss": 0.1125, "step": 1964000 }, { "epoch": 0.13, "learning_rate": 4.778111299435029e-05, "loss": 0.1187, "step": 1964500 }, { "epoch": 0.13, "learning_rate": 4.778054802259887e-05, "loss": 0.1176, "step": 1965000 }, { "epoch": 0.13, "learning_rate": 4.7779984180790966e-05, "loss": 0.1145, "step": 1965500 }, { "epoch": 0.13, "learning_rate": 4.777941920903955e-05, "loss": 0.1102, "step": 1966000 }, { "epoch": 0.13, "learning_rate": 4.777885423728814e-05, "loss": 0.1179, "step": 1966500 }, { "epoch": 0.13, "learning_rate": 4.7778289265536724e-05, "loss": 0.1101, "step": 1967000 }, { "epoch": 0.13, "learning_rate": 4.7777724293785315e-05, "loss": 0.1073, "step": 1967500 }, { "epoch": 0.13, "learning_rate": 4.77771604519774e-05, "loss": 0.1164, "step": 1968000 }, { "epoch": 0.13, "learning_rate": 4.7776595480225986e-05, "loss": 0.1109, "step": 1968500 }, { "epoch": 0.13, "learning_rate": 4.7776030508474576e-05, "loss": 0.112, "step": 1969000 }, { "epoch": 0.13, "learning_rate": 4.777546553672317e-05, "loss": 0.1263, "step": 1969500 }, { "epoch": 0.13, "learning_rate": 4.777490169491526e-05, "loss": 0.1161, "step": 1970000 }, { "epoch": 0.13, "learning_rate": 4.7774336723163845e-05, "loss": 0.1188, "step": 1970500 }, { "epoch": 0.13, "learning_rate": 4.7773771751412435e-05, "loss": 0.1114, "step": 1971000 }, { "epoch": 0.13, "learning_rate": 4.777320677966102e-05, "loss": 0.1138, "step": 1971500 }, { "epoch": 0.13, "learning_rate": 4.7772642937853106e-05, "loss": 0.1134, "step": 1972000 }, { "epoch": 0.13, "learning_rate": 4.77720779661017e-05, "loss": 0.1161, "step": 1972500 }, { "epoch": 0.13, "learning_rate": 4.777151299435029e-05, "loss": 0.1118, "step": 1973000 }, { "epoch": 0.13, "learning_rate": 4.777094802259887e-05, "loss": 0.12, "step": 1973500 }, { "epoch": 0.13, "learning_rate": 4.777038305084746e-05, "loss": 0.1117, "step": 1974000 }, { "epoch": 0.13, "learning_rate": 4.7769818079096046e-05, "loss": 0.1097, "step": 1974500 }, { "epoch": 0.13, "learning_rate": 4.7769253107344636e-05, "loss": 0.1052, "step": 1975000 }, { "epoch": 0.13, "learning_rate": 4.776868926553672e-05, "loss": 0.1155, "step": 1975500 }, { "epoch": 0.13, "learning_rate": 4.7768124293785314e-05, "loss": 0.1082, "step": 1976000 }, { "epoch": 0.13, "learning_rate": 4.77675593220339e-05, "loss": 0.1163, "step": 1976500 }, { "epoch": 0.13, "learning_rate": 4.776699435028249e-05, "loss": 0.112, "step": 1977000 }, { "epoch": 0.13, "learning_rate": 4.776642937853107e-05, "loss": 0.1073, "step": 1977500 }, { "epoch": 0.13, "learning_rate": 4.7765865536723166e-05, "loss": 0.1059, "step": 1978000 }, { "epoch": 0.13, "learning_rate": 4.776530056497176e-05, "loss": 0.1104, "step": 1978500 }, { "epoch": 0.13, "learning_rate": 4.776473559322034e-05, "loss": 0.1067, "step": 1979000 }, { "epoch": 0.13, "learning_rate": 4.776417062146893e-05, "loss": 0.1124, "step": 1979500 }, { "epoch": 0.13, "learning_rate": 4.776360564971752e-05, "loss": 0.1166, "step": 1980000 }, { "epoch": 0.13, "learning_rate": 4.776304180790961e-05, "loss": 0.1168, "step": 1980500 }, { "epoch": 0.13, "learning_rate": 4.776247683615819e-05, "loss": 0.1121, "step": 1981000 }, { "epoch": 0.13, "learning_rate": 4.776191186440678e-05, "loss": 0.1111, "step": 1981500 }, { "epoch": 0.13, "learning_rate": 4.776134689265537e-05, "loss": 0.1108, "step": 1982000 }, { "epoch": 0.13, "learning_rate": 4.776078192090396e-05, "loss": 0.1098, "step": 1982500 }, { "epoch": 0.13, "learning_rate": 4.7760218079096045e-05, "loss": 0.114, "step": 1983000 }, { "epoch": 0.13, "learning_rate": 4.7759653107344635e-05, "loss": 0.1184, "step": 1983500 }, { "epoch": 0.13, "learning_rate": 4.775908813559322e-05, "loss": 0.1111, "step": 1984000 }, { "epoch": 0.13, "learning_rate": 4.775852316384181e-05, "loss": 0.112, "step": 1984500 }, { "epoch": 0.13, "learning_rate": 4.7757958192090394e-05, "loss": 0.1165, "step": 1985000 }, { "epoch": 0.13, "learning_rate": 4.775739435028249e-05, "loss": 0.115, "step": 1985500 }, { "epoch": 0.13, "learning_rate": 4.775682937853108e-05, "loss": 0.1165, "step": 1986000 }, { "epoch": 0.13, "learning_rate": 4.775626440677967e-05, "loss": 0.1087, "step": 1986500 }, { "epoch": 0.13, "learning_rate": 4.775569943502825e-05, "loss": 0.1104, "step": 1987000 }, { "epoch": 0.13, "learning_rate": 4.775513559322034e-05, "loss": 0.1138, "step": 1987500 }, { "epoch": 0.13, "learning_rate": 4.775457062146893e-05, "loss": 0.1078, "step": 1988000 }, { "epoch": 0.13, "learning_rate": 4.7754005649717514e-05, "loss": 0.1124, "step": 1988500 }, { "epoch": 0.13, "learning_rate": 4.7753440677966105e-05, "loss": 0.1127, "step": 1989000 }, { "epoch": 0.13, "learning_rate": 4.775287570621469e-05, "loss": 0.1164, "step": 1989500 }, { "epoch": 0.13, "learning_rate": 4.775231186440678e-05, "loss": 0.114, "step": 1990000 }, { "epoch": 0.13, "learning_rate": 4.7751746892655366e-05, "loss": 0.1141, "step": 1990500 }, { "epoch": 0.13, "learning_rate": 4.775118192090396e-05, "loss": 0.1135, "step": 1991000 }, { "epoch": 0.14, "learning_rate": 4.775061694915254e-05, "loss": 0.1137, "step": 1991500 }, { "epoch": 0.14, "learning_rate": 4.775005197740113e-05, "loss": 0.1126, "step": 1992000 }, { "epoch": 0.14, "learning_rate": 4.7749487005649715e-05, "loss": 0.1064, "step": 1992500 }, { "epoch": 0.14, "learning_rate": 4.774892316384181e-05, "loss": 0.116, "step": 1993000 }, { "epoch": 0.14, "learning_rate": 4.77483581920904e-05, "loss": 0.1154, "step": 1993500 }, { "epoch": 0.14, "learning_rate": 4.774779322033899e-05, "loss": 0.115, "step": 1994000 }, { "epoch": 0.14, "learning_rate": 4.7747228248587574e-05, "loss": 0.1169, "step": 1994500 }, { "epoch": 0.14, "learning_rate": 4.774666440677966e-05, "loss": 0.1132, "step": 1995000 }, { "epoch": 0.14, "learning_rate": 4.774609943502825e-05, "loss": 0.1148, "step": 1995500 }, { "epoch": 0.14, "learning_rate": 4.7745534463276836e-05, "loss": 0.1137, "step": 1996000 }, { "epoch": 0.14, "learning_rate": 4.7744969491525426e-05, "loss": 0.1138, "step": 1996500 }, { "epoch": 0.14, "learning_rate": 4.774440451977401e-05, "loss": 0.1075, "step": 1997000 }, { "epoch": 0.14, "learning_rate": 4.7743840677966104e-05, "loss": 0.1149, "step": 1997500 }, { "epoch": 0.14, "learning_rate": 4.774327570621469e-05, "loss": 0.1229, "step": 1998000 }, { "epoch": 0.14, "learning_rate": 4.774271073446328e-05, "loss": 0.1154, "step": 1998500 }, { "epoch": 0.14, "learning_rate": 4.774214576271186e-05, "loss": 0.1116, "step": 1999000 }, { "epoch": 0.14, "learning_rate": 4.774158079096045e-05, "loss": 0.1075, "step": 1999500 }, { "epoch": 0.14, "learning_rate": 4.7741015819209037e-05, "loss": 0.1138, "step": 2000000 }, { "epoch": 0.14, "learning_rate": 4.774045197740114e-05, "loss": 0.109, "step": 2000500 }, { "epoch": 0.14, "learning_rate": 4.773988700564972e-05, "loss": 0.1105, "step": 2001000 }, { "epoch": 0.14, "learning_rate": 4.773932203389831e-05, "loss": 0.1106, "step": 2001500 }, { "epoch": 0.14, "learning_rate": 4.7738757062146896e-05, "loss": 0.1138, "step": 2002000 }, { "epoch": 0.14, "learning_rate": 4.7738192090395486e-05, "loss": 0.1105, "step": 2002500 }, { "epoch": 0.14, "learning_rate": 4.773762824858757e-05, "loss": 0.1062, "step": 2003000 }, { "epoch": 0.14, "learning_rate": 4.773706327683616e-05, "loss": 0.1127, "step": 2003500 }, { "epoch": 0.14, "learning_rate": 4.773649830508475e-05, "loss": 0.1047, "step": 2004000 }, { "epoch": 0.14, "learning_rate": 4.773593333333334e-05, "loss": 0.1124, "step": 2004500 }, { "epoch": 0.14, "learning_rate": 4.773536836158192e-05, "loss": 0.1119, "step": 2005000 }, { "epoch": 0.14, "learning_rate": 4.773480338983051e-05, "loss": 0.112, "step": 2005500 }, { "epoch": 0.14, "learning_rate": 4.77342395480226e-05, "loss": 0.1107, "step": 2006000 }, { "epoch": 0.14, "learning_rate": 4.7733674576271184e-05, "loss": 0.1098, "step": 2006500 }, { "epoch": 0.14, "learning_rate": 4.7733109604519774e-05, "loss": 0.1155, "step": 2007000 }, { "epoch": 0.14, "learning_rate": 4.773254463276836e-05, "loss": 0.1112, "step": 2007500 }, { "epoch": 0.14, "learning_rate": 4.773197966101695e-05, "loss": 0.1115, "step": 2008000 }, { "epoch": 0.14, "learning_rate": 4.773141468926554e-05, "loss": 0.1031, "step": 2008500 }, { "epoch": 0.14, "learning_rate": 4.773085084745763e-05, "loss": 0.1141, "step": 2009000 }, { "epoch": 0.14, "learning_rate": 4.773028587570622e-05, "loss": 0.105, "step": 2009500 }, { "epoch": 0.14, "learning_rate": 4.772972090395481e-05, "loss": 0.1098, "step": 2010000 }, { "epoch": 0.14, "learning_rate": 4.772915593220339e-05, "loss": 0.111, "step": 2010500 }, { "epoch": 0.14, "learning_rate": 4.772859096045198e-05, "loss": 0.1184, "step": 2011000 }, { "epoch": 0.14, "learning_rate": 4.772802598870057e-05, "loss": 0.1116, "step": 2011500 }, { "epoch": 0.14, "learning_rate": 4.772746214689266e-05, "loss": 0.1126, "step": 2012000 }, { "epoch": 0.14, "learning_rate": 4.7726897175141244e-05, "loss": 0.101, "step": 2012500 }, { "epoch": 0.14, "learning_rate": 4.7726332203389834e-05, "loss": 0.1118, "step": 2013000 }, { "epoch": 0.14, "learning_rate": 4.772576723163842e-05, "loss": 0.1067, "step": 2013500 }, { "epoch": 0.14, "learning_rate": 4.772520225988701e-05, "loss": 0.1063, "step": 2014000 }, { "epoch": 0.14, "learning_rate": 4.772463728813559e-05, "loss": 0.1167, "step": 2014500 }, { "epoch": 0.14, "learning_rate": 4.7724073446327686e-05, "loss": 0.1113, "step": 2015000 }, { "epoch": 0.14, "learning_rate": 4.772350847457627e-05, "loss": 0.1103, "step": 2015500 }, { "epoch": 0.14, "learning_rate": 4.772294350282486e-05, "loss": 0.1128, "step": 2016000 }, { "epoch": 0.14, "learning_rate": 4.7722378531073445e-05, "loss": 0.1132, "step": 2016500 }, { "epoch": 0.14, "learning_rate": 4.772181468926554e-05, "loss": 0.1158, "step": 2017000 }, { "epoch": 0.14, "learning_rate": 4.772124971751413e-05, "loss": 0.111, "step": 2017500 }, { "epoch": 0.14, "learning_rate": 4.772068474576271e-05, "loss": 0.1142, "step": 2018000 }, { "epoch": 0.14, "learning_rate": 4.7720119774011303e-05, "loss": 0.1131, "step": 2018500 }, { "epoch": 0.14, "learning_rate": 4.771955593220339e-05, "loss": 0.1116, "step": 2019000 }, { "epoch": 0.14, "learning_rate": 4.771899096045198e-05, "loss": 0.1168, "step": 2019500 }, { "epoch": 0.14, "learning_rate": 4.7718425988700565e-05, "loss": 0.1128, "step": 2020000 }, { "epoch": 0.14, "learning_rate": 4.7717861016949156e-05, "loss": 0.1114, "step": 2020500 }, { "epoch": 0.14, "learning_rate": 4.771729604519774e-05, "loss": 0.11, "step": 2021000 }, { "epoch": 0.14, "learning_rate": 4.771673107344633e-05, "loss": 0.1141, "step": 2021500 }, { "epoch": 0.14, "learning_rate": 4.771616610169492e-05, "loss": 0.1155, "step": 2022000 }, { "epoch": 0.14, "learning_rate": 4.771560225988701e-05, "loss": 0.1165, "step": 2022500 }, { "epoch": 0.14, "learning_rate": 4.771503728813559e-05, "loss": 0.1151, "step": 2023000 }, { "epoch": 0.14, "learning_rate": 4.771447231638418e-05, "loss": 0.1097, "step": 2023500 }, { "epoch": 0.14, "learning_rate": 4.7713907344632766e-05, "loss": 0.1114, "step": 2024000 }, { "epoch": 0.14, "learning_rate": 4.7713342372881357e-05, "loss": 0.1079, "step": 2024500 }, { "epoch": 0.14, "learning_rate": 4.771277853107345e-05, "loss": 0.1141, "step": 2025000 }, { "epoch": 0.14, "learning_rate": 4.771221355932204e-05, "loss": 0.1252, "step": 2025500 }, { "epoch": 0.14, "learning_rate": 4.7711648587570625e-05, "loss": 0.1163, "step": 2026000 }, { "epoch": 0.14, "learning_rate": 4.7711083615819216e-05, "loss": 0.1136, "step": 2026500 }, { "epoch": 0.14, "learning_rate": 4.77105197740113e-05, "loss": 0.1057, "step": 2027000 }, { "epoch": 0.14, "learning_rate": 4.7709954802259887e-05, "loss": 0.1045, "step": 2027500 }, { "epoch": 0.14, "learning_rate": 4.770938983050848e-05, "loss": 0.1127, "step": 2028000 }, { "epoch": 0.14, "learning_rate": 4.770882485875706e-05, "loss": 0.1141, "step": 2028500 }, { "epoch": 0.14, "learning_rate": 4.7708261016949155e-05, "loss": 0.112, "step": 2029000 }, { "epoch": 0.14, "learning_rate": 4.770769604519774e-05, "loss": 0.1162, "step": 2029500 }, { "epoch": 0.14, "learning_rate": 4.770713107344633e-05, "loss": 0.1124, "step": 2030000 }, { "epoch": 0.14, "learning_rate": 4.770656610169491e-05, "loss": 0.1142, "step": 2030500 }, { "epoch": 0.14, "learning_rate": 4.7706001129943504e-05, "loss": 0.1132, "step": 2031000 }, { "epoch": 0.14, "learning_rate": 4.770543615819209e-05, "loss": 0.1164, "step": 2031500 }, { "epoch": 0.14, "learning_rate": 4.770487118644068e-05, "loss": 0.1132, "step": 2032000 }, { "epoch": 0.14, "learning_rate": 4.770430734463277e-05, "loss": 0.1207, "step": 2032500 }, { "epoch": 0.14, "learning_rate": 4.770374237288136e-05, "loss": 0.1118, "step": 2033000 }, { "epoch": 0.14, "learning_rate": 4.7703177401129946e-05, "loss": 0.1119, "step": 2033500 }, { "epoch": 0.14, "learning_rate": 4.770261242937854e-05, "loss": 0.1081, "step": 2034000 }, { "epoch": 0.14, "learning_rate": 4.770204745762712e-05, "loss": 0.1141, "step": 2034500 }, { "epoch": 0.14, "learning_rate": 4.770148361581921e-05, "loss": 0.1099, "step": 2035000 }, { "epoch": 0.14, "learning_rate": 4.77009186440678e-05, "loss": 0.1115, "step": 2035500 }, { "epoch": 0.14, "learning_rate": 4.770035367231639e-05, "loss": 0.1172, "step": 2036000 }, { "epoch": 0.14, "learning_rate": 4.769978870056497e-05, "loss": 0.112, "step": 2036500 }, { "epoch": 0.14, "learning_rate": 4.769922485875706e-05, "loss": 0.1149, "step": 2037000 }, { "epoch": 0.14, "learning_rate": 4.769865988700565e-05, "loss": 0.1079, "step": 2037500 }, { "epoch": 0.14, "learning_rate": 4.7698094915254235e-05, "loss": 0.1054, "step": 2038000 }, { "epoch": 0.14, "learning_rate": 4.7697529943502825e-05, "loss": 0.11, "step": 2038500 }, { "epoch": 0.14, "learning_rate": 4.769696497175141e-05, "loss": 0.1152, "step": 2039000 }, { "epoch": 0.14, "learning_rate": 4.769640112994351e-05, "loss": 0.1152, "step": 2039500 }, { "epoch": 0.14, "learning_rate": 4.7695836158192094e-05, "loss": 0.1172, "step": 2040000 }, { "epoch": 0.14, "learning_rate": 4.7695271186440684e-05, "loss": 0.1073, "step": 2040500 }, { "epoch": 0.14, "learning_rate": 4.769470621468927e-05, "loss": 0.1111, "step": 2041000 }, { "epoch": 0.14, "learning_rate": 4.769414124293786e-05, "loss": 0.1043, "step": 2041500 }, { "epoch": 0.14, "learning_rate": 4.7693577401129946e-05, "loss": 0.1152, "step": 2042000 }, { "epoch": 0.14, "learning_rate": 4.769301242937853e-05, "loss": 0.1138, "step": 2042500 }, { "epoch": 0.14, "learning_rate": 4.769244745762712e-05, "loss": 0.1098, "step": 2043000 }, { "epoch": 0.14, "learning_rate": 4.769188248587571e-05, "loss": 0.1029, "step": 2043500 }, { "epoch": 0.14, "learning_rate": 4.76913186440678e-05, "loss": 0.1152, "step": 2044000 }, { "epoch": 0.14, "learning_rate": 4.769075367231638e-05, "loss": 0.1053, "step": 2044500 }, { "epoch": 0.14, "learning_rate": 4.769018870056497e-05, "loss": 0.1157, "step": 2045000 }, { "epoch": 0.14, "learning_rate": 4.7689623728813556e-05, "loss": 0.108, "step": 2045500 }, { "epoch": 0.14, "learning_rate": 4.768905875706215e-05, "loss": 0.1044, "step": 2046000 }, { "epoch": 0.14, "learning_rate": 4.768849378531074e-05, "loss": 0.1131, "step": 2046500 }, { "epoch": 0.14, "learning_rate": 4.768792881355932e-05, "loss": 0.1085, "step": 2047000 }, { "epoch": 0.14, "learning_rate": 4.7687364971751415e-05, "loss": 0.1057, "step": 2047500 }, { "epoch": 0.14, "learning_rate": 4.7686800000000006e-05, "loss": 0.1068, "step": 2048000 }, { "epoch": 0.14, "learning_rate": 4.768623502824859e-05, "loss": 0.1125, "step": 2048500 }, { "epoch": 0.14, "learning_rate": 4.768567005649718e-05, "loss": 0.1143, "step": 2049000 }, { "epoch": 0.14, "learning_rate": 4.7685105084745764e-05, "loss": 0.1094, "step": 2049500 }, { "epoch": 0.14, "learning_rate": 4.7684540112994354e-05, "loss": 0.1147, "step": 2050000 }, { "epoch": 0.14, "learning_rate": 4.768397627118644e-05, "loss": 0.1179, "step": 2050500 }, { "epoch": 0.14, "learning_rate": 4.768341129943503e-05, "loss": 0.1179, "step": 2051000 }, { "epoch": 0.14, "learning_rate": 4.7682846327683616e-05, "loss": 0.1118, "step": 2051500 }, { "epoch": 0.14, "learning_rate": 4.7682281355932207e-05, "loss": 0.1193, "step": 2052000 }, { "epoch": 0.14, "learning_rate": 4.7681717514124294e-05, "loss": 0.1153, "step": 2052500 }, { "epoch": 0.14, "learning_rate": 4.7681152542372884e-05, "loss": 0.1144, "step": 2053000 }, { "epoch": 0.14, "learning_rate": 4.768058757062147e-05, "loss": 0.1182, "step": 2053500 }, { "epoch": 0.14, "learning_rate": 4.768002259887006e-05, "loss": 0.1111, "step": 2054000 }, { "epoch": 0.14, "learning_rate": 4.767945875706215e-05, "loss": 0.1208, "step": 2054500 }, { "epoch": 0.14, "learning_rate": 4.7678893785310737e-05, "loss": 0.1078, "step": 2055000 }, { "epoch": 0.14, "learning_rate": 4.767832881355933e-05, "loss": 0.1077, "step": 2055500 }, { "epoch": 0.14, "learning_rate": 4.767776384180791e-05, "loss": 0.1084, "step": 2056000 }, { "epoch": 0.14, "learning_rate": 4.76771988700565e-05, "loss": 0.1076, "step": 2056500 }, { "epoch": 0.14, "learning_rate": 4.767663389830509e-05, "loss": 0.1148, "step": 2057000 }, { "epoch": 0.14, "learning_rate": 4.767607005649718e-05, "loss": 0.1171, "step": 2057500 }, { "epoch": 0.14, "learning_rate": 4.767550508474576e-05, "loss": 0.1092, "step": 2058000 }, { "epoch": 0.14, "learning_rate": 4.7674940112994354e-05, "loss": 0.1099, "step": 2058500 }, { "epoch": 0.14, "learning_rate": 4.767437514124294e-05, "loss": 0.1109, "step": 2059000 }, { "epoch": 0.14, "learning_rate": 4.767381129943503e-05, "loss": 0.104, "step": 2059500 }, { "epoch": 0.14, "learning_rate": 4.7673246327683615e-05, "loss": 0.102, "step": 2060000 }, { "epoch": 0.14, "learning_rate": 4.7672681355932206e-05, "loss": 0.1175, "step": 2060500 }, { "epoch": 0.14, "learning_rate": 4.767211638418079e-05, "loss": 0.1182, "step": 2061000 }, { "epoch": 0.14, "learning_rate": 4.767155141242938e-05, "loss": 0.1203, "step": 2061500 }, { "epoch": 0.14, "learning_rate": 4.7670987570621474e-05, "loss": 0.112, "step": 2062000 }, { "epoch": 0.14, "learning_rate": 4.767042259887006e-05, "loss": 0.1184, "step": 2062500 }, { "epoch": 0.14, "learning_rate": 4.766985762711865e-05, "loss": 0.1127, "step": 2063000 }, { "epoch": 0.14, "learning_rate": 4.766929265536723e-05, "loss": 0.1106, "step": 2063500 }, { "epoch": 0.14, "learning_rate": 4.7668728813559326e-05, "loss": 0.1109, "step": 2064000 }, { "epoch": 0.14, "learning_rate": 4.766816384180791e-05, "loss": 0.1084, "step": 2064500 }, { "epoch": 0.14, "learning_rate": 4.76675988700565e-05, "loss": 0.1065, "step": 2065000 }, { "epoch": 0.14, "learning_rate": 4.7667033898305085e-05, "loss": 0.1146, "step": 2065500 }, { "epoch": 0.14, "learning_rate": 4.7666468926553675e-05, "loss": 0.109, "step": 2066000 }, { "epoch": 0.14, "learning_rate": 4.766590508474576e-05, "loss": 0.1117, "step": 2066500 }, { "epoch": 0.14, "learning_rate": 4.766534011299435e-05, "loss": 0.1141, "step": 2067000 }, { "epoch": 0.14, "learning_rate": 4.766477514124294e-05, "loss": 0.1099, "step": 2067500 }, { "epoch": 0.14, "learning_rate": 4.766421016949153e-05, "loss": 0.1061, "step": 2068000 }, { "epoch": 0.14, "learning_rate": 4.766364519774011e-05, "loss": 0.1122, "step": 2068500 }, { "epoch": 0.14, "learning_rate": 4.7663081355932205e-05, "loss": 0.1145, "step": 2069000 }, { "epoch": 0.14, "learning_rate": 4.7662516384180796e-05, "loss": 0.1161, "step": 2069500 }, { "epoch": 0.14, "learning_rate": 4.766195141242938e-05, "loss": 0.1058, "step": 2070000 }, { "epoch": 0.14, "learning_rate": 4.766138644067797e-05, "loss": 0.1055, "step": 2070500 }, { "epoch": 0.14, "learning_rate": 4.766082146892656e-05, "loss": 0.1106, "step": 2071000 }, { "epoch": 0.14, "learning_rate": 4.766025762711865e-05, "loss": 0.1095, "step": 2071500 }, { "epoch": 0.14, "learning_rate": 4.7659693785310735e-05, "loss": 0.1116, "step": 2072000 }, { "epoch": 0.14, "learning_rate": 4.7659128813559326e-05, "loss": 0.1094, "step": 2072500 }, { "epoch": 0.14, "learning_rate": 4.765856384180791e-05, "loss": 0.1144, "step": 2073000 }, { "epoch": 0.14, "learning_rate": 4.76579988700565e-05, "loss": 0.1146, "step": 2073500 }, { "epoch": 0.14, "learning_rate": 4.7657433898305084e-05, "loss": 0.1056, "step": 2074000 }, { "epoch": 0.14, "learning_rate": 4.7656868926553674e-05, "loss": 0.1045, "step": 2074500 }, { "epoch": 0.14, "learning_rate": 4.765630395480226e-05, "loss": 0.1089, "step": 2075000 }, { "epoch": 0.14, "learning_rate": 4.765573898305085e-05, "loss": 0.1137, "step": 2075500 }, { "epoch": 0.14, "learning_rate": 4.765517514124294e-05, "loss": 0.1107, "step": 2076000 }, { "epoch": 0.14, "learning_rate": 4.765461016949153e-05, "loss": 0.1127, "step": 2076500 }, { "epoch": 0.14, "learning_rate": 4.765404519774012e-05, "loss": 0.1156, "step": 2077000 }, { "epoch": 0.14, "learning_rate": 4.76534802259887e-05, "loss": 0.1134, "step": 2077500 }, { "epoch": 0.14, "learning_rate": 4.765291525423729e-05, "loss": 0.1122, "step": 2078000 }, { "epoch": 0.14, "learning_rate": 4.765235141242938e-05, "loss": 0.1064, "step": 2078500 }, { "epoch": 0.14, "learning_rate": 4.765178757062147e-05, "loss": 0.1067, "step": 2079000 }, { "epoch": 0.14, "learning_rate": 4.765122259887006e-05, "loss": 0.1037, "step": 2079500 }, { "epoch": 0.14, "learning_rate": 4.765065762711865e-05, "loss": 0.1092, "step": 2080000 }, { "epoch": 0.14, "learning_rate": 4.765009265536724e-05, "loss": 0.118, "step": 2080500 }, { "epoch": 0.14, "learning_rate": 4.764952768361582e-05, "loss": 0.1117, "step": 2081000 }, { "epoch": 0.14, "learning_rate": 4.764896271186441e-05, "loss": 0.1106, "step": 2081500 }, { "epoch": 0.14, "learning_rate": 4.7648397740112996e-05, "loss": 0.1054, "step": 2082000 }, { "epoch": 0.14, "learning_rate": 4.7647832768361587e-05, "loss": 0.1125, "step": 2082500 }, { "epoch": 0.14, "learning_rate": 4.7647268926553674e-05, "loss": 0.1078, "step": 2083000 }, { "epoch": 0.14, "learning_rate": 4.7646703954802264e-05, "loss": 0.1107, "step": 2083500 }, { "epoch": 0.14, "learning_rate": 4.764613898305085e-05, "loss": 0.1159, "step": 2084000 }, { "epoch": 0.14, "learning_rate": 4.764557401129944e-05, "loss": 0.1083, "step": 2084500 }, { "epoch": 0.14, "learning_rate": 4.764500903954803e-05, "loss": 0.112, "step": 2085000 }, { "epoch": 0.14, "learning_rate": 4.7644445197740116e-05, "loss": 0.1166, "step": 2085500 }, { "epoch": 0.14, "learning_rate": 4.76438802259887e-05, "loss": 0.1209, "step": 2086000 }, { "epoch": 0.14, "learning_rate": 4.764331525423729e-05, "loss": 0.1109, "step": 2086500 }, { "epoch": 0.14, "learning_rate": 4.7642750282485875e-05, "loss": 0.1104, "step": 2087000 }, { "epoch": 0.14, "learning_rate": 4.7642185310734465e-05, "loss": 0.11, "step": 2087500 }, { "epoch": 0.14, "learning_rate": 4.764162146892656e-05, "loss": 0.1073, "step": 2088000 }, { "epoch": 0.14, "learning_rate": 4.764105649717514e-05, "loss": 0.111, "step": 2088500 }, { "epoch": 0.14, "learning_rate": 4.7640491525423734e-05, "loss": 0.1154, "step": 2089000 }, { "epoch": 0.14, "learning_rate": 4.763992655367232e-05, "loss": 0.1073, "step": 2089500 }, { "epoch": 0.14, "learning_rate": 4.763936271186441e-05, "loss": 0.109, "step": 2090000 }, { "epoch": 0.14, "learning_rate": 4.7638797740112995e-05, "loss": 0.1102, "step": 2090500 }, { "epoch": 0.14, "learning_rate": 4.7638232768361586e-05, "loss": 0.111, "step": 2091000 }, { "epoch": 0.14, "learning_rate": 4.763766779661017e-05, "loss": 0.1083, "step": 2091500 }, { "epoch": 0.14, "learning_rate": 4.7637103954802264e-05, "loss": 0.1105, "step": 2092000 }, { "epoch": 0.14, "learning_rate": 4.763653898305085e-05, "loss": 0.1172, "step": 2092500 }, { "epoch": 0.14, "learning_rate": 4.763597401129944e-05, "loss": 0.1103, "step": 2093000 }, { "epoch": 0.14, "learning_rate": 4.763540903954802e-05, "loss": 0.109, "step": 2093500 }, { "epoch": 0.14, "learning_rate": 4.763484406779661e-05, "loss": 0.113, "step": 2094000 }, { "epoch": 0.14, "learning_rate": 4.7634279096045196e-05, "loss": 0.1146, "step": 2094500 }, { "epoch": 0.14, "learning_rate": 4.763371525423729e-05, "loss": 0.1033, "step": 2095000 }, { "epoch": 0.14, "learning_rate": 4.763315028248588e-05, "loss": 0.1152, "step": 2095500 }, { "epoch": 0.14, "learning_rate": 4.7632585310734465e-05, "loss": 0.1064, "step": 2096000 }, { "epoch": 0.14, "learning_rate": 4.7632020338983055e-05, "loss": 0.1102, "step": 2096500 }, { "epoch": 0.14, "learning_rate": 4.763145536723164e-05, "loss": 0.1196, "step": 2097000 }, { "epoch": 0.14, "learning_rate": 4.763089152542373e-05, "loss": 0.1098, "step": 2097500 }, { "epoch": 0.14, "learning_rate": 4.763032655367232e-05, "loss": 0.108, "step": 2098000 }, { "epoch": 0.14, "learning_rate": 4.762976158192091e-05, "loss": 0.1077, "step": 2098500 }, { "epoch": 0.14, "learning_rate": 4.76291966101695e-05, "loss": 0.1063, "step": 2099000 }, { "epoch": 0.14, "learning_rate": 4.762863163841808e-05, "loss": 0.1139, "step": 2099500 }, { "epoch": 0.14, "learning_rate": 4.762806666666667e-05, "loss": 0.114, "step": 2100000 }, { "epoch": 0.14, "learning_rate": 4.762750282485876e-05, "loss": 0.1197, "step": 2100500 }, { "epoch": 0.14, "learning_rate": 4.762693785310734e-05, "loss": 0.1124, "step": 2101000 }, { "epoch": 0.14, "learning_rate": 4.7626372881355934e-05, "loss": 0.1149, "step": 2101500 }, { "epoch": 0.14, "learning_rate": 4.762580790960452e-05, "loss": 0.1117, "step": 2102000 }, { "epoch": 0.14, "learning_rate": 4.762524293785311e-05, "loss": 0.1176, "step": 2102500 }, { "epoch": 0.14, "learning_rate": 4.76246779661017e-05, "loss": 0.1127, "step": 2103000 }, { "epoch": 0.14, "learning_rate": 4.7624114124293786e-05, "loss": 0.1075, "step": 2103500 }, { "epoch": 0.14, "learning_rate": 4.762354915254238e-05, "loss": 0.1121, "step": 2104000 }, { "epoch": 0.14, "learning_rate": 4.762298418079096e-05, "loss": 0.1093, "step": 2104500 }, { "epoch": 0.14, "learning_rate": 4.762241920903955e-05, "loss": 0.1112, "step": 2105000 }, { "epoch": 0.14, "learning_rate": 4.762185423728814e-05, "loss": 0.1082, "step": 2105500 }, { "epoch": 0.14, "learning_rate": 4.762129039548023e-05, "loss": 0.1143, "step": 2106000 }, { "epoch": 0.14, "learning_rate": 4.762072542372882e-05, "loss": 0.1074, "step": 2106500 }, { "epoch": 0.14, "learning_rate": 4.76201604519774e-05, "loss": 0.1113, "step": 2107000 }, { "epoch": 0.14, "learning_rate": 4.7619595480225994e-05, "loss": 0.1069, "step": 2107500 }, { "epoch": 0.14, "learning_rate": 4.761903050847458e-05, "loss": 0.1069, "step": 2108000 }, { "epoch": 0.14, "learning_rate": 4.7618466666666665e-05, "loss": 0.1148, "step": 2108500 }, { "epoch": 0.14, "learning_rate": 4.7617901694915255e-05, "loss": 0.1096, "step": 2109000 }, { "epoch": 0.14, "learning_rate": 4.7617336723163846e-05, "loss": 0.1127, "step": 2109500 }, { "epoch": 0.14, "learning_rate": 4.761677175141243e-05, "loss": 0.1074, "step": 2110000 }, { "epoch": 0.14, "learning_rate": 4.761620677966102e-05, "loss": 0.1056, "step": 2110500 }, { "epoch": 0.14, "learning_rate": 4.7615641807909604e-05, "loss": 0.1077, "step": 2111000 }, { "epoch": 0.14, "learning_rate": 4.76150779661017e-05, "loss": 0.104, "step": 2111500 }, { "epoch": 0.14, "learning_rate": 4.761451299435029e-05, "loss": 0.1118, "step": 2112000 }, { "epoch": 0.14, "learning_rate": 4.761394802259887e-05, "loss": 0.1071, "step": 2112500 }, { "epoch": 0.14, "learning_rate": 4.761338305084746e-05, "loss": 0.1121, "step": 2113000 }, { "epoch": 0.14, "learning_rate": 4.761281920903955e-05, "loss": 0.1118, "step": 2113500 }, { "epoch": 0.14, "learning_rate": 4.761225423728814e-05, "loss": 0.1058, "step": 2114000 }, { "epoch": 0.14, "learning_rate": 4.7611689265536725e-05, "loss": 0.1101, "step": 2114500 }, { "epoch": 0.14, "learning_rate": 4.7611124293785315e-05, "loss": 0.1103, "step": 2115000 }, { "epoch": 0.14, "learning_rate": 4.76105593220339e-05, "loss": 0.1107, "step": 2115500 }, { "epoch": 0.14, "learning_rate": 4.7609995480225986e-05, "loss": 0.113, "step": 2116000 }, { "epoch": 0.14, "learning_rate": 4.760943050847458e-05, "loss": 0.1095, "step": 2116500 }, { "epoch": 0.14, "learning_rate": 4.760886553672317e-05, "loss": 0.1118, "step": 2117000 }, { "epoch": 0.14, "learning_rate": 4.760830056497175e-05, "loss": 0.1097, "step": 2117500 }, { "epoch": 0.14, "learning_rate": 4.760773559322034e-05, "loss": 0.1138, "step": 2118000 }, { "epoch": 0.14, "learning_rate": 4.7607171751412436e-05, "loss": 0.1097, "step": 2118500 }, { "epoch": 0.14, "learning_rate": 4.760660677966102e-05, "loss": 0.1151, "step": 2119000 }, { "epoch": 0.14, "learning_rate": 4.760604180790961e-05, "loss": 0.1074, "step": 2119500 }, { "epoch": 0.14, "learning_rate": 4.7605476836158194e-05, "loss": 0.1153, "step": 2120000 }, { "epoch": 0.14, "learning_rate": 4.760491299435029e-05, "loss": 0.1067, "step": 2120500 }, { "epoch": 0.14, "learning_rate": 4.760434802259887e-05, "loss": 0.1092, "step": 2121000 }, { "epoch": 0.14, "learning_rate": 4.760378305084746e-05, "loss": 0.1091, "step": 2121500 }, { "epoch": 0.14, "learning_rate": 4.7603218079096046e-05, "loss": 0.1112, "step": 2122000 }, { "epoch": 0.14, "learning_rate": 4.760265423728813e-05, "loss": 0.1185, "step": 2122500 }, { "epoch": 0.14, "learning_rate": 4.7602089265536724e-05, "loss": 0.113, "step": 2123000 }, { "epoch": 0.14, "learning_rate": 4.760152542372882e-05, "loss": 0.1115, "step": 2123500 }, { "epoch": 0.14, "learning_rate": 4.760096045197741e-05, "loss": 0.1139, "step": 2124000 }, { "epoch": 0.14, "learning_rate": 4.760039548022599e-05, "loss": 0.1116, "step": 2124500 }, { "epoch": 0.14, "learning_rate": 4.759983163841808e-05, "loss": 0.111, "step": 2125000 }, { "epoch": 0.14, "learning_rate": 4.759926666666667e-05, "loss": 0.1068, "step": 2125500 }, { "epoch": 0.14, "learning_rate": 4.7598701694915254e-05, "loss": 0.1068, "step": 2126000 }, { "epoch": 0.14, "learning_rate": 4.7598136723163844e-05, "loss": 0.1066, "step": 2126500 }, { "epoch": 0.14, "learning_rate": 4.7597571751412435e-05, "loss": 0.1138, "step": 2127000 }, { "epoch": 0.14, "learning_rate": 4.759700677966102e-05, "loss": 0.1023, "step": 2127500 }, { "epoch": 0.14, "learning_rate": 4.759644180790961e-05, "loss": 0.1079, "step": 2128000 }, { "epoch": 0.14, "learning_rate": 4.759587683615819e-05, "loss": 0.1118, "step": 2128500 }, { "epoch": 0.14, "learning_rate": 4.7595311864406784e-05, "loss": 0.1115, "step": 2129000 }, { "epoch": 0.14, "learning_rate": 4.759474689265537e-05, "loss": 0.1115, "step": 2129500 }, { "epoch": 0.14, "learning_rate": 4.759418192090396e-05, "loss": 0.1129, "step": 2130000 }, { "epoch": 0.14, "learning_rate": 4.759361694915255e-05, "loss": 0.1116, "step": 2130500 }, { "epoch": 0.14, "learning_rate": 4.759305197740113e-05, "loss": 0.1084, "step": 2131000 }, { "epoch": 0.14, "learning_rate": 4.759248700564972e-05, "loss": 0.1063, "step": 2131500 }, { "epoch": 0.14, "learning_rate": 4.759192203389831e-05, "loss": 0.1135, "step": 2132000 }, { "epoch": 0.14, "learning_rate": 4.75913570621469e-05, "loss": 0.117, "step": 2132500 }, { "epoch": 0.14, "learning_rate": 4.759079209039548e-05, "loss": 0.1045, "step": 2133000 }, { "epoch": 0.14, "learning_rate": 4.759022824858757e-05, "loss": 0.1114, "step": 2133500 }, { "epoch": 0.14, "learning_rate": 4.758966440677967e-05, "loss": 0.1076, "step": 2134000 }, { "epoch": 0.14, "learning_rate": 4.758909943502825e-05, "loss": 0.1077, "step": 2134500 }, { "epoch": 0.14, "learning_rate": 4.7588534463276844e-05, "loss": 0.1106, "step": 2135000 }, { "epoch": 0.14, "learning_rate": 4.758796949152543e-05, "loss": 0.113, "step": 2135500 }, { "epoch": 0.14, "learning_rate": 4.758740451977402e-05, "loss": 0.1126, "step": 2136000 }, { "epoch": 0.14, "learning_rate": 4.7586840677966105e-05, "loss": 0.1053, "step": 2136500 }, { "epoch": 0.14, "learning_rate": 4.758627570621469e-05, "loss": 0.1156, "step": 2137000 }, { "epoch": 0.14, "learning_rate": 4.758571073446328e-05, "loss": 0.108, "step": 2137500 }, { "epoch": 0.14, "learning_rate": 4.758514576271187e-05, "loss": 0.1116, "step": 2138000 }, { "epoch": 0.14, "learning_rate": 4.758458192090396e-05, "loss": 0.1097, "step": 2138500 }, { "epoch": 0.15, "learning_rate": 4.758401694915254e-05, "loss": 0.1115, "step": 2139000 }, { "epoch": 0.15, "learning_rate": 4.758345197740113e-05, "loss": 0.1078, "step": 2139500 }, { "epoch": 0.15, "learning_rate": 4.7582887005649716e-05, "loss": 0.1174, "step": 2140000 }, { "epoch": 0.15, "learning_rate": 4.7582322033898306e-05, "loss": 0.1081, "step": 2140500 }, { "epoch": 0.15, "learning_rate": 4.758175706214689e-05, "loss": 0.1146, "step": 2141000 }, { "epoch": 0.15, "learning_rate": 4.758119322033899e-05, "loss": 0.1148, "step": 2141500 }, { "epoch": 0.15, "learning_rate": 4.7580628248587575e-05, "loss": 0.1119, "step": 2142000 }, { "epoch": 0.15, "learning_rate": 4.7580063276836165e-05, "loss": 0.1136, "step": 2142500 }, { "epoch": 0.15, "learning_rate": 4.757949830508475e-05, "loss": 0.109, "step": 2143000 }, { "epoch": 0.15, "learning_rate": 4.7578934463276836e-05, "loss": 0.1089, "step": 2143500 }, { "epoch": 0.15, "learning_rate": 4.757836949152543e-05, "loss": 0.1091, "step": 2144000 }, { "epoch": 0.15, "learning_rate": 4.757780451977402e-05, "loss": 0.1073, "step": 2144500 }, { "epoch": 0.15, "learning_rate": 4.75772395480226e-05, "loss": 0.1068, "step": 2145000 }, { "epoch": 0.15, "learning_rate": 4.757667570621469e-05, "loss": 0.1076, "step": 2145500 }, { "epoch": 0.15, "learning_rate": 4.757611073446328e-05, "loss": 0.1085, "step": 2146000 }, { "epoch": 0.15, "learning_rate": 4.757554576271186e-05, "loss": 0.107, "step": 2146500 }, { "epoch": 0.15, "learning_rate": 4.757498079096045e-05, "loss": 0.1083, "step": 2147000 }, { "epoch": 0.15, "learning_rate": 4.757441581920904e-05, "loss": 0.1113, "step": 2147500 }, { "epoch": 0.15, "learning_rate": 4.757385197740114e-05, "loss": 0.1068, "step": 2148000 }, { "epoch": 0.15, "learning_rate": 4.757328700564972e-05, "loss": 0.1097, "step": 2148500 }, { "epoch": 0.15, "learning_rate": 4.757272203389831e-05, "loss": 0.0999, "step": 2149000 }, { "epoch": 0.15, "learning_rate": 4.7572157062146896e-05, "loss": 0.1024, "step": 2149500 }, { "epoch": 0.15, "learning_rate": 4.757159322033898e-05, "loss": 0.1047, "step": 2150000 }, { "epoch": 0.15, "learning_rate": 4.7571028248587574e-05, "loss": 0.1108, "step": 2150500 }, { "epoch": 0.15, "learning_rate": 4.757046327683616e-05, "loss": 0.1139, "step": 2151000 } ], "max_steps": 44250000, "num_train_epochs": 3, "total_flos": 1.530035227432059e+17, "trial_name": null, "trial_params": null }