{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14816352251585413, "eval_steps": 500, "global_step": 118500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.765625e-08, "loss": 10.5314, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.8828125e-06, "loss": 8.9917, "step": 50 }, { "epoch": 0.0, "learning_rate": 9.765625e-06, "loss": 7.5141, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.4648437500000001e-05, "loss": 7.2521, "step": 150 }, { "epoch": 0.0, "learning_rate": 1.953125e-05, "loss": 6.9931, "step": 200 }, { "epoch": 0.0, "learning_rate": 2.44140625e-05, "loss": 6.9094, "step": 250 }, { "epoch": 0.0, "learning_rate": 2.9296875000000002e-05, "loss": 6.8692, "step": 300 }, { "epoch": 0.0, "learning_rate": 3.41796875e-05, "loss": 6.7538, "step": 350 }, { "epoch": 0.0, "learning_rate": 3.90625e-05, "loss": 6.6989, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.3945312500000005e-05, "loss": 6.7315, "step": 450 }, { "epoch": 0.0, "learning_rate": 4.8828125e-05, "loss": 6.7081, "step": 500 }, { "epoch": 0.0, "learning_rate": 5.37109375e-05, "loss": 6.5596, "step": 550 }, { "epoch": 0.0, "learning_rate": 5.8593750000000005e-05, "loss": 6.6213, "step": 600 }, { "epoch": 0.0, "learning_rate": 6.34765625e-05, "loss": 6.5238, "step": 650 }, { "epoch": 0.0, "learning_rate": 6.8359375e-05, "loss": 6.4714, "step": 700 }, { "epoch": 0.0, "learning_rate": 7.32421875e-05, "loss": 6.3938, "step": 750 }, { "epoch": 0.0, "learning_rate": 7.8125e-05, "loss": 6.4915, "step": 800 }, { "epoch": 0.0, "learning_rate": 8.30078125e-05, "loss": 6.3645, "step": 850 }, { "epoch": 0.0, "learning_rate": 8.789062500000001e-05, "loss": 6.2997, "step": 900 }, { "epoch": 0.0, "learning_rate": 9.27734375e-05, "loss": 6.3072, "step": 950 }, { "epoch": 0.0, "learning_rate": 9.765625e-05, "loss": 6.2465, "step": 1000 }, { "epoch": 0.0, "learning_rate": 9.999674498728043e-05, "loss": 6.2556, "step": 1050 }, { "epoch": 0.0, "learning_rate": 9.999048534743506e-05, "loss": 6.0951, "step": 1100 }, { "epoch": 0.0, "learning_rate": 9.99842257075897e-05, "loss": 6.1544, "step": 1150 }, { "epoch": 0.0, "learning_rate": 9.997796606774434e-05, "loss": 6.1193, "step": 1200 }, { "epoch": 0.0, "learning_rate": 9.997170642789897e-05, "loss": 6.0761, "step": 1250 }, { "epoch": 0.0, "learning_rate": 9.996544678805361e-05, "loss": 6.06, "step": 1300 }, { "epoch": 0.0, "learning_rate": 9.995918714820825e-05, "loss": 6.0118, "step": 1350 }, { "epoch": 0.0, "learning_rate": 9.995292750836289e-05, "loss": 5.9435, "step": 1400 }, { "epoch": 0.0, "learning_rate": 9.994666786851752e-05, "loss": 5.917, "step": 1450 }, { "epoch": 0.0, "learning_rate": 9.994040822867216e-05, "loss": 5.9276, "step": 1500 }, { "epoch": 0.0, "learning_rate": 9.99341485888268e-05, "loss": 5.9058, "step": 1550 }, { "epoch": 0.0, "learning_rate": 9.992788894898144e-05, "loss": 5.8115, "step": 1600 }, { "epoch": 0.0, "learning_rate": 9.992162930913607e-05, "loss": 5.889, "step": 1650 }, { "epoch": 0.0, "learning_rate": 9.991536966929071e-05, "loss": 5.8272, "step": 1700 }, { "epoch": 0.0, "learning_rate": 9.990911002944535e-05, "loss": 5.8239, "step": 1750 }, { "epoch": 0.0, "learning_rate": 9.990285038959999e-05, "loss": 5.7675, "step": 1800 }, { "epoch": 0.0, "learning_rate": 9.989659074975462e-05, "loss": 5.7345, "step": 1850 }, { "epoch": 0.0, "learning_rate": 9.989033110990926e-05, "loss": 5.7704, "step": 1900 }, { "epoch": 0.0, "learning_rate": 9.98840714700639e-05, "loss": 5.6971, "step": 1950 }, { "epoch": 0.0, "learning_rate": 9.987781183021853e-05, "loss": 5.7833, "step": 2000 }, { "epoch": 0.0, "learning_rate": 9.987155219037317e-05, "loss": 5.781, "step": 2050 }, { "epoch": 0.0, "learning_rate": 9.986529255052782e-05, "loss": 5.6441, "step": 2100 }, { "epoch": 0.0, "learning_rate": 9.985903291068246e-05, "loss": 5.7542, "step": 2150 }, { "epoch": 0.0, "learning_rate": 9.98527732708371e-05, "loss": 5.6033, "step": 2200 }, { "epoch": 0.0, "learning_rate": 9.984651363099173e-05, "loss": 5.6416, "step": 2250 }, { "epoch": 0.0, "learning_rate": 9.984025399114637e-05, "loss": 5.5772, "step": 2300 }, { "epoch": 0.0, "learning_rate": 9.983399435130101e-05, "loss": 5.6965, "step": 2350 }, { "epoch": 0.0, "learning_rate": 9.982773471145565e-05, "loss": 5.5223, "step": 2400 }, { "epoch": 0.0, "learning_rate": 9.982147507161028e-05, "loss": 5.6218, "step": 2450 }, { "epoch": 0.0, "learning_rate": 9.981521543176492e-05, "loss": 5.5474, "step": 2500 }, { "epoch": 0.0, "learning_rate": 9.980895579191956e-05, "loss": 5.5644, "step": 2550 }, { "epoch": 0.0, "learning_rate": 9.980269615207421e-05, "loss": 5.5995, "step": 2600 }, { "epoch": 0.0, "learning_rate": 9.979643651222885e-05, "loss": 5.6271, "step": 2650 }, { "epoch": 0.0, "learning_rate": 9.979017687238348e-05, "loss": 5.5348, "step": 2700 }, { "epoch": 0.0, "learning_rate": 9.978391723253812e-05, "loss": 5.5212, "step": 2750 }, { "epoch": 0.0, "learning_rate": 9.977765759269276e-05, "loss": 5.4433, "step": 2800 }, { "epoch": 0.0, "learning_rate": 9.97713979528474e-05, "loss": 5.4321, "step": 2850 }, { "epoch": 0.0, "learning_rate": 9.976513831300202e-05, "loss": 5.5834, "step": 2900 }, { "epoch": 0.0, "learning_rate": 9.975887867315666e-05, "loss": 5.5172, "step": 2950 }, { "epoch": 0.0, "learning_rate": 9.97526190333113e-05, "loss": 5.4591, "step": 3000 }, { "epoch": 0.0, "learning_rate": 9.974635939346593e-05, "loss": 5.5175, "step": 3050 }, { "epoch": 0.0, "learning_rate": 9.974009975362057e-05, "loss": 5.4572, "step": 3100 }, { "epoch": 0.0, "learning_rate": 9.973384011377522e-05, "loss": 5.3919, "step": 3150 }, { "epoch": 0.0, "learning_rate": 9.972758047392986e-05, "loss": 5.4764, "step": 3200 }, { "epoch": 0.0, "learning_rate": 9.97213208340845e-05, "loss": 5.49, "step": 3250 }, { "epoch": 0.0, "learning_rate": 9.971506119423913e-05, "loss": 5.4918, "step": 3300 }, { "epoch": 0.0, "learning_rate": 9.970880155439377e-05, "loss": 5.4713, "step": 3350 }, { "epoch": 0.0, "learning_rate": 9.970254191454841e-05, "loss": 5.4313, "step": 3400 }, { "epoch": 0.0, "learning_rate": 9.969628227470304e-05, "loss": 5.4729, "step": 3450 }, { "epoch": 0.0, "learning_rate": 9.969002263485768e-05, "loss": 5.5127, "step": 3500 }, { "epoch": 0.0, "learning_rate": 9.968376299501232e-05, "loss": 5.4661, "step": 3550 }, { "epoch": 0.0, "learning_rate": 9.967750335516696e-05, "loss": 5.3822, "step": 3600 }, { "epoch": 0.0, "learning_rate": 9.967124371532161e-05, "loss": 5.4726, "step": 3650 }, { "epoch": 0.0, "learning_rate": 9.966498407547624e-05, "loss": 5.4201, "step": 3700 }, { "epoch": 0.0, "learning_rate": 9.965872443563088e-05, "loss": 5.3728, "step": 3750 }, { "epoch": 0.0, "learning_rate": 9.965246479578552e-05, "loss": 5.4065, "step": 3800 }, { "epoch": 0.0, "learning_rate": 9.964620515594016e-05, "loss": 5.4338, "step": 3850 }, { "epoch": 0.0, "learning_rate": 9.96399455160948e-05, "loss": 5.4468, "step": 3900 }, { "epoch": 0.0, "learning_rate": 9.963368587624943e-05, "loss": 5.4349, "step": 3950 }, { "epoch": 0.01, "learning_rate": 9.962742623640407e-05, "loss": 5.4514, "step": 4000 }, { "epoch": 0.01, "learning_rate": 9.96211665965587e-05, "loss": 5.4015, "step": 4050 }, { "epoch": 0.01, "learning_rate": 9.961490695671334e-05, "loss": 5.4213, "step": 4100 }, { "epoch": 0.01, "learning_rate": 9.960864731686798e-05, "loss": 5.4106, "step": 4150 }, { "epoch": 0.01, "learning_rate": 9.960238767702262e-05, "loss": 5.3752, "step": 4200 }, { "epoch": 0.01, "learning_rate": 9.959612803717726e-05, "loss": 5.4173, "step": 4250 }, { "epoch": 0.01, "learning_rate": 9.958986839733189e-05, "loss": 5.3093, "step": 4300 }, { "epoch": 0.01, "learning_rate": 9.958360875748653e-05, "loss": 5.3797, "step": 4350 }, { "epoch": 0.01, "learning_rate": 9.957734911764117e-05, "loss": 5.3453, "step": 4400 }, { "epoch": 0.01, "learning_rate": 9.95710894777958e-05, "loss": 5.3681, "step": 4450 }, { "epoch": 0.01, "learning_rate": 9.956482983795044e-05, "loss": 5.4038, "step": 4500 }, { "epoch": 0.01, "learning_rate": 9.955857019810508e-05, "loss": 5.4484, "step": 4550 }, { "epoch": 0.01, "learning_rate": 9.955231055825972e-05, "loss": 5.2668, "step": 4600 }, { "epoch": 0.01, "learning_rate": 9.954605091841435e-05, "loss": 5.3614, "step": 4650 }, { "epoch": 0.01, "learning_rate": 9.9539791278569e-05, "loss": 5.3342, "step": 4700 }, { "epoch": 0.01, "learning_rate": 9.953353163872364e-05, "loss": 5.2445, "step": 4750 }, { "epoch": 0.01, "learning_rate": 9.952727199887828e-05, "loss": 5.3382, "step": 4800 }, { "epoch": 0.01, "learning_rate": 9.952101235903292e-05, "loss": 5.2986, "step": 4850 }, { "epoch": 0.01, "learning_rate": 9.951475271918755e-05, "loss": 5.3645, "step": 4900 }, { "epoch": 0.01, "learning_rate": 9.950849307934219e-05, "loss": 5.3216, "step": 4950 }, { "epoch": 0.01, "learning_rate": 9.950223343949683e-05, "loss": 5.3751, "step": 5000 }, { "epoch": 0.01, "learning_rate": 9.949597379965147e-05, "loss": 5.3573, "step": 5050 }, { "epoch": 0.01, "learning_rate": 9.94897141598061e-05, "loss": 5.3258, "step": 5100 }, { "epoch": 0.01, "learning_rate": 9.948345451996074e-05, "loss": 5.3585, "step": 5150 }, { "epoch": 0.01, "learning_rate": 9.947719488011539e-05, "loss": 5.3239, "step": 5200 }, { "epoch": 0.01, "learning_rate": 9.947093524027003e-05, "loss": 5.3074, "step": 5250 }, { "epoch": 0.01, "learning_rate": 9.946467560042467e-05, "loss": 5.3173, "step": 5300 }, { "epoch": 0.01, "learning_rate": 9.94584159605793e-05, "loss": 5.3274, "step": 5350 }, { "epoch": 0.01, "learning_rate": 9.945215632073394e-05, "loss": 5.3407, "step": 5400 }, { "epoch": 0.01, "learning_rate": 9.944589668088858e-05, "loss": 5.3076, "step": 5450 }, { "epoch": 0.01, "learning_rate": 9.943963704104322e-05, "loss": 5.3157, "step": 5500 }, { "epoch": 0.01, "learning_rate": 9.943337740119785e-05, "loss": 5.3547, "step": 5550 }, { "epoch": 0.01, "learning_rate": 9.942711776135249e-05, "loss": 5.2748, "step": 5600 }, { "epoch": 0.01, "learning_rate": 9.942085812150713e-05, "loss": 5.3272, "step": 5650 }, { "epoch": 0.01, "learning_rate": 9.941459848166176e-05, "loss": 5.2803, "step": 5700 }, { "epoch": 0.01, "learning_rate": 9.94083388418164e-05, "loss": 5.3449, "step": 5750 }, { "epoch": 0.01, "learning_rate": 9.940207920197104e-05, "loss": 5.3271, "step": 5800 }, { "epoch": 0.01, "learning_rate": 9.939581956212568e-05, "loss": 5.242, "step": 5850 }, { "epoch": 0.01, "learning_rate": 9.938955992228031e-05, "loss": 5.2954, "step": 5900 }, { "epoch": 0.01, "learning_rate": 9.938330028243495e-05, "loss": 5.2743, "step": 5950 }, { "epoch": 0.01, "learning_rate": 9.937704064258959e-05, "loss": 5.2799, "step": 6000 }, { "epoch": 0.01, "learning_rate": 9.937078100274423e-05, "loss": 5.2536, "step": 6050 }, { "epoch": 0.01, "learning_rate": 9.936452136289886e-05, "loss": 5.2376, "step": 6100 }, { "epoch": 0.01, "learning_rate": 9.93582617230535e-05, "loss": 5.2576, "step": 6150 }, { "epoch": 0.01, "learning_rate": 9.935200208320814e-05, "loss": 5.3282, "step": 6200 }, { "epoch": 0.01, "learning_rate": 9.934574244336279e-05, "loss": 5.2802, "step": 6250 }, { "epoch": 0.01, "learning_rate": 9.933948280351743e-05, "loss": 5.3182, "step": 6300 }, { "epoch": 0.01, "learning_rate": 9.933322316367206e-05, "loss": 5.2235, "step": 6350 }, { "epoch": 0.01, "learning_rate": 9.93269635238267e-05, "loss": 5.2087, "step": 6400 }, { "epoch": 0.01, "learning_rate": 9.932070388398134e-05, "loss": 5.2417, "step": 6450 }, { "epoch": 0.01, "learning_rate": 9.931444424413598e-05, "loss": 5.248, "step": 6500 }, { "epoch": 0.01, "learning_rate": 9.930818460429061e-05, "loss": 5.2896, "step": 6550 }, { "epoch": 0.01, "learning_rate": 9.930192496444525e-05, "loss": 5.3225, "step": 6600 }, { "epoch": 0.01, "learning_rate": 9.929566532459989e-05, "loss": 5.2403, "step": 6650 }, { "epoch": 0.01, "learning_rate": 9.928940568475453e-05, "loss": 5.2837, "step": 6700 }, { "epoch": 0.01, "learning_rate": 9.928314604490918e-05, "loss": 5.2571, "step": 6750 }, { "epoch": 0.01, "learning_rate": 9.927688640506381e-05, "loss": 5.3359, "step": 6800 }, { "epoch": 0.01, "learning_rate": 9.927062676521845e-05, "loss": 5.156, "step": 6850 }, { "epoch": 0.01, "learning_rate": 9.926436712537307e-05, "loss": 5.2564, "step": 6900 }, { "epoch": 0.01, "learning_rate": 9.925810748552771e-05, "loss": 5.2463, "step": 6950 }, { "epoch": 0.01, "learning_rate": 9.925184784568235e-05, "loss": 5.2254, "step": 7000 }, { "epoch": 0.01, "learning_rate": 9.924558820583699e-05, "loss": 5.2658, "step": 7050 }, { "epoch": 0.01, "learning_rate": 9.923932856599162e-05, "loss": 5.29, "step": 7100 }, { "epoch": 0.01, "learning_rate": 9.923306892614626e-05, "loss": 5.2442, "step": 7150 }, { "epoch": 0.01, "learning_rate": 9.92268092863009e-05, "loss": 5.2496, "step": 7200 }, { "epoch": 0.01, "learning_rate": 9.922054964645555e-05, "loss": 5.1477, "step": 7250 }, { "epoch": 0.01, "learning_rate": 9.921429000661019e-05, "loss": 5.2259, "step": 7300 }, { "epoch": 0.01, "learning_rate": 9.920803036676482e-05, "loss": 5.2793, "step": 7350 }, { "epoch": 0.01, "learning_rate": 9.920177072691946e-05, "loss": 5.2581, "step": 7400 }, { "epoch": 0.01, "learning_rate": 9.91955110870741e-05, "loss": 5.135, "step": 7450 }, { "epoch": 0.01, "learning_rate": 9.918925144722874e-05, "loss": 5.159, "step": 7500 }, { "epoch": 0.01, "learning_rate": 9.918299180738337e-05, "loss": 5.188, "step": 7550 }, { "epoch": 0.01, "learning_rate": 9.917673216753801e-05, "loss": 5.1451, "step": 7600 }, { "epoch": 0.01, "learning_rate": 9.917047252769265e-05, "loss": 5.2135, "step": 7650 }, { "epoch": 0.01, "learning_rate": 9.916421288784729e-05, "loss": 5.1554, "step": 7700 }, { "epoch": 0.01, "learning_rate": 9.915795324800192e-05, "loss": 5.1475, "step": 7750 }, { "epoch": 0.01, "learning_rate": 9.915169360815657e-05, "loss": 5.1787, "step": 7800 }, { "epoch": 0.01, "learning_rate": 9.914543396831121e-05, "loss": 5.1475, "step": 7850 }, { "epoch": 0.01, "learning_rate": 9.913917432846585e-05, "loss": 5.1687, "step": 7900 }, { "epoch": 0.01, "learning_rate": 9.913291468862049e-05, "loss": 5.2084, "step": 7950 }, { "epoch": 0.01, "learning_rate": 9.912665504877512e-05, "loss": 5.136, "step": 8000 }, { "epoch": 0.01, "learning_rate": 9.912039540892976e-05, "loss": 5.1358, "step": 8050 }, { "epoch": 0.01, "learning_rate": 9.91141357690844e-05, "loss": 5.2394, "step": 8100 }, { "epoch": 0.01, "learning_rate": 9.910787612923903e-05, "loss": 5.1529, "step": 8150 }, { "epoch": 0.01, "learning_rate": 9.910161648939367e-05, "loss": 5.1991, "step": 8200 }, { "epoch": 0.01, "learning_rate": 9.909535684954831e-05, "loss": 5.2609, "step": 8250 }, { "epoch": 0.01, "learning_rate": 9.908909720970295e-05, "loss": 5.1962, "step": 8300 }, { "epoch": 0.01, "learning_rate": 9.908283756985758e-05, "loss": 5.1939, "step": 8350 }, { "epoch": 0.01, "learning_rate": 9.907657793001222e-05, "loss": 5.3289, "step": 8400 }, { "epoch": 0.01, "learning_rate": 9.907031829016686e-05, "loss": 5.2048, "step": 8450 }, { "epoch": 0.01, "learning_rate": 9.90640586503215e-05, "loss": 5.2757, "step": 8500 }, { "epoch": 0.01, "learning_rate": 9.905779901047613e-05, "loss": 5.1927, "step": 8550 }, { "epoch": 0.01, "learning_rate": 9.905153937063077e-05, "loss": 5.2142, "step": 8600 }, { "epoch": 0.01, "learning_rate": 9.904527973078541e-05, "loss": 5.2504, "step": 8650 }, { "epoch": 0.01, "learning_rate": 9.903902009094005e-05, "loss": 5.2259, "step": 8700 }, { "epoch": 0.01, "learning_rate": 9.903276045109468e-05, "loss": 5.1221, "step": 8750 }, { "epoch": 0.01, "learning_rate": 9.902650081124933e-05, "loss": 5.2464, "step": 8800 }, { "epoch": 0.01, "learning_rate": 9.902024117140397e-05, "loss": 5.2496, "step": 8850 }, { "epoch": 0.01, "learning_rate": 9.901398153155861e-05, "loss": 5.1249, "step": 8900 }, { "epoch": 0.01, "learning_rate": 9.900772189171325e-05, "loss": 5.1619, "step": 8950 }, { "epoch": 0.01, "learning_rate": 9.900146225186788e-05, "loss": 5.1336, "step": 9000 }, { "epoch": 0.01, "learning_rate": 9.899520261202252e-05, "loss": 5.2792, "step": 9050 }, { "epoch": 0.01, "learning_rate": 9.898894297217716e-05, "loss": 5.2257, "step": 9100 }, { "epoch": 0.01, "learning_rate": 9.89826833323318e-05, "loss": 5.1522, "step": 9150 }, { "epoch": 0.01, "learning_rate": 9.897642369248643e-05, "loss": 5.1053, "step": 9200 }, { "epoch": 0.01, "learning_rate": 9.897016405264107e-05, "loss": 5.1274, "step": 9250 }, { "epoch": 0.01, "learning_rate": 9.896390441279571e-05, "loss": 5.1827, "step": 9300 }, { "epoch": 0.01, "learning_rate": 9.895764477295036e-05, "loss": 5.2096, "step": 9350 }, { "epoch": 0.01, "learning_rate": 9.8951385133105e-05, "loss": 5.1465, "step": 9400 }, { "epoch": 0.01, "learning_rate": 9.894512549325963e-05, "loss": 5.1816, "step": 9450 }, { "epoch": 0.01, "learning_rate": 9.893886585341427e-05, "loss": 5.0591, "step": 9500 }, { "epoch": 0.01, "learning_rate": 9.893260621356891e-05, "loss": 5.1462, "step": 9550 }, { "epoch": 0.01, "learning_rate": 9.892634657372354e-05, "loss": 5.1869, "step": 9600 }, { "epoch": 0.01, "learning_rate": 9.892008693387818e-05, "loss": 5.1178, "step": 9650 }, { "epoch": 0.01, "learning_rate": 9.891382729403282e-05, "loss": 5.1577, "step": 9700 }, { "epoch": 0.01, "learning_rate": 9.890756765418744e-05, "loss": 5.1557, "step": 9750 }, { "epoch": 0.01, "learning_rate": 9.890130801434208e-05, "loss": 5.1689, "step": 9800 }, { "epoch": 0.01, "learning_rate": 9.889504837449673e-05, "loss": 5.1602, "step": 9850 }, { "epoch": 0.01, "learning_rate": 9.888878873465137e-05, "loss": 5.1228, "step": 9900 }, { "epoch": 0.01, "learning_rate": 9.8882529094806e-05, "loss": 5.1584, "step": 9950 }, { "epoch": 0.01, "learning_rate": 9.887626945496064e-05, "loss": 5.1452, "step": 10000 }, { "epoch": 0.01, "learning_rate": 9.887000981511528e-05, "loss": 5.151, "step": 10050 }, { "epoch": 0.01, "learning_rate": 9.886375017526992e-05, "loss": 5.0252, "step": 10100 }, { "epoch": 0.01, "learning_rate": 9.885749053542456e-05, "loss": 5.1309, "step": 10150 }, { "epoch": 0.01, "learning_rate": 9.885123089557919e-05, "loss": 5.1565, "step": 10200 }, { "epoch": 0.01, "learning_rate": 9.884497125573383e-05, "loss": 5.2388, "step": 10250 }, { "epoch": 0.01, "learning_rate": 9.883871161588847e-05, "loss": 5.1605, "step": 10300 }, { "epoch": 0.01, "learning_rate": 9.88324519760431e-05, "loss": 5.111, "step": 10350 }, { "epoch": 0.01, "learning_rate": 9.882619233619776e-05, "loss": 5.0965, "step": 10400 }, { "epoch": 0.01, "learning_rate": 9.881993269635239e-05, "loss": 5.1391, "step": 10450 }, { "epoch": 0.01, "learning_rate": 9.881367305650703e-05, "loss": 5.1788, "step": 10500 }, { "epoch": 0.01, "learning_rate": 9.880741341666167e-05, "loss": 5.1208, "step": 10550 }, { "epoch": 0.01, "learning_rate": 9.88011537768163e-05, "loss": 5.2192, "step": 10600 }, { "epoch": 0.01, "learning_rate": 9.879489413697094e-05, "loss": 5.1437, "step": 10650 }, { "epoch": 0.01, "learning_rate": 9.878863449712558e-05, "loss": 5.118, "step": 10700 }, { "epoch": 0.01, "learning_rate": 9.878237485728022e-05, "loss": 5.1279, "step": 10750 }, { "epoch": 0.01, "learning_rate": 9.877611521743485e-05, "loss": 5.22, "step": 10800 }, { "epoch": 0.01, "learning_rate": 9.876985557758949e-05, "loss": 5.163, "step": 10850 }, { "epoch": 0.01, "learning_rate": 9.876359593774414e-05, "loss": 5.1439, "step": 10900 }, { "epoch": 0.01, "learning_rate": 9.875733629789877e-05, "loss": 5.1024, "step": 10950 }, { "epoch": 0.01, "learning_rate": 9.87510766580534e-05, "loss": 5.1739, "step": 11000 }, { "epoch": 0.01, "learning_rate": 9.874481701820804e-05, "loss": 5.1034, "step": 11050 }, { "epoch": 0.01, "learning_rate": 9.873855737836268e-05, "loss": 5.19, "step": 11100 }, { "epoch": 0.01, "learning_rate": 9.873229773851732e-05, "loss": 5.2172, "step": 11150 }, { "epoch": 0.01, "learning_rate": 9.872603809867195e-05, "loss": 5.1166, "step": 11200 }, { "epoch": 0.01, "learning_rate": 9.871977845882659e-05, "loss": 5.1715, "step": 11250 }, { "epoch": 0.01, "learning_rate": 9.871351881898123e-05, "loss": 5.0846, "step": 11300 }, { "epoch": 0.01, "learning_rate": 9.870725917913586e-05, "loss": 5.1278, "step": 11350 }, { "epoch": 0.01, "learning_rate": 9.870099953929052e-05, "loss": 4.9778, "step": 11400 }, { "epoch": 0.01, "learning_rate": 9.869473989944515e-05, "loss": 5.1595, "step": 11450 }, { "epoch": 0.01, "learning_rate": 9.868848025959979e-05, "loss": 5.16, "step": 11500 }, { "epoch": 0.01, "learning_rate": 9.868222061975443e-05, "loss": 5.1694, "step": 11550 }, { "epoch": 0.01, "learning_rate": 9.867596097990906e-05, "loss": 5.2079, "step": 11600 }, { "epoch": 0.01, "learning_rate": 9.86697013400637e-05, "loss": 5.1775, "step": 11650 }, { "epoch": 0.01, "learning_rate": 9.866344170021834e-05, "loss": 5.0909, "step": 11700 }, { "epoch": 0.01, "learning_rate": 9.865718206037298e-05, "loss": 5.1012, "step": 11750 }, { "epoch": 0.01, "learning_rate": 9.865092242052761e-05, "loss": 5.1335, "step": 11800 }, { "epoch": 0.01, "learning_rate": 9.864466278068225e-05, "loss": 5.0912, "step": 11850 }, { "epoch": 0.01, "learning_rate": 9.863840314083689e-05, "loss": 5.1194, "step": 11900 }, { "epoch": 0.01, "learning_rate": 9.863214350099154e-05, "loss": 5.0931, "step": 11950 }, { "epoch": 0.02, "learning_rate": 9.862588386114618e-05, "loss": 5.0602, "step": 12000 }, { "epoch": 0.02, "learning_rate": 9.861962422130081e-05, "loss": 5.125, "step": 12050 }, { "epoch": 0.02, "learning_rate": 9.861336458145545e-05, "loss": 5.1661, "step": 12100 }, { "epoch": 0.02, "learning_rate": 9.860710494161009e-05, "loss": 5.1776, "step": 12150 }, { "epoch": 0.02, "learning_rate": 9.860084530176473e-05, "loss": 5.1397, "step": 12200 }, { "epoch": 0.02, "learning_rate": 9.859458566191936e-05, "loss": 5.1623, "step": 12250 }, { "epoch": 0.02, "learning_rate": 9.8588326022074e-05, "loss": 5.0727, "step": 12300 }, { "epoch": 0.02, "learning_rate": 9.858206638222864e-05, "loss": 5.1286, "step": 12350 }, { "epoch": 0.02, "learning_rate": 9.857580674238328e-05, "loss": 5.0841, "step": 12400 }, { "epoch": 0.02, "learning_rate": 9.856954710253791e-05, "loss": 5.0967, "step": 12450 }, { "epoch": 0.02, "learning_rate": 9.856328746269255e-05, "loss": 5.0078, "step": 12500 }, { "epoch": 0.02, "learning_rate": 9.855702782284719e-05, "loss": 5.1039, "step": 12550 }, { "epoch": 0.02, "learning_rate": 9.855076818300182e-05, "loss": 5.0574, "step": 12600 }, { "epoch": 0.02, "learning_rate": 9.854450854315646e-05, "loss": 5.0747, "step": 12650 }, { "epoch": 0.02, "learning_rate": 9.85382489033111e-05, "loss": 5.1479, "step": 12700 }, { "epoch": 0.02, "learning_rate": 9.853198926346574e-05, "loss": 5.073, "step": 12750 }, { "epoch": 0.02, "learning_rate": 9.852572962362037e-05, "loss": 5.037, "step": 12800 }, { "epoch": 0.02, "learning_rate": 9.851946998377501e-05, "loss": 5.1986, "step": 12850 }, { "epoch": 0.02, "learning_rate": 9.851321034392965e-05, "loss": 5.0764, "step": 12900 }, { "epoch": 0.02, "learning_rate": 9.85069507040843e-05, "loss": 5.1051, "step": 12950 }, { "epoch": 0.02, "learning_rate": 9.850069106423894e-05, "loss": 5.0971, "step": 13000 }, { "epoch": 0.02, "learning_rate": 9.849443142439357e-05, "loss": 5.0805, "step": 13050 }, { "epoch": 0.02, "learning_rate": 9.848817178454821e-05, "loss": 4.9856, "step": 13100 }, { "epoch": 0.02, "learning_rate": 9.848191214470285e-05, "loss": 5.0483, "step": 13150 }, { "epoch": 0.02, "learning_rate": 9.847565250485749e-05, "loss": 5.1007, "step": 13200 }, { "epoch": 0.02, "learning_rate": 9.846939286501212e-05, "loss": 5.1224, "step": 13250 }, { "epoch": 0.02, "learning_rate": 9.846313322516676e-05, "loss": 5.0343, "step": 13300 }, { "epoch": 0.02, "learning_rate": 9.84568735853214e-05, "loss": 5.0529, "step": 13350 }, { "epoch": 0.02, "learning_rate": 9.845061394547604e-05, "loss": 5.1647, "step": 13400 }, { "epoch": 0.02, "learning_rate": 9.844435430563067e-05, "loss": 4.9785, "step": 13450 }, { "epoch": 0.02, "learning_rate": 9.843809466578532e-05, "loss": 5.0798, "step": 13500 }, { "epoch": 0.02, "learning_rate": 9.843183502593996e-05, "loss": 5.0689, "step": 13550 }, { "epoch": 0.02, "learning_rate": 9.84255753860946e-05, "loss": 5.119, "step": 13600 }, { "epoch": 0.02, "learning_rate": 9.841931574624924e-05, "loss": 5.125, "step": 13650 }, { "epoch": 0.02, "learning_rate": 9.841305610640387e-05, "loss": 5.1155, "step": 13700 }, { "epoch": 0.02, "learning_rate": 9.840679646655851e-05, "loss": 5.0851, "step": 13750 }, { "epoch": 0.02, "learning_rate": 9.840053682671313e-05, "loss": 5.0952, "step": 13800 }, { "epoch": 0.02, "learning_rate": 9.839427718686777e-05, "loss": 5.0983, "step": 13850 }, { "epoch": 0.02, "learning_rate": 9.838801754702241e-05, "loss": 5.0124, "step": 13900 }, { "epoch": 0.02, "learning_rate": 9.838175790717705e-05, "loss": 5.1179, "step": 13950 }, { "epoch": 0.02, "learning_rate": 9.83754982673317e-05, "loss": 5.0917, "step": 14000 }, { "epoch": 0.02, "learning_rate": 9.836923862748633e-05, "loss": 5.0688, "step": 14050 }, { "epoch": 0.02, "learning_rate": 9.836297898764097e-05, "loss": 5.1203, "step": 14100 }, { "epoch": 0.02, "learning_rate": 9.835671934779561e-05, "loss": 5.014, "step": 14150 }, { "epoch": 0.02, "learning_rate": 9.835045970795025e-05, "loss": 5.1154, "step": 14200 }, { "epoch": 0.02, "learning_rate": 9.834420006810488e-05, "loss": 5.02, "step": 14250 }, { "epoch": 0.02, "learning_rate": 9.833794042825952e-05, "loss": 5.1522, "step": 14300 }, { "epoch": 0.02, "learning_rate": 9.833168078841416e-05, "loss": 5.0825, "step": 14350 }, { "epoch": 0.02, "learning_rate": 9.83254211485688e-05, "loss": 4.9917, "step": 14400 }, { "epoch": 0.02, "learning_rate": 9.831916150872343e-05, "loss": 5.1461, "step": 14450 }, { "epoch": 0.02, "learning_rate": 9.831290186887808e-05, "loss": 5.0798, "step": 14500 }, { "epoch": 0.02, "learning_rate": 9.830664222903272e-05, "loss": 5.0809, "step": 14550 }, { "epoch": 0.02, "learning_rate": 9.830038258918736e-05, "loss": 5.1048, "step": 14600 }, { "epoch": 0.02, "learning_rate": 9.8294122949342e-05, "loss": 5.0196, "step": 14650 }, { "epoch": 0.02, "learning_rate": 9.828786330949663e-05, "loss": 5.0792, "step": 14700 }, { "epoch": 0.02, "learning_rate": 9.828160366965127e-05, "loss": 5.0779, "step": 14750 }, { "epoch": 0.02, "learning_rate": 9.827534402980591e-05, "loss": 5.0929, "step": 14800 }, { "epoch": 0.02, "learning_rate": 9.826908438996055e-05, "loss": 5.0022, "step": 14850 }, { "epoch": 0.02, "learning_rate": 9.826282475011518e-05, "loss": 5.058, "step": 14900 }, { "epoch": 0.02, "learning_rate": 9.825656511026982e-05, "loss": 5.1122, "step": 14950 }, { "epoch": 0.02, "learning_rate": 9.825030547042446e-05, "loss": 4.9881, "step": 15000 }, { "epoch": 0.02, "learning_rate": 9.82440458305791e-05, "loss": 5.1249, "step": 15050 }, { "epoch": 0.02, "learning_rate": 9.823778619073373e-05, "loss": 5.0669, "step": 15100 }, { "epoch": 0.02, "learning_rate": 9.823152655088837e-05, "loss": 5.1448, "step": 15150 }, { "epoch": 0.02, "learning_rate": 9.8225266911043e-05, "loss": 5.0814, "step": 15200 }, { "epoch": 0.02, "learning_rate": 9.821900727119764e-05, "loss": 5.1258, "step": 15250 }, { "epoch": 0.02, "learning_rate": 9.821274763135228e-05, "loss": 5.0981, "step": 15300 }, { "epoch": 0.02, "learning_rate": 9.820648799150692e-05, "loss": 5.0133, "step": 15350 }, { "epoch": 0.02, "learning_rate": 9.820022835166156e-05, "loss": 5.0403, "step": 15400 }, { "epoch": 0.02, "learning_rate": 9.81939687118162e-05, "loss": 5.1317, "step": 15450 }, { "epoch": 0.02, "learning_rate": 9.818770907197083e-05, "loss": 4.9721, "step": 15500 }, { "epoch": 0.02, "learning_rate": 9.818144943212548e-05, "loss": 5.0799, "step": 15550 }, { "epoch": 0.02, "learning_rate": 9.817518979228012e-05, "loss": 5.1427, "step": 15600 }, { "epoch": 0.02, "learning_rate": 9.816893015243476e-05, "loss": 5.0997, "step": 15650 }, { "epoch": 0.02, "learning_rate": 9.81626705125894e-05, "loss": 5.0645, "step": 15700 }, { "epoch": 0.02, "learning_rate": 9.815641087274403e-05, "loss": 5.0573, "step": 15750 }, { "epoch": 0.02, "learning_rate": 9.815015123289867e-05, "loss": 5.0137, "step": 15800 }, { "epoch": 0.02, "learning_rate": 9.81438915930533e-05, "loss": 5.0359, "step": 15850 }, { "epoch": 0.02, "learning_rate": 9.813763195320794e-05, "loss": 4.9925, "step": 15900 }, { "epoch": 0.02, "learning_rate": 9.813137231336258e-05, "loss": 5.0501, "step": 15950 }, { "epoch": 0.02, "learning_rate": 9.812511267351722e-05, "loss": 5.1372, "step": 16000 }, { "epoch": 0.02, "learning_rate": 9.811885303367187e-05, "loss": 5.1117, "step": 16050 }, { "epoch": 0.02, "learning_rate": 9.81125933938265e-05, "loss": 5.0704, "step": 16100 }, { "epoch": 0.02, "learning_rate": 9.810633375398114e-05, "loss": 5.0082, "step": 16150 }, { "epoch": 0.02, "learning_rate": 9.810007411413578e-05, "loss": 5.0676, "step": 16200 }, { "epoch": 0.02, "learning_rate": 9.809381447429042e-05, "loss": 5.0272, "step": 16250 }, { "epoch": 0.02, "learning_rate": 9.808755483444506e-05, "loss": 5.0261, "step": 16300 }, { "epoch": 0.02, "learning_rate": 9.808129519459969e-05, "loss": 5.1074, "step": 16350 }, { "epoch": 0.02, "learning_rate": 9.807503555475433e-05, "loss": 5.1274, "step": 16400 }, { "epoch": 0.02, "learning_rate": 9.806877591490897e-05, "loss": 5.038, "step": 16450 }, { "epoch": 0.02, "learning_rate": 9.80625162750636e-05, "loss": 5.0136, "step": 16500 }, { "epoch": 0.02, "learning_rate": 9.805625663521824e-05, "loss": 5.159, "step": 16550 }, { "epoch": 0.02, "learning_rate": 9.804999699537288e-05, "loss": 5.0331, "step": 16600 }, { "epoch": 0.02, "learning_rate": 9.804373735552752e-05, "loss": 5.0596, "step": 16650 }, { "epoch": 0.02, "learning_rate": 9.803747771568215e-05, "loss": 5.1169, "step": 16700 }, { "epoch": 0.02, "learning_rate": 9.803121807583679e-05, "loss": 5.0433, "step": 16750 }, { "epoch": 0.02, "learning_rate": 9.802495843599143e-05, "loss": 5.032, "step": 16800 }, { "epoch": 0.02, "learning_rate": 9.801869879614607e-05, "loss": 5.0889, "step": 16850 }, { "epoch": 0.02, "learning_rate": 9.80124391563007e-05, "loss": 5.0816, "step": 16900 }, { "epoch": 0.02, "learning_rate": 9.800617951645534e-05, "loss": 5.0503, "step": 16950 }, { "epoch": 0.02, "learning_rate": 9.799991987660998e-05, "loss": 5.1312, "step": 17000 }, { "epoch": 0.02, "learning_rate": 9.799366023676462e-05, "loss": 5.0654, "step": 17050 }, { "epoch": 0.02, "learning_rate": 9.798740059691927e-05, "loss": 5.1187, "step": 17100 }, { "epoch": 0.02, "learning_rate": 9.79811409570739e-05, "loss": 5.0556, "step": 17150 }, { "epoch": 0.02, "learning_rate": 9.797488131722854e-05, "loss": 4.9554, "step": 17200 }, { "epoch": 0.02, "learning_rate": 9.796862167738318e-05, "loss": 4.9638, "step": 17250 }, { "epoch": 0.02, "learning_rate": 9.796236203753782e-05, "loss": 5.0005, "step": 17300 }, { "epoch": 0.02, "learning_rate": 9.795610239769245e-05, "loss": 5.0771, "step": 17350 }, { "epoch": 0.02, "learning_rate": 9.794984275784709e-05, "loss": 5.014, "step": 17400 }, { "epoch": 0.02, "learning_rate": 9.794358311800173e-05, "loss": 4.9919, "step": 17450 }, { "epoch": 0.02, "learning_rate": 9.793732347815636e-05, "loss": 5.0039, "step": 17500 }, { "epoch": 0.02, "learning_rate": 9.7931063838311e-05, "loss": 5.0204, "step": 17550 }, { "epoch": 0.02, "learning_rate": 9.792480419846565e-05, "loss": 5.0553, "step": 17600 }, { "epoch": 0.02, "learning_rate": 9.791854455862029e-05, "loss": 5.0788, "step": 17650 }, { "epoch": 0.02, "learning_rate": 9.791228491877493e-05, "loss": 4.9646, "step": 17700 }, { "epoch": 0.02, "learning_rate": 9.790602527892956e-05, "loss": 4.9587, "step": 17750 }, { "epoch": 0.02, "learning_rate": 9.78997656390842e-05, "loss": 4.9992, "step": 17800 }, { "epoch": 0.02, "learning_rate": 9.789350599923883e-05, "loss": 5.0846, "step": 17850 }, { "epoch": 0.02, "learning_rate": 9.788724635939346e-05, "loss": 5.0743, "step": 17900 }, { "epoch": 0.02, "learning_rate": 9.78809867195481e-05, "loss": 5.0384, "step": 17950 }, { "epoch": 0.02, "learning_rate": 9.787472707970274e-05, "loss": 5.0108, "step": 18000 }, { "epoch": 0.02, "learning_rate": 9.786846743985738e-05, "loss": 5.0642, "step": 18050 }, { "epoch": 0.02, "learning_rate": 9.786220780001201e-05, "loss": 4.998, "step": 18100 }, { "epoch": 0.02, "learning_rate": 9.785594816016666e-05, "loss": 5.146, "step": 18150 }, { "epoch": 0.02, "learning_rate": 9.78496885203213e-05, "loss": 5.0123, "step": 18200 }, { "epoch": 0.02, "learning_rate": 9.784342888047594e-05, "loss": 5.0642, "step": 18250 }, { "epoch": 0.02, "learning_rate": 9.783716924063058e-05, "loss": 4.9705, "step": 18300 }, { "epoch": 0.02, "learning_rate": 9.783090960078521e-05, "loss": 5.0945, "step": 18350 }, { "epoch": 0.02, "learning_rate": 9.782464996093985e-05, "loss": 4.979, "step": 18400 }, { "epoch": 0.02, "learning_rate": 9.781839032109449e-05, "loss": 5.0197, "step": 18450 }, { "epoch": 0.02, "learning_rate": 9.781213068124912e-05, "loss": 4.9305, "step": 18500 }, { "epoch": 0.02, "learning_rate": 9.780587104140376e-05, "loss": 4.9548, "step": 18550 }, { "epoch": 0.02, "learning_rate": 9.77996114015584e-05, "loss": 5.0515, "step": 18600 }, { "epoch": 0.02, "learning_rate": 9.779335176171305e-05, "loss": 5.1018, "step": 18650 }, { "epoch": 0.02, "learning_rate": 9.778709212186769e-05, "loss": 4.9698, "step": 18700 }, { "epoch": 0.02, "learning_rate": 9.778083248202232e-05, "loss": 5.0234, "step": 18750 }, { "epoch": 0.02, "learning_rate": 9.777457284217696e-05, "loss": 4.9905, "step": 18800 }, { "epoch": 0.02, "learning_rate": 9.77683132023316e-05, "loss": 5.0494, "step": 18850 }, { "epoch": 0.02, "learning_rate": 9.776205356248624e-05, "loss": 5.0004, "step": 18900 }, { "epoch": 0.02, "learning_rate": 9.775579392264087e-05, "loss": 4.9839, "step": 18950 }, { "epoch": 0.02, "learning_rate": 9.774953428279551e-05, "loss": 5.0039, "step": 19000 }, { "epoch": 0.02, "learning_rate": 9.774327464295015e-05, "loss": 5.008, "step": 19050 }, { "epoch": 0.02, "learning_rate": 9.773701500310479e-05, "loss": 4.9516, "step": 19100 }, { "epoch": 0.02, "learning_rate": 9.773075536325942e-05, "loss": 5.0539, "step": 19150 }, { "epoch": 0.02, "learning_rate": 9.772449572341406e-05, "loss": 5.0299, "step": 19200 }, { "epoch": 0.02, "learning_rate": 9.77182360835687e-05, "loss": 4.9789, "step": 19250 }, { "epoch": 0.02, "learning_rate": 9.771197644372334e-05, "loss": 4.9953, "step": 19300 }, { "epoch": 0.02, "learning_rate": 9.770571680387797e-05, "loss": 5.0014, "step": 19350 }, { "epoch": 0.02, "learning_rate": 9.769945716403261e-05, "loss": 5.0454, "step": 19400 }, { "epoch": 0.02, "learning_rate": 9.769319752418725e-05, "loss": 4.9653, "step": 19450 }, { "epoch": 0.02, "learning_rate": 9.768693788434188e-05, "loss": 4.9861, "step": 19500 }, { "epoch": 0.02, "learning_rate": 9.768067824449652e-05, "loss": 5.0414, "step": 19550 }, { "epoch": 0.02, "learning_rate": 9.767441860465116e-05, "loss": 5.0262, "step": 19600 }, { "epoch": 0.02, "learning_rate": 9.76681589648058e-05, "loss": 5.0101, "step": 19650 }, { "epoch": 0.02, "learning_rate": 9.766189932496045e-05, "loss": 5.0211, "step": 19700 }, { "epoch": 0.02, "learning_rate": 9.765563968511509e-05, "loss": 5.0693, "step": 19750 }, { "epoch": 0.02, "learning_rate": 9.764938004526972e-05, "loss": 5.0358, "step": 19800 }, { "epoch": 0.02, "learning_rate": 9.764312040542436e-05, "loss": 4.9987, "step": 19850 }, { "epoch": 0.02, "learning_rate": 9.7636860765579e-05, "loss": 5.0134, "step": 19900 }, { "epoch": 0.02, "learning_rate": 9.763060112573363e-05, "loss": 4.9817, "step": 19950 }, { "epoch": 0.03, "learning_rate": 9.762434148588827e-05, "loss": 5.0787, "step": 20000 }, { "epoch": 0.03, "learning_rate": 9.761808184604291e-05, "loss": 5.0112, "step": 20050 }, { "epoch": 0.03, "learning_rate": 9.761182220619755e-05, "loss": 5.0429, "step": 20100 }, { "epoch": 0.03, "learning_rate": 9.760556256635218e-05, "loss": 5.0247, "step": 20150 }, { "epoch": 0.03, "learning_rate": 9.759930292650683e-05, "loss": 5.0893, "step": 20200 }, { "epoch": 0.03, "learning_rate": 9.759304328666147e-05, "loss": 5.1345, "step": 20250 }, { "epoch": 0.03, "learning_rate": 9.758678364681611e-05, "loss": 5.0516, "step": 20300 }, { "epoch": 0.03, "learning_rate": 9.758052400697075e-05, "loss": 5.0024, "step": 20350 }, { "epoch": 0.03, "learning_rate": 9.757426436712538e-05, "loss": 5.044, "step": 20400 }, { "epoch": 0.03, "learning_rate": 9.756800472728002e-05, "loss": 5.0127, "step": 20450 }, { "epoch": 0.03, "learning_rate": 9.756174508743466e-05, "loss": 5.0059, "step": 20500 }, { "epoch": 0.03, "learning_rate": 9.75554854475893e-05, "loss": 5.0712, "step": 20550 }, { "epoch": 0.03, "learning_rate": 9.754922580774393e-05, "loss": 5.1652, "step": 20600 }, { "epoch": 0.03, "learning_rate": 9.754296616789857e-05, "loss": 5.0563, "step": 20650 }, { "epoch": 0.03, "learning_rate": 9.75367065280532e-05, "loss": 5.0582, "step": 20700 }, { "epoch": 0.03, "learning_rate": 9.753044688820785e-05, "loss": 5.0277, "step": 20750 }, { "epoch": 0.03, "learning_rate": 9.752418724836248e-05, "loss": 4.9915, "step": 20800 }, { "epoch": 0.03, "learning_rate": 9.751792760851712e-05, "loss": 5.0201, "step": 20850 }, { "epoch": 0.03, "learning_rate": 9.751166796867176e-05, "loss": 5.0551, "step": 20900 }, { "epoch": 0.03, "learning_rate": 9.75054083288264e-05, "loss": 4.954, "step": 20950 }, { "epoch": 0.03, "learning_rate": 9.749914868898103e-05, "loss": 5.0755, "step": 21000 }, { "epoch": 0.03, "learning_rate": 9.749288904913567e-05, "loss": 4.964, "step": 21050 }, { "epoch": 0.03, "learning_rate": 9.74866294092903e-05, "loss": 5.0213, "step": 21100 }, { "epoch": 0.03, "learning_rate": 9.748036976944494e-05, "loss": 4.9266, "step": 21150 }, { "epoch": 0.03, "learning_rate": 9.747411012959958e-05, "loss": 4.9009, "step": 21200 }, { "epoch": 0.03, "learning_rate": 9.746785048975423e-05, "loss": 5.003, "step": 21250 }, { "epoch": 0.03, "learning_rate": 9.746159084990887e-05, "loss": 4.963, "step": 21300 }, { "epoch": 0.03, "learning_rate": 9.745533121006351e-05, "loss": 4.928, "step": 21350 }, { "epoch": 0.03, "learning_rate": 9.744907157021814e-05, "loss": 5.0024, "step": 21400 }, { "epoch": 0.03, "learning_rate": 9.744281193037278e-05, "loss": 4.9741, "step": 21450 }, { "epoch": 0.03, "learning_rate": 9.743655229052742e-05, "loss": 4.9941, "step": 21500 }, { "epoch": 0.03, "learning_rate": 9.743029265068206e-05, "loss": 5.0514, "step": 21550 }, { "epoch": 0.03, "learning_rate": 9.74240330108367e-05, "loss": 5.0875, "step": 21600 }, { "epoch": 0.03, "learning_rate": 9.741777337099133e-05, "loss": 4.991, "step": 21650 }, { "epoch": 0.03, "learning_rate": 9.741151373114597e-05, "loss": 5.0779, "step": 21700 }, { "epoch": 0.03, "learning_rate": 9.740525409130062e-05, "loss": 5.0252, "step": 21750 }, { "epoch": 0.03, "learning_rate": 9.739899445145526e-05, "loss": 5.0145, "step": 21800 }, { "epoch": 0.03, "learning_rate": 9.73927348116099e-05, "loss": 4.9934, "step": 21850 }, { "epoch": 0.03, "learning_rate": 9.738647517176452e-05, "loss": 5.008, "step": 21900 }, { "epoch": 0.03, "learning_rate": 9.738021553191915e-05, "loss": 5.0035, "step": 21950 }, { "epoch": 0.03, "learning_rate": 9.737395589207379e-05, "loss": 5.0499, "step": 22000 }, { "epoch": 0.03, "learning_rate": 9.736769625222843e-05, "loss": 5.006, "step": 22050 }, { "epoch": 0.03, "learning_rate": 9.736143661238307e-05, "loss": 5.053, "step": 22100 }, { "epoch": 0.03, "learning_rate": 9.73551769725377e-05, "loss": 5.0937, "step": 22150 }, { "epoch": 0.03, "learning_rate": 9.734891733269234e-05, "loss": 5.0601, "step": 22200 }, { "epoch": 0.03, "learning_rate": 9.734265769284698e-05, "loss": 4.9274, "step": 22250 }, { "epoch": 0.03, "learning_rate": 9.733639805300163e-05, "loss": 5.0018, "step": 22300 }, { "epoch": 0.03, "learning_rate": 9.733013841315627e-05, "loss": 5.0515, "step": 22350 }, { "epoch": 0.03, "learning_rate": 9.73238787733109e-05, "loss": 5.062, "step": 22400 }, { "epoch": 0.03, "learning_rate": 9.731761913346554e-05, "loss": 4.9992, "step": 22450 }, { "epoch": 0.03, "learning_rate": 9.731135949362018e-05, "loss": 5.0432, "step": 22500 }, { "epoch": 0.03, "learning_rate": 9.730509985377482e-05, "loss": 4.9944, "step": 22550 }, { "epoch": 0.03, "learning_rate": 9.729884021392945e-05, "loss": 5.0588, "step": 22600 }, { "epoch": 0.03, "learning_rate": 9.729258057408409e-05, "loss": 4.9646, "step": 22650 }, { "epoch": 0.03, "learning_rate": 9.728632093423873e-05, "loss": 4.9767, "step": 22700 }, { "epoch": 0.03, "learning_rate": 9.728006129439337e-05, "loss": 4.8981, "step": 22750 }, { "epoch": 0.03, "learning_rate": 9.727380165454802e-05, "loss": 4.9451, "step": 22800 }, { "epoch": 0.03, "learning_rate": 9.726754201470265e-05, "loss": 4.9775, "step": 22850 }, { "epoch": 0.03, "learning_rate": 9.726128237485729e-05, "loss": 4.9923, "step": 22900 }, { "epoch": 0.03, "learning_rate": 9.725502273501193e-05, "loss": 4.9264, "step": 22950 }, { "epoch": 0.03, "learning_rate": 9.724876309516657e-05, "loss": 4.9555, "step": 23000 }, { "epoch": 0.03, "learning_rate": 9.72425034553212e-05, "loss": 5.051, "step": 23050 }, { "epoch": 0.03, "learning_rate": 9.723624381547584e-05, "loss": 5.0173, "step": 23100 }, { "epoch": 0.03, "learning_rate": 9.722998417563048e-05, "loss": 5.0166, "step": 23150 }, { "epoch": 0.03, "learning_rate": 9.722372453578512e-05, "loss": 4.9683, "step": 23200 }, { "epoch": 0.03, "learning_rate": 9.721746489593975e-05, "loss": 4.8964, "step": 23250 }, { "epoch": 0.03, "learning_rate": 9.721120525609439e-05, "loss": 5.0901, "step": 23300 }, { "epoch": 0.03, "learning_rate": 9.720494561624903e-05, "loss": 5.088, "step": 23350 }, { "epoch": 0.03, "learning_rate": 9.719868597640366e-05, "loss": 4.9527, "step": 23400 }, { "epoch": 0.03, "learning_rate": 9.71924263365583e-05, "loss": 5.0099, "step": 23450 }, { "epoch": 0.03, "learning_rate": 9.718616669671294e-05, "loss": 4.9271, "step": 23500 }, { "epoch": 0.03, "learning_rate": 9.717990705686758e-05, "loss": 4.8979, "step": 23550 }, { "epoch": 0.03, "learning_rate": 9.717364741702221e-05, "loss": 5.0187, "step": 23600 }, { "epoch": 0.03, "learning_rate": 9.716738777717685e-05, "loss": 4.9638, "step": 23650 }, { "epoch": 0.03, "learning_rate": 9.716112813733149e-05, "loss": 5.0316, "step": 23700 }, { "epoch": 0.03, "learning_rate": 9.715486849748613e-05, "loss": 5.0148, "step": 23750 }, { "epoch": 0.03, "learning_rate": 9.714860885764076e-05, "loss": 4.9157, "step": 23800 }, { "epoch": 0.03, "learning_rate": 9.714234921779541e-05, "loss": 4.9498, "step": 23850 }, { "epoch": 0.03, "learning_rate": 9.713608957795005e-05, "loss": 4.9914, "step": 23900 }, { "epoch": 0.03, "learning_rate": 9.712982993810469e-05, "loss": 5.0533, "step": 23950 }, { "epoch": 0.03, "learning_rate": 9.712357029825933e-05, "loss": 4.8966, "step": 24000 }, { "epoch": 0.03, "learning_rate": 9.711731065841396e-05, "loss": 4.9949, "step": 24050 }, { "epoch": 0.03, "learning_rate": 9.71110510185686e-05, "loss": 5.0254, "step": 24100 }, { "epoch": 0.03, "learning_rate": 9.710479137872324e-05, "loss": 4.9671, "step": 24150 }, { "epoch": 0.03, "learning_rate": 9.709853173887788e-05, "loss": 4.9985, "step": 24200 }, { "epoch": 0.03, "learning_rate": 9.709227209903251e-05, "loss": 5.0642, "step": 24250 }, { "epoch": 0.03, "learning_rate": 9.708601245918715e-05, "loss": 4.9919, "step": 24300 }, { "epoch": 0.03, "learning_rate": 9.70797528193418e-05, "loss": 4.952, "step": 24350 }, { "epoch": 0.03, "learning_rate": 9.707349317949644e-05, "loss": 4.9108, "step": 24400 }, { "epoch": 0.03, "learning_rate": 9.706723353965108e-05, "loss": 4.9603, "step": 24450 }, { "epoch": 0.03, "learning_rate": 9.706097389980571e-05, "loss": 4.9385, "step": 24500 }, { "epoch": 0.03, "learning_rate": 9.705471425996035e-05, "loss": 4.8909, "step": 24550 }, { "epoch": 0.03, "learning_rate": 9.704845462011499e-05, "loss": 5.012, "step": 24600 }, { "epoch": 0.03, "learning_rate": 9.704219498026962e-05, "loss": 4.9623, "step": 24650 }, { "epoch": 0.03, "learning_rate": 9.703593534042426e-05, "loss": 5.0044, "step": 24700 }, { "epoch": 0.03, "learning_rate": 9.702967570057889e-05, "loss": 4.9789, "step": 24750 }, { "epoch": 0.03, "learning_rate": 9.702341606073352e-05, "loss": 5.0463, "step": 24800 }, { "epoch": 0.03, "learning_rate": 9.701715642088817e-05, "loss": 4.933, "step": 24850 }, { "epoch": 0.03, "learning_rate": 9.701089678104281e-05, "loss": 4.907, "step": 24900 }, { "epoch": 0.03, "learning_rate": 9.700463714119745e-05, "loss": 4.9755, "step": 24950 }, { "epoch": 0.03, "learning_rate": 9.699837750135209e-05, "loss": 5.0386, "step": 25000 }, { "epoch": 0.03, "learning_rate": 9.699211786150672e-05, "loss": 4.8779, "step": 25050 }, { "epoch": 0.03, "learning_rate": 9.698585822166136e-05, "loss": 5.0635, "step": 25100 }, { "epoch": 0.03, "learning_rate": 9.6979598581816e-05, "loss": 4.9544, "step": 25150 }, { "epoch": 0.03, "learning_rate": 9.697333894197064e-05, "loss": 4.7586, "step": 25200 }, { "epoch": 0.03, "learning_rate": 9.696707930212527e-05, "loss": 4.8179, "step": 25250 }, { "epoch": 0.03, "learning_rate": 9.696081966227991e-05, "loss": 4.9926, "step": 25300 }, { "epoch": 0.03, "learning_rate": 9.695456002243455e-05, "loss": 4.9877, "step": 25350 }, { "epoch": 0.03, "learning_rate": 9.69483003825892e-05, "loss": 5.019, "step": 25400 }, { "epoch": 0.03, "learning_rate": 9.694204074274384e-05, "loss": 4.9265, "step": 25450 }, { "epoch": 0.03, "learning_rate": 9.693578110289847e-05, "loss": 4.9906, "step": 25500 }, { "epoch": 0.03, "learning_rate": 9.692952146305311e-05, "loss": 4.9345, "step": 25550 }, { "epoch": 0.03, "learning_rate": 9.692326182320775e-05, "loss": 4.9798, "step": 25600 }, { "epoch": 0.03, "learning_rate": 9.691700218336238e-05, "loss": 5.0507, "step": 25650 }, { "epoch": 0.03, "learning_rate": 9.691074254351702e-05, "loss": 5.0605, "step": 25700 }, { "epoch": 0.03, "learning_rate": 9.690448290367166e-05, "loss": 5.0018, "step": 25750 }, { "epoch": 0.03, "learning_rate": 9.68982232638263e-05, "loss": 5.0694, "step": 25800 }, { "epoch": 0.03, "learning_rate": 9.689196362398093e-05, "loss": 5.0046, "step": 25850 }, { "epoch": 0.03, "learning_rate": 9.688570398413559e-05, "loss": 5.0854, "step": 25900 }, { "epoch": 0.03, "learning_rate": 9.687944434429021e-05, "loss": 4.9259, "step": 25950 }, { "epoch": 0.03, "learning_rate": 9.687318470444485e-05, "loss": 4.9893, "step": 26000 }, { "epoch": 0.03, "learning_rate": 9.686692506459948e-05, "loss": 5.0459, "step": 26050 }, { "epoch": 0.03, "learning_rate": 9.686066542475412e-05, "loss": 5.0484, "step": 26100 }, { "epoch": 0.03, "learning_rate": 9.685440578490876e-05, "loss": 4.998, "step": 26150 }, { "epoch": 0.03, "learning_rate": 9.68481461450634e-05, "loss": 4.8881, "step": 26200 }, { "epoch": 0.03, "learning_rate": 9.684188650521803e-05, "loss": 4.9884, "step": 26250 }, { "epoch": 0.03, "learning_rate": 9.683562686537267e-05, "loss": 4.9541, "step": 26300 }, { "epoch": 0.03, "learning_rate": 9.682936722552731e-05, "loss": 4.9218, "step": 26350 }, { "epoch": 0.03, "learning_rate": 9.682310758568196e-05, "loss": 4.93, "step": 26400 }, { "epoch": 0.03, "learning_rate": 9.68168479458366e-05, "loss": 4.9518, "step": 26450 }, { "epoch": 0.03, "learning_rate": 9.681058830599123e-05, "loss": 4.996, "step": 26500 }, { "epoch": 0.03, "learning_rate": 9.680432866614587e-05, "loss": 4.9652, "step": 26550 }, { "epoch": 0.03, "learning_rate": 9.679806902630051e-05, "loss": 4.9522, "step": 26600 }, { "epoch": 0.03, "learning_rate": 9.679180938645515e-05, "loss": 4.9844, "step": 26650 }, { "epoch": 0.03, "learning_rate": 9.678554974660978e-05, "loss": 4.9588, "step": 26700 }, { "epoch": 0.03, "learning_rate": 9.677929010676442e-05, "loss": 4.8937, "step": 26750 }, { "epoch": 0.03, "learning_rate": 9.677303046691906e-05, "loss": 4.9631, "step": 26800 }, { "epoch": 0.03, "learning_rate": 9.67667708270737e-05, "loss": 4.8256, "step": 26850 }, { "epoch": 0.03, "learning_rate": 9.676051118722833e-05, "loss": 5.0197, "step": 26900 }, { "epoch": 0.03, "learning_rate": 9.675425154738298e-05, "loss": 4.9843, "step": 26950 }, { "epoch": 0.03, "learning_rate": 9.674799190753762e-05, "loss": 4.9336, "step": 27000 }, { "epoch": 0.03, "learning_rate": 9.674173226769226e-05, "loss": 4.9875, "step": 27050 }, { "epoch": 0.03, "learning_rate": 9.67354726278469e-05, "loss": 4.9077, "step": 27100 }, { "epoch": 0.03, "learning_rate": 9.672921298800153e-05, "loss": 5.0175, "step": 27150 }, { "epoch": 0.03, "learning_rate": 9.672295334815617e-05, "loss": 4.9048, "step": 27200 }, { "epoch": 0.03, "learning_rate": 9.67166937083108e-05, "loss": 4.9995, "step": 27250 }, { "epoch": 0.03, "learning_rate": 9.671043406846544e-05, "loss": 4.9683, "step": 27300 }, { "epoch": 0.03, "learning_rate": 9.670417442862008e-05, "loss": 4.9659, "step": 27350 }, { "epoch": 0.03, "learning_rate": 9.669791478877472e-05, "loss": 5.0109, "step": 27400 }, { "epoch": 0.03, "learning_rate": 9.669165514892936e-05, "loss": 4.9081, "step": 27450 }, { "epoch": 0.03, "learning_rate": 9.6685395509084e-05, "loss": 4.9651, "step": 27500 }, { "epoch": 0.03, "learning_rate": 9.667913586923863e-05, "loss": 5.0429, "step": 27550 }, { "epoch": 0.03, "learning_rate": 9.667287622939327e-05, "loss": 4.9883, "step": 27600 }, { "epoch": 0.03, "learning_rate": 9.66666165895479e-05, "loss": 4.9352, "step": 27650 }, { "epoch": 0.03, "learning_rate": 9.666035694970254e-05, "loss": 4.9341, "step": 27700 }, { "epoch": 0.03, "learning_rate": 9.665409730985718e-05, "loss": 4.9687, "step": 27750 }, { "epoch": 0.03, "learning_rate": 9.664783767001182e-05, "loss": 4.9271, "step": 27800 }, { "epoch": 0.03, "learning_rate": 9.664157803016645e-05, "loss": 4.9193, "step": 27850 }, { "epoch": 0.03, "learning_rate": 9.663531839032109e-05, "loss": 4.9538, "step": 27900 }, { "epoch": 0.03, "learning_rate": 9.662905875047574e-05, "loss": 4.9093, "step": 27950 }, { "epoch": 0.04, "learning_rate": 9.662279911063038e-05, "loss": 4.932, "step": 28000 }, { "epoch": 0.04, "learning_rate": 9.661653947078502e-05, "loss": 4.9525, "step": 28050 }, { "epoch": 0.04, "learning_rate": 9.661027983093965e-05, "loss": 4.9749, "step": 28100 }, { "epoch": 0.04, "learning_rate": 9.660402019109429e-05, "loss": 4.9653, "step": 28150 }, { "epoch": 0.04, "learning_rate": 9.659776055124893e-05, "loss": 4.924, "step": 28200 }, { "epoch": 0.04, "learning_rate": 9.659150091140357e-05, "loss": 4.9608, "step": 28250 }, { "epoch": 0.04, "learning_rate": 9.65852412715582e-05, "loss": 4.9988, "step": 28300 }, { "epoch": 0.04, "learning_rate": 9.657898163171284e-05, "loss": 4.9842, "step": 28350 }, { "epoch": 0.04, "learning_rate": 9.657272199186748e-05, "loss": 4.9969, "step": 28400 }, { "epoch": 0.04, "learning_rate": 9.656646235202212e-05, "loss": 5.0377, "step": 28450 }, { "epoch": 0.04, "learning_rate": 9.656020271217677e-05, "loss": 5.0012, "step": 28500 }, { "epoch": 0.04, "learning_rate": 9.65539430723314e-05, "loss": 4.9756, "step": 28550 }, { "epoch": 0.04, "learning_rate": 9.654768343248604e-05, "loss": 5.0446, "step": 28600 }, { "epoch": 0.04, "learning_rate": 9.654142379264068e-05, "loss": 4.8955, "step": 28650 }, { "epoch": 0.04, "learning_rate": 9.653516415279532e-05, "loss": 4.9771, "step": 28700 }, { "epoch": 0.04, "learning_rate": 9.652890451294995e-05, "loss": 4.9379, "step": 28750 }, { "epoch": 0.04, "learning_rate": 9.652264487310458e-05, "loss": 4.96, "step": 28800 }, { "epoch": 0.04, "learning_rate": 9.651638523325921e-05, "loss": 5.0098, "step": 28850 }, { "epoch": 0.04, "learning_rate": 9.651012559341385e-05, "loss": 5.0001, "step": 28900 }, { "epoch": 0.04, "learning_rate": 9.650386595356849e-05, "loss": 4.9597, "step": 28950 }, { "epoch": 0.04, "learning_rate": 9.649760631372314e-05, "loss": 4.946, "step": 29000 }, { "epoch": 0.04, "learning_rate": 9.649134667387778e-05, "loss": 5.1027, "step": 29050 }, { "epoch": 0.04, "learning_rate": 9.648508703403241e-05, "loss": 4.9982, "step": 29100 }, { "epoch": 0.04, "learning_rate": 9.647882739418705e-05, "loss": 4.9308, "step": 29150 }, { "epoch": 0.04, "learning_rate": 9.647256775434169e-05, "loss": 4.9149, "step": 29200 }, { "epoch": 0.04, "learning_rate": 9.646630811449633e-05, "loss": 4.9285, "step": 29250 }, { "epoch": 0.04, "learning_rate": 9.646004847465096e-05, "loss": 4.9079, "step": 29300 }, { "epoch": 0.04, "learning_rate": 9.64537888348056e-05, "loss": 4.909, "step": 29350 }, { "epoch": 0.04, "learning_rate": 9.644752919496024e-05, "loss": 4.9169, "step": 29400 }, { "epoch": 0.04, "learning_rate": 9.644126955511488e-05, "loss": 4.9185, "step": 29450 }, { "epoch": 0.04, "learning_rate": 9.643500991526953e-05, "loss": 4.9429, "step": 29500 }, { "epoch": 0.04, "learning_rate": 9.642875027542416e-05, "loss": 4.9764, "step": 29550 }, { "epoch": 0.04, "learning_rate": 9.64224906355788e-05, "loss": 4.8426, "step": 29600 }, { "epoch": 0.04, "learning_rate": 9.641623099573344e-05, "loss": 4.9947, "step": 29650 }, { "epoch": 0.04, "learning_rate": 9.640997135588808e-05, "loss": 4.9022, "step": 29700 }, { "epoch": 0.04, "learning_rate": 9.640371171604271e-05, "loss": 4.9893, "step": 29750 }, { "epoch": 0.04, "learning_rate": 9.639745207619735e-05, "loss": 5.0289, "step": 29800 }, { "epoch": 0.04, "learning_rate": 9.639119243635199e-05, "loss": 4.9806, "step": 29850 }, { "epoch": 0.04, "learning_rate": 9.638493279650663e-05, "loss": 4.8427, "step": 29900 }, { "epoch": 0.04, "learning_rate": 9.637867315666126e-05, "loss": 4.9523, "step": 29950 }, { "epoch": 0.04, "learning_rate": 9.63724135168159e-05, "loss": 4.8792, "step": 30000 }, { "epoch": 0.04, "learning_rate": 9.636615387697054e-05, "loss": 4.899, "step": 30050 }, { "epoch": 0.04, "learning_rate": 9.635989423712518e-05, "loss": 4.9358, "step": 30100 }, { "epoch": 0.04, "learning_rate": 9.635363459727981e-05, "loss": 5.0462, "step": 30150 }, { "epoch": 0.04, "learning_rate": 9.634737495743445e-05, "loss": 4.9762, "step": 30200 }, { "epoch": 0.04, "learning_rate": 9.634111531758909e-05, "loss": 4.9145, "step": 30250 }, { "epoch": 0.04, "learning_rate": 9.633485567774372e-05, "loss": 4.9593, "step": 30300 }, { "epoch": 0.04, "learning_rate": 9.632859603789836e-05, "loss": 5.0096, "step": 30350 }, { "epoch": 0.04, "learning_rate": 9.6322336398053e-05, "loss": 4.9238, "step": 30400 }, { "epoch": 0.04, "learning_rate": 9.631607675820764e-05, "loss": 4.936, "step": 30450 }, { "epoch": 0.04, "learning_rate": 9.630981711836227e-05, "loss": 4.9949, "step": 30500 }, { "epoch": 0.04, "learning_rate": 9.630355747851692e-05, "loss": 4.9759, "step": 30550 }, { "epoch": 0.04, "learning_rate": 9.629729783867156e-05, "loss": 4.9781, "step": 30600 }, { "epoch": 0.04, "learning_rate": 9.62910381988262e-05, "loss": 4.8776, "step": 30650 }, { "epoch": 0.04, "learning_rate": 9.628477855898084e-05, "loss": 4.9683, "step": 30700 }, { "epoch": 0.04, "learning_rate": 9.627851891913547e-05, "loss": 4.8879, "step": 30750 }, { "epoch": 0.04, "learning_rate": 9.627225927929011e-05, "loss": 4.9501, "step": 30800 }, { "epoch": 0.04, "learning_rate": 9.626599963944475e-05, "loss": 5.0213, "step": 30850 }, { "epoch": 0.04, "learning_rate": 9.625973999959939e-05, "loss": 4.8904, "step": 30900 }, { "epoch": 0.04, "learning_rate": 9.625348035975402e-05, "loss": 4.9378, "step": 30950 }, { "epoch": 0.04, "learning_rate": 9.624722071990866e-05, "loss": 4.9549, "step": 31000 }, { "epoch": 0.04, "learning_rate": 9.62409610800633e-05, "loss": 4.8818, "step": 31050 }, { "epoch": 0.04, "learning_rate": 9.623470144021795e-05, "loss": 4.9676, "step": 31100 }, { "epoch": 0.04, "learning_rate": 9.622844180037259e-05, "loss": 4.873, "step": 31150 }, { "epoch": 0.04, "learning_rate": 9.622218216052722e-05, "loss": 4.9116, "step": 31200 }, { "epoch": 0.04, "learning_rate": 9.621592252068186e-05, "loss": 4.9553, "step": 31250 }, { "epoch": 0.04, "learning_rate": 9.62096628808365e-05, "loss": 4.9174, "step": 31300 }, { "epoch": 0.04, "learning_rate": 9.620340324099114e-05, "loss": 4.941, "step": 31350 }, { "epoch": 0.04, "learning_rate": 9.619714360114577e-05, "loss": 4.926, "step": 31400 }, { "epoch": 0.04, "learning_rate": 9.619088396130041e-05, "loss": 4.945, "step": 31450 }, { "epoch": 0.04, "learning_rate": 9.618462432145505e-05, "loss": 4.953, "step": 31500 }, { "epoch": 0.04, "learning_rate": 9.617836468160968e-05, "loss": 4.8548, "step": 31550 }, { "epoch": 0.04, "learning_rate": 9.617210504176432e-05, "loss": 4.9591, "step": 31600 }, { "epoch": 0.04, "learning_rate": 9.616584540191896e-05, "loss": 4.8602, "step": 31650 }, { "epoch": 0.04, "learning_rate": 9.61595857620736e-05, "loss": 4.8587, "step": 31700 }, { "epoch": 0.04, "learning_rate": 9.615332612222823e-05, "loss": 4.9136, "step": 31750 }, { "epoch": 0.04, "learning_rate": 9.614706648238287e-05, "loss": 5.0296, "step": 31800 }, { "epoch": 0.04, "learning_rate": 9.614080684253751e-05, "loss": 5.0043, "step": 31850 }, { "epoch": 0.04, "learning_rate": 9.613454720269215e-05, "loss": 4.993, "step": 31900 }, { "epoch": 0.04, "learning_rate": 9.612828756284678e-05, "loss": 4.8789, "step": 31950 }, { "epoch": 0.04, "learning_rate": 9.612202792300142e-05, "loss": 4.987, "step": 32000 }, { "epoch": 0.04, "learning_rate": 9.611576828315606e-05, "loss": 5.0348, "step": 32050 }, { "epoch": 0.04, "learning_rate": 9.610950864331071e-05, "loss": 4.916, "step": 32100 }, { "epoch": 0.04, "learning_rate": 9.610324900346535e-05, "loss": 4.8943, "step": 32150 }, { "epoch": 0.04, "learning_rate": 9.609698936361998e-05, "loss": 4.9347, "step": 32200 }, { "epoch": 0.04, "learning_rate": 9.609072972377462e-05, "loss": 4.9428, "step": 32250 }, { "epoch": 0.04, "learning_rate": 9.608447008392926e-05, "loss": 4.9919, "step": 32300 }, { "epoch": 0.04, "learning_rate": 9.60782104440839e-05, "loss": 4.8804, "step": 32350 }, { "epoch": 0.04, "learning_rate": 9.607195080423853e-05, "loss": 5.0084, "step": 32400 }, { "epoch": 0.04, "learning_rate": 9.606569116439317e-05, "loss": 4.9282, "step": 32450 }, { "epoch": 0.04, "learning_rate": 9.605943152454781e-05, "loss": 4.9022, "step": 32500 }, { "epoch": 0.04, "learning_rate": 9.605317188470244e-05, "loss": 4.9085, "step": 32550 }, { "epoch": 0.04, "learning_rate": 9.604691224485708e-05, "loss": 4.9265, "step": 32600 }, { "epoch": 0.04, "learning_rate": 9.604065260501173e-05, "loss": 4.8853, "step": 32650 }, { "epoch": 0.04, "learning_rate": 9.603439296516637e-05, "loss": 5.0286, "step": 32700 }, { "epoch": 0.04, "learning_rate": 9.602813332532101e-05, "loss": 4.8819, "step": 32750 }, { "epoch": 0.04, "learning_rate": 9.602187368547565e-05, "loss": 4.9279, "step": 32800 }, { "epoch": 0.04, "learning_rate": 9.601561404563027e-05, "loss": 4.9277, "step": 32850 }, { "epoch": 0.04, "learning_rate": 9.60093544057849e-05, "loss": 5.0327, "step": 32900 }, { "epoch": 0.04, "learning_rate": 9.600309476593954e-05, "loss": 4.8926, "step": 32950 }, { "epoch": 0.04, "learning_rate": 9.599683512609418e-05, "loss": 4.906, "step": 33000 }, { "epoch": 0.04, "learning_rate": 9.599057548624882e-05, "loss": 4.8531, "step": 33050 }, { "epoch": 0.04, "learning_rate": 9.598431584640346e-05, "loss": 4.9295, "step": 33100 }, { "epoch": 0.04, "learning_rate": 9.59780562065581e-05, "loss": 4.9341, "step": 33150 }, { "epoch": 0.04, "learning_rate": 9.597179656671274e-05, "loss": 4.9963, "step": 33200 }, { "epoch": 0.04, "learning_rate": 9.596553692686738e-05, "loss": 4.9057, "step": 33250 }, { "epoch": 0.04, "learning_rate": 9.595927728702202e-05, "loss": 4.9403, "step": 33300 }, { "epoch": 0.04, "learning_rate": 9.595301764717666e-05, "loss": 4.9096, "step": 33350 }, { "epoch": 0.04, "learning_rate": 9.594675800733129e-05, "loss": 4.8384, "step": 33400 }, { "epoch": 0.04, "learning_rate": 9.594049836748593e-05, "loss": 4.8878, "step": 33450 }, { "epoch": 0.04, "learning_rate": 9.593423872764057e-05, "loss": 4.8244, "step": 33500 }, { "epoch": 0.04, "learning_rate": 9.59279790877952e-05, "loss": 4.9195, "step": 33550 }, { "epoch": 0.04, "learning_rate": 9.592171944794984e-05, "loss": 4.7685, "step": 33600 }, { "epoch": 0.04, "learning_rate": 9.59154598081045e-05, "loss": 4.8966, "step": 33650 }, { "epoch": 0.04, "learning_rate": 9.590920016825913e-05, "loss": 4.9074, "step": 33700 }, { "epoch": 0.04, "learning_rate": 9.590294052841377e-05, "loss": 4.987, "step": 33750 }, { "epoch": 0.04, "learning_rate": 9.58966808885684e-05, "loss": 4.9245, "step": 33800 }, { "epoch": 0.04, "learning_rate": 9.589042124872304e-05, "loss": 4.8736, "step": 33850 }, { "epoch": 0.04, "learning_rate": 9.588416160887768e-05, "loss": 4.852, "step": 33900 }, { "epoch": 0.04, "learning_rate": 9.587790196903232e-05, "loss": 4.8628, "step": 33950 }, { "epoch": 0.04, "learning_rate": 9.587164232918695e-05, "loss": 4.9368, "step": 34000 }, { "epoch": 0.04, "learning_rate": 9.586538268934159e-05, "loss": 4.8265, "step": 34050 }, { "epoch": 0.04, "learning_rate": 9.585912304949623e-05, "loss": 5.0204, "step": 34100 }, { "epoch": 0.04, "learning_rate": 9.585286340965087e-05, "loss": 4.8846, "step": 34150 }, { "epoch": 0.04, "learning_rate": 9.58466037698055e-05, "loss": 5.0095, "step": 34200 }, { "epoch": 0.04, "learning_rate": 9.584034412996014e-05, "loss": 4.8919, "step": 34250 }, { "epoch": 0.04, "learning_rate": 9.583408449011478e-05, "loss": 4.9342, "step": 34300 }, { "epoch": 0.04, "learning_rate": 9.582782485026942e-05, "loss": 4.9791, "step": 34350 }, { "epoch": 0.04, "learning_rate": 9.582156521042405e-05, "loss": 4.9114, "step": 34400 }, { "epoch": 0.04, "learning_rate": 9.581530557057869e-05, "loss": 4.9314, "step": 34450 }, { "epoch": 0.04, "learning_rate": 9.580904593073333e-05, "loss": 4.8273, "step": 34500 }, { "epoch": 0.04, "learning_rate": 9.580278629088797e-05, "loss": 4.9017, "step": 34550 }, { "epoch": 0.04, "learning_rate": 9.57965266510426e-05, "loss": 4.9036, "step": 34600 }, { "epoch": 0.04, "learning_rate": 9.579026701119724e-05, "loss": 4.8987, "step": 34650 }, { "epoch": 0.04, "learning_rate": 9.578400737135189e-05, "loss": 4.8685, "step": 34700 }, { "epoch": 0.04, "learning_rate": 9.577774773150653e-05, "loss": 4.9544, "step": 34750 }, { "epoch": 0.04, "learning_rate": 9.577148809166117e-05, "loss": 4.9566, "step": 34800 }, { "epoch": 0.04, "learning_rate": 9.57652284518158e-05, "loss": 4.9021, "step": 34850 }, { "epoch": 0.04, "learning_rate": 9.575896881197044e-05, "loss": 4.9509, "step": 34900 }, { "epoch": 0.04, "learning_rate": 9.575270917212508e-05, "loss": 4.8876, "step": 34950 }, { "epoch": 0.04, "learning_rate": 9.574644953227971e-05, "loss": 4.9282, "step": 35000 }, { "epoch": 0.04, "learning_rate": 9.574018989243435e-05, "loss": 4.9116, "step": 35050 }, { "epoch": 0.04, "learning_rate": 9.573393025258899e-05, "loss": 4.8911, "step": 35100 }, { "epoch": 0.04, "learning_rate": 9.572767061274363e-05, "loss": 4.8299, "step": 35150 }, { "epoch": 0.04, "learning_rate": 9.572141097289828e-05, "loss": 4.8908, "step": 35200 }, { "epoch": 0.04, "learning_rate": 9.571515133305292e-05, "loss": 4.9348, "step": 35250 }, { "epoch": 0.04, "learning_rate": 9.570889169320755e-05, "loss": 4.9122, "step": 35300 }, { "epoch": 0.04, "learning_rate": 9.570263205336219e-05, "loss": 4.951, "step": 35350 }, { "epoch": 0.04, "learning_rate": 9.569637241351683e-05, "loss": 4.8927, "step": 35400 }, { "epoch": 0.04, "learning_rate": 9.569011277367146e-05, "loss": 4.9435, "step": 35450 }, { "epoch": 0.04, "learning_rate": 9.56838531338261e-05, "loss": 4.9543, "step": 35500 }, { "epoch": 0.04, "learning_rate": 9.567759349398074e-05, "loss": 4.9717, "step": 35550 }, { "epoch": 0.04, "learning_rate": 9.567133385413538e-05, "loss": 4.9319, "step": 35600 }, { "epoch": 0.04, "learning_rate": 9.566507421429001e-05, "loss": 4.8934, "step": 35650 }, { "epoch": 0.04, "learning_rate": 9.565881457444464e-05, "loss": 4.865, "step": 35700 }, { "epoch": 0.04, "learning_rate": 9.565255493459929e-05, "loss": 4.8861, "step": 35750 }, { "epoch": 0.04, "learning_rate": 9.564629529475393e-05, "loss": 4.931, "step": 35800 }, { "epoch": 0.04, "learning_rate": 9.564003565490856e-05, "loss": 4.9698, "step": 35850 }, { "epoch": 0.04, "learning_rate": 9.56337760150632e-05, "loss": 4.9013, "step": 35900 }, { "epoch": 0.04, "learning_rate": 9.562751637521784e-05, "loss": 4.9143, "step": 35950 }, { "epoch": 0.05, "learning_rate": 9.562125673537248e-05, "loss": 4.8763, "step": 36000 }, { "epoch": 0.05, "learning_rate": 9.561499709552711e-05, "loss": 4.8376, "step": 36050 }, { "epoch": 0.05, "learning_rate": 9.560873745568175e-05, "loss": 4.8385, "step": 36100 }, { "epoch": 0.05, "learning_rate": 9.560247781583639e-05, "loss": 4.8305, "step": 36150 }, { "epoch": 0.05, "learning_rate": 9.559621817599102e-05, "loss": 4.8613, "step": 36200 }, { "epoch": 0.05, "learning_rate": 9.558995853614568e-05, "loss": 4.8079, "step": 36250 }, { "epoch": 0.05, "learning_rate": 9.558369889630031e-05, "loss": 4.864, "step": 36300 }, { "epoch": 0.05, "learning_rate": 9.557743925645495e-05, "loss": 4.95, "step": 36350 }, { "epoch": 0.05, "learning_rate": 9.557117961660959e-05, "loss": 4.9481, "step": 36400 }, { "epoch": 0.05, "learning_rate": 9.556491997676422e-05, "loss": 4.8428, "step": 36450 }, { "epoch": 0.05, "learning_rate": 9.555866033691886e-05, "loss": 4.8048, "step": 36500 }, { "epoch": 0.05, "learning_rate": 9.55524006970735e-05, "loss": 4.9291, "step": 36550 }, { "epoch": 0.05, "learning_rate": 9.554614105722814e-05, "loss": 4.9583, "step": 36600 }, { "epoch": 0.05, "learning_rate": 9.553988141738277e-05, "loss": 4.8893, "step": 36650 }, { "epoch": 0.05, "learning_rate": 9.553362177753741e-05, "loss": 4.8675, "step": 36700 }, { "epoch": 0.05, "learning_rate": 9.552736213769206e-05, "loss": 4.9823, "step": 36750 }, { "epoch": 0.05, "learning_rate": 9.55211024978467e-05, "loss": 4.8646, "step": 36800 }, { "epoch": 0.05, "learning_rate": 9.551484285800132e-05, "loss": 4.9244, "step": 36850 }, { "epoch": 0.05, "learning_rate": 9.550858321815596e-05, "loss": 4.9371, "step": 36900 }, { "epoch": 0.05, "learning_rate": 9.55023235783106e-05, "loss": 4.9954, "step": 36950 }, { "epoch": 0.05, "learning_rate": 9.549606393846524e-05, "loss": 4.8359, "step": 37000 }, { "epoch": 0.05, "learning_rate": 9.548980429861987e-05, "loss": 4.9412, "step": 37050 }, { "epoch": 0.05, "learning_rate": 9.548354465877451e-05, "loss": 4.9906, "step": 37100 }, { "epoch": 0.05, "learning_rate": 9.547728501892915e-05, "loss": 4.929, "step": 37150 }, { "epoch": 0.05, "learning_rate": 9.547102537908378e-05, "loss": 4.9832, "step": 37200 }, { "epoch": 0.05, "learning_rate": 9.546476573923842e-05, "loss": 4.8273, "step": 37250 }, { "epoch": 0.05, "learning_rate": 9.545850609939307e-05, "loss": 4.9437, "step": 37300 }, { "epoch": 0.05, "learning_rate": 9.545224645954771e-05, "loss": 4.8752, "step": 37350 }, { "epoch": 0.05, "learning_rate": 9.544598681970235e-05, "loss": 4.8737, "step": 37400 }, { "epoch": 0.05, "learning_rate": 9.543972717985698e-05, "loss": 4.9611, "step": 37450 }, { "epoch": 0.05, "learning_rate": 9.543346754001162e-05, "loss": 4.8258, "step": 37500 }, { "epoch": 0.05, "learning_rate": 9.542720790016626e-05, "loss": 4.9859, "step": 37550 }, { "epoch": 0.05, "learning_rate": 9.54209482603209e-05, "loss": 4.8322, "step": 37600 }, { "epoch": 0.05, "learning_rate": 9.541468862047553e-05, "loss": 4.9787, "step": 37650 }, { "epoch": 0.05, "learning_rate": 9.540842898063017e-05, "loss": 4.8141, "step": 37700 }, { "epoch": 0.05, "learning_rate": 9.540216934078481e-05, "loss": 4.8832, "step": 37750 }, { "epoch": 0.05, "learning_rate": 9.539590970093946e-05, "loss": 4.9542, "step": 37800 }, { "epoch": 0.05, "learning_rate": 9.53896500610941e-05, "loss": 5.0191, "step": 37850 }, { "epoch": 0.05, "learning_rate": 9.538339042124873e-05, "loss": 4.9279, "step": 37900 }, { "epoch": 0.05, "learning_rate": 9.537713078140337e-05, "loss": 4.9501, "step": 37950 }, { "epoch": 0.05, "learning_rate": 9.537087114155801e-05, "loss": 4.858, "step": 38000 }, { "epoch": 0.05, "learning_rate": 9.536461150171265e-05, "loss": 4.9615, "step": 38050 }, { "epoch": 0.05, "learning_rate": 9.535835186186728e-05, "loss": 4.8703, "step": 38100 }, { "epoch": 0.05, "learning_rate": 9.535209222202192e-05, "loss": 4.8618, "step": 38150 }, { "epoch": 0.05, "learning_rate": 9.534583258217656e-05, "loss": 4.8739, "step": 38200 }, { "epoch": 0.05, "learning_rate": 9.53395729423312e-05, "loss": 4.9114, "step": 38250 }, { "epoch": 0.05, "learning_rate": 9.533331330248583e-05, "loss": 4.9042, "step": 38300 }, { "epoch": 0.05, "learning_rate": 9.532705366264047e-05, "loss": 4.827, "step": 38350 }, { "epoch": 0.05, "learning_rate": 9.532079402279511e-05, "loss": 4.8382, "step": 38400 }, { "epoch": 0.05, "learning_rate": 9.531453438294974e-05, "loss": 4.846, "step": 38450 }, { "epoch": 0.05, "learning_rate": 9.530827474310438e-05, "loss": 4.9417, "step": 38500 }, { "epoch": 0.05, "learning_rate": 9.530201510325902e-05, "loss": 4.8777, "step": 38550 }, { "epoch": 0.05, "learning_rate": 9.529575546341366e-05, "loss": 4.8874, "step": 38600 }, { "epoch": 0.05, "learning_rate": 9.52894958235683e-05, "loss": 4.9413, "step": 38650 }, { "epoch": 0.05, "learning_rate": 9.528323618372293e-05, "loss": 4.7937, "step": 38700 }, { "epoch": 0.05, "learning_rate": 9.527697654387757e-05, "loss": 4.9104, "step": 38750 }, { "epoch": 0.05, "learning_rate": 9.52707169040322e-05, "loss": 4.8675, "step": 38800 }, { "epoch": 0.05, "learning_rate": 9.526445726418686e-05, "loss": 4.8226, "step": 38850 }, { "epoch": 0.05, "learning_rate": 9.52581976243415e-05, "loss": 4.9149, "step": 38900 }, { "epoch": 0.05, "learning_rate": 9.525193798449613e-05, "loss": 4.8805, "step": 38950 }, { "epoch": 0.05, "learning_rate": 9.524567834465077e-05, "loss": 4.9775, "step": 39000 }, { "epoch": 0.05, "learning_rate": 9.52394187048054e-05, "loss": 4.9145, "step": 39050 }, { "epoch": 0.05, "learning_rate": 9.523315906496004e-05, "loss": 4.8407, "step": 39100 }, { "epoch": 0.05, "learning_rate": 9.522689942511468e-05, "loss": 4.9132, "step": 39150 }, { "epoch": 0.05, "learning_rate": 9.522063978526932e-05, "loss": 4.842, "step": 39200 }, { "epoch": 0.05, "learning_rate": 9.521438014542396e-05, "loss": 4.9297, "step": 39250 }, { "epoch": 0.05, "learning_rate": 9.520812050557859e-05, "loss": 4.9036, "step": 39300 }, { "epoch": 0.05, "learning_rate": 9.520186086573324e-05, "loss": 4.9217, "step": 39350 }, { "epoch": 0.05, "learning_rate": 9.519560122588788e-05, "loss": 4.879, "step": 39400 }, { "epoch": 0.05, "learning_rate": 9.518934158604252e-05, "loss": 4.8425, "step": 39450 }, { "epoch": 0.05, "learning_rate": 9.518308194619716e-05, "loss": 4.9346, "step": 39500 }, { "epoch": 0.05, "learning_rate": 9.51768223063518e-05, "loss": 4.8834, "step": 39550 }, { "epoch": 0.05, "learning_rate": 9.517056266650643e-05, "loss": 4.7799, "step": 39600 }, { "epoch": 0.05, "learning_rate": 9.516430302666107e-05, "loss": 4.9226, "step": 39650 }, { "epoch": 0.05, "learning_rate": 9.515804338681569e-05, "loss": 4.8893, "step": 39700 }, { "epoch": 0.05, "learning_rate": 9.515178374697033e-05, "loss": 4.8613, "step": 39750 }, { "epoch": 0.05, "learning_rate": 9.514552410712497e-05, "loss": 4.8944, "step": 39800 }, { "epoch": 0.05, "learning_rate": 9.51392644672796e-05, "loss": 4.8806, "step": 39850 }, { "epoch": 0.05, "learning_rate": 9.513300482743425e-05, "loss": 4.8746, "step": 39900 }, { "epoch": 0.05, "learning_rate": 9.512674518758889e-05, "loss": 4.9389, "step": 39950 }, { "epoch": 0.05, "learning_rate": 9.512048554774353e-05, "loss": 4.8188, "step": 40000 }, { "epoch": 0.05, "learning_rate": 9.511422590789817e-05, "loss": 4.9529, "step": 40050 }, { "epoch": 0.05, "learning_rate": 9.51079662680528e-05, "loss": 4.9178, "step": 40100 }, { "epoch": 0.05, "learning_rate": 9.510170662820744e-05, "loss": 4.7775, "step": 40150 }, { "epoch": 0.05, "learning_rate": 9.509544698836208e-05, "loss": 4.9203, "step": 40200 }, { "epoch": 0.05, "learning_rate": 9.508918734851672e-05, "loss": 4.8925, "step": 40250 }, { "epoch": 0.05, "learning_rate": 9.508292770867135e-05, "loss": 4.9638, "step": 40300 }, { "epoch": 0.05, "learning_rate": 9.507666806882599e-05, "loss": 4.8044, "step": 40350 }, { "epoch": 0.05, "learning_rate": 9.507040842898064e-05, "loss": 4.8619, "step": 40400 }, { "epoch": 0.05, "learning_rate": 9.506414878913528e-05, "loss": 4.8807, "step": 40450 }, { "epoch": 0.05, "learning_rate": 9.505788914928992e-05, "loss": 4.8446, "step": 40500 }, { "epoch": 0.05, "learning_rate": 9.505162950944455e-05, "loss": 4.823, "step": 40550 }, { "epoch": 0.05, "learning_rate": 9.504536986959919e-05, "loss": 4.9156, "step": 40600 }, { "epoch": 0.05, "learning_rate": 9.503911022975383e-05, "loss": 4.8854, "step": 40650 }, { "epoch": 0.05, "learning_rate": 9.503285058990847e-05, "loss": 4.8832, "step": 40700 }, { "epoch": 0.05, "learning_rate": 9.50265909500631e-05, "loss": 4.8582, "step": 40750 }, { "epoch": 0.05, "learning_rate": 9.502033131021774e-05, "loss": 4.8077, "step": 40800 }, { "epoch": 0.05, "learning_rate": 9.501407167037238e-05, "loss": 4.9733, "step": 40850 }, { "epoch": 0.05, "learning_rate": 9.500781203052701e-05, "loss": 4.939, "step": 40900 }, { "epoch": 0.05, "learning_rate": 9.500155239068165e-05, "loss": 4.8049, "step": 40950 }, { "epoch": 0.05, "learning_rate": 9.499529275083629e-05, "loss": 4.9012, "step": 41000 }, { "epoch": 0.05, "learning_rate": 9.498903311099093e-05, "loss": 4.7638, "step": 41050 }, { "epoch": 0.05, "learning_rate": 9.498277347114556e-05, "loss": 4.8456, "step": 41100 }, { "epoch": 0.05, "learning_rate": 9.49765138313002e-05, "loss": 4.917, "step": 41150 }, { "epoch": 0.05, "learning_rate": 9.497025419145484e-05, "loss": 4.7383, "step": 41200 }, { "epoch": 0.05, "learning_rate": 9.496399455160948e-05, "loss": 4.9119, "step": 41250 }, { "epoch": 0.05, "learning_rate": 9.495773491176411e-05, "loss": 4.9026, "step": 41300 }, { "epoch": 0.05, "learning_rate": 9.495147527191875e-05, "loss": 4.9216, "step": 41350 }, { "epoch": 0.05, "learning_rate": 9.494521563207339e-05, "loss": 4.8082, "step": 41400 }, { "epoch": 0.05, "learning_rate": 9.493895599222804e-05, "loss": 4.9085, "step": 41450 }, { "epoch": 0.05, "learning_rate": 9.493269635238268e-05, "loss": 4.9353, "step": 41500 }, { "epoch": 0.05, "learning_rate": 9.492643671253731e-05, "loss": 4.9121, "step": 41550 }, { "epoch": 0.05, "learning_rate": 9.492017707269195e-05, "loss": 4.9035, "step": 41600 }, { "epoch": 0.05, "learning_rate": 9.491391743284659e-05, "loss": 4.8528, "step": 41650 }, { "epoch": 0.05, "learning_rate": 9.490765779300123e-05, "loss": 4.7581, "step": 41700 }, { "epoch": 0.05, "learning_rate": 9.490139815315586e-05, "loss": 4.7886, "step": 41750 }, { "epoch": 0.05, "learning_rate": 9.48951385133105e-05, "loss": 4.853, "step": 41800 }, { "epoch": 0.05, "learning_rate": 9.488887887346514e-05, "loss": 4.8328, "step": 41850 }, { "epoch": 0.05, "learning_rate": 9.488261923361977e-05, "loss": 4.9638, "step": 41900 }, { "epoch": 0.05, "learning_rate": 9.487635959377443e-05, "loss": 4.8679, "step": 41950 }, { "epoch": 0.05, "learning_rate": 9.487009995392906e-05, "loss": 4.8202, "step": 42000 }, { "epoch": 0.05, "learning_rate": 9.48638403140837e-05, "loss": 4.9281, "step": 42050 }, { "epoch": 0.05, "learning_rate": 9.485758067423834e-05, "loss": 4.8553, "step": 42100 }, { "epoch": 0.05, "learning_rate": 9.485132103439298e-05, "loss": 4.8666, "step": 42150 }, { "epoch": 0.05, "learning_rate": 9.484506139454761e-05, "loss": 4.7533, "step": 42200 }, { "epoch": 0.05, "learning_rate": 9.483880175470225e-05, "loss": 4.8444, "step": 42250 }, { "epoch": 0.05, "learning_rate": 9.483254211485689e-05, "loss": 4.7471, "step": 42300 }, { "epoch": 0.05, "learning_rate": 9.482628247501152e-05, "loss": 4.948, "step": 42350 }, { "epoch": 0.05, "learning_rate": 9.482002283516616e-05, "loss": 4.8891, "step": 42400 }, { "epoch": 0.05, "learning_rate": 9.48137631953208e-05, "loss": 4.8327, "step": 42450 }, { "epoch": 0.05, "learning_rate": 9.480750355547544e-05, "loss": 4.8727, "step": 42500 }, { "epoch": 0.05, "learning_rate": 9.480124391563007e-05, "loss": 4.9436, "step": 42550 }, { "epoch": 0.05, "learning_rate": 9.479498427578471e-05, "loss": 4.8762, "step": 42600 }, { "epoch": 0.05, "learning_rate": 9.478872463593935e-05, "loss": 4.8695, "step": 42650 }, { "epoch": 0.05, "learning_rate": 9.478246499609399e-05, "loss": 4.9169, "step": 42700 }, { "epoch": 0.05, "learning_rate": 9.477620535624862e-05, "loss": 4.88, "step": 42750 }, { "epoch": 0.05, "learning_rate": 9.476994571640326e-05, "loss": 4.9302, "step": 42800 }, { "epoch": 0.05, "learning_rate": 9.47636860765579e-05, "loss": 4.7956, "step": 42850 }, { "epoch": 0.05, "learning_rate": 9.475742643671254e-05, "loss": 4.8662, "step": 42900 }, { "epoch": 0.05, "learning_rate": 9.475116679686717e-05, "loss": 4.8943, "step": 42950 }, { "epoch": 0.05, "learning_rate": 9.474490715702182e-05, "loss": 4.9173, "step": 43000 }, { "epoch": 0.05, "learning_rate": 9.473864751717646e-05, "loss": 4.9455, "step": 43050 }, { "epoch": 0.05, "learning_rate": 9.47323878773311e-05, "loss": 4.7996, "step": 43100 }, { "epoch": 0.05, "learning_rate": 9.472612823748574e-05, "loss": 4.8842, "step": 43150 }, { "epoch": 0.05, "learning_rate": 9.471986859764037e-05, "loss": 4.8904, "step": 43200 }, { "epoch": 0.05, "learning_rate": 9.471360895779501e-05, "loss": 4.8688, "step": 43250 }, { "epoch": 0.05, "learning_rate": 9.470734931794965e-05, "loss": 4.9324, "step": 43300 }, { "epoch": 0.05, "learning_rate": 9.470108967810428e-05, "loss": 4.9366, "step": 43350 }, { "epoch": 0.05, "learning_rate": 9.469483003825892e-05, "loss": 4.8679, "step": 43400 }, { "epoch": 0.05, "learning_rate": 9.468857039841356e-05, "loss": 4.845, "step": 43450 }, { "epoch": 0.05, "learning_rate": 9.468231075856821e-05, "loss": 4.8334, "step": 43500 }, { "epoch": 0.05, "learning_rate": 9.467605111872285e-05, "loss": 4.8806, "step": 43550 }, { "epoch": 0.05, "learning_rate": 9.466979147887748e-05, "loss": 4.8584, "step": 43600 }, { "epoch": 0.05, "learning_rate": 9.466353183903212e-05, "loss": 4.8452, "step": 43650 }, { "epoch": 0.05, "learning_rate": 9.465727219918676e-05, "loss": 4.821, "step": 43700 }, { "epoch": 0.05, "learning_rate": 9.465101255934138e-05, "loss": 4.9137, "step": 43750 }, { "epoch": 0.05, "learning_rate": 9.464475291949602e-05, "loss": 4.8857, "step": 43800 }, { "epoch": 0.05, "learning_rate": 9.463849327965066e-05, "loss": 4.7707, "step": 43850 }, { "epoch": 0.05, "learning_rate": 9.46322336398053e-05, "loss": 4.9027, "step": 43900 }, { "epoch": 0.05, "learning_rate": 9.462597399995993e-05, "loss": 4.8451, "step": 43950 }, { "epoch": 0.06, "learning_rate": 9.461971436011458e-05, "loss": 4.8435, "step": 44000 }, { "epoch": 0.06, "learning_rate": 9.461345472026922e-05, "loss": 4.8197, "step": 44050 }, { "epoch": 0.06, "learning_rate": 9.460719508042386e-05, "loss": 4.8404, "step": 44100 }, { "epoch": 0.06, "learning_rate": 9.46009354405785e-05, "loss": 4.8895, "step": 44150 }, { "epoch": 0.06, "learning_rate": 9.459467580073313e-05, "loss": 4.8558, "step": 44200 }, { "epoch": 0.06, "learning_rate": 9.458841616088777e-05, "loss": 4.8344, "step": 44250 }, { "epoch": 0.06, "learning_rate": 9.458215652104241e-05, "loss": 4.912, "step": 44300 }, { "epoch": 0.06, "learning_rate": 9.457589688119704e-05, "loss": 4.8431, "step": 44350 }, { "epoch": 0.06, "learning_rate": 9.456963724135168e-05, "loss": 4.8052, "step": 44400 }, { "epoch": 0.06, "learning_rate": 9.456337760150632e-05, "loss": 4.824, "step": 44450 }, { "epoch": 0.06, "learning_rate": 9.455711796166096e-05, "loss": 4.909, "step": 44500 }, { "epoch": 0.06, "learning_rate": 9.455085832181561e-05, "loss": 4.8579, "step": 44550 }, { "epoch": 0.06, "learning_rate": 9.454459868197024e-05, "loss": 4.8507, "step": 44600 }, { "epoch": 0.06, "learning_rate": 9.453833904212488e-05, "loss": 4.9012, "step": 44650 }, { "epoch": 0.06, "learning_rate": 9.453207940227952e-05, "loss": 4.8429, "step": 44700 }, { "epoch": 0.06, "learning_rate": 9.452581976243416e-05, "loss": 4.9221, "step": 44750 }, { "epoch": 0.06, "learning_rate": 9.45195601225888e-05, "loss": 4.9022, "step": 44800 }, { "epoch": 0.06, "learning_rate": 9.451330048274343e-05, "loss": 4.8352, "step": 44850 }, { "epoch": 0.06, "learning_rate": 9.450704084289807e-05, "loss": 4.7652, "step": 44900 }, { "epoch": 0.06, "learning_rate": 9.45007812030527e-05, "loss": 4.7978, "step": 44950 }, { "epoch": 0.06, "learning_rate": 9.449452156320734e-05, "loss": 4.8511, "step": 45000 }, { "epoch": 0.06, "learning_rate": 9.448826192336198e-05, "loss": 4.8277, "step": 45050 }, { "epoch": 0.06, "learning_rate": 9.448200228351662e-05, "loss": 4.88, "step": 45100 }, { "epoch": 0.06, "learning_rate": 9.447574264367126e-05, "loss": 4.7694, "step": 45150 }, { "epoch": 0.06, "learning_rate": 9.446948300382589e-05, "loss": 4.8222, "step": 45200 }, { "epoch": 0.06, "learning_rate": 9.446322336398053e-05, "loss": 4.7438, "step": 45250 }, { "epoch": 0.06, "learning_rate": 9.445696372413517e-05, "loss": 4.776, "step": 45300 }, { "epoch": 0.06, "learning_rate": 9.44507040842898e-05, "loss": 4.7953, "step": 45350 }, { "epoch": 0.06, "learning_rate": 9.444444444444444e-05, "loss": 4.7831, "step": 45400 }, { "epoch": 0.06, "learning_rate": 9.443818480459908e-05, "loss": 4.8219, "step": 45450 }, { "epoch": 0.06, "learning_rate": 9.443192516475372e-05, "loss": 4.8662, "step": 45500 }, { "epoch": 0.06, "learning_rate": 9.442566552490837e-05, "loss": 4.7885, "step": 45550 }, { "epoch": 0.06, "learning_rate": 9.4419405885063e-05, "loss": 4.9618, "step": 45600 }, { "epoch": 0.06, "learning_rate": 9.441314624521764e-05, "loss": 4.9015, "step": 45650 }, { "epoch": 0.06, "learning_rate": 9.440688660537228e-05, "loss": 4.825, "step": 45700 }, { "epoch": 0.06, "learning_rate": 9.440062696552692e-05, "loss": 4.8321, "step": 45750 }, { "epoch": 0.06, "learning_rate": 9.439436732568155e-05, "loss": 4.8673, "step": 45800 }, { "epoch": 0.06, "learning_rate": 9.438810768583619e-05, "loss": 4.8465, "step": 45850 }, { "epoch": 0.06, "learning_rate": 9.438184804599083e-05, "loss": 4.7656, "step": 45900 }, { "epoch": 0.06, "learning_rate": 9.437558840614547e-05, "loss": 4.7679, "step": 45950 }, { "epoch": 0.06, "learning_rate": 9.43693287663001e-05, "loss": 4.8623, "step": 46000 }, { "epoch": 0.06, "learning_rate": 9.436306912645474e-05, "loss": 4.9471, "step": 46050 }, { "epoch": 0.06, "learning_rate": 9.435680948660939e-05, "loss": 4.829, "step": 46100 }, { "epoch": 0.06, "learning_rate": 9.435054984676403e-05, "loss": 4.783, "step": 46150 }, { "epoch": 0.06, "learning_rate": 9.434429020691867e-05, "loss": 4.91, "step": 46200 }, { "epoch": 0.06, "learning_rate": 9.43380305670733e-05, "loss": 4.8606, "step": 46250 }, { "epoch": 0.06, "learning_rate": 9.433177092722794e-05, "loss": 4.9043, "step": 46300 }, { "epoch": 0.06, "learning_rate": 9.432551128738258e-05, "loss": 4.8101, "step": 46350 }, { "epoch": 0.06, "learning_rate": 9.431925164753722e-05, "loss": 4.8635, "step": 46400 }, { "epoch": 0.06, "learning_rate": 9.431299200769185e-05, "loss": 4.8443, "step": 46450 }, { "epoch": 0.06, "learning_rate": 9.430673236784649e-05, "loss": 4.9156, "step": 46500 }, { "epoch": 0.06, "learning_rate": 9.430047272800113e-05, "loss": 4.8334, "step": 46550 }, { "epoch": 0.06, "learning_rate": 9.429421308815577e-05, "loss": 4.8496, "step": 46600 }, { "epoch": 0.06, "learning_rate": 9.42879534483104e-05, "loss": 4.7832, "step": 46650 }, { "epoch": 0.06, "learning_rate": 9.428169380846504e-05, "loss": 4.8806, "step": 46700 }, { "epoch": 0.06, "learning_rate": 9.427543416861968e-05, "loss": 4.8289, "step": 46750 }, { "epoch": 0.06, "learning_rate": 9.426917452877431e-05, "loss": 4.8799, "step": 46800 }, { "epoch": 0.06, "learning_rate": 9.426291488892895e-05, "loss": 4.7865, "step": 46850 }, { "epoch": 0.06, "learning_rate": 9.425665524908359e-05, "loss": 4.8874, "step": 46900 }, { "epoch": 0.06, "learning_rate": 9.425039560923823e-05, "loss": 4.7974, "step": 46950 }, { "epoch": 0.06, "learning_rate": 9.424413596939286e-05, "loss": 4.8853, "step": 47000 }, { "epoch": 0.06, "learning_rate": 9.42378763295475e-05, "loss": 4.8898, "step": 47050 }, { "epoch": 0.06, "learning_rate": 9.423161668970215e-05, "loss": 4.8087, "step": 47100 }, { "epoch": 0.06, "learning_rate": 9.422535704985679e-05, "loss": 4.7916, "step": 47150 }, { "epoch": 0.06, "learning_rate": 9.421909741001143e-05, "loss": 4.8458, "step": 47200 }, { "epoch": 0.06, "learning_rate": 9.421283777016606e-05, "loss": 4.7961, "step": 47250 }, { "epoch": 0.06, "learning_rate": 9.42065781303207e-05, "loss": 4.7764, "step": 47300 }, { "epoch": 0.06, "learning_rate": 9.420031849047534e-05, "loss": 4.7764, "step": 47350 }, { "epoch": 0.06, "learning_rate": 9.419405885062998e-05, "loss": 4.7961, "step": 47400 }, { "epoch": 0.06, "learning_rate": 9.418779921078461e-05, "loss": 4.905, "step": 47450 }, { "epoch": 0.06, "learning_rate": 9.418153957093925e-05, "loss": 4.9256, "step": 47500 }, { "epoch": 0.06, "learning_rate": 9.417527993109389e-05, "loss": 4.7964, "step": 47550 }, { "epoch": 0.06, "learning_rate": 9.416902029124853e-05, "loss": 4.7838, "step": 47600 }, { "epoch": 0.06, "learning_rate": 9.416276065140318e-05, "loss": 4.8281, "step": 47650 }, { "epoch": 0.06, "learning_rate": 9.415650101155781e-05, "loss": 4.814, "step": 47700 }, { "epoch": 0.06, "learning_rate": 9.415024137171245e-05, "loss": 4.8239, "step": 47750 }, { "epoch": 0.06, "learning_rate": 9.414398173186707e-05, "loss": 4.8236, "step": 47800 }, { "epoch": 0.06, "learning_rate": 9.413772209202171e-05, "loss": 4.7257, "step": 47850 }, { "epoch": 0.06, "learning_rate": 9.413146245217635e-05, "loss": 4.7877, "step": 47900 }, { "epoch": 0.06, "learning_rate": 9.412520281233099e-05, "loss": 4.7699, "step": 47950 }, { "epoch": 0.06, "learning_rate": 9.411894317248562e-05, "loss": 4.7777, "step": 48000 }, { "epoch": 0.06, "learning_rate": 9.411268353264026e-05, "loss": 4.8701, "step": 48050 }, { "epoch": 0.06, "learning_rate": 9.41064238927949e-05, "loss": 4.8745, "step": 48100 }, { "epoch": 0.06, "learning_rate": 9.410016425294955e-05, "loss": 4.7425, "step": 48150 }, { "epoch": 0.06, "learning_rate": 9.409390461310419e-05, "loss": 4.8276, "step": 48200 }, { "epoch": 0.06, "learning_rate": 9.408764497325882e-05, "loss": 4.8549, "step": 48250 }, { "epoch": 0.06, "learning_rate": 9.408138533341346e-05, "loss": 4.8174, "step": 48300 }, { "epoch": 0.06, "learning_rate": 9.40751256935681e-05, "loss": 4.9012, "step": 48350 }, { "epoch": 0.06, "learning_rate": 9.406886605372274e-05, "loss": 4.9074, "step": 48400 }, { "epoch": 0.06, "learning_rate": 9.406260641387737e-05, "loss": 4.8228, "step": 48450 }, { "epoch": 0.06, "learning_rate": 9.405634677403201e-05, "loss": 4.7824, "step": 48500 }, { "epoch": 0.06, "learning_rate": 9.405008713418665e-05, "loss": 4.8327, "step": 48550 }, { "epoch": 0.06, "learning_rate": 9.404382749434129e-05, "loss": 4.812, "step": 48600 }, { "epoch": 0.06, "learning_rate": 9.403756785449594e-05, "loss": 4.8772, "step": 48650 }, { "epoch": 0.06, "learning_rate": 9.403130821465057e-05, "loss": 4.7846, "step": 48700 }, { "epoch": 0.06, "learning_rate": 9.402504857480521e-05, "loss": 4.8698, "step": 48750 }, { "epoch": 0.06, "learning_rate": 9.401878893495985e-05, "loss": 4.877, "step": 48800 }, { "epoch": 0.06, "learning_rate": 9.401252929511449e-05, "loss": 4.9277, "step": 48850 }, { "epoch": 0.06, "learning_rate": 9.400626965526912e-05, "loss": 4.7961, "step": 48900 }, { "epoch": 0.06, "learning_rate": 9.400001001542376e-05, "loss": 4.8658, "step": 48950 }, { "epoch": 0.06, "learning_rate": 9.39937503755784e-05, "loss": 4.9083, "step": 49000 }, { "epoch": 0.06, "learning_rate": 9.398749073573304e-05, "loss": 4.906, "step": 49050 }, { "epoch": 0.06, "learning_rate": 9.398123109588767e-05, "loss": 4.9733, "step": 49100 }, { "epoch": 0.06, "learning_rate": 9.397497145604231e-05, "loss": 4.8797, "step": 49150 }, { "epoch": 0.06, "learning_rate": 9.396871181619695e-05, "loss": 4.7407, "step": 49200 }, { "epoch": 0.06, "learning_rate": 9.396245217635158e-05, "loss": 4.8258, "step": 49250 }, { "epoch": 0.06, "learning_rate": 9.395619253650622e-05, "loss": 4.8159, "step": 49300 }, { "epoch": 0.06, "learning_rate": 9.394993289666086e-05, "loss": 4.8519, "step": 49350 }, { "epoch": 0.06, "learning_rate": 9.39436732568155e-05, "loss": 4.8459, "step": 49400 }, { "epoch": 0.06, "learning_rate": 9.393741361697013e-05, "loss": 4.9222, "step": 49450 }, { "epoch": 0.06, "learning_rate": 9.393115397712477e-05, "loss": 4.7723, "step": 49500 }, { "epoch": 0.06, "learning_rate": 9.392489433727941e-05, "loss": 4.8635, "step": 49550 }, { "epoch": 0.06, "learning_rate": 9.391863469743405e-05, "loss": 4.8247, "step": 49600 }, { "epoch": 0.06, "learning_rate": 9.391237505758868e-05, "loss": 4.843, "step": 49650 }, { "epoch": 0.06, "learning_rate": 9.390611541774333e-05, "loss": 4.8651, "step": 49700 }, { "epoch": 0.06, "learning_rate": 9.389985577789797e-05, "loss": 4.8679, "step": 49750 }, { "epoch": 0.06, "learning_rate": 9.389359613805261e-05, "loss": 4.8541, "step": 49800 }, { "epoch": 0.06, "learning_rate": 9.388733649820725e-05, "loss": 4.8491, "step": 49850 }, { "epoch": 0.06, "learning_rate": 9.388107685836188e-05, "loss": 4.772, "step": 49900 }, { "epoch": 0.06, "learning_rate": 9.387481721851652e-05, "loss": 4.8485, "step": 49950 }, { "epoch": 0.06, "learning_rate": 9.386855757867116e-05, "loss": 4.8142, "step": 50000 }, { "epoch": 0.06, "learning_rate": 9.38622979388258e-05, "loss": 4.8444, "step": 50050 }, { "epoch": 0.06, "learning_rate": 9.385603829898043e-05, "loss": 4.8488, "step": 50100 }, { "epoch": 0.06, "learning_rate": 9.384977865913507e-05, "loss": 4.8745, "step": 50150 }, { "epoch": 0.06, "learning_rate": 9.384351901928971e-05, "loss": 4.9032, "step": 50200 }, { "epoch": 0.06, "learning_rate": 9.383725937944436e-05, "loss": 4.8142, "step": 50250 }, { "epoch": 0.06, "learning_rate": 9.3830999739599e-05, "loss": 4.8774, "step": 50300 }, { "epoch": 0.06, "learning_rate": 9.382474009975363e-05, "loss": 4.8524, "step": 50350 }, { "epoch": 0.06, "learning_rate": 9.381848045990827e-05, "loss": 4.8886, "step": 50400 }, { "epoch": 0.06, "learning_rate": 9.381222082006291e-05, "loss": 4.8967, "step": 50450 }, { "epoch": 0.06, "learning_rate": 9.380596118021754e-05, "loss": 4.8382, "step": 50500 }, { "epoch": 0.06, "learning_rate": 9.379970154037218e-05, "loss": 4.744, "step": 50550 }, { "epoch": 0.06, "learning_rate": 9.379344190052682e-05, "loss": 4.771, "step": 50600 }, { "epoch": 0.06, "learning_rate": 9.378718226068144e-05, "loss": 4.7565, "step": 50650 }, { "epoch": 0.06, "learning_rate": 9.378092262083608e-05, "loss": 4.7757, "step": 50700 }, { "epoch": 0.06, "learning_rate": 9.377466298099073e-05, "loss": 4.9062, "step": 50750 }, { "epoch": 0.06, "learning_rate": 9.376840334114537e-05, "loss": 4.8481, "step": 50800 }, { "epoch": 0.06, "learning_rate": 9.37621437013e-05, "loss": 4.8184, "step": 50850 }, { "epoch": 0.06, "learning_rate": 9.375588406145464e-05, "loss": 4.8065, "step": 50900 }, { "epoch": 0.06, "learning_rate": 9.374962442160928e-05, "loss": 4.8363, "step": 50950 }, { "epoch": 0.06, "learning_rate": 9.374336478176392e-05, "loss": 4.772, "step": 51000 }, { "epoch": 0.06, "learning_rate": 9.373710514191856e-05, "loss": 4.8629, "step": 51050 }, { "epoch": 0.06, "learning_rate": 9.373084550207319e-05, "loss": 4.8761, "step": 51100 }, { "epoch": 0.06, "learning_rate": 9.372458586222783e-05, "loss": 4.7568, "step": 51150 }, { "epoch": 0.06, "learning_rate": 9.371832622238247e-05, "loss": 4.7894, "step": 51200 }, { "epoch": 0.06, "learning_rate": 9.371206658253712e-05, "loss": 4.8007, "step": 51250 }, { "epoch": 0.06, "learning_rate": 9.370580694269176e-05, "loss": 4.9112, "step": 51300 }, { "epoch": 0.06, "learning_rate": 9.369954730284639e-05, "loss": 4.9049, "step": 51350 }, { "epoch": 0.06, "learning_rate": 9.369328766300103e-05, "loss": 4.8902, "step": 51400 }, { "epoch": 0.06, "learning_rate": 9.368702802315567e-05, "loss": 4.8879, "step": 51450 }, { "epoch": 0.06, "learning_rate": 9.36807683833103e-05, "loss": 4.8221, "step": 51500 }, { "epoch": 0.06, "learning_rate": 9.367450874346494e-05, "loss": 4.7562, "step": 51550 }, { "epoch": 0.06, "learning_rate": 9.366824910361958e-05, "loss": 4.7622, "step": 51600 }, { "epoch": 0.06, "learning_rate": 9.366198946377422e-05, "loss": 4.8004, "step": 51650 }, { "epoch": 0.06, "learning_rate": 9.365572982392885e-05, "loss": 4.7418, "step": 51700 }, { "epoch": 0.06, "learning_rate": 9.364947018408349e-05, "loss": 4.8085, "step": 51750 }, { "epoch": 0.06, "learning_rate": 9.364321054423814e-05, "loss": 4.7993, "step": 51800 }, { "epoch": 0.06, "learning_rate": 9.363695090439277e-05, "loss": 4.8312, "step": 51850 }, { "epoch": 0.06, "learning_rate": 9.36306912645474e-05, "loss": 4.8896, "step": 51900 }, { "epoch": 0.06, "learning_rate": 9.362443162470204e-05, "loss": 4.7999, "step": 51950 }, { "epoch": 0.07, "learning_rate": 9.361817198485668e-05, "loss": 4.8556, "step": 52000 }, { "epoch": 0.07, "learning_rate": 9.361191234501132e-05, "loss": 4.7733, "step": 52050 }, { "epoch": 0.07, "learning_rate": 9.360565270516595e-05, "loss": 4.8466, "step": 52100 }, { "epoch": 0.07, "learning_rate": 9.359939306532059e-05, "loss": 4.8646, "step": 52150 }, { "epoch": 0.07, "learning_rate": 9.359313342547523e-05, "loss": 4.8169, "step": 52200 }, { "epoch": 0.07, "learning_rate": 9.358687378562986e-05, "loss": 4.7771, "step": 52250 }, { "epoch": 0.07, "learning_rate": 9.358061414578452e-05, "loss": 4.7843, "step": 52300 }, { "epoch": 0.07, "learning_rate": 9.357435450593915e-05, "loss": 4.8863, "step": 52350 }, { "epoch": 0.07, "learning_rate": 9.356809486609379e-05, "loss": 4.8975, "step": 52400 }, { "epoch": 0.07, "learning_rate": 9.356183522624843e-05, "loss": 4.7513, "step": 52450 }, { "epoch": 0.07, "learning_rate": 9.355557558640307e-05, "loss": 4.7863, "step": 52500 }, { "epoch": 0.07, "learning_rate": 9.35493159465577e-05, "loss": 4.7558, "step": 52550 }, { "epoch": 0.07, "learning_rate": 9.354305630671234e-05, "loss": 4.7227, "step": 52600 }, { "epoch": 0.07, "learning_rate": 9.353679666686698e-05, "loss": 4.817, "step": 52650 }, { "epoch": 0.07, "learning_rate": 9.353053702702161e-05, "loss": 4.7987, "step": 52700 }, { "epoch": 0.07, "learning_rate": 9.352427738717625e-05, "loss": 4.8098, "step": 52750 }, { "epoch": 0.07, "learning_rate": 9.35180177473309e-05, "loss": 4.7916, "step": 52800 }, { "epoch": 0.07, "learning_rate": 9.351175810748554e-05, "loss": 4.7565, "step": 52850 }, { "epoch": 0.07, "learning_rate": 9.350549846764018e-05, "loss": 4.783, "step": 52900 }, { "epoch": 0.07, "learning_rate": 9.349923882779481e-05, "loss": 4.7771, "step": 52950 }, { "epoch": 0.07, "learning_rate": 9.349297918794945e-05, "loss": 4.8035, "step": 53000 }, { "epoch": 0.07, "learning_rate": 9.348671954810409e-05, "loss": 4.8243, "step": 53050 }, { "epoch": 0.07, "learning_rate": 9.348045990825873e-05, "loss": 4.8457, "step": 53100 }, { "epoch": 0.07, "learning_rate": 9.347420026841336e-05, "loss": 4.8277, "step": 53150 }, { "epoch": 0.07, "learning_rate": 9.3467940628568e-05, "loss": 4.7704, "step": 53200 }, { "epoch": 0.07, "learning_rate": 9.346168098872264e-05, "loss": 4.8455, "step": 53250 }, { "epoch": 0.07, "learning_rate": 9.345542134887728e-05, "loss": 4.8563, "step": 53300 }, { "epoch": 0.07, "learning_rate": 9.344916170903191e-05, "loss": 4.715, "step": 53350 }, { "epoch": 0.07, "learning_rate": 9.344290206918655e-05, "loss": 4.7463, "step": 53400 }, { "epoch": 0.07, "learning_rate": 9.343664242934119e-05, "loss": 4.8523, "step": 53450 }, { "epoch": 0.07, "learning_rate": 9.343038278949583e-05, "loss": 4.827, "step": 53500 }, { "epoch": 0.07, "learning_rate": 9.342412314965046e-05, "loss": 4.7495, "step": 53550 }, { "epoch": 0.07, "learning_rate": 9.34178635098051e-05, "loss": 4.8084, "step": 53600 }, { "epoch": 0.07, "learning_rate": 9.341160386995974e-05, "loss": 4.8527, "step": 53650 }, { "epoch": 0.07, "learning_rate": 9.340534423011437e-05, "loss": 4.8554, "step": 53700 }, { "epoch": 0.07, "learning_rate": 9.339908459026901e-05, "loss": 4.8912, "step": 53750 }, { "epoch": 0.07, "learning_rate": 9.339282495042365e-05, "loss": 4.9147, "step": 53800 }, { "epoch": 0.07, "learning_rate": 9.33865653105783e-05, "loss": 4.8065, "step": 53850 }, { "epoch": 0.07, "learning_rate": 9.338030567073294e-05, "loss": 4.6936, "step": 53900 }, { "epoch": 0.07, "learning_rate": 9.337404603088757e-05, "loss": 4.7533, "step": 53950 }, { "epoch": 0.07, "learning_rate": 9.336778639104221e-05, "loss": 4.725, "step": 54000 }, { "epoch": 0.07, "learning_rate": 9.336152675119685e-05, "loss": 4.8228, "step": 54050 }, { "epoch": 0.07, "learning_rate": 9.335526711135149e-05, "loss": 4.9261, "step": 54100 }, { "epoch": 0.07, "learning_rate": 9.334900747150612e-05, "loss": 4.8336, "step": 54150 }, { "epoch": 0.07, "learning_rate": 9.334274783166076e-05, "loss": 4.8313, "step": 54200 }, { "epoch": 0.07, "learning_rate": 9.33364881918154e-05, "loss": 4.8546, "step": 54250 }, { "epoch": 0.07, "learning_rate": 9.333022855197004e-05, "loss": 4.8496, "step": 54300 }, { "epoch": 0.07, "learning_rate": 9.332396891212469e-05, "loss": 4.7476, "step": 54350 }, { "epoch": 0.07, "learning_rate": 9.331770927227932e-05, "loss": 4.7844, "step": 54400 }, { "epoch": 0.07, "learning_rate": 9.331144963243396e-05, "loss": 4.816, "step": 54450 }, { "epoch": 0.07, "learning_rate": 9.33051899925886e-05, "loss": 4.7343, "step": 54500 }, { "epoch": 0.07, "learning_rate": 9.329893035274324e-05, "loss": 4.7946, "step": 54550 }, { "epoch": 0.07, "learning_rate": 9.329267071289787e-05, "loss": 4.8655, "step": 54600 }, { "epoch": 0.07, "learning_rate": 9.328641107305251e-05, "loss": 4.8715, "step": 54650 }, { "epoch": 0.07, "learning_rate": 9.328015143320713e-05, "loss": 4.7694, "step": 54700 }, { "epoch": 0.07, "learning_rate": 9.327389179336177e-05, "loss": 4.8106, "step": 54750 }, { "epoch": 0.07, "learning_rate": 9.326763215351641e-05, "loss": 4.8625, "step": 54800 }, { "epoch": 0.07, "learning_rate": 9.326137251367105e-05, "loss": 4.8186, "step": 54850 }, { "epoch": 0.07, "learning_rate": 9.32551128738257e-05, "loss": 4.7482, "step": 54900 }, { "epoch": 0.07, "learning_rate": 9.324885323398033e-05, "loss": 4.8372, "step": 54950 }, { "epoch": 0.07, "learning_rate": 9.324259359413497e-05, "loss": 4.8122, "step": 55000 }, { "epoch": 0.07, "learning_rate": 9.323633395428961e-05, "loss": 4.8596, "step": 55050 }, { "epoch": 0.07, "learning_rate": 9.323007431444425e-05, "loss": 4.8744, "step": 55100 }, { "epoch": 0.07, "learning_rate": 9.322381467459888e-05, "loss": 4.781, "step": 55150 }, { "epoch": 0.07, "learning_rate": 9.321755503475352e-05, "loss": 4.719, "step": 55200 }, { "epoch": 0.07, "learning_rate": 9.321129539490816e-05, "loss": 4.8216, "step": 55250 }, { "epoch": 0.07, "learning_rate": 9.32050357550628e-05, "loss": 4.8481, "step": 55300 }, { "epoch": 0.07, "learning_rate": 9.319877611521743e-05, "loss": 4.804, "step": 55350 }, { "epoch": 0.07, "learning_rate": 9.319251647537208e-05, "loss": 4.8374, "step": 55400 }, { "epoch": 0.07, "learning_rate": 9.318625683552672e-05, "loss": 4.7305, "step": 55450 }, { "epoch": 0.07, "learning_rate": 9.317999719568136e-05, "loss": 4.7696, "step": 55500 }, { "epoch": 0.07, "learning_rate": 9.3173737555836e-05, "loss": 4.9178, "step": 55550 }, { "epoch": 0.07, "learning_rate": 9.316747791599063e-05, "loss": 4.8276, "step": 55600 }, { "epoch": 0.07, "learning_rate": 9.316121827614527e-05, "loss": 4.8643, "step": 55650 }, { "epoch": 0.07, "learning_rate": 9.315495863629991e-05, "loss": 4.8305, "step": 55700 }, { "epoch": 0.07, "learning_rate": 9.314869899645455e-05, "loss": 4.7809, "step": 55750 }, { "epoch": 0.07, "learning_rate": 9.314243935660918e-05, "loss": 4.8151, "step": 55800 }, { "epoch": 0.07, "learning_rate": 9.313617971676382e-05, "loss": 4.7223, "step": 55850 }, { "epoch": 0.07, "learning_rate": 9.312992007691846e-05, "loss": 4.8051, "step": 55900 }, { "epoch": 0.07, "learning_rate": 9.31236604370731e-05, "loss": 4.771, "step": 55950 }, { "epoch": 0.07, "learning_rate": 9.311740079722773e-05, "loss": 4.8378, "step": 56000 }, { "epoch": 0.07, "learning_rate": 9.311114115738237e-05, "loss": 4.7683, "step": 56050 }, { "epoch": 0.07, "learning_rate": 9.310488151753701e-05, "loss": 4.9132, "step": 56100 }, { "epoch": 0.07, "learning_rate": 9.309862187769164e-05, "loss": 4.7431, "step": 56150 }, { "epoch": 0.07, "learning_rate": 9.309236223784628e-05, "loss": 4.8244, "step": 56200 }, { "epoch": 0.07, "learning_rate": 9.308610259800092e-05, "loss": 4.8158, "step": 56250 }, { "epoch": 0.07, "learning_rate": 9.307984295815556e-05, "loss": 4.8042, "step": 56300 }, { "epoch": 0.07, "learning_rate": 9.30735833183102e-05, "loss": 4.7324, "step": 56350 }, { "epoch": 0.07, "learning_rate": 9.306732367846483e-05, "loss": 4.7638, "step": 56400 }, { "epoch": 0.07, "learning_rate": 9.306106403861948e-05, "loss": 4.7496, "step": 56450 }, { "epoch": 0.07, "learning_rate": 9.305480439877412e-05, "loss": 4.7426, "step": 56500 }, { "epoch": 0.07, "learning_rate": 9.304854475892876e-05, "loss": 4.8491, "step": 56550 }, { "epoch": 0.07, "learning_rate": 9.30422851190834e-05, "loss": 4.8309, "step": 56600 }, { "epoch": 0.07, "learning_rate": 9.303602547923803e-05, "loss": 4.7953, "step": 56650 }, { "epoch": 0.07, "learning_rate": 9.302976583939267e-05, "loss": 4.832, "step": 56700 }, { "epoch": 0.07, "learning_rate": 9.30235061995473e-05, "loss": 4.8866, "step": 56750 }, { "epoch": 0.07, "learning_rate": 9.301724655970194e-05, "loss": 4.7327, "step": 56800 }, { "epoch": 0.07, "learning_rate": 9.301098691985658e-05, "loss": 4.8198, "step": 56850 }, { "epoch": 0.07, "learning_rate": 9.300472728001122e-05, "loss": 4.769, "step": 56900 }, { "epoch": 0.07, "learning_rate": 9.299846764016587e-05, "loss": 4.8043, "step": 56950 }, { "epoch": 0.07, "learning_rate": 9.29922080003205e-05, "loss": 4.7927, "step": 57000 }, { "epoch": 0.07, "learning_rate": 9.298594836047514e-05, "loss": 4.7911, "step": 57050 }, { "epoch": 0.07, "learning_rate": 9.297968872062978e-05, "loss": 4.7492, "step": 57100 }, { "epoch": 0.07, "learning_rate": 9.297342908078442e-05, "loss": 4.862, "step": 57150 }, { "epoch": 0.07, "learning_rate": 9.296716944093906e-05, "loss": 4.8067, "step": 57200 }, { "epoch": 0.07, "learning_rate": 9.296090980109369e-05, "loss": 4.8345, "step": 57250 }, { "epoch": 0.07, "learning_rate": 9.295465016124833e-05, "loss": 4.7816, "step": 57300 }, { "epoch": 0.07, "learning_rate": 9.294839052140297e-05, "loss": 4.778, "step": 57350 }, { "epoch": 0.07, "learning_rate": 9.29421308815576e-05, "loss": 4.869, "step": 57400 }, { "epoch": 0.07, "learning_rate": 9.293587124171224e-05, "loss": 4.8069, "step": 57450 }, { "epoch": 0.07, "learning_rate": 9.292961160186688e-05, "loss": 4.7515, "step": 57500 }, { "epoch": 0.07, "learning_rate": 9.292335196202152e-05, "loss": 4.751, "step": 57550 }, { "epoch": 0.07, "learning_rate": 9.291709232217615e-05, "loss": 4.7856, "step": 57600 }, { "epoch": 0.07, "learning_rate": 9.291083268233079e-05, "loss": 4.7407, "step": 57650 }, { "epoch": 0.07, "learning_rate": 9.290457304248543e-05, "loss": 4.7605, "step": 57700 }, { "epoch": 0.07, "learning_rate": 9.289831340264007e-05, "loss": 4.7723, "step": 57750 }, { "epoch": 0.07, "learning_rate": 9.28920537627947e-05, "loss": 4.7901, "step": 57800 }, { "epoch": 0.07, "learning_rate": 9.288579412294934e-05, "loss": 4.786, "step": 57850 }, { "epoch": 0.07, "learning_rate": 9.287953448310398e-05, "loss": 4.7735, "step": 57900 }, { "epoch": 0.07, "learning_rate": 9.287327484325862e-05, "loss": 4.7273, "step": 57950 }, { "epoch": 0.07, "learning_rate": 9.286701520341327e-05, "loss": 4.9152, "step": 58000 }, { "epoch": 0.07, "learning_rate": 9.28607555635679e-05, "loss": 4.779, "step": 58050 }, { "epoch": 0.07, "learning_rate": 9.285449592372254e-05, "loss": 4.8458, "step": 58100 }, { "epoch": 0.07, "learning_rate": 9.284823628387718e-05, "loss": 4.6567, "step": 58150 }, { "epoch": 0.07, "learning_rate": 9.284197664403182e-05, "loss": 4.7651, "step": 58200 }, { "epoch": 0.07, "learning_rate": 9.283571700418645e-05, "loss": 4.8025, "step": 58250 }, { "epoch": 0.07, "learning_rate": 9.282945736434109e-05, "loss": 4.8119, "step": 58300 }, { "epoch": 0.07, "learning_rate": 9.282319772449573e-05, "loss": 4.8521, "step": 58350 }, { "epoch": 0.07, "learning_rate": 9.281693808465036e-05, "loss": 4.8156, "step": 58400 }, { "epoch": 0.07, "learning_rate": 9.2810678444805e-05, "loss": 4.7752, "step": 58450 }, { "epoch": 0.07, "learning_rate": 9.280441880495965e-05, "loss": 4.7975, "step": 58500 }, { "epoch": 0.07, "learning_rate": 9.279815916511429e-05, "loss": 4.7512, "step": 58550 }, { "epoch": 0.07, "learning_rate": 9.279189952526893e-05, "loss": 4.8402, "step": 58600 }, { "epoch": 0.07, "learning_rate": 9.278563988542357e-05, "loss": 4.7469, "step": 58650 }, { "epoch": 0.07, "learning_rate": 9.27793802455782e-05, "loss": 4.7918, "step": 58700 }, { "epoch": 0.07, "learning_rate": 9.277312060573283e-05, "loss": 4.7682, "step": 58750 }, { "epoch": 0.07, "learning_rate": 9.276686096588746e-05, "loss": 4.7368, "step": 58800 }, { "epoch": 0.07, "learning_rate": 9.27606013260421e-05, "loss": 4.8816, "step": 58850 }, { "epoch": 0.07, "learning_rate": 9.275434168619674e-05, "loss": 4.8218, "step": 58900 }, { "epoch": 0.07, "learning_rate": 9.274808204635138e-05, "loss": 4.8902, "step": 58950 }, { "epoch": 0.07, "learning_rate": 9.274182240650601e-05, "loss": 4.7448, "step": 59000 }, { "epoch": 0.07, "learning_rate": 9.273556276666066e-05, "loss": 4.7472, "step": 59050 }, { "epoch": 0.07, "learning_rate": 9.27293031268153e-05, "loss": 4.8522, "step": 59100 }, { "epoch": 0.07, "learning_rate": 9.272304348696994e-05, "loss": 4.8542, "step": 59150 }, { "epoch": 0.07, "learning_rate": 9.271678384712458e-05, "loss": 4.8097, "step": 59200 }, { "epoch": 0.07, "learning_rate": 9.271052420727921e-05, "loss": 4.7222, "step": 59250 }, { "epoch": 0.07, "learning_rate": 9.270426456743385e-05, "loss": 4.7384, "step": 59300 }, { "epoch": 0.07, "learning_rate": 9.269800492758849e-05, "loss": 4.8622, "step": 59350 }, { "epoch": 0.07, "learning_rate": 9.269174528774313e-05, "loss": 4.8043, "step": 59400 }, { "epoch": 0.07, "learning_rate": 9.268548564789776e-05, "loss": 4.8097, "step": 59450 }, { "epoch": 0.07, "learning_rate": 9.26792260080524e-05, "loss": 4.7535, "step": 59500 }, { "epoch": 0.07, "learning_rate": 9.267296636820705e-05, "loss": 4.7521, "step": 59550 }, { "epoch": 0.07, "learning_rate": 9.266670672836169e-05, "loss": 4.7129, "step": 59600 }, { "epoch": 0.07, "learning_rate": 9.266044708851633e-05, "loss": 4.8227, "step": 59650 }, { "epoch": 0.07, "learning_rate": 9.265418744867096e-05, "loss": 4.8065, "step": 59700 }, { "epoch": 0.07, "learning_rate": 9.26479278088256e-05, "loss": 4.6986, "step": 59750 }, { "epoch": 0.07, "learning_rate": 9.264166816898024e-05, "loss": 4.8258, "step": 59800 }, { "epoch": 0.07, "learning_rate": 9.263540852913487e-05, "loss": 4.6965, "step": 59850 }, { "epoch": 0.07, "learning_rate": 9.262914888928951e-05, "loss": 4.7563, "step": 59900 }, { "epoch": 0.07, "learning_rate": 9.262288924944415e-05, "loss": 4.7658, "step": 59950 }, { "epoch": 0.08, "learning_rate": 9.261662960959879e-05, "loss": 4.8305, "step": 60000 }, { "epoch": 0.08, "learning_rate": 9.261036996975342e-05, "loss": 4.7984, "step": 60050 }, { "epoch": 0.08, "learning_rate": 9.260411032990806e-05, "loss": 4.7045, "step": 60100 }, { "epoch": 0.08, "learning_rate": 9.25978506900627e-05, "loss": 4.6286, "step": 60150 }, { "epoch": 0.08, "learning_rate": 9.259159105021734e-05, "loss": 4.7306, "step": 60200 }, { "epoch": 0.08, "learning_rate": 9.258533141037197e-05, "loss": 4.7517, "step": 60250 }, { "epoch": 0.08, "learning_rate": 9.257907177052661e-05, "loss": 4.8261, "step": 60300 }, { "epoch": 0.08, "learning_rate": 9.257281213068125e-05, "loss": 4.8339, "step": 60350 }, { "epoch": 0.08, "learning_rate": 9.256655249083589e-05, "loss": 4.7464, "step": 60400 }, { "epoch": 0.08, "learning_rate": 9.256029285099052e-05, "loss": 4.7697, "step": 60450 }, { "epoch": 0.08, "learning_rate": 9.255403321114516e-05, "loss": 4.8145, "step": 60500 }, { "epoch": 0.08, "learning_rate": 9.25477735712998e-05, "loss": 4.769, "step": 60550 }, { "epoch": 0.08, "learning_rate": 9.254151393145445e-05, "loss": 4.7717, "step": 60600 }, { "epoch": 0.08, "learning_rate": 9.253525429160909e-05, "loss": 4.7009, "step": 60650 }, { "epoch": 0.08, "learning_rate": 9.252899465176372e-05, "loss": 4.8466, "step": 60700 }, { "epoch": 0.08, "learning_rate": 9.252273501191836e-05, "loss": 4.7625, "step": 60750 }, { "epoch": 0.08, "learning_rate": 9.2516475372073e-05, "loss": 4.7241, "step": 60800 }, { "epoch": 0.08, "learning_rate": 9.251021573222763e-05, "loss": 4.7304, "step": 60850 }, { "epoch": 0.08, "learning_rate": 9.250395609238227e-05, "loss": 4.7728, "step": 60900 }, { "epoch": 0.08, "learning_rate": 9.249769645253691e-05, "loss": 4.7271, "step": 60950 }, { "epoch": 0.08, "learning_rate": 9.249143681269155e-05, "loss": 4.8385, "step": 61000 }, { "epoch": 0.08, "learning_rate": 9.248517717284618e-05, "loss": 4.7094, "step": 61050 }, { "epoch": 0.08, "learning_rate": 9.247891753300083e-05, "loss": 4.7675, "step": 61100 }, { "epoch": 0.08, "learning_rate": 9.247265789315547e-05, "loss": 4.8117, "step": 61150 }, { "epoch": 0.08, "learning_rate": 9.246639825331011e-05, "loss": 4.7748, "step": 61200 }, { "epoch": 0.08, "learning_rate": 9.246013861346475e-05, "loss": 4.7073, "step": 61250 }, { "epoch": 0.08, "learning_rate": 9.245387897361938e-05, "loss": 4.7601, "step": 61300 }, { "epoch": 0.08, "learning_rate": 9.244761933377402e-05, "loss": 4.8325, "step": 61350 }, { "epoch": 0.08, "learning_rate": 9.244135969392866e-05, "loss": 4.7358, "step": 61400 }, { "epoch": 0.08, "learning_rate": 9.24351000540833e-05, "loss": 4.8985, "step": 61450 }, { "epoch": 0.08, "learning_rate": 9.242884041423793e-05, "loss": 4.7647, "step": 61500 }, { "epoch": 0.08, "learning_rate": 9.242258077439257e-05, "loss": 4.834, "step": 61550 }, { "epoch": 0.08, "learning_rate": 9.241632113454721e-05, "loss": 4.8463, "step": 61600 }, { "epoch": 0.08, "learning_rate": 9.241006149470185e-05, "loss": 4.7806, "step": 61650 }, { "epoch": 0.08, "learning_rate": 9.240380185485648e-05, "loss": 4.832, "step": 61700 }, { "epoch": 0.08, "learning_rate": 9.239754221501112e-05, "loss": 4.7172, "step": 61750 }, { "epoch": 0.08, "learning_rate": 9.239128257516576e-05, "loss": 4.6974, "step": 61800 }, { "epoch": 0.08, "learning_rate": 9.23850229353204e-05, "loss": 4.8808, "step": 61850 }, { "epoch": 0.08, "learning_rate": 9.237876329547503e-05, "loss": 4.7467, "step": 61900 }, { "epoch": 0.08, "learning_rate": 9.237250365562967e-05, "loss": 4.7978, "step": 61950 }, { "epoch": 0.08, "learning_rate": 9.236624401578431e-05, "loss": 4.7813, "step": 62000 }, { "epoch": 0.08, "learning_rate": 9.235998437593894e-05, "loss": 4.6861, "step": 62050 }, { "epoch": 0.08, "learning_rate": 9.235372473609358e-05, "loss": 4.8143, "step": 62100 }, { "epoch": 0.08, "learning_rate": 9.234746509624823e-05, "loss": 4.7044, "step": 62150 }, { "epoch": 0.08, "learning_rate": 9.234120545640287e-05, "loss": 4.7484, "step": 62200 }, { "epoch": 0.08, "learning_rate": 9.233494581655751e-05, "loss": 4.8017, "step": 62250 }, { "epoch": 0.08, "learning_rate": 9.232868617671214e-05, "loss": 4.7773, "step": 62300 }, { "epoch": 0.08, "learning_rate": 9.232242653686678e-05, "loss": 4.7718, "step": 62350 }, { "epoch": 0.08, "learning_rate": 9.231616689702142e-05, "loss": 4.782, "step": 62400 }, { "epoch": 0.08, "learning_rate": 9.230990725717606e-05, "loss": 4.8147, "step": 62450 }, { "epoch": 0.08, "learning_rate": 9.23036476173307e-05, "loss": 4.8229, "step": 62500 }, { "epoch": 0.08, "learning_rate": 9.229738797748533e-05, "loss": 4.8218, "step": 62550 }, { "epoch": 0.08, "learning_rate": 9.229112833763997e-05, "loss": 4.7839, "step": 62600 }, { "epoch": 0.08, "learning_rate": 9.228486869779462e-05, "loss": 4.827, "step": 62650 }, { "epoch": 0.08, "learning_rate": 9.227860905794926e-05, "loss": 4.7065, "step": 62700 }, { "epoch": 0.08, "learning_rate": 9.22723494181039e-05, "loss": 4.7529, "step": 62750 }, { "epoch": 0.08, "learning_rate": 9.226608977825852e-05, "loss": 4.7759, "step": 62800 }, { "epoch": 0.08, "learning_rate": 9.225983013841316e-05, "loss": 4.7658, "step": 62850 }, { "epoch": 0.08, "learning_rate": 9.225357049856779e-05, "loss": 4.8145, "step": 62900 }, { "epoch": 0.08, "learning_rate": 9.224731085872243e-05, "loss": 4.7996, "step": 62950 }, { "epoch": 0.08, "learning_rate": 9.224105121887707e-05, "loss": 4.8698, "step": 63000 }, { "epoch": 0.08, "learning_rate": 9.22347915790317e-05, "loss": 4.7504, "step": 63050 }, { "epoch": 0.08, "learning_rate": 9.222853193918634e-05, "loss": 4.7422, "step": 63100 }, { "epoch": 0.08, "learning_rate": 9.222227229934099e-05, "loss": 4.7964, "step": 63150 }, { "epoch": 0.08, "learning_rate": 9.221601265949563e-05, "loss": 4.8424, "step": 63200 }, { "epoch": 0.08, "learning_rate": 9.220975301965027e-05, "loss": 4.8181, "step": 63250 }, { "epoch": 0.08, "learning_rate": 9.22034933798049e-05, "loss": 4.7154, "step": 63300 }, { "epoch": 0.08, "learning_rate": 9.219723373995954e-05, "loss": 4.8233, "step": 63350 }, { "epoch": 0.08, "learning_rate": 9.219097410011418e-05, "loss": 4.8181, "step": 63400 }, { "epoch": 0.08, "learning_rate": 9.218471446026882e-05, "loss": 4.8606, "step": 63450 }, { "epoch": 0.08, "learning_rate": 9.217845482042345e-05, "loss": 4.8427, "step": 63500 }, { "epoch": 0.08, "learning_rate": 9.217219518057809e-05, "loss": 4.7252, "step": 63550 }, { "epoch": 0.08, "learning_rate": 9.216593554073273e-05, "loss": 4.7106, "step": 63600 }, { "epoch": 0.08, "learning_rate": 9.215967590088737e-05, "loss": 4.7977, "step": 63650 }, { "epoch": 0.08, "learning_rate": 9.215341626104202e-05, "loss": 4.7493, "step": 63700 }, { "epoch": 0.08, "learning_rate": 9.214715662119665e-05, "loss": 4.8408, "step": 63750 }, { "epoch": 0.08, "learning_rate": 9.214089698135129e-05, "loss": 4.7561, "step": 63800 }, { "epoch": 0.08, "learning_rate": 9.213463734150593e-05, "loss": 4.7419, "step": 63850 }, { "epoch": 0.08, "learning_rate": 9.212837770166057e-05, "loss": 4.7731, "step": 63900 }, { "epoch": 0.08, "learning_rate": 9.21221180618152e-05, "loss": 4.7927, "step": 63950 }, { "epoch": 0.08, "learning_rate": 9.211585842196984e-05, "loss": 4.7341, "step": 64000 }, { "epoch": 0.08, "learning_rate": 9.210959878212448e-05, "loss": 4.817, "step": 64050 }, { "epoch": 0.08, "learning_rate": 9.210333914227912e-05, "loss": 4.9103, "step": 64100 }, { "epoch": 0.08, "learning_rate": 9.209707950243375e-05, "loss": 4.8086, "step": 64150 }, { "epoch": 0.08, "learning_rate": 9.209081986258839e-05, "loss": 4.7746, "step": 64200 }, { "epoch": 0.08, "learning_rate": 9.208456022274303e-05, "loss": 4.7008, "step": 64250 }, { "epoch": 0.08, "learning_rate": 9.207830058289766e-05, "loss": 4.7932, "step": 64300 }, { "epoch": 0.08, "learning_rate": 9.20720409430523e-05, "loss": 4.8196, "step": 64350 }, { "epoch": 0.08, "learning_rate": 9.206578130320694e-05, "loss": 4.69, "step": 64400 }, { "epoch": 0.08, "learning_rate": 9.205952166336158e-05, "loss": 4.8035, "step": 64450 }, { "epoch": 0.08, "learning_rate": 9.205326202351621e-05, "loss": 4.7838, "step": 64500 }, { "epoch": 0.08, "learning_rate": 9.204700238367085e-05, "loss": 4.7711, "step": 64550 }, { "epoch": 0.08, "learning_rate": 9.204074274382549e-05, "loss": 4.739, "step": 64600 }, { "epoch": 0.08, "learning_rate": 9.203448310398013e-05, "loss": 4.7423, "step": 64650 }, { "epoch": 0.08, "learning_rate": 9.202822346413478e-05, "loss": 4.8614, "step": 64700 }, { "epoch": 0.08, "learning_rate": 9.202196382428941e-05, "loss": 4.7336, "step": 64750 }, { "epoch": 0.08, "learning_rate": 9.201570418444405e-05, "loss": 4.7757, "step": 64800 }, { "epoch": 0.08, "learning_rate": 9.200944454459869e-05, "loss": 4.7281, "step": 64850 }, { "epoch": 0.08, "learning_rate": 9.200318490475333e-05, "loss": 4.7941, "step": 64900 }, { "epoch": 0.08, "learning_rate": 9.199692526490796e-05, "loss": 4.7066, "step": 64950 }, { "epoch": 0.08, "learning_rate": 9.19906656250626e-05, "loss": 4.7872, "step": 65000 }, { "epoch": 0.08, "learning_rate": 9.198440598521724e-05, "loss": 4.8306, "step": 65050 }, { "epoch": 0.08, "learning_rate": 9.197814634537188e-05, "loss": 4.7317, "step": 65100 }, { "epoch": 0.08, "learning_rate": 9.197188670552651e-05, "loss": 4.7549, "step": 65150 }, { "epoch": 0.08, "learning_rate": 9.196562706568115e-05, "loss": 4.7782, "step": 65200 }, { "epoch": 0.08, "learning_rate": 9.19593674258358e-05, "loss": 4.8401, "step": 65250 }, { "epoch": 0.08, "learning_rate": 9.195310778599044e-05, "loss": 4.8024, "step": 65300 }, { "epoch": 0.08, "learning_rate": 9.194684814614508e-05, "loss": 4.7082, "step": 65350 }, { "epoch": 0.08, "learning_rate": 9.194058850629971e-05, "loss": 4.7479, "step": 65400 }, { "epoch": 0.08, "learning_rate": 9.193432886645435e-05, "loss": 4.7879, "step": 65450 }, { "epoch": 0.08, "learning_rate": 9.192806922660899e-05, "loss": 4.683, "step": 65500 }, { "epoch": 0.08, "learning_rate": 9.192180958676363e-05, "loss": 4.6895, "step": 65550 }, { "epoch": 0.08, "learning_rate": 9.191554994691826e-05, "loss": 4.7518, "step": 65600 }, { "epoch": 0.08, "learning_rate": 9.190929030707289e-05, "loss": 4.8073, "step": 65650 }, { "epoch": 0.08, "learning_rate": 9.190303066722752e-05, "loss": 4.8379, "step": 65700 }, { "epoch": 0.08, "learning_rate": 9.189677102738217e-05, "loss": 4.6354, "step": 65750 }, { "epoch": 0.08, "learning_rate": 9.189051138753681e-05, "loss": 4.7375, "step": 65800 }, { "epoch": 0.08, "learning_rate": 9.188425174769145e-05, "loss": 4.8924, "step": 65850 }, { "epoch": 0.08, "learning_rate": 9.187799210784609e-05, "loss": 4.8017, "step": 65900 }, { "epoch": 0.08, "learning_rate": 9.187173246800072e-05, "loss": 4.7871, "step": 65950 }, { "epoch": 0.08, "learning_rate": 9.186547282815536e-05, "loss": 4.722, "step": 66000 }, { "epoch": 0.08, "learning_rate": 9.185921318831e-05, "loss": 4.7636, "step": 66050 }, { "epoch": 0.08, "learning_rate": 9.185295354846464e-05, "loss": 4.7255, "step": 66100 }, { "epoch": 0.08, "learning_rate": 9.184669390861927e-05, "loss": 4.7975, "step": 66150 }, { "epoch": 0.08, "learning_rate": 9.184043426877391e-05, "loss": 4.6684, "step": 66200 }, { "epoch": 0.08, "learning_rate": 9.183417462892856e-05, "loss": 4.8151, "step": 66250 }, { "epoch": 0.08, "learning_rate": 9.18279149890832e-05, "loss": 4.752, "step": 66300 }, { "epoch": 0.08, "learning_rate": 9.182165534923784e-05, "loss": 4.7446, "step": 66350 }, { "epoch": 0.08, "learning_rate": 9.181539570939247e-05, "loss": 4.7123, "step": 66400 }, { "epoch": 0.08, "learning_rate": 9.180913606954711e-05, "loss": 4.7892, "step": 66450 }, { "epoch": 0.08, "learning_rate": 9.180287642970175e-05, "loss": 4.7465, "step": 66500 }, { "epoch": 0.08, "learning_rate": 9.179661678985639e-05, "loss": 4.6745, "step": 66550 }, { "epoch": 0.08, "learning_rate": 9.179035715001102e-05, "loss": 4.7361, "step": 66600 }, { "epoch": 0.08, "learning_rate": 9.178409751016566e-05, "loss": 4.7312, "step": 66650 }, { "epoch": 0.08, "learning_rate": 9.17778378703203e-05, "loss": 4.7995, "step": 66700 }, { "epoch": 0.08, "learning_rate": 9.177157823047493e-05, "loss": 4.7226, "step": 66750 }, { "epoch": 0.08, "learning_rate": 9.176531859062957e-05, "loss": 4.7107, "step": 66800 }, { "epoch": 0.08, "learning_rate": 9.175905895078421e-05, "loss": 4.7735, "step": 66850 }, { "epoch": 0.08, "learning_rate": 9.175279931093885e-05, "loss": 4.8097, "step": 66900 }, { "epoch": 0.08, "learning_rate": 9.174653967109348e-05, "loss": 4.7446, "step": 66950 }, { "epoch": 0.08, "learning_rate": 9.174028003124812e-05, "loss": 4.7501, "step": 67000 }, { "epoch": 0.08, "learning_rate": 9.173402039140276e-05, "loss": 4.626, "step": 67050 }, { "epoch": 0.08, "learning_rate": 9.17277607515574e-05, "loss": 4.6823, "step": 67100 }, { "epoch": 0.08, "learning_rate": 9.172150111171203e-05, "loss": 4.8039, "step": 67150 }, { "epoch": 0.08, "learning_rate": 9.171524147186667e-05, "loss": 4.7606, "step": 67200 }, { "epoch": 0.08, "learning_rate": 9.170898183202131e-05, "loss": 4.7107, "step": 67250 }, { "epoch": 0.08, "learning_rate": 9.170272219217596e-05, "loss": 4.6456, "step": 67300 }, { "epoch": 0.08, "learning_rate": 9.16964625523306e-05, "loss": 4.8464, "step": 67350 }, { "epoch": 0.08, "learning_rate": 9.169020291248523e-05, "loss": 4.8265, "step": 67400 }, { "epoch": 0.08, "learning_rate": 9.168394327263987e-05, "loss": 4.7749, "step": 67450 }, { "epoch": 0.08, "learning_rate": 9.167768363279451e-05, "loss": 4.7295, "step": 67500 }, { "epoch": 0.08, "learning_rate": 9.167142399294915e-05, "loss": 4.8133, "step": 67550 }, { "epoch": 0.08, "learning_rate": 9.166516435310378e-05, "loss": 4.7395, "step": 67600 }, { "epoch": 0.08, "learning_rate": 9.165890471325842e-05, "loss": 4.781, "step": 67650 }, { "epoch": 0.08, "learning_rate": 9.165264507341306e-05, "loss": 4.7836, "step": 67700 }, { "epoch": 0.08, "learning_rate": 9.16463854335677e-05, "loss": 4.7698, "step": 67750 }, { "epoch": 0.08, "learning_rate": 9.164012579372235e-05, "loss": 4.7829, "step": 67800 }, { "epoch": 0.08, "learning_rate": 9.163386615387698e-05, "loss": 4.7152, "step": 67850 }, { "epoch": 0.08, "learning_rate": 9.162760651403162e-05, "loss": 4.7543, "step": 67900 }, { "epoch": 0.08, "learning_rate": 9.162134687418626e-05, "loss": 4.7766, "step": 67950 }, { "epoch": 0.09, "learning_rate": 9.16150872343409e-05, "loss": 4.7515, "step": 68000 }, { "epoch": 0.09, "learning_rate": 9.160882759449553e-05, "loss": 4.7085, "step": 68050 }, { "epoch": 0.09, "learning_rate": 9.160256795465017e-05, "loss": 4.7828, "step": 68100 }, { "epoch": 0.09, "learning_rate": 9.159630831480481e-05, "loss": 4.6939, "step": 68150 }, { "epoch": 0.09, "learning_rate": 9.159004867495944e-05, "loss": 4.7543, "step": 68200 }, { "epoch": 0.09, "learning_rate": 9.158378903511408e-05, "loss": 4.7559, "step": 68250 }, { "epoch": 0.09, "learning_rate": 9.157752939526872e-05, "loss": 4.7316, "step": 68300 }, { "epoch": 0.09, "learning_rate": 9.157126975542336e-05, "loss": 4.7757, "step": 68350 }, { "epoch": 0.09, "learning_rate": 9.1565010115578e-05, "loss": 4.8704, "step": 68400 }, { "epoch": 0.09, "learning_rate": 9.155875047573263e-05, "loss": 4.7982, "step": 68450 }, { "epoch": 0.09, "learning_rate": 9.155249083588727e-05, "loss": 4.6861, "step": 68500 }, { "epoch": 0.09, "learning_rate": 9.15462311960419e-05, "loss": 4.7399, "step": 68550 }, { "epoch": 0.09, "learning_rate": 9.153997155619654e-05, "loss": 4.8311, "step": 68600 }, { "epoch": 0.09, "learning_rate": 9.153371191635118e-05, "loss": 4.7744, "step": 68650 }, { "epoch": 0.09, "learning_rate": 9.152745227650582e-05, "loss": 4.7378, "step": 68700 }, { "epoch": 0.09, "learning_rate": 9.152119263666045e-05, "loss": 4.7257, "step": 68750 }, { "epoch": 0.09, "learning_rate": 9.151493299681509e-05, "loss": 4.724, "step": 68800 }, { "epoch": 0.09, "learning_rate": 9.150867335696974e-05, "loss": 4.7639, "step": 68850 }, { "epoch": 0.09, "learning_rate": 9.150241371712438e-05, "loss": 4.836, "step": 68900 }, { "epoch": 0.09, "learning_rate": 9.149615407727902e-05, "loss": 4.7625, "step": 68950 }, { "epoch": 0.09, "learning_rate": 9.148989443743366e-05, "loss": 4.7762, "step": 69000 }, { "epoch": 0.09, "learning_rate": 9.148363479758829e-05, "loss": 4.7188, "step": 69050 }, { "epoch": 0.09, "learning_rate": 9.147737515774293e-05, "loss": 4.7564, "step": 69100 }, { "epoch": 0.09, "learning_rate": 9.147111551789757e-05, "loss": 4.6955, "step": 69150 }, { "epoch": 0.09, "learning_rate": 9.14648558780522e-05, "loss": 4.7995, "step": 69200 }, { "epoch": 0.09, "learning_rate": 9.145859623820684e-05, "loss": 4.695, "step": 69250 }, { "epoch": 0.09, "learning_rate": 9.145233659836148e-05, "loss": 4.7098, "step": 69300 }, { "epoch": 0.09, "learning_rate": 9.144607695851612e-05, "loss": 4.7645, "step": 69350 }, { "epoch": 0.09, "learning_rate": 9.143981731867077e-05, "loss": 4.7536, "step": 69400 }, { "epoch": 0.09, "learning_rate": 9.14335576788254e-05, "loss": 4.7429, "step": 69450 }, { "epoch": 0.09, "learning_rate": 9.142729803898004e-05, "loss": 4.7717, "step": 69500 }, { "epoch": 0.09, "learning_rate": 9.142103839913468e-05, "loss": 4.7892, "step": 69550 }, { "epoch": 0.09, "learning_rate": 9.141477875928932e-05, "loss": 4.7121, "step": 69600 }, { "epoch": 0.09, "learning_rate": 9.140851911944394e-05, "loss": 4.696, "step": 69650 }, { "epoch": 0.09, "learning_rate": 9.140225947959858e-05, "loss": 4.7978, "step": 69700 }, { "epoch": 0.09, "learning_rate": 9.139599983975322e-05, "loss": 4.69, "step": 69750 }, { "epoch": 0.09, "learning_rate": 9.138974019990785e-05, "loss": 4.7406, "step": 69800 }, { "epoch": 0.09, "learning_rate": 9.138348056006249e-05, "loss": 4.7523, "step": 69850 }, { "epoch": 0.09, "learning_rate": 9.137722092021714e-05, "loss": 4.797, "step": 69900 }, { "epoch": 0.09, "learning_rate": 9.137096128037178e-05, "loss": 4.725, "step": 69950 }, { "epoch": 0.09, "learning_rate": 9.136470164052642e-05, "loss": 4.7629, "step": 70000 }, { "epoch": 0.09, "learning_rate": 9.135844200068105e-05, "loss": 4.6369, "step": 70050 }, { "epoch": 0.09, "learning_rate": 9.135218236083569e-05, "loss": 4.7584, "step": 70100 }, { "epoch": 0.09, "learning_rate": 9.134592272099033e-05, "loss": 4.7135, "step": 70150 }, { "epoch": 0.09, "learning_rate": 9.133966308114496e-05, "loss": 4.6053, "step": 70200 }, { "epoch": 0.09, "learning_rate": 9.13334034412996e-05, "loss": 4.7127, "step": 70250 }, { "epoch": 0.09, "learning_rate": 9.132714380145424e-05, "loss": 4.7942, "step": 70300 }, { "epoch": 0.09, "learning_rate": 9.132088416160888e-05, "loss": 4.7682, "step": 70350 }, { "epoch": 0.09, "learning_rate": 9.131462452176353e-05, "loss": 4.7616, "step": 70400 }, { "epoch": 0.09, "learning_rate": 9.130836488191816e-05, "loss": 4.7866, "step": 70450 }, { "epoch": 0.09, "learning_rate": 9.13021052420728e-05, "loss": 4.7297, "step": 70500 }, { "epoch": 0.09, "learning_rate": 9.129584560222744e-05, "loss": 4.774, "step": 70550 }, { "epoch": 0.09, "learning_rate": 9.128958596238208e-05, "loss": 4.7306, "step": 70600 }, { "epoch": 0.09, "learning_rate": 9.128332632253671e-05, "loss": 4.7907, "step": 70650 }, { "epoch": 0.09, "learning_rate": 9.127706668269135e-05, "loss": 4.7095, "step": 70700 }, { "epoch": 0.09, "learning_rate": 9.127080704284599e-05, "loss": 4.7573, "step": 70750 }, { "epoch": 0.09, "learning_rate": 9.126454740300063e-05, "loss": 4.7543, "step": 70800 }, { "epoch": 0.09, "learning_rate": 9.125828776315526e-05, "loss": 4.7874, "step": 70850 }, { "epoch": 0.09, "learning_rate": 9.12520281233099e-05, "loss": 4.7725, "step": 70900 }, { "epoch": 0.09, "learning_rate": 9.124576848346454e-05, "loss": 4.6267, "step": 70950 }, { "epoch": 0.09, "learning_rate": 9.123950884361918e-05, "loss": 4.8825, "step": 71000 }, { "epoch": 0.09, "learning_rate": 9.123324920377381e-05, "loss": 4.7166, "step": 71050 }, { "epoch": 0.09, "learning_rate": 9.122698956392845e-05, "loss": 4.8406, "step": 71100 }, { "epoch": 0.09, "learning_rate": 9.122072992408309e-05, "loss": 4.7241, "step": 71150 }, { "epoch": 0.09, "learning_rate": 9.121447028423772e-05, "loss": 4.7581, "step": 71200 }, { "epoch": 0.09, "learning_rate": 9.120821064439236e-05, "loss": 4.8553, "step": 71250 }, { "epoch": 0.09, "learning_rate": 9.1201951004547e-05, "loss": 4.7083, "step": 71300 }, { "epoch": 0.09, "learning_rate": 9.119569136470164e-05, "loss": 4.8527, "step": 71350 }, { "epoch": 0.09, "learning_rate": 9.118943172485627e-05, "loss": 4.6636, "step": 71400 }, { "epoch": 0.09, "learning_rate": 9.118317208501093e-05, "loss": 4.582, "step": 71450 }, { "epoch": 0.09, "learning_rate": 9.117691244516556e-05, "loss": 4.7628, "step": 71500 }, { "epoch": 0.09, "learning_rate": 9.11706528053202e-05, "loss": 4.7849, "step": 71550 }, { "epoch": 0.09, "learning_rate": 9.116439316547484e-05, "loss": 4.7323, "step": 71600 }, { "epoch": 0.09, "learning_rate": 9.115813352562947e-05, "loss": 4.7723, "step": 71650 }, { "epoch": 0.09, "learning_rate": 9.115187388578411e-05, "loss": 4.8704, "step": 71700 }, { "epoch": 0.09, "learning_rate": 9.114561424593875e-05, "loss": 4.7759, "step": 71750 }, { "epoch": 0.09, "learning_rate": 9.113935460609339e-05, "loss": 4.7629, "step": 71800 }, { "epoch": 0.09, "learning_rate": 9.113309496624802e-05, "loss": 4.8201, "step": 71850 }, { "epoch": 0.09, "learning_rate": 9.112683532640266e-05, "loss": 4.7626, "step": 71900 }, { "epoch": 0.09, "learning_rate": 9.112057568655731e-05, "loss": 4.7213, "step": 71950 }, { "epoch": 0.09, "learning_rate": 9.111431604671195e-05, "loss": 4.7333, "step": 72000 }, { "epoch": 0.09, "learning_rate": 9.110805640686659e-05, "loss": 4.6664, "step": 72050 }, { "epoch": 0.09, "learning_rate": 9.110179676702122e-05, "loss": 4.763, "step": 72100 }, { "epoch": 0.09, "learning_rate": 9.109553712717586e-05, "loss": 4.8328, "step": 72150 }, { "epoch": 0.09, "learning_rate": 9.10892774873305e-05, "loss": 4.6907, "step": 72200 }, { "epoch": 0.09, "learning_rate": 9.108301784748514e-05, "loss": 4.7491, "step": 72250 }, { "epoch": 0.09, "learning_rate": 9.107675820763977e-05, "loss": 4.7699, "step": 72300 }, { "epoch": 0.09, "learning_rate": 9.107049856779441e-05, "loss": 4.7386, "step": 72350 }, { "epoch": 0.09, "learning_rate": 9.106423892794905e-05, "loss": 4.8148, "step": 72400 }, { "epoch": 0.09, "learning_rate": 9.105797928810369e-05, "loss": 4.7842, "step": 72450 }, { "epoch": 0.09, "learning_rate": 9.105171964825832e-05, "loss": 4.7783, "step": 72500 }, { "epoch": 0.09, "learning_rate": 9.104546000841296e-05, "loss": 4.7482, "step": 72550 }, { "epoch": 0.09, "learning_rate": 9.10392003685676e-05, "loss": 4.6679, "step": 72600 }, { "epoch": 0.09, "learning_rate": 9.103294072872223e-05, "loss": 4.7649, "step": 72650 }, { "epoch": 0.09, "learning_rate": 9.102668108887687e-05, "loss": 4.7007, "step": 72700 }, { "epoch": 0.09, "learning_rate": 9.102042144903151e-05, "loss": 4.7162, "step": 72750 }, { "epoch": 0.09, "learning_rate": 9.101416180918615e-05, "loss": 4.7021, "step": 72800 }, { "epoch": 0.09, "learning_rate": 9.100790216934078e-05, "loss": 4.7517, "step": 72850 }, { "epoch": 0.09, "learning_rate": 9.100164252949542e-05, "loss": 4.7645, "step": 72900 }, { "epoch": 0.09, "learning_rate": 9.099538288965006e-05, "loss": 4.6773, "step": 72950 }, { "epoch": 0.09, "learning_rate": 9.098912324980471e-05, "loss": 4.6825, "step": 73000 }, { "epoch": 0.09, "learning_rate": 9.098286360995935e-05, "loss": 4.7241, "step": 73050 }, { "epoch": 0.09, "learning_rate": 9.097660397011398e-05, "loss": 4.7236, "step": 73100 }, { "epoch": 0.09, "learning_rate": 9.097034433026862e-05, "loss": 4.7003, "step": 73150 }, { "epoch": 0.09, "learning_rate": 9.096408469042326e-05, "loss": 4.6381, "step": 73200 }, { "epoch": 0.09, "learning_rate": 9.09578250505779e-05, "loss": 4.7898, "step": 73250 }, { "epoch": 0.09, "learning_rate": 9.095156541073253e-05, "loss": 4.7564, "step": 73300 }, { "epoch": 0.09, "learning_rate": 9.094530577088717e-05, "loss": 4.7501, "step": 73350 }, { "epoch": 0.09, "learning_rate": 9.093904613104181e-05, "loss": 4.7363, "step": 73400 }, { "epoch": 0.09, "learning_rate": 9.093278649119645e-05, "loss": 4.7505, "step": 73450 }, { "epoch": 0.09, "learning_rate": 9.09265268513511e-05, "loss": 4.7221, "step": 73500 }, { "epoch": 0.09, "learning_rate": 9.092026721150573e-05, "loss": 4.8041, "step": 73550 }, { "epoch": 0.09, "learning_rate": 9.091400757166037e-05, "loss": 4.846, "step": 73600 }, { "epoch": 0.09, "learning_rate": 9.090774793181501e-05, "loss": 4.7542, "step": 73650 }, { "epoch": 0.09, "learning_rate": 9.090148829196963e-05, "loss": 4.6919, "step": 73700 }, { "epoch": 0.09, "learning_rate": 9.089522865212427e-05, "loss": 4.7224, "step": 73750 }, { "epoch": 0.09, "learning_rate": 9.08889690122789e-05, "loss": 4.7332, "step": 73800 }, { "epoch": 0.09, "learning_rate": 9.088270937243354e-05, "loss": 4.755, "step": 73850 }, { "epoch": 0.09, "learning_rate": 9.087644973258818e-05, "loss": 4.7373, "step": 73900 }, { "epoch": 0.09, "learning_rate": 9.087019009274282e-05, "loss": 4.7199, "step": 73950 }, { "epoch": 0.09, "learning_rate": 9.086393045289746e-05, "loss": 4.7748, "step": 74000 }, { "epoch": 0.09, "learning_rate": 9.085767081305211e-05, "loss": 4.7455, "step": 74050 }, { "epoch": 0.09, "learning_rate": 9.085141117320674e-05, "loss": 4.7283, "step": 74100 }, { "epoch": 0.09, "learning_rate": 9.084515153336138e-05, "loss": 4.7279, "step": 74150 }, { "epoch": 0.09, "learning_rate": 9.083889189351602e-05, "loss": 4.792, "step": 74200 }, { "epoch": 0.09, "learning_rate": 9.083263225367066e-05, "loss": 4.7471, "step": 74250 }, { "epoch": 0.09, "learning_rate": 9.08263726138253e-05, "loss": 4.6707, "step": 74300 }, { "epoch": 0.09, "learning_rate": 9.082011297397993e-05, "loss": 4.709, "step": 74350 }, { "epoch": 0.09, "learning_rate": 9.081385333413457e-05, "loss": 4.6234, "step": 74400 }, { "epoch": 0.09, "learning_rate": 9.08075936942892e-05, "loss": 4.7119, "step": 74450 }, { "epoch": 0.09, "learning_rate": 9.080133405444384e-05, "loss": 4.7284, "step": 74500 }, { "epoch": 0.09, "learning_rate": 9.07950744145985e-05, "loss": 4.6634, "step": 74550 }, { "epoch": 0.09, "learning_rate": 9.078881477475313e-05, "loss": 4.8128, "step": 74600 }, { "epoch": 0.09, "learning_rate": 9.078255513490777e-05, "loss": 4.6925, "step": 74650 }, { "epoch": 0.09, "learning_rate": 9.07762954950624e-05, "loss": 4.7179, "step": 74700 }, { "epoch": 0.09, "learning_rate": 9.077003585521704e-05, "loss": 4.703, "step": 74750 }, { "epoch": 0.09, "learning_rate": 9.076377621537168e-05, "loss": 4.7611, "step": 74800 }, { "epoch": 0.09, "learning_rate": 9.075751657552632e-05, "loss": 4.7785, "step": 74850 }, { "epoch": 0.09, "learning_rate": 9.075125693568096e-05, "loss": 4.7907, "step": 74900 }, { "epoch": 0.09, "learning_rate": 9.074499729583559e-05, "loss": 4.6883, "step": 74950 }, { "epoch": 0.09, "learning_rate": 9.073873765599023e-05, "loss": 4.7875, "step": 75000 }, { "epoch": 0.09, "learning_rate": 9.073247801614487e-05, "loss": 4.81, "step": 75050 }, { "epoch": 0.09, "learning_rate": 9.07262183762995e-05, "loss": 4.7826, "step": 75100 }, { "epoch": 0.09, "learning_rate": 9.071995873645414e-05, "loss": 4.6997, "step": 75150 }, { "epoch": 0.09, "learning_rate": 9.071369909660878e-05, "loss": 4.7331, "step": 75200 }, { "epoch": 0.09, "learning_rate": 9.070743945676342e-05, "loss": 4.8255, "step": 75250 }, { "epoch": 0.09, "learning_rate": 9.070117981691805e-05, "loss": 4.6665, "step": 75300 }, { "epoch": 0.09, "learning_rate": 9.069492017707269e-05, "loss": 4.6821, "step": 75350 }, { "epoch": 0.09, "learning_rate": 9.068866053722733e-05, "loss": 4.837, "step": 75400 }, { "epoch": 0.09, "learning_rate": 9.068240089738197e-05, "loss": 4.7725, "step": 75450 }, { "epoch": 0.09, "learning_rate": 9.06761412575366e-05, "loss": 4.7529, "step": 75500 }, { "epoch": 0.09, "learning_rate": 9.066988161769124e-05, "loss": 4.7562, "step": 75550 }, { "epoch": 0.09, "learning_rate": 9.066362197784589e-05, "loss": 4.7919, "step": 75600 }, { "epoch": 0.09, "learning_rate": 9.065736233800053e-05, "loss": 4.7784, "step": 75650 }, { "epoch": 0.09, "learning_rate": 9.065110269815517e-05, "loss": 4.6653, "step": 75700 }, { "epoch": 0.09, "learning_rate": 9.06448430583098e-05, "loss": 4.7321, "step": 75750 }, { "epoch": 0.09, "learning_rate": 9.063858341846444e-05, "loss": 4.7004, "step": 75800 }, { "epoch": 0.09, "learning_rate": 9.063232377861908e-05, "loss": 4.7461, "step": 75850 }, { "epoch": 0.09, "learning_rate": 9.062606413877372e-05, "loss": 4.716, "step": 75900 }, { "epoch": 0.09, "learning_rate": 9.061980449892835e-05, "loss": 4.6109, "step": 75950 }, { "epoch": 0.1, "learning_rate": 9.061354485908299e-05, "loss": 4.7743, "step": 76000 }, { "epoch": 0.1, "learning_rate": 9.060728521923763e-05, "loss": 4.7611, "step": 76050 }, { "epoch": 0.1, "learning_rate": 9.060102557939228e-05, "loss": 4.725, "step": 76100 }, { "epoch": 0.1, "learning_rate": 9.059476593954692e-05, "loss": 4.7579, "step": 76150 }, { "epoch": 0.1, "learning_rate": 9.058850629970155e-05, "loss": 4.707, "step": 76200 }, { "epoch": 0.1, "learning_rate": 9.058224665985619e-05, "loss": 4.7007, "step": 76250 }, { "epoch": 0.1, "learning_rate": 9.057598702001083e-05, "loss": 4.6851, "step": 76300 }, { "epoch": 0.1, "learning_rate": 9.056972738016546e-05, "loss": 4.6699, "step": 76350 }, { "epoch": 0.1, "learning_rate": 9.05634677403201e-05, "loss": 4.6545, "step": 76400 }, { "epoch": 0.1, "learning_rate": 9.055720810047474e-05, "loss": 4.7926, "step": 76450 }, { "epoch": 0.1, "learning_rate": 9.055094846062938e-05, "loss": 4.8275, "step": 76500 }, { "epoch": 0.1, "learning_rate": 9.0544688820784e-05, "loss": 4.7783, "step": 76550 }, { "epoch": 0.1, "learning_rate": 9.053842918093865e-05, "loss": 4.6668, "step": 76600 }, { "epoch": 0.1, "learning_rate": 9.053216954109329e-05, "loss": 4.694, "step": 76650 }, { "epoch": 0.1, "learning_rate": 9.052590990124793e-05, "loss": 4.7165, "step": 76700 }, { "epoch": 0.1, "learning_rate": 9.051965026140256e-05, "loss": 4.7805, "step": 76750 }, { "epoch": 0.1, "learning_rate": 9.05133906215572e-05, "loss": 4.7588, "step": 76800 }, { "epoch": 0.1, "learning_rate": 9.050713098171184e-05, "loss": 4.7014, "step": 76850 }, { "epoch": 0.1, "learning_rate": 9.050087134186648e-05, "loss": 4.7845, "step": 76900 }, { "epoch": 0.1, "learning_rate": 9.049461170202111e-05, "loss": 4.7561, "step": 76950 }, { "epoch": 0.1, "learning_rate": 9.048835206217575e-05, "loss": 4.7725, "step": 77000 }, { "epoch": 0.1, "learning_rate": 9.048209242233039e-05, "loss": 4.6629, "step": 77050 }, { "epoch": 0.1, "learning_rate": 9.047583278248502e-05, "loss": 4.6253, "step": 77100 }, { "epoch": 0.1, "learning_rate": 9.046957314263968e-05, "loss": 4.6605, "step": 77150 }, { "epoch": 0.1, "learning_rate": 9.046331350279431e-05, "loss": 4.7287, "step": 77200 }, { "epoch": 0.1, "learning_rate": 9.045705386294895e-05, "loss": 4.7559, "step": 77250 }, { "epoch": 0.1, "learning_rate": 9.045079422310359e-05, "loss": 4.7661, "step": 77300 }, { "epoch": 0.1, "learning_rate": 9.044453458325822e-05, "loss": 4.7301, "step": 77350 }, { "epoch": 0.1, "learning_rate": 9.043827494341286e-05, "loss": 4.775, "step": 77400 }, { "epoch": 0.1, "learning_rate": 9.04320153035675e-05, "loss": 4.6566, "step": 77450 }, { "epoch": 0.1, "learning_rate": 9.042575566372214e-05, "loss": 4.8482, "step": 77500 }, { "epoch": 0.1, "learning_rate": 9.041949602387677e-05, "loss": 4.785, "step": 77550 }, { "epoch": 0.1, "learning_rate": 9.041323638403141e-05, "loss": 4.6871, "step": 77600 }, { "epoch": 0.1, "learning_rate": 9.040697674418606e-05, "loss": 4.6298, "step": 77650 }, { "epoch": 0.1, "learning_rate": 9.04007171043407e-05, "loss": 4.6765, "step": 77700 }, { "epoch": 0.1, "learning_rate": 9.039445746449532e-05, "loss": 4.7137, "step": 77750 }, { "epoch": 0.1, "learning_rate": 9.038819782464996e-05, "loss": 4.6975, "step": 77800 }, { "epoch": 0.1, "learning_rate": 9.03819381848046e-05, "loss": 4.6529, "step": 77850 }, { "epoch": 0.1, "learning_rate": 9.037567854495924e-05, "loss": 4.7126, "step": 77900 }, { "epoch": 0.1, "learning_rate": 9.036941890511387e-05, "loss": 4.8285, "step": 77950 }, { "epoch": 0.1, "learning_rate": 9.036315926526851e-05, "loss": 4.7203, "step": 78000 }, { "epoch": 0.1, "learning_rate": 9.035689962542315e-05, "loss": 4.7446, "step": 78050 }, { "epoch": 0.1, "learning_rate": 9.035063998557778e-05, "loss": 4.8649, "step": 78100 }, { "epoch": 0.1, "learning_rate": 9.034438034573244e-05, "loss": 4.7708, "step": 78150 }, { "epoch": 0.1, "learning_rate": 9.033812070588707e-05, "loss": 4.6653, "step": 78200 }, { "epoch": 0.1, "learning_rate": 9.033186106604171e-05, "loss": 4.8369, "step": 78250 }, { "epoch": 0.1, "learning_rate": 9.032560142619635e-05, "loss": 4.8279, "step": 78300 }, { "epoch": 0.1, "learning_rate": 9.031934178635099e-05, "loss": 4.722, "step": 78350 }, { "epoch": 0.1, "learning_rate": 9.031308214650562e-05, "loss": 4.8048, "step": 78400 }, { "epoch": 0.1, "learning_rate": 9.030682250666026e-05, "loss": 4.7429, "step": 78450 }, { "epoch": 0.1, "learning_rate": 9.03005628668149e-05, "loss": 4.6829, "step": 78500 }, { "epoch": 0.1, "learning_rate": 9.029430322696953e-05, "loss": 4.6444, "step": 78550 }, { "epoch": 0.1, "learning_rate": 9.028804358712417e-05, "loss": 4.7206, "step": 78600 }, { "epoch": 0.1, "learning_rate": 9.028178394727881e-05, "loss": 4.6379, "step": 78650 }, { "epoch": 0.1, "learning_rate": 9.027552430743346e-05, "loss": 4.7466, "step": 78700 }, { "epoch": 0.1, "learning_rate": 9.02692646675881e-05, "loss": 4.8062, "step": 78750 }, { "epoch": 0.1, "learning_rate": 9.026300502774273e-05, "loss": 4.6829, "step": 78800 }, { "epoch": 0.1, "learning_rate": 9.025674538789737e-05, "loss": 4.6888, "step": 78850 }, { "epoch": 0.1, "learning_rate": 9.025048574805201e-05, "loss": 4.736, "step": 78900 }, { "epoch": 0.1, "learning_rate": 9.024422610820665e-05, "loss": 4.7818, "step": 78950 }, { "epoch": 0.1, "learning_rate": 9.023796646836128e-05, "loss": 4.7175, "step": 79000 }, { "epoch": 0.1, "learning_rate": 9.023170682851592e-05, "loss": 4.7639, "step": 79050 }, { "epoch": 0.1, "learning_rate": 9.022544718867056e-05, "loss": 4.7575, "step": 79100 }, { "epoch": 0.1, "learning_rate": 9.02191875488252e-05, "loss": 4.8011, "step": 79150 }, { "epoch": 0.1, "learning_rate": 9.021292790897983e-05, "loss": 4.7577, "step": 79200 }, { "epoch": 0.1, "learning_rate": 9.020666826913447e-05, "loss": 4.7216, "step": 79250 }, { "epoch": 0.1, "learning_rate": 9.020040862928911e-05, "loss": 4.6773, "step": 79300 }, { "epoch": 0.1, "learning_rate": 9.019414898944375e-05, "loss": 4.6969, "step": 79350 }, { "epoch": 0.1, "learning_rate": 9.018788934959838e-05, "loss": 4.7627, "step": 79400 }, { "epoch": 0.1, "learning_rate": 9.018162970975302e-05, "loss": 4.7581, "step": 79450 }, { "epoch": 0.1, "learning_rate": 9.017537006990766e-05, "loss": 4.7632, "step": 79500 }, { "epoch": 0.1, "learning_rate": 9.01691104300623e-05, "loss": 4.6903, "step": 79550 }, { "epoch": 0.1, "learning_rate": 9.016285079021693e-05, "loss": 4.6487, "step": 79600 }, { "epoch": 0.1, "learning_rate": 9.015659115037157e-05, "loss": 4.7398, "step": 79650 }, { "epoch": 0.1, "learning_rate": 9.01503315105262e-05, "loss": 4.7801, "step": 79700 }, { "epoch": 0.1, "learning_rate": 9.014407187068086e-05, "loss": 4.6876, "step": 79750 }, { "epoch": 0.1, "learning_rate": 9.01378122308355e-05, "loss": 4.8514, "step": 79800 }, { "epoch": 0.1, "learning_rate": 9.013155259099013e-05, "loss": 4.7327, "step": 79850 }, { "epoch": 0.1, "learning_rate": 9.012529295114477e-05, "loss": 4.6236, "step": 79900 }, { "epoch": 0.1, "learning_rate": 9.01190333112994e-05, "loss": 4.6869, "step": 79950 }, { "epoch": 0.1, "learning_rate": 9.011277367145404e-05, "loss": 4.8552, "step": 80000 }, { "epoch": 0.1, "learning_rate": 9.010651403160868e-05, "loss": 4.6957, "step": 80050 }, { "epoch": 0.1, "learning_rate": 9.010025439176332e-05, "loss": 4.7635, "step": 80100 }, { "epoch": 0.1, "learning_rate": 9.009399475191796e-05, "loss": 4.6495, "step": 80150 }, { "epoch": 0.1, "learning_rate": 9.00877351120726e-05, "loss": 4.6948, "step": 80200 }, { "epoch": 0.1, "learning_rate": 9.008147547222724e-05, "loss": 4.734, "step": 80250 }, { "epoch": 0.1, "learning_rate": 9.007521583238188e-05, "loss": 4.778, "step": 80300 }, { "epoch": 0.1, "learning_rate": 9.006895619253652e-05, "loss": 4.7328, "step": 80350 }, { "epoch": 0.1, "learning_rate": 9.006269655269116e-05, "loss": 4.7534, "step": 80400 }, { "epoch": 0.1, "learning_rate": 9.00564369128458e-05, "loss": 4.5871, "step": 80450 }, { "epoch": 0.1, "learning_rate": 9.005017727300043e-05, "loss": 4.7049, "step": 80500 }, { "epoch": 0.1, "learning_rate": 9.004391763315507e-05, "loss": 4.7501, "step": 80550 }, { "epoch": 0.1, "learning_rate": 9.003765799330969e-05, "loss": 4.6838, "step": 80600 }, { "epoch": 0.1, "learning_rate": 9.003139835346433e-05, "loss": 4.7151, "step": 80650 }, { "epoch": 0.1, "learning_rate": 9.002513871361897e-05, "loss": 4.759, "step": 80700 }, { "epoch": 0.1, "learning_rate": 9.001887907377362e-05, "loss": 4.7252, "step": 80750 }, { "epoch": 0.1, "learning_rate": 9.001261943392825e-05, "loss": 4.7114, "step": 80800 }, { "epoch": 0.1, "learning_rate": 9.000635979408289e-05, "loss": 4.6958, "step": 80850 }, { "epoch": 0.1, "learning_rate": 9.000010015423753e-05, "loss": 4.8173, "step": 80900 }, { "epoch": 0.1, "learning_rate": 8.999384051439217e-05, "loss": 4.8072, "step": 80950 }, { "epoch": 0.1, "learning_rate": 8.99875808745468e-05, "loss": 4.6883, "step": 81000 }, { "epoch": 0.1, "learning_rate": 8.998132123470144e-05, "loss": 4.6742, "step": 81050 }, { "epoch": 0.1, "learning_rate": 8.997506159485608e-05, "loss": 4.7918, "step": 81100 }, { "epoch": 0.1, "learning_rate": 8.996880195501072e-05, "loss": 4.7142, "step": 81150 }, { "epoch": 0.1, "learning_rate": 8.996254231516535e-05, "loss": 4.7942, "step": 81200 }, { "epoch": 0.1, "learning_rate": 8.995628267531999e-05, "loss": 4.6289, "step": 81250 }, { "epoch": 0.1, "learning_rate": 8.995002303547464e-05, "loss": 4.6941, "step": 81300 }, { "epoch": 0.1, "learning_rate": 8.994376339562928e-05, "loss": 4.6284, "step": 81350 }, { "epoch": 0.1, "learning_rate": 8.993750375578392e-05, "loss": 4.713, "step": 81400 }, { "epoch": 0.1, "learning_rate": 8.993124411593855e-05, "loss": 4.8105, "step": 81450 }, { "epoch": 0.1, "learning_rate": 8.992498447609319e-05, "loss": 4.6716, "step": 81500 }, { "epoch": 0.1, "learning_rate": 8.991872483624783e-05, "loss": 4.75, "step": 81550 }, { "epoch": 0.1, "learning_rate": 8.991246519640247e-05, "loss": 4.6475, "step": 81600 }, { "epoch": 0.1, "learning_rate": 8.99062055565571e-05, "loss": 4.7382, "step": 81650 }, { "epoch": 0.1, "learning_rate": 8.989994591671174e-05, "loss": 4.6386, "step": 81700 }, { "epoch": 0.1, "learning_rate": 8.989368627686638e-05, "loss": 4.7418, "step": 81750 }, { "epoch": 0.1, "learning_rate": 8.988742663702102e-05, "loss": 4.6303, "step": 81800 }, { "epoch": 0.1, "learning_rate": 8.988116699717565e-05, "loss": 4.6674, "step": 81850 }, { "epoch": 0.1, "learning_rate": 8.987490735733029e-05, "loss": 4.7436, "step": 81900 }, { "epoch": 0.1, "learning_rate": 8.986864771748493e-05, "loss": 4.84, "step": 81950 }, { "epoch": 0.1, "learning_rate": 8.986238807763956e-05, "loss": 4.7075, "step": 82000 }, { "epoch": 0.1, "learning_rate": 8.98561284377942e-05, "loss": 4.7547, "step": 82050 }, { "epoch": 0.1, "learning_rate": 8.984986879794884e-05, "loss": 4.7744, "step": 82100 }, { "epoch": 0.1, "learning_rate": 8.984360915810348e-05, "loss": 4.6357, "step": 82150 }, { "epoch": 0.1, "learning_rate": 8.983734951825811e-05, "loss": 4.7392, "step": 82200 }, { "epoch": 0.1, "learning_rate": 8.983108987841275e-05, "loss": 4.7492, "step": 82250 }, { "epoch": 0.1, "learning_rate": 8.98248302385674e-05, "loss": 4.703, "step": 82300 }, { "epoch": 0.1, "learning_rate": 8.981857059872204e-05, "loss": 4.7259, "step": 82350 }, { "epoch": 0.1, "learning_rate": 8.981231095887668e-05, "loss": 4.7062, "step": 82400 }, { "epoch": 0.1, "learning_rate": 8.980605131903131e-05, "loss": 4.8032, "step": 82450 }, { "epoch": 0.1, "learning_rate": 8.979979167918595e-05, "loss": 4.6319, "step": 82500 }, { "epoch": 0.1, "learning_rate": 8.979353203934059e-05, "loss": 4.7324, "step": 82550 }, { "epoch": 0.1, "learning_rate": 8.978727239949523e-05, "loss": 4.598, "step": 82600 }, { "epoch": 0.1, "learning_rate": 8.978101275964986e-05, "loss": 4.7033, "step": 82650 }, { "epoch": 0.1, "learning_rate": 8.97747531198045e-05, "loss": 4.6597, "step": 82700 }, { "epoch": 0.1, "learning_rate": 8.976849347995914e-05, "loss": 4.7717, "step": 82750 }, { "epoch": 0.1, "learning_rate": 8.976223384011378e-05, "loss": 4.7862, "step": 82800 }, { "epoch": 0.1, "learning_rate": 8.975597420026843e-05, "loss": 4.5935, "step": 82850 }, { "epoch": 0.1, "learning_rate": 8.974971456042306e-05, "loss": 4.621, "step": 82900 }, { "epoch": 0.1, "learning_rate": 8.97434549205777e-05, "loss": 4.7514, "step": 82950 }, { "epoch": 0.1, "learning_rate": 8.973719528073234e-05, "loss": 4.7414, "step": 83000 }, { "epoch": 0.1, "learning_rate": 8.973093564088698e-05, "loss": 4.6449, "step": 83050 }, { "epoch": 0.1, "learning_rate": 8.972467600104161e-05, "loss": 4.838, "step": 83100 }, { "epoch": 0.1, "learning_rate": 8.971841636119625e-05, "loss": 4.6922, "step": 83150 }, { "epoch": 0.1, "learning_rate": 8.971215672135089e-05, "loss": 4.6945, "step": 83200 }, { "epoch": 0.1, "learning_rate": 8.970589708150552e-05, "loss": 4.6316, "step": 83250 }, { "epoch": 0.1, "learning_rate": 8.969963744166016e-05, "loss": 4.8084, "step": 83300 }, { "epoch": 0.1, "learning_rate": 8.96933778018148e-05, "loss": 4.7665, "step": 83350 }, { "epoch": 0.1, "learning_rate": 8.968711816196944e-05, "loss": 4.7076, "step": 83400 }, { "epoch": 0.1, "learning_rate": 8.968085852212407e-05, "loss": 4.692, "step": 83450 }, { "epoch": 0.1, "learning_rate": 8.967459888227871e-05, "loss": 4.6776, "step": 83500 }, { "epoch": 0.1, "learning_rate": 8.966833924243335e-05, "loss": 4.685, "step": 83550 }, { "epoch": 0.1, "learning_rate": 8.966207960258799e-05, "loss": 4.6418, "step": 83600 }, { "epoch": 0.1, "learning_rate": 8.965581996274262e-05, "loss": 4.6879, "step": 83650 }, { "epoch": 0.1, "learning_rate": 8.964956032289726e-05, "loss": 4.7135, "step": 83700 }, { "epoch": 0.1, "learning_rate": 8.96433006830519e-05, "loss": 4.6616, "step": 83750 }, { "epoch": 0.1, "learning_rate": 8.963704104320654e-05, "loss": 4.719, "step": 83800 }, { "epoch": 0.1, "learning_rate": 8.963078140336119e-05, "loss": 4.6849, "step": 83850 }, { "epoch": 0.1, "learning_rate": 8.962452176351582e-05, "loss": 4.688, "step": 83900 }, { "epoch": 0.1, "learning_rate": 8.961826212367046e-05, "loss": 4.6398, "step": 83950 }, { "epoch": 0.11, "learning_rate": 8.96120024838251e-05, "loss": 4.7129, "step": 84000 }, { "epoch": 0.11, "learning_rate": 8.960574284397974e-05, "loss": 4.74, "step": 84050 }, { "epoch": 0.11, "learning_rate": 8.959948320413437e-05, "loss": 4.7627, "step": 84100 }, { "epoch": 0.11, "learning_rate": 8.959322356428901e-05, "loss": 4.6485, "step": 84150 }, { "epoch": 0.11, "learning_rate": 8.958696392444365e-05, "loss": 4.6159, "step": 84200 }, { "epoch": 0.11, "learning_rate": 8.958070428459828e-05, "loss": 4.6232, "step": 84250 }, { "epoch": 0.11, "learning_rate": 8.957444464475292e-05, "loss": 4.6875, "step": 84300 }, { "epoch": 0.11, "learning_rate": 8.956818500490756e-05, "loss": 4.7255, "step": 84350 }, { "epoch": 0.11, "learning_rate": 8.956192536506221e-05, "loss": 4.6539, "step": 84400 }, { "epoch": 0.11, "learning_rate": 8.955566572521685e-05, "loss": 4.7278, "step": 84450 }, { "epoch": 0.11, "learning_rate": 8.954940608537149e-05, "loss": 4.6535, "step": 84500 }, { "epoch": 0.11, "learning_rate": 8.954314644552612e-05, "loss": 4.7339, "step": 84550 }, { "epoch": 0.11, "learning_rate": 8.953688680568076e-05, "loss": 4.6904, "step": 84600 }, { "epoch": 0.11, "learning_rate": 8.953062716583538e-05, "loss": 4.6689, "step": 84650 }, { "epoch": 0.11, "learning_rate": 8.952436752599002e-05, "loss": 4.7067, "step": 84700 }, { "epoch": 0.11, "learning_rate": 8.951810788614466e-05, "loss": 4.6996, "step": 84750 }, { "epoch": 0.11, "learning_rate": 8.95118482462993e-05, "loss": 4.7374, "step": 84800 }, { "epoch": 0.11, "learning_rate": 8.950558860645393e-05, "loss": 4.6367, "step": 84850 }, { "epoch": 0.11, "learning_rate": 8.949932896660858e-05, "loss": 4.6414, "step": 84900 }, { "epoch": 0.11, "learning_rate": 8.949306932676322e-05, "loss": 4.6264, "step": 84950 }, { "epoch": 0.11, "learning_rate": 8.948680968691786e-05, "loss": 4.6692, "step": 85000 }, { "epoch": 0.11, "learning_rate": 8.94805500470725e-05, "loss": 4.7165, "step": 85050 }, { "epoch": 0.11, "learning_rate": 8.947429040722713e-05, "loss": 4.6282, "step": 85100 }, { "epoch": 0.11, "learning_rate": 8.946803076738177e-05, "loss": 4.7505, "step": 85150 }, { "epoch": 0.11, "learning_rate": 8.946177112753641e-05, "loss": 4.7126, "step": 85200 }, { "epoch": 0.11, "learning_rate": 8.945551148769105e-05, "loss": 4.7042, "step": 85250 }, { "epoch": 0.11, "learning_rate": 8.944925184784568e-05, "loss": 4.8019, "step": 85300 }, { "epoch": 0.11, "learning_rate": 8.944299220800032e-05, "loss": 4.6479, "step": 85350 }, { "epoch": 0.11, "learning_rate": 8.943673256815497e-05, "loss": 4.7677, "step": 85400 }, { "epoch": 0.11, "learning_rate": 8.943047292830961e-05, "loss": 4.6612, "step": 85450 }, { "epoch": 0.11, "learning_rate": 8.942421328846425e-05, "loss": 4.7324, "step": 85500 }, { "epoch": 0.11, "learning_rate": 8.941795364861888e-05, "loss": 4.6774, "step": 85550 }, { "epoch": 0.11, "learning_rate": 8.941169400877352e-05, "loss": 4.7765, "step": 85600 }, { "epoch": 0.11, "learning_rate": 8.940543436892816e-05, "loss": 4.7098, "step": 85650 }, { "epoch": 0.11, "learning_rate": 8.93991747290828e-05, "loss": 4.7673, "step": 85700 }, { "epoch": 0.11, "learning_rate": 8.939291508923743e-05, "loss": 4.5591, "step": 85750 }, { "epoch": 0.11, "learning_rate": 8.938665544939207e-05, "loss": 4.7893, "step": 85800 }, { "epoch": 0.11, "learning_rate": 8.93803958095467e-05, "loss": 4.7224, "step": 85850 }, { "epoch": 0.11, "learning_rate": 8.937413616970134e-05, "loss": 4.5436, "step": 85900 }, { "epoch": 0.11, "learning_rate": 8.936787652985598e-05, "loss": 4.6915, "step": 85950 }, { "epoch": 0.11, "learning_rate": 8.936161689001062e-05, "loss": 4.651, "step": 86000 }, { "epoch": 0.11, "learning_rate": 8.935535725016526e-05, "loss": 4.6926, "step": 86050 }, { "epoch": 0.11, "learning_rate": 8.934909761031989e-05, "loss": 4.8272, "step": 86100 }, { "epoch": 0.11, "learning_rate": 8.934283797047453e-05, "loss": 4.7195, "step": 86150 }, { "epoch": 0.11, "learning_rate": 8.933657833062917e-05, "loss": 4.6559, "step": 86200 }, { "epoch": 0.11, "learning_rate": 8.93303186907838e-05, "loss": 4.6624, "step": 86250 }, { "epoch": 0.11, "learning_rate": 8.932405905093844e-05, "loss": 4.7579, "step": 86300 }, { "epoch": 0.11, "learning_rate": 8.931779941109308e-05, "loss": 4.6872, "step": 86350 }, { "epoch": 0.11, "learning_rate": 8.931153977124772e-05, "loss": 4.7191, "step": 86400 }, { "epoch": 0.11, "learning_rate": 8.930528013140237e-05, "loss": 4.7165, "step": 86450 }, { "epoch": 0.11, "learning_rate": 8.9299020491557e-05, "loss": 4.6001, "step": 86500 }, { "epoch": 0.11, "learning_rate": 8.929276085171164e-05, "loss": 4.7492, "step": 86550 }, { "epoch": 0.11, "learning_rate": 8.928650121186628e-05, "loss": 4.6381, "step": 86600 }, { "epoch": 0.11, "learning_rate": 8.928024157202092e-05, "loss": 4.7482, "step": 86650 }, { "epoch": 0.11, "learning_rate": 8.927398193217555e-05, "loss": 4.6256, "step": 86700 }, { "epoch": 0.11, "learning_rate": 8.926772229233019e-05, "loss": 4.6443, "step": 86750 }, { "epoch": 0.11, "learning_rate": 8.926146265248483e-05, "loss": 4.7506, "step": 86800 }, { "epoch": 0.11, "learning_rate": 8.925520301263947e-05, "loss": 4.7162, "step": 86850 }, { "epoch": 0.11, "learning_rate": 8.92489433727941e-05, "loss": 4.6777, "step": 86900 }, { "epoch": 0.11, "learning_rate": 8.924268373294875e-05, "loss": 4.7167, "step": 86950 }, { "epoch": 0.11, "learning_rate": 8.923642409310339e-05, "loss": 4.7128, "step": 87000 }, { "epoch": 0.11, "learning_rate": 8.923016445325803e-05, "loss": 4.7406, "step": 87050 }, { "epoch": 0.11, "learning_rate": 8.922390481341267e-05, "loss": 4.6017, "step": 87100 }, { "epoch": 0.11, "learning_rate": 8.92176451735673e-05, "loss": 4.6576, "step": 87150 }, { "epoch": 0.11, "learning_rate": 8.921138553372194e-05, "loss": 4.7292, "step": 87200 }, { "epoch": 0.11, "learning_rate": 8.920512589387658e-05, "loss": 4.6713, "step": 87250 }, { "epoch": 0.11, "learning_rate": 8.919886625403122e-05, "loss": 4.7073, "step": 87300 }, { "epoch": 0.11, "learning_rate": 8.919260661418585e-05, "loss": 4.7133, "step": 87350 }, { "epoch": 0.11, "learning_rate": 8.918634697434049e-05, "loss": 4.6872, "step": 87400 }, { "epoch": 0.11, "learning_rate": 8.918008733449513e-05, "loss": 4.7702, "step": 87450 }, { "epoch": 0.11, "learning_rate": 8.917382769464977e-05, "loss": 4.7467, "step": 87500 }, { "epoch": 0.11, "learning_rate": 8.91675680548044e-05, "loss": 4.7264, "step": 87550 }, { "epoch": 0.11, "learning_rate": 8.916130841495904e-05, "loss": 4.7174, "step": 87600 }, { "epoch": 0.11, "learning_rate": 8.915504877511368e-05, "loss": 4.7321, "step": 87650 }, { "epoch": 0.11, "learning_rate": 8.914878913526831e-05, "loss": 4.6891, "step": 87700 }, { "epoch": 0.11, "learning_rate": 8.914252949542295e-05, "loss": 4.7175, "step": 87750 }, { "epoch": 0.11, "learning_rate": 8.913626985557759e-05, "loss": 4.6804, "step": 87800 }, { "epoch": 0.11, "learning_rate": 8.913001021573223e-05, "loss": 4.6703, "step": 87850 }, { "epoch": 0.11, "learning_rate": 8.912375057588686e-05, "loss": 4.6683, "step": 87900 }, { "epoch": 0.11, "learning_rate": 8.91174909360415e-05, "loss": 4.7089, "step": 87950 }, { "epoch": 0.11, "learning_rate": 8.911123129619615e-05, "loss": 4.7045, "step": 88000 }, { "epoch": 0.11, "learning_rate": 8.910497165635079e-05, "loss": 4.7137, "step": 88050 }, { "epoch": 0.11, "learning_rate": 8.909871201650543e-05, "loss": 4.6977, "step": 88100 }, { "epoch": 0.11, "learning_rate": 8.909245237666006e-05, "loss": 4.6403, "step": 88150 }, { "epoch": 0.11, "learning_rate": 8.90861927368147e-05, "loss": 4.6497, "step": 88200 }, { "epoch": 0.11, "learning_rate": 8.907993309696934e-05, "loss": 4.6451, "step": 88250 }, { "epoch": 0.11, "learning_rate": 8.907367345712398e-05, "loss": 4.6798, "step": 88300 }, { "epoch": 0.11, "learning_rate": 8.906741381727861e-05, "loss": 4.6356, "step": 88350 }, { "epoch": 0.11, "learning_rate": 8.906115417743325e-05, "loss": 4.6465, "step": 88400 }, { "epoch": 0.11, "learning_rate": 8.905489453758789e-05, "loss": 4.6968, "step": 88450 }, { "epoch": 0.11, "learning_rate": 8.904863489774253e-05, "loss": 4.6841, "step": 88500 }, { "epoch": 0.11, "learning_rate": 8.904237525789718e-05, "loss": 4.7728, "step": 88550 }, { "epoch": 0.11, "learning_rate": 8.903611561805181e-05, "loss": 4.7661, "step": 88600 }, { "epoch": 0.11, "learning_rate": 8.902985597820645e-05, "loss": 4.7059, "step": 88650 }, { "epoch": 0.11, "learning_rate": 8.902359633836108e-05, "loss": 4.6902, "step": 88700 }, { "epoch": 0.11, "learning_rate": 8.901733669851571e-05, "loss": 4.7605, "step": 88750 }, { "epoch": 0.11, "learning_rate": 8.901107705867035e-05, "loss": 4.6893, "step": 88800 }, { "epoch": 0.11, "learning_rate": 8.900481741882499e-05, "loss": 4.7198, "step": 88850 }, { "epoch": 0.11, "learning_rate": 8.899855777897962e-05, "loss": 4.6918, "step": 88900 }, { "epoch": 0.11, "learning_rate": 8.899229813913426e-05, "loss": 4.7043, "step": 88950 }, { "epoch": 0.11, "learning_rate": 8.89860384992889e-05, "loss": 4.7212, "step": 89000 }, { "epoch": 0.11, "learning_rate": 8.897977885944355e-05, "loss": 4.7059, "step": 89050 }, { "epoch": 0.11, "learning_rate": 8.897351921959819e-05, "loss": 4.6373, "step": 89100 }, { "epoch": 0.11, "learning_rate": 8.896725957975282e-05, "loss": 4.6572, "step": 89150 }, { "epoch": 0.11, "learning_rate": 8.896099993990746e-05, "loss": 4.6589, "step": 89200 }, { "epoch": 0.11, "learning_rate": 8.89547403000621e-05, "loss": 4.6769, "step": 89250 }, { "epoch": 0.11, "learning_rate": 8.894848066021674e-05, "loss": 4.6736, "step": 89300 }, { "epoch": 0.11, "learning_rate": 8.894222102037137e-05, "loss": 4.7671, "step": 89350 }, { "epoch": 0.11, "learning_rate": 8.893596138052601e-05, "loss": 4.7329, "step": 89400 }, { "epoch": 0.11, "learning_rate": 8.892970174068065e-05, "loss": 4.5699, "step": 89450 }, { "epoch": 0.11, "learning_rate": 8.892344210083529e-05, "loss": 4.7475, "step": 89500 }, { "epoch": 0.11, "learning_rate": 8.891718246098994e-05, "loss": 4.6723, "step": 89550 }, { "epoch": 0.11, "learning_rate": 8.891092282114457e-05, "loss": 4.7, "step": 89600 }, { "epoch": 0.11, "learning_rate": 8.890466318129921e-05, "loss": 4.5611, "step": 89650 }, { "epoch": 0.11, "learning_rate": 8.889840354145385e-05, "loss": 4.6122, "step": 89700 }, { "epoch": 0.11, "learning_rate": 8.889214390160849e-05, "loss": 4.7558, "step": 89750 }, { "epoch": 0.11, "learning_rate": 8.888588426176312e-05, "loss": 4.6573, "step": 89800 }, { "epoch": 0.11, "learning_rate": 8.887962462191776e-05, "loss": 4.6997, "step": 89850 }, { "epoch": 0.11, "learning_rate": 8.88733649820724e-05, "loss": 4.6501, "step": 89900 }, { "epoch": 0.11, "learning_rate": 8.886710534222704e-05, "loss": 4.6486, "step": 89950 }, { "epoch": 0.11, "learning_rate": 8.886084570238167e-05, "loss": 4.6515, "step": 90000 }, { "epoch": 0.11, "learning_rate": 8.885458606253631e-05, "loss": 4.716, "step": 90050 }, { "epoch": 0.11, "learning_rate": 8.884832642269095e-05, "loss": 4.5199, "step": 90100 }, { "epoch": 0.11, "learning_rate": 8.884206678284558e-05, "loss": 4.6829, "step": 90150 }, { "epoch": 0.11, "learning_rate": 8.883580714300022e-05, "loss": 4.6129, "step": 90200 }, { "epoch": 0.11, "learning_rate": 8.882954750315486e-05, "loss": 4.6495, "step": 90250 }, { "epoch": 0.11, "learning_rate": 8.88232878633095e-05, "loss": 4.7283, "step": 90300 }, { "epoch": 0.11, "learning_rate": 8.881702822346413e-05, "loss": 4.6765, "step": 90350 }, { "epoch": 0.11, "learning_rate": 8.881076858361877e-05, "loss": 4.6333, "step": 90400 }, { "epoch": 0.11, "learning_rate": 8.880450894377341e-05, "loss": 4.5628, "step": 90450 }, { "epoch": 0.11, "learning_rate": 8.879824930392805e-05, "loss": 4.7373, "step": 90500 }, { "epoch": 0.11, "learning_rate": 8.879198966408268e-05, "loss": 4.7486, "step": 90550 }, { "epoch": 0.11, "learning_rate": 8.878573002423733e-05, "loss": 4.699, "step": 90600 }, { "epoch": 0.11, "learning_rate": 8.877947038439197e-05, "loss": 4.5657, "step": 90650 }, { "epoch": 0.11, "learning_rate": 8.877321074454661e-05, "loss": 4.6785, "step": 90700 }, { "epoch": 0.11, "learning_rate": 8.876695110470125e-05, "loss": 4.5411, "step": 90750 }, { "epoch": 0.11, "learning_rate": 8.876069146485588e-05, "loss": 4.6253, "step": 90800 }, { "epoch": 0.11, "learning_rate": 8.875443182501052e-05, "loss": 4.5771, "step": 90850 }, { "epoch": 0.11, "learning_rate": 8.874817218516516e-05, "loss": 4.7469, "step": 90900 }, { "epoch": 0.11, "learning_rate": 8.87419125453198e-05, "loss": 4.7254, "step": 90950 }, { "epoch": 0.11, "learning_rate": 8.873565290547443e-05, "loss": 4.7084, "step": 91000 }, { "epoch": 0.11, "learning_rate": 8.872939326562907e-05, "loss": 4.7575, "step": 91050 }, { "epoch": 0.11, "learning_rate": 8.872313362578372e-05, "loss": 4.6385, "step": 91100 }, { "epoch": 0.11, "learning_rate": 8.871687398593836e-05, "loss": 4.6479, "step": 91150 }, { "epoch": 0.11, "learning_rate": 8.8710614346093e-05, "loss": 4.7165, "step": 91200 }, { "epoch": 0.11, "learning_rate": 8.870435470624763e-05, "loss": 4.7795, "step": 91250 }, { "epoch": 0.11, "learning_rate": 8.869809506640227e-05, "loss": 4.7282, "step": 91300 }, { "epoch": 0.11, "learning_rate": 8.869183542655691e-05, "loss": 4.6759, "step": 91350 }, { "epoch": 0.11, "learning_rate": 8.868557578671155e-05, "loss": 4.6861, "step": 91400 }, { "epoch": 0.11, "learning_rate": 8.867931614686618e-05, "loss": 4.6574, "step": 91450 }, { "epoch": 0.11, "learning_rate": 8.867305650702082e-05, "loss": 4.5996, "step": 91500 }, { "epoch": 0.11, "learning_rate": 8.866679686717544e-05, "loss": 4.7207, "step": 91550 }, { "epoch": 0.11, "learning_rate": 8.866053722733008e-05, "loss": 4.5964, "step": 91600 }, { "epoch": 0.11, "learning_rate": 8.865427758748473e-05, "loss": 4.826, "step": 91650 }, { "epoch": 0.11, "learning_rate": 8.864801794763937e-05, "loss": 4.6873, "step": 91700 }, { "epoch": 0.11, "learning_rate": 8.8641758307794e-05, "loss": 4.5026, "step": 91750 }, { "epoch": 0.11, "learning_rate": 8.863549866794864e-05, "loss": 4.7417, "step": 91800 }, { "epoch": 0.11, "learning_rate": 8.862923902810328e-05, "loss": 4.5605, "step": 91850 }, { "epoch": 0.11, "learning_rate": 8.862297938825792e-05, "loss": 4.7093, "step": 91900 }, { "epoch": 0.11, "learning_rate": 8.861671974841256e-05, "loss": 4.7035, "step": 91950 }, { "epoch": 0.12, "learning_rate": 8.861046010856719e-05, "loss": 4.6482, "step": 92000 }, { "epoch": 0.12, "learning_rate": 8.860420046872183e-05, "loss": 4.8264, "step": 92050 }, { "epoch": 0.12, "learning_rate": 8.859794082887647e-05, "loss": 4.6644, "step": 92100 }, { "epoch": 0.12, "learning_rate": 8.859168118903112e-05, "loss": 4.701, "step": 92150 }, { "epoch": 0.12, "learning_rate": 8.858542154918576e-05, "loss": 4.7562, "step": 92200 }, { "epoch": 0.12, "learning_rate": 8.85791619093404e-05, "loss": 4.6847, "step": 92250 }, { "epoch": 0.12, "learning_rate": 8.857290226949503e-05, "loss": 4.763, "step": 92300 }, { "epoch": 0.12, "learning_rate": 8.856664262964967e-05, "loss": 4.6447, "step": 92350 }, { "epoch": 0.12, "learning_rate": 8.85603829898043e-05, "loss": 4.616, "step": 92400 }, { "epoch": 0.12, "learning_rate": 8.855412334995894e-05, "loss": 4.7529, "step": 92450 }, { "epoch": 0.12, "learning_rate": 8.854786371011358e-05, "loss": 4.6804, "step": 92500 }, { "epoch": 0.12, "learning_rate": 8.854160407026822e-05, "loss": 4.7588, "step": 92550 }, { "epoch": 0.12, "learning_rate": 8.853534443042285e-05, "loss": 4.6295, "step": 92600 }, { "epoch": 0.12, "learning_rate": 8.85290847905775e-05, "loss": 4.6445, "step": 92650 }, { "epoch": 0.12, "learning_rate": 8.852282515073214e-05, "loss": 4.64, "step": 92700 }, { "epoch": 0.12, "learning_rate": 8.851656551088677e-05, "loss": 4.7319, "step": 92750 }, { "epoch": 0.12, "learning_rate": 8.85103058710414e-05, "loss": 4.5632, "step": 92800 }, { "epoch": 0.12, "learning_rate": 8.850404623119604e-05, "loss": 4.6853, "step": 92850 }, { "epoch": 0.12, "learning_rate": 8.849778659135068e-05, "loss": 4.7247, "step": 92900 }, { "epoch": 0.12, "learning_rate": 8.849152695150532e-05, "loss": 4.6646, "step": 92950 }, { "epoch": 0.12, "learning_rate": 8.848526731165995e-05, "loss": 4.7556, "step": 93000 }, { "epoch": 0.12, "learning_rate": 8.847900767181459e-05, "loss": 4.6369, "step": 93050 }, { "epoch": 0.12, "learning_rate": 8.847274803196923e-05, "loss": 4.7038, "step": 93100 }, { "epoch": 0.12, "learning_rate": 8.846648839212387e-05, "loss": 4.6655, "step": 93150 }, { "epoch": 0.12, "learning_rate": 8.846022875227852e-05, "loss": 4.6411, "step": 93200 }, { "epoch": 0.12, "learning_rate": 8.845396911243315e-05, "loss": 4.6884, "step": 93250 }, { "epoch": 0.12, "learning_rate": 8.844770947258779e-05, "loss": 4.6653, "step": 93300 }, { "epoch": 0.12, "learning_rate": 8.844144983274243e-05, "loss": 4.6892, "step": 93350 }, { "epoch": 0.12, "learning_rate": 8.843519019289707e-05, "loss": 4.6872, "step": 93400 }, { "epoch": 0.12, "learning_rate": 8.84289305530517e-05, "loss": 4.6548, "step": 93450 }, { "epoch": 0.12, "learning_rate": 8.842267091320634e-05, "loss": 4.6105, "step": 93500 }, { "epoch": 0.12, "learning_rate": 8.841641127336098e-05, "loss": 4.8093, "step": 93550 }, { "epoch": 0.12, "learning_rate": 8.841015163351561e-05, "loss": 4.6211, "step": 93600 }, { "epoch": 0.12, "learning_rate": 8.840389199367025e-05, "loss": 4.7199, "step": 93650 }, { "epoch": 0.12, "learning_rate": 8.83976323538249e-05, "loss": 4.756, "step": 93700 }, { "epoch": 0.12, "learning_rate": 8.839137271397954e-05, "loss": 4.6766, "step": 93750 }, { "epoch": 0.12, "learning_rate": 8.838511307413418e-05, "loss": 4.6744, "step": 93800 }, { "epoch": 0.12, "learning_rate": 8.837885343428881e-05, "loss": 4.7808, "step": 93850 }, { "epoch": 0.12, "learning_rate": 8.837259379444345e-05, "loss": 4.6389, "step": 93900 }, { "epoch": 0.12, "learning_rate": 8.836633415459809e-05, "loss": 4.55, "step": 93950 }, { "epoch": 0.12, "learning_rate": 8.836007451475273e-05, "loss": 4.6295, "step": 94000 }, { "epoch": 0.12, "learning_rate": 8.835381487490736e-05, "loss": 4.7568, "step": 94050 }, { "epoch": 0.12, "learning_rate": 8.8347555235062e-05, "loss": 4.7114, "step": 94100 }, { "epoch": 0.12, "learning_rate": 8.834129559521664e-05, "loss": 4.714, "step": 94150 }, { "epoch": 0.12, "learning_rate": 8.833503595537128e-05, "loss": 4.7107, "step": 94200 }, { "epoch": 0.12, "learning_rate": 8.832877631552591e-05, "loss": 4.6742, "step": 94250 }, { "epoch": 0.12, "learning_rate": 8.832251667568055e-05, "loss": 4.6579, "step": 94300 }, { "epoch": 0.12, "learning_rate": 8.831625703583519e-05, "loss": 4.7947, "step": 94350 }, { "epoch": 0.12, "learning_rate": 8.830999739598983e-05, "loss": 4.7256, "step": 94400 }, { "epoch": 0.12, "learning_rate": 8.830373775614446e-05, "loss": 4.6812, "step": 94450 }, { "epoch": 0.12, "learning_rate": 8.82974781162991e-05, "loss": 4.7476, "step": 94500 }, { "epoch": 0.12, "learning_rate": 8.829121847645374e-05, "loss": 4.5652, "step": 94550 }, { "epoch": 0.12, "learning_rate": 8.828495883660837e-05, "loss": 4.5838, "step": 94600 }, { "epoch": 0.12, "learning_rate": 8.827869919676301e-05, "loss": 4.5822, "step": 94650 }, { "epoch": 0.12, "learning_rate": 8.827243955691765e-05, "loss": 4.665, "step": 94700 }, { "epoch": 0.12, "learning_rate": 8.82661799170723e-05, "loss": 4.5589, "step": 94750 }, { "epoch": 0.12, "learning_rate": 8.825992027722694e-05, "loss": 4.7264, "step": 94800 }, { "epoch": 0.12, "learning_rate": 8.825366063738158e-05, "loss": 4.7185, "step": 94850 }, { "epoch": 0.12, "learning_rate": 8.824740099753621e-05, "loss": 4.6762, "step": 94900 }, { "epoch": 0.12, "learning_rate": 8.824114135769085e-05, "loss": 4.8204, "step": 94950 }, { "epoch": 0.12, "learning_rate": 8.823488171784549e-05, "loss": 4.6318, "step": 95000 }, { "epoch": 0.12, "learning_rate": 8.822862207800012e-05, "loss": 4.6202, "step": 95050 }, { "epoch": 0.12, "learning_rate": 8.822236243815476e-05, "loss": 4.7147, "step": 95100 }, { "epoch": 0.12, "learning_rate": 8.82161027983094e-05, "loss": 4.6291, "step": 95150 }, { "epoch": 0.12, "learning_rate": 8.820984315846404e-05, "loss": 4.6102, "step": 95200 }, { "epoch": 0.12, "learning_rate": 8.820358351861869e-05, "loss": 4.6207, "step": 95250 }, { "epoch": 0.12, "learning_rate": 8.819732387877332e-05, "loss": 4.6017, "step": 95300 }, { "epoch": 0.12, "learning_rate": 8.819106423892796e-05, "loss": 4.598, "step": 95350 }, { "epoch": 0.12, "learning_rate": 8.81848045990826e-05, "loss": 4.5163, "step": 95400 }, { "epoch": 0.12, "learning_rate": 8.817854495923724e-05, "loss": 4.7225, "step": 95450 }, { "epoch": 0.12, "learning_rate": 8.817228531939187e-05, "loss": 4.749, "step": 95500 }, { "epoch": 0.12, "learning_rate": 8.816602567954651e-05, "loss": 4.63, "step": 95550 }, { "epoch": 0.12, "learning_rate": 8.815976603970114e-05, "loss": 4.763, "step": 95600 }, { "epoch": 0.12, "learning_rate": 8.815350639985577e-05, "loss": 4.5727, "step": 95650 }, { "epoch": 0.12, "learning_rate": 8.814724676001041e-05, "loss": 4.643, "step": 95700 }, { "epoch": 0.12, "learning_rate": 8.814098712016506e-05, "loss": 4.6483, "step": 95750 }, { "epoch": 0.12, "learning_rate": 8.81347274803197e-05, "loss": 4.6398, "step": 95800 }, { "epoch": 0.12, "learning_rate": 8.812846784047434e-05, "loss": 4.584, "step": 95850 }, { "epoch": 0.12, "learning_rate": 8.812220820062897e-05, "loss": 4.596, "step": 95900 }, { "epoch": 0.12, "learning_rate": 8.811594856078361e-05, "loss": 4.6372, "step": 95950 }, { "epoch": 0.12, "learning_rate": 8.810968892093825e-05, "loss": 4.6496, "step": 96000 }, { "epoch": 0.12, "learning_rate": 8.810342928109288e-05, "loss": 4.5875, "step": 96050 }, { "epoch": 0.12, "learning_rate": 8.809716964124752e-05, "loss": 4.5572, "step": 96100 }, { "epoch": 0.12, "learning_rate": 8.809091000140216e-05, "loss": 4.5819, "step": 96150 }, { "epoch": 0.12, "learning_rate": 8.80846503615568e-05, "loss": 4.7162, "step": 96200 }, { "epoch": 0.12, "learning_rate": 8.807839072171143e-05, "loss": 4.8047, "step": 96250 }, { "epoch": 0.12, "learning_rate": 8.807213108186608e-05, "loss": 4.7054, "step": 96300 }, { "epoch": 0.12, "learning_rate": 8.806587144202072e-05, "loss": 4.6995, "step": 96350 }, { "epoch": 0.12, "learning_rate": 8.805961180217536e-05, "loss": 4.5668, "step": 96400 }, { "epoch": 0.12, "learning_rate": 8.805335216233e-05, "loss": 4.647, "step": 96450 }, { "epoch": 0.12, "learning_rate": 8.804709252248463e-05, "loss": 4.7144, "step": 96500 }, { "epoch": 0.12, "learning_rate": 8.804083288263927e-05, "loss": 4.5999, "step": 96550 }, { "epoch": 0.12, "learning_rate": 8.803457324279391e-05, "loss": 4.6368, "step": 96600 }, { "epoch": 0.12, "learning_rate": 8.802831360294855e-05, "loss": 4.6746, "step": 96650 }, { "epoch": 0.12, "learning_rate": 8.802205396310318e-05, "loss": 4.7281, "step": 96700 }, { "epoch": 0.12, "learning_rate": 8.801579432325782e-05, "loss": 4.6258, "step": 96750 }, { "epoch": 0.12, "learning_rate": 8.800953468341246e-05, "loss": 4.5893, "step": 96800 }, { "epoch": 0.12, "learning_rate": 8.80032750435671e-05, "loss": 4.6316, "step": 96850 }, { "epoch": 0.12, "learning_rate": 8.799701540372173e-05, "loss": 4.6904, "step": 96900 }, { "epoch": 0.12, "learning_rate": 8.799075576387637e-05, "loss": 4.6363, "step": 96950 }, { "epoch": 0.12, "learning_rate": 8.798449612403101e-05, "loss": 4.7321, "step": 97000 }, { "epoch": 0.12, "learning_rate": 8.797823648418564e-05, "loss": 4.6951, "step": 97050 }, { "epoch": 0.12, "learning_rate": 8.797197684434028e-05, "loss": 4.6147, "step": 97100 }, { "epoch": 0.12, "learning_rate": 8.796571720449492e-05, "loss": 4.6088, "step": 97150 }, { "epoch": 0.12, "learning_rate": 8.795945756464956e-05, "loss": 4.6459, "step": 97200 }, { "epoch": 0.12, "learning_rate": 8.79531979248042e-05, "loss": 4.6814, "step": 97250 }, { "epoch": 0.12, "learning_rate": 8.794693828495884e-05, "loss": 4.5962, "step": 97300 }, { "epoch": 0.12, "learning_rate": 8.794067864511348e-05, "loss": 4.6478, "step": 97350 }, { "epoch": 0.12, "learning_rate": 8.793441900526812e-05, "loss": 4.642, "step": 97400 }, { "epoch": 0.12, "learning_rate": 8.792815936542276e-05, "loss": 4.5963, "step": 97450 }, { "epoch": 0.12, "learning_rate": 8.79218997255774e-05, "loss": 4.6257, "step": 97500 }, { "epoch": 0.12, "learning_rate": 8.791564008573203e-05, "loss": 4.7128, "step": 97550 }, { "epoch": 0.12, "learning_rate": 8.790938044588667e-05, "loss": 4.6916, "step": 97600 }, { "epoch": 0.12, "learning_rate": 8.79031208060413e-05, "loss": 4.6241, "step": 97650 }, { "epoch": 0.12, "learning_rate": 8.789686116619594e-05, "loss": 4.6504, "step": 97700 }, { "epoch": 0.12, "learning_rate": 8.789060152635058e-05, "loss": 4.5793, "step": 97750 }, { "epoch": 0.12, "learning_rate": 8.788434188650522e-05, "loss": 4.6792, "step": 97800 }, { "epoch": 0.12, "learning_rate": 8.787808224665987e-05, "loss": 4.6924, "step": 97850 }, { "epoch": 0.12, "learning_rate": 8.78718226068145e-05, "loss": 4.6772, "step": 97900 }, { "epoch": 0.12, "learning_rate": 8.786556296696914e-05, "loss": 4.693, "step": 97950 }, { "epoch": 0.12, "learning_rate": 8.785930332712378e-05, "loss": 4.7405, "step": 98000 }, { "epoch": 0.12, "learning_rate": 8.785304368727842e-05, "loss": 4.7116, "step": 98050 }, { "epoch": 0.12, "learning_rate": 8.784678404743306e-05, "loss": 4.6298, "step": 98100 }, { "epoch": 0.12, "learning_rate": 8.784052440758769e-05, "loss": 4.589, "step": 98150 }, { "epoch": 0.12, "learning_rate": 8.783426476774233e-05, "loss": 4.6586, "step": 98200 }, { "epoch": 0.12, "learning_rate": 8.782800512789697e-05, "loss": 4.6333, "step": 98250 }, { "epoch": 0.12, "learning_rate": 8.78217454880516e-05, "loss": 4.6836, "step": 98300 }, { "epoch": 0.12, "learning_rate": 8.781548584820624e-05, "loss": 4.7564, "step": 98350 }, { "epoch": 0.12, "learning_rate": 8.780922620836088e-05, "loss": 4.5994, "step": 98400 }, { "epoch": 0.12, "learning_rate": 8.780296656851552e-05, "loss": 4.5449, "step": 98450 }, { "epoch": 0.12, "learning_rate": 8.779670692867015e-05, "loss": 4.606, "step": 98500 }, { "epoch": 0.12, "learning_rate": 8.779044728882479e-05, "loss": 4.7761, "step": 98550 }, { "epoch": 0.12, "learning_rate": 8.778418764897943e-05, "loss": 4.5777, "step": 98600 }, { "epoch": 0.12, "learning_rate": 8.777792800913407e-05, "loss": 4.6968, "step": 98650 }, { "epoch": 0.12, "learning_rate": 8.77716683692887e-05, "loss": 4.5946, "step": 98700 }, { "epoch": 0.12, "learning_rate": 8.776540872944334e-05, "loss": 4.7041, "step": 98750 }, { "epoch": 0.12, "learning_rate": 8.775914908959798e-05, "loss": 4.6633, "step": 98800 }, { "epoch": 0.12, "learning_rate": 8.775288944975262e-05, "loss": 4.6359, "step": 98850 }, { "epoch": 0.12, "learning_rate": 8.774662980990727e-05, "loss": 4.6043, "step": 98900 }, { "epoch": 0.12, "learning_rate": 8.77403701700619e-05, "loss": 4.7214, "step": 98950 }, { "epoch": 0.12, "learning_rate": 8.773411053021654e-05, "loss": 4.606, "step": 99000 }, { "epoch": 0.12, "learning_rate": 8.772785089037118e-05, "loss": 4.6624, "step": 99050 }, { "epoch": 0.12, "learning_rate": 8.772159125052582e-05, "loss": 4.6475, "step": 99100 }, { "epoch": 0.12, "learning_rate": 8.771533161068045e-05, "loss": 4.6306, "step": 99150 }, { "epoch": 0.12, "learning_rate": 8.770907197083509e-05, "loss": 4.6203, "step": 99200 }, { "epoch": 0.12, "learning_rate": 8.770281233098973e-05, "loss": 4.6718, "step": 99250 }, { "epoch": 0.12, "learning_rate": 8.769655269114437e-05, "loss": 4.6379, "step": 99300 }, { "epoch": 0.12, "learning_rate": 8.7690293051299e-05, "loss": 4.6614, "step": 99350 }, { "epoch": 0.12, "learning_rate": 8.768403341145365e-05, "loss": 4.7055, "step": 99400 }, { "epoch": 0.12, "learning_rate": 8.767777377160829e-05, "loss": 4.6282, "step": 99450 }, { "epoch": 0.12, "learning_rate": 8.767151413176293e-05, "loss": 4.6638, "step": 99500 }, { "epoch": 0.12, "learning_rate": 8.766525449191757e-05, "loss": 4.6528, "step": 99550 }, { "epoch": 0.12, "learning_rate": 8.765899485207219e-05, "loss": 4.6601, "step": 99600 }, { "epoch": 0.12, "learning_rate": 8.765273521222683e-05, "loss": 4.6606, "step": 99650 }, { "epoch": 0.12, "learning_rate": 8.764647557238146e-05, "loss": 4.6089, "step": 99700 }, { "epoch": 0.12, "learning_rate": 8.76402159325361e-05, "loss": 4.5494, "step": 99750 }, { "epoch": 0.12, "learning_rate": 8.763395629269074e-05, "loss": 4.6833, "step": 99800 }, { "epoch": 0.12, "learning_rate": 8.762769665284538e-05, "loss": 4.5772, "step": 99850 }, { "epoch": 0.12, "learning_rate": 8.762143701300003e-05, "loss": 4.662, "step": 99900 }, { "epoch": 0.12, "learning_rate": 8.761517737315466e-05, "loss": 4.5357, "step": 99950 }, { "epoch": 0.13, "learning_rate": 8.76089177333093e-05, "loss": 4.73, "step": 100000 }, { "epoch": 0.13, "learning_rate": 8.760265809346394e-05, "loss": 4.7277, "step": 100050 }, { "epoch": 0.13, "learning_rate": 8.759639845361858e-05, "loss": 4.6497, "step": 100100 }, { "epoch": 0.13, "learning_rate": 8.759013881377321e-05, "loss": 4.7537, "step": 100150 }, { "epoch": 0.13, "learning_rate": 8.758387917392785e-05, "loss": 4.5982, "step": 100200 }, { "epoch": 0.13, "learning_rate": 8.757761953408249e-05, "loss": 4.7034, "step": 100250 }, { "epoch": 0.13, "learning_rate": 8.757135989423713e-05, "loss": 4.6436, "step": 100300 }, { "epoch": 0.13, "learning_rate": 8.756510025439176e-05, "loss": 4.6378, "step": 100350 }, { "epoch": 0.13, "learning_rate": 8.75588406145464e-05, "loss": 4.6245, "step": 100400 }, { "epoch": 0.13, "learning_rate": 8.755258097470105e-05, "loss": 4.6132, "step": 100450 }, { "epoch": 0.13, "learning_rate": 8.754632133485569e-05, "loss": 4.6805, "step": 100500 }, { "epoch": 0.13, "learning_rate": 8.754006169501033e-05, "loss": 4.7593, "step": 100550 }, { "epoch": 0.13, "learning_rate": 8.753380205516496e-05, "loss": 4.6413, "step": 100600 }, { "epoch": 0.13, "learning_rate": 8.75275424153196e-05, "loss": 4.6947, "step": 100650 }, { "epoch": 0.13, "learning_rate": 8.752128277547424e-05, "loss": 4.5964, "step": 100700 }, { "epoch": 0.13, "learning_rate": 8.751502313562887e-05, "loss": 4.7478, "step": 100750 }, { "epoch": 0.13, "learning_rate": 8.750876349578351e-05, "loss": 4.5746, "step": 100800 }, { "epoch": 0.13, "learning_rate": 8.750250385593815e-05, "loss": 4.7298, "step": 100850 }, { "epoch": 0.13, "learning_rate": 8.749624421609279e-05, "loss": 4.5872, "step": 100900 }, { "epoch": 0.13, "learning_rate": 8.748998457624742e-05, "loss": 4.6753, "step": 100950 }, { "epoch": 0.13, "learning_rate": 8.748372493640206e-05, "loss": 4.5943, "step": 101000 }, { "epoch": 0.13, "learning_rate": 8.74774652965567e-05, "loss": 4.661, "step": 101050 }, { "epoch": 0.13, "learning_rate": 8.747120565671134e-05, "loss": 4.7462, "step": 101100 }, { "epoch": 0.13, "learning_rate": 8.746494601686597e-05, "loss": 4.594, "step": 101150 }, { "epoch": 0.13, "learning_rate": 8.745868637702061e-05, "loss": 4.6316, "step": 101200 }, { "epoch": 0.13, "learning_rate": 8.745242673717525e-05, "loss": 4.6892, "step": 101250 }, { "epoch": 0.13, "learning_rate": 8.744616709732989e-05, "loss": 4.5811, "step": 101300 }, { "epoch": 0.13, "learning_rate": 8.743990745748452e-05, "loss": 4.6746, "step": 101350 }, { "epoch": 0.13, "learning_rate": 8.743364781763916e-05, "loss": 4.6084, "step": 101400 }, { "epoch": 0.13, "learning_rate": 8.742738817779381e-05, "loss": 4.5918, "step": 101450 }, { "epoch": 0.13, "learning_rate": 8.742112853794845e-05, "loss": 4.7085, "step": 101500 }, { "epoch": 0.13, "learning_rate": 8.741486889810309e-05, "loss": 4.6357, "step": 101550 }, { "epoch": 0.13, "learning_rate": 8.740860925825772e-05, "loss": 4.6585, "step": 101600 }, { "epoch": 0.13, "learning_rate": 8.740234961841236e-05, "loss": 4.7711, "step": 101650 }, { "epoch": 0.13, "learning_rate": 8.7396089978567e-05, "loss": 4.6273, "step": 101700 }, { "epoch": 0.13, "learning_rate": 8.738983033872164e-05, "loss": 4.704, "step": 101750 }, { "epoch": 0.13, "learning_rate": 8.738357069887627e-05, "loss": 4.6335, "step": 101800 }, { "epoch": 0.13, "learning_rate": 8.737731105903091e-05, "loss": 4.6834, "step": 101850 }, { "epoch": 0.13, "learning_rate": 8.737105141918555e-05, "loss": 4.6077, "step": 101900 }, { "epoch": 0.13, "learning_rate": 8.736479177934018e-05, "loss": 4.7102, "step": 101950 }, { "epoch": 0.13, "learning_rate": 8.735853213949484e-05, "loss": 4.647, "step": 102000 }, { "epoch": 0.13, "learning_rate": 8.735227249964947e-05, "loss": 4.5672, "step": 102050 }, { "epoch": 0.13, "learning_rate": 8.734601285980411e-05, "loss": 4.6348, "step": 102100 }, { "epoch": 0.13, "learning_rate": 8.733975321995875e-05, "loss": 4.6442, "step": 102150 }, { "epoch": 0.13, "learning_rate": 8.733349358011338e-05, "loss": 4.7328, "step": 102200 }, { "epoch": 0.13, "learning_rate": 8.732723394026802e-05, "loss": 4.6444, "step": 102250 }, { "epoch": 0.13, "learning_rate": 8.732097430042266e-05, "loss": 4.7245, "step": 102300 }, { "epoch": 0.13, "learning_rate": 8.73147146605773e-05, "loss": 4.7639, "step": 102350 }, { "epoch": 0.13, "learning_rate": 8.730845502073193e-05, "loss": 4.6425, "step": 102400 }, { "epoch": 0.13, "learning_rate": 8.730219538088656e-05, "loss": 4.5069, "step": 102450 }, { "epoch": 0.13, "learning_rate": 8.729593574104121e-05, "loss": 4.6772, "step": 102500 }, { "epoch": 0.13, "learning_rate": 8.728967610119585e-05, "loss": 4.7159, "step": 102550 }, { "epoch": 0.13, "learning_rate": 8.728341646135048e-05, "loss": 4.7009, "step": 102600 }, { "epoch": 0.13, "learning_rate": 8.727715682150512e-05, "loss": 4.5837, "step": 102650 }, { "epoch": 0.13, "learning_rate": 8.727089718165976e-05, "loss": 4.6362, "step": 102700 }, { "epoch": 0.13, "learning_rate": 8.72646375418144e-05, "loss": 4.6928, "step": 102750 }, { "epoch": 0.13, "learning_rate": 8.725837790196903e-05, "loss": 4.7469, "step": 102800 }, { "epoch": 0.13, "learning_rate": 8.725211826212367e-05, "loss": 4.5269, "step": 102850 }, { "epoch": 0.13, "learning_rate": 8.724585862227831e-05, "loss": 4.6167, "step": 102900 }, { "epoch": 0.13, "learning_rate": 8.723959898243294e-05, "loss": 4.6474, "step": 102950 }, { "epoch": 0.13, "learning_rate": 8.72333393425876e-05, "loss": 4.616, "step": 103000 }, { "epoch": 0.13, "learning_rate": 8.722707970274223e-05, "loss": 4.5639, "step": 103050 }, { "epoch": 0.13, "learning_rate": 8.722082006289687e-05, "loss": 4.6344, "step": 103100 }, { "epoch": 0.13, "learning_rate": 8.721456042305151e-05, "loss": 4.5888, "step": 103150 }, { "epoch": 0.13, "learning_rate": 8.720830078320614e-05, "loss": 4.6103, "step": 103200 }, { "epoch": 0.13, "learning_rate": 8.720204114336078e-05, "loss": 4.574, "step": 103250 }, { "epoch": 0.13, "learning_rate": 8.719578150351542e-05, "loss": 4.6956, "step": 103300 }, { "epoch": 0.13, "learning_rate": 8.718952186367006e-05, "loss": 4.6352, "step": 103350 }, { "epoch": 0.13, "learning_rate": 8.71832622238247e-05, "loss": 4.6735, "step": 103400 }, { "epoch": 0.13, "learning_rate": 8.717700258397933e-05, "loss": 4.6204, "step": 103450 }, { "epoch": 0.13, "learning_rate": 8.717074294413397e-05, "loss": 4.6001, "step": 103500 }, { "epoch": 0.13, "learning_rate": 8.716448330428862e-05, "loss": 4.7022, "step": 103550 }, { "epoch": 0.13, "learning_rate": 8.715822366444326e-05, "loss": 4.6126, "step": 103600 }, { "epoch": 0.13, "learning_rate": 8.715196402459788e-05, "loss": 4.6652, "step": 103650 }, { "epoch": 0.13, "learning_rate": 8.714570438475252e-05, "loss": 4.5414, "step": 103700 }, { "epoch": 0.13, "learning_rate": 8.713944474490716e-05, "loss": 4.6486, "step": 103750 }, { "epoch": 0.13, "learning_rate": 8.713318510506179e-05, "loss": 4.6902, "step": 103800 }, { "epoch": 0.13, "learning_rate": 8.712692546521643e-05, "loss": 4.6465, "step": 103850 }, { "epoch": 0.13, "learning_rate": 8.712066582537107e-05, "loss": 4.6041, "step": 103900 }, { "epoch": 0.13, "learning_rate": 8.71144061855257e-05, "loss": 4.595, "step": 103950 }, { "epoch": 0.13, "learning_rate": 8.710814654568034e-05, "loss": 4.6429, "step": 104000 }, { "epoch": 0.13, "learning_rate": 8.710188690583499e-05, "loss": 4.6613, "step": 104050 }, { "epoch": 0.13, "learning_rate": 8.709562726598963e-05, "loss": 4.6644, "step": 104100 }, { "epoch": 0.13, "learning_rate": 8.708936762614427e-05, "loss": 4.4133, "step": 104150 }, { "epoch": 0.13, "learning_rate": 8.70831079862989e-05, "loss": 4.6648, "step": 104200 }, { "epoch": 0.13, "learning_rate": 8.707684834645354e-05, "loss": 4.6289, "step": 104250 }, { "epoch": 0.13, "learning_rate": 8.707058870660818e-05, "loss": 4.6019, "step": 104300 }, { "epoch": 0.13, "learning_rate": 8.706432906676282e-05, "loss": 4.6553, "step": 104350 }, { "epoch": 0.13, "learning_rate": 8.705806942691745e-05, "loss": 4.664, "step": 104400 }, { "epoch": 0.13, "learning_rate": 8.705180978707209e-05, "loss": 4.5264, "step": 104450 }, { "epoch": 0.13, "learning_rate": 8.704555014722673e-05, "loss": 4.5772, "step": 104500 }, { "epoch": 0.13, "learning_rate": 8.703929050738138e-05, "loss": 4.6172, "step": 104550 }, { "epoch": 0.13, "learning_rate": 8.703303086753602e-05, "loss": 4.6091, "step": 104600 }, { "epoch": 0.13, "learning_rate": 8.702677122769065e-05, "loss": 4.6118, "step": 104650 }, { "epoch": 0.13, "learning_rate": 8.702051158784529e-05, "loss": 4.6182, "step": 104700 }, { "epoch": 0.13, "learning_rate": 8.701425194799993e-05, "loss": 4.6188, "step": 104750 }, { "epoch": 0.13, "learning_rate": 8.700799230815457e-05, "loss": 4.7028, "step": 104800 }, { "epoch": 0.13, "learning_rate": 8.70017326683092e-05, "loss": 4.6302, "step": 104850 }, { "epoch": 0.13, "learning_rate": 8.699547302846384e-05, "loss": 4.6688, "step": 104900 }, { "epoch": 0.13, "learning_rate": 8.698921338861848e-05, "loss": 4.6391, "step": 104950 }, { "epoch": 0.13, "learning_rate": 8.698295374877312e-05, "loss": 4.658, "step": 105000 }, { "epoch": 0.13, "learning_rate": 8.697669410892775e-05, "loss": 4.6636, "step": 105050 }, { "epoch": 0.13, "learning_rate": 8.697043446908239e-05, "loss": 4.5558, "step": 105100 }, { "epoch": 0.13, "learning_rate": 8.696417482923703e-05, "loss": 4.7438, "step": 105150 }, { "epoch": 0.13, "learning_rate": 8.695791518939167e-05, "loss": 4.6059, "step": 105200 }, { "epoch": 0.13, "learning_rate": 8.69516555495463e-05, "loss": 4.7373, "step": 105250 }, { "epoch": 0.13, "learning_rate": 8.694539590970094e-05, "loss": 4.6419, "step": 105300 }, { "epoch": 0.13, "learning_rate": 8.693913626985558e-05, "loss": 4.6541, "step": 105350 }, { "epoch": 0.13, "learning_rate": 8.693287663001021e-05, "loss": 4.552, "step": 105400 }, { "epoch": 0.13, "learning_rate": 8.692661699016485e-05, "loss": 4.577, "step": 105450 }, { "epoch": 0.13, "learning_rate": 8.692035735031949e-05, "loss": 4.604, "step": 105500 }, { "epoch": 0.13, "learning_rate": 8.691409771047413e-05, "loss": 4.6828, "step": 105550 }, { "epoch": 0.13, "learning_rate": 8.690783807062878e-05, "loss": 4.6014, "step": 105600 }, { "epoch": 0.13, "learning_rate": 8.690157843078341e-05, "loss": 4.649, "step": 105650 }, { "epoch": 0.13, "learning_rate": 8.689531879093805e-05, "loss": 4.5771, "step": 105700 }, { "epoch": 0.13, "learning_rate": 8.688905915109269e-05, "loss": 4.7137, "step": 105750 }, { "epoch": 0.13, "learning_rate": 8.688279951124733e-05, "loss": 4.7217, "step": 105800 }, { "epoch": 0.13, "learning_rate": 8.687653987140196e-05, "loss": 4.6131, "step": 105850 }, { "epoch": 0.13, "learning_rate": 8.68702802315566e-05, "loss": 4.6333, "step": 105900 }, { "epoch": 0.13, "learning_rate": 8.686402059171124e-05, "loss": 4.6667, "step": 105950 }, { "epoch": 0.13, "learning_rate": 8.685776095186588e-05, "loss": 4.6643, "step": 106000 }, { "epoch": 0.13, "learning_rate": 8.685150131202051e-05, "loss": 4.6164, "step": 106050 }, { "epoch": 0.13, "learning_rate": 8.684524167217516e-05, "loss": 4.6001, "step": 106100 }, { "epoch": 0.13, "learning_rate": 8.68389820323298e-05, "loss": 4.5997, "step": 106150 }, { "epoch": 0.13, "learning_rate": 8.683272239248444e-05, "loss": 4.6822, "step": 106200 }, { "epoch": 0.13, "learning_rate": 8.682646275263908e-05, "loss": 4.7182, "step": 106250 }, { "epoch": 0.13, "learning_rate": 8.682020311279371e-05, "loss": 4.5646, "step": 106300 }, { "epoch": 0.13, "learning_rate": 8.681394347294835e-05, "loss": 4.6211, "step": 106350 }, { "epoch": 0.13, "learning_rate": 8.680768383310299e-05, "loss": 4.6918, "step": 106400 }, { "epoch": 0.13, "learning_rate": 8.680142419325763e-05, "loss": 4.5538, "step": 106450 }, { "epoch": 0.13, "learning_rate": 8.679516455341225e-05, "loss": 4.6555, "step": 106500 }, { "epoch": 0.13, "learning_rate": 8.678890491356689e-05, "loss": 4.6778, "step": 106550 }, { "epoch": 0.13, "learning_rate": 8.678264527372152e-05, "loss": 4.612, "step": 106600 }, { "epoch": 0.13, "learning_rate": 8.677638563387617e-05, "loss": 4.6192, "step": 106650 }, { "epoch": 0.13, "learning_rate": 8.677012599403081e-05, "loss": 4.6342, "step": 106700 }, { "epoch": 0.13, "learning_rate": 8.676386635418545e-05, "loss": 4.741, "step": 106750 }, { "epoch": 0.13, "learning_rate": 8.675760671434009e-05, "loss": 4.5402, "step": 106800 }, { "epoch": 0.13, "learning_rate": 8.675134707449472e-05, "loss": 4.6812, "step": 106850 }, { "epoch": 0.13, "learning_rate": 8.674508743464936e-05, "loss": 4.5787, "step": 106900 }, { "epoch": 0.13, "learning_rate": 8.6738827794804e-05, "loss": 4.5697, "step": 106950 }, { "epoch": 0.13, "learning_rate": 8.673256815495864e-05, "loss": 4.5097, "step": 107000 }, { "epoch": 0.13, "learning_rate": 8.672630851511327e-05, "loss": 4.5828, "step": 107050 }, { "epoch": 0.13, "learning_rate": 8.672004887526791e-05, "loss": 4.5333, "step": 107100 }, { "epoch": 0.13, "learning_rate": 8.671378923542256e-05, "loss": 4.5815, "step": 107150 }, { "epoch": 0.13, "learning_rate": 8.67075295955772e-05, "loss": 4.638, "step": 107200 }, { "epoch": 0.13, "learning_rate": 8.670126995573184e-05, "loss": 4.626, "step": 107250 }, { "epoch": 0.13, "learning_rate": 8.669501031588647e-05, "loss": 4.5386, "step": 107300 }, { "epoch": 0.13, "learning_rate": 8.668875067604111e-05, "loss": 4.6048, "step": 107350 }, { "epoch": 0.13, "learning_rate": 8.668249103619575e-05, "loss": 4.6906, "step": 107400 }, { "epoch": 0.13, "learning_rate": 8.667623139635039e-05, "loss": 4.5375, "step": 107450 }, { "epoch": 0.13, "learning_rate": 8.666997175650502e-05, "loss": 4.6963, "step": 107500 }, { "epoch": 0.13, "learning_rate": 8.666371211665966e-05, "loss": 4.7485, "step": 107550 }, { "epoch": 0.13, "learning_rate": 8.66574524768143e-05, "loss": 4.6562, "step": 107600 }, { "epoch": 0.13, "learning_rate": 8.665119283696895e-05, "loss": 4.6621, "step": 107650 }, { "epoch": 0.13, "learning_rate": 8.664493319712357e-05, "loss": 4.5565, "step": 107700 }, { "epoch": 0.13, "learning_rate": 8.663867355727821e-05, "loss": 4.6115, "step": 107750 }, { "epoch": 0.13, "learning_rate": 8.663241391743285e-05, "loss": 4.4692, "step": 107800 }, { "epoch": 0.13, "learning_rate": 8.662615427758748e-05, "loss": 4.5996, "step": 107850 }, { "epoch": 0.13, "learning_rate": 8.661989463774212e-05, "loss": 4.6222, "step": 107900 }, { "epoch": 0.13, "learning_rate": 8.661363499789676e-05, "loss": 4.6772, "step": 107950 }, { "epoch": 0.14, "learning_rate": 8.66073753580514e-05, "loss": 4.4892, "step": 108000 }, { "epoch": 0.14, "learning_rate": 8.660111571820603e-05, "loss": 4.5857, "step": 108050 }, { "epoch": 0.14, "learning_rate": 8.659485607836067e-05, "loss": 4.6099, "step": 108100 }, { "epoch": 0.14, "learning_rate": 8.658859643851531e-05, "loss": 4.6226, "step": 108150 }, { "epoch": 0.14, "learning_rate": 8.658233679866996e-05, "loss": 4.5481, "step": 108200 }, { "epoch": 0.14, "learning_rate": 8.65760771588246e-05, "loss": 4.7489, "step": 108250 }, { "epoch": 0.14, "learning_rate": 8.656981751897923e-05, "loss": 4.6319, "step": 108300 }, { "epoch": 0.14, "learning_rate": 8.656355787913387e-05, "loss": 4.6761, "step": 108350 }, { "epoch": 0.14, "learning_rate": 8.655729823928851e-05, "loss": 4.7393, "step": 108400 }, { "epoch": 0.14, "learning_rate": 8.655103859944315e-05, "loss": 4.518, "step": 108450 }, { "epoch": 0.14, "learning_rate": 8.654477895959778e-05, "loss": 4.6002, "step": 108500 }, { "epoch": 0.14, "learning_rate": 8.653851931975242e-05, "loss": 4.5674, "step": 108550 }, { "epoch": 0.14, "learning_rate": 8.653225967990706e-05, "loss": 4.5522, "step": 108600 }, { "epoch": 0.14, "learning_rate": 8.65260000400617e-05, "loss": 4.6201, "step": 108650 }, { "epoch": 0.14, "learning_rate": 8.651974040021635e-05, "loss": 4.5763, "step": 108700 }, { "epoch": 0.14, "learning_rate": 8.651348076037098e-05, "loss": 4.4725, "step": 108750 }, { "epoch": 0.14, "learning_rate": 8.650722112052562e-05, "loss": 4.5951, "step": 108800 }, { "epoch": 0.14, "learning_rate": 8.650096148068026e-05, "loss": 4.57, "step": 108850 }, { "epoch": 0.14, "learning_rate": 8.64947018408349e-05, "loss": 4.6326, "step": 108900 }, { "epoch": 0.14, "learning_rate": 8.648844220098953e-05, "loss": 4.6737, "step": 108950 }, { "epoch": 0.14, "learning_rate": 8.648218256114417e-05, "loss": 4.6565, "step": 109000 }, { "epoch": 0.14, "learning_rate": 8.647592292129881e-05, "loss": 4.6351, "step": 109050 }, { "epoch": 0.14, "learning_rate": 8.646966328145344e-05, "loss": 4.6408, "step": 109100 }, { "epoch": 0.14, "learning_rate": 8.646340364160808e-05, "loss": 4.6526, "step": 109150 }, { "epoch": 0.14, "learning_rate": 8.645714400176272e-05, "loss": 4.7316, "step": 109200 }, { "epoch": 0.14, "learning_rate": 8.645088436191736e-05, "loss": 4.6178, "step": 109250 }, { "epoch": 0.14, "learning_rate": 8.6444624722072e-05, "loss": 4.506, "step": 109300 }, { "epoch": 0.14, "learning_rate": 8.643836508222663e-05, "loss": 4.5724, "step": 109350 }, { "epoch": 0.14, "learning_rate": 8.643210544238127e-05, "loss": 4.651, "step": 109400 }, { "epoch": 0.14, "learning_rate": 8.64258458025359e-05, "loss": 4.6449, "step": 109450 }, { "epoch": 0.14, "learning_rate": 8.641958616269054e-05, "loss": 4.5922, "step": 109500 }, { "epoch": 0.14, "learning_rate": 8.641332652284518e-05, "loss": 4.5794, "step": 109550 }, { "epoch": 0.14, "learning_rate": 8.640706688299982e-05, "loss": 4.6392, "step": 109600 }, { "epoch": 0.14, "learning_rate": 8.640080724315446e-05, "loss": 4.605, "step": 109650 }, { "epoch": 0.14, "learning_rate": 8.639454760330909e-05, "loss": 4.6948, "step": 109700 }, { "epoch": 0.14, "learning_rate": 8.638828796346374e-05, "loss": 4.6179, "step": 109750 }, { "epoch": 0.14, "learning_rate": 8.638202832361838e-05, "loss": 4.6462, "step": 109800 }, { "epoch": 0.14, "learning_rate": 8.637576868377302e-05, "loss": 4.6481, "step": 109850 }, { "epoch": 0.14, "learning_rate": 8.636950904392766e-05, "loss": 4.62, "step": 109900 }, { "epoch": 0.14, "learning_rate": 8.636324940408229e-05, "loss": 4.5041, "step": 109950 }, { "epoch": 0.14, "learning_rate": 8.635698976423693e-05, "loss": 4.7147, "step": 110000 }, { "epoch": 0.14, "learning_rate": 8.635073012439157e-05, "loss": 4.5862, "step": 110050 }, { "epoch": 0.14, "learning_rate": 8.63444704845462e-05, "loss": 4.6399, "step": 110100 }, { "epoch": 0.14, "learning_rate": 8.633821084470084e-05, "loss": 4.6378, "step": 110150 }, { "epoch": 0.14, "learning_rate": 8.633195120485548e-05, "loss": 4.7177, "step": 110200 }, { "epoch": 0.14, "learning_rate": 8.632569156501013e-05, "loss": 4.6172, "step": 110250 }, { "epoch": 0.14, "learning_rate": 8.631943192516477e-05, "loss": 4.5959, "step": 110300 }, { "epoch": 0.14, "learning_rate": 8.63131722853194e-05, "loss": 4.6571, "step": 110350 }, { "epoch": 0.14, "learning_rate": 8.630691264547404e-05, "loss": 4.5999, "step": 110400 }, { "epoch": 0.14, "learning_rate": 8.630065300562868e-05, "loss": 4.5691, "step": 110450 }, { "epoch": 0.14, "learning_rate": 8.629439336578332e-05, "loss": 4.6227, "step": 110500 }, { "epoch": 0.14, "learning_rate": 8.628813372593794e-05, "loss": 4.6406, "step": 110550 }, { "epoch": 0.14, "learning_rate": 8.628187408609258e-05, "loss": 4.6652, "step": 110600 }, { "epoch": 0.14, "learning_rate": 8.627561444624722e-05, "loss": 4.701, "step": 110650 }, { "epoch": 0.14, "learning_rate": 8.626935480640185e-05, "loss": 4.7019, "step": 110700 }, { "epoch": 0.14, "learning_rate": 8.626309516655649e-05, "loss": 4.5734, "step": 110750 }, { "epoch": 0.14, "learning_rate": 8.625683552671114e-05, "loss": 4.5688, "step": 110800 }, { "epoch": 0.14, "learning_rate": 8.625057588686578e-05, "loss": 4.6532, "step": 110850 }, { "epoch": 0.14, "learning_rate": 8.624431624702042e-05, "loss": 4.7014, "step": 110900 }, { "epoch": 0.14, "learning_rate": 8.623805660717505e-05, "loss": 4.5822, "step": 110950 }, { "epoch": 0.14, "learning_rate": 8.623179696732969e-05, "loss": 4.5862, "step": 111000 }, { "epoch": 0.14, "learning_rate": 8.622553732748433e-05, "loss": 4.6067, "step": 111050 }, { "epoch": 0.14, "learning_rate": 8.621927768763896e-05, "loss": 4.5717, "step": 111100 }, { "epoch": 0.14, "learning_rate": 8.62130180477936e-05, "loss": 4.6667, "step": 111150 }, { "epoch": 0.14, "learning_rate": 8.620675840794824e-05, "loss": 4.7206, "step": 111200 }, { "epoch": 0.14, "learning_rate": 8.620049876810288e-05, "loss": 4.5919, "step": 111250 }, { "epoch": 0.14, "learning_rate": 8.619423912825753e-05, "loss": 4.6903, "step": 111300 }, { "epoch": 0.14, "learning_rate": 8.618797948841217e-05, "loss": 4.5963, "step": 111350 }, { "epoch": 0.14, "learning_rate": 8.61817198485668e-05, "loss": 4.5647, "step": 111400 }, { "epoch": 0.14, "learning_rate": 8.617546020872144e-05, "loss": 4.5635, "step": 111450 }, { "epoch": 0.14, "learning_rate": 8.616920056887608e-05, "loss": 4.72, "step": 111500 }, { "epoch": 0.14, "learning_rate": 8.616294092903071e-05, "loss": 4.6807, "step": 111550 }, { "epoch": 0.14, "learning_rate": 8.615668128918535e-05, "loss": 4.7225, "step": 111600 }, { "epoch": 0.14, "learning_rate": 8.615042164933999e-05, "loss": 4.6068, "step": 111650 }, { "epoch": 0.14, "learning_rate": 8.614416200949463e-05, "loss": 4.6446, "step": 111700 }, { "epoch": 0.14, "learning_rate": 8.613790236964926e-05, "loss": 4.5783, "step": 111750 }, { "epoch": 0.14, "learning_rate": 8.61316427298039e-05, "loss": 4.6415, "step": 111800 }, { "epoch": 0.14, "learning_rate": 8.612538308995854e-05, "loss": 4.5253, "step": 111850 }, { "epoch": 0.14, "learning_rate": 8.611912345011318e-05, "loss": 4.6933, "step": 111900 }, { "epoch": 0.14, "learning_rate": 8.611286381026781e-05, "loss": 4.6241, "step": 111950 }, { "epoch": 0.14, "learning_rate": 8.610660417042245e-05, "loss": 4.5858, "step": 112000 }, { "epoch": 0.14, "learning_rate": 8.610034453057709e-05, "loss": 4.6476, "step": 112050 }, { "epoch": 0.14, "learning_rate": 8.609408489073173e-05, "loss": 4.6741, "step": 112100 }, { "epoch": 0.14, "learning_rate": 8.608782525088636e-05, "loss": 4.5437, "step": 112150 }, { "epoch": 0.14, "learning_rate": 8.6081565611041e-05, "loss": 4.5972, "step": 112200 }, { "epoch": 0.14, "learning_rate": 8.607530597119564e-05, "loss": 4.6371, "step": 112250 }, { "epoch": 0.14, "learning_rate": 8.606904633135027e-05, "loss": 4.7025, "step": 112300 }, { "epoch": 0.14, "learning_rate": 8.606278669150493e-05, "loss": 4.5561, "step": 112350 }, { "epoch": 0.14, "learning_rate": 8.605652705165956e-05, "loss": 4.5676, "step": 112400 }, { "epoch": 0.14, "learning_rate": 8.60502674118142e-05, "loss": 4.5434, "step": 112450 }, { "epoch": 0.14, "learning_rate": 8.604400777196884e-05, "loss": 4.7121, "step": 112500 }, { "epoch": 0.14, "learning_rate": 8.603774813212347e-05, "loss": 4.703, "step": 112550 }, { "epoch": 0.14, "learning_rate": 8.603148849227811e-05, "loss": 4.5932, "step": 112600 }, { "epoch": 0.14, "learning_rate": 8.602522885243275e-05, "loss": 4.5927, "step": 112650 }, { "epoch": 0.14, "learning_rate": 8.601896921258739e-05, "loss": 4.5809, "step": 112700 }, { "epoch": 0.14, "learning_rate": 8.601270957274202e-05, "loss": 4.5636, "step": 112750 }, { "epoch": 0.14, "learning_rate": 8.600644993289666e-05, "loss": 4.647, "step": 112800 }, { "epoch": 0.14, "learning_rate": 8.600019029305131e-05, "loss": 4.5662, "step": 112850 }, { "epoch": 0.14, "learning_rate": 8.599393065320595e-05, "loss": 4.6477, "step": 112900 }, { "epoch": 0.14, "learning_rate": 8.598767101336059e-05, "loss": 4.6109, "step": 112950 }, { "epoch": 0.14, "learning_rate": 8.598141137351522e-05, "loss": 4.5663, "step": 113000 }, { "epoch": 0.14, "learning_rate": 8.597515173366986e-05, "loss": 4.5771, "step": 113050 }, { "epoch": 0.14, "learning_rate": 8.59688920938245e-05, "loss": 4.6443, "step": 113100 }, { "epoch": 0.14, "learning_rate": 8.596263245397914e-05, "loss": 4.6244, "step": 113150 }, { "epoch": 0.14, "learning_rate": 8.595637281413377e-05, "loss": 4.6202, "step": 113200 }, { "epoch": 0.14, "learning_rate": 8.595011317428841e-05, "loss": 4.5988, "step": 113250 }, { "epoch": 0.14, "learning_rate": 8.594385353444305e-05, "loss": 4.6224, "step": 113300 }, { "epoch": 0.14, "learning_rate": 8.593759389459769e-05, "loss": 4.6215, "step": 113350 }, { "epoch": 0.14, "learning_rate": 8.593133425475232e-05, "loss": 4.5812, "step": 113400 }, { "epoch": 0.14, "learning_rate": 8.592507461490696e-05, "loss": 4.6249, "step": 113450 }, { "epoch": 0.14, "learning_rate": 8.59188149750616e-05, "loss": 4.5777, "step": 113500 }, { "epoch": 0.14, "learning_rate": 8.591255533521623e-05, "loss": 4.5683, "step": 113550 }, { "epoch": 0.14, "learning_rate": 8.590629569537087e-05, "loss": 4.6373, "step": 113600 }, { "epoch": 0.14, "learning_rate": 8.590003605552551e-05, "loss": 4.5935, "step": 113650 }, { "epoch": 0.14, "learning_rate": 8.589377641568015e-05, "loss": 4.5525, "step": 113700 }, { "epoch": 0.14, "learning_rate": 8.588751677583478e-05, "loss": 4.6128, "step": 113750 }, { "epoch": 0.14, "learning_rate": 8.588125713598942e-05, "loss": 4.5949, "step": 113800 }, { "epoch": 0.14, "learning_rate": 8.587499749614406e-05, "loss": 4.5913, "step": 113850 }, { "epoch": 0.14, "learning_rate": 8.586873785629871e-05, "loss": 4.6106, "step": 113900 }, { "epoch": 0.14, "learning_rate": 8.586247821645335e-05, "loss": 4.5782, "step": 113950 }, { "epoch": 0.14, "learning_rate": 8.585621857660798e-05, "loss": 4.6377, "step": 114000 }, { "epoch": 0.14, "learning_rate": 8.584995893676262e-05, "loss": 4.5233, "step": 114050 }, { "epoch": 0.14, "learning_rate": 8.584369929691726e-05, "loss": 4.6276, "step": 114100 }, { "epoch": 0.14, "learning_rate": 8.58374396570719e-05, "loss": 4.6506, "step": 114150 }, { "epoch": 0.14, "learning_rate": 8.583118001722653e-05, "loss": 4.4356, "step": 114200 }, { "epoch": 0.14, "learning_rate": 8.582492037738117e-05, "loss": 4.5724, "step": 114250 }, { "epoch": 0.14, "learning_rate": 8.581866073753581e-05, "loss": 4.6787, "step": 114300 }, { "epoch": 0.14, "learning_rate": 8.581240109769045e-05, "loss": 4.535, "step": 114350 }, { "epoch": 0.14, "learning_rate": 8.58061414578451e-05, "loss": 4.563, "step": 114400 }, { "epoch": 0.14, "learning_rate": 8.579988181799973e-05, "loss": 4.7351, "step": 114450 }, { "epoch": 0.14, "learning_rate": 8.579362217815437e-05, "loss": 4.568, "step": 114500 }, { "epoch": 0.14, "learning_rate": 8.578736253830901e-05, "loss": 4.6686, "step": 114550 }, { "epoch": 0.14, "learning_rate": 8.578110289846363e-05, "loss": 4.6947, "step": 114600 }, { "epoch": 0.14, "learning_rate": 8.577484325861827e-05, "loss": 4.5654, "step": 114650 }, { "epoch": 0.14, "learning_rate": 8.576858361877291e-05, "loss": 4.6272, "step": 114700 }, { "epoch": 0.14, "learning_rate": 8.576232397892754e-05, "loss": 4.5824, "step": 114750 }, { "epoch": 0.14, "learning_rate": 8.575606433908218e-05, "loss": 4.5091, "step": 114800 }, { "epoch": 0.14, "learning_rate": 8.574980469923682e-05, "loss": 4.5266, "step": 114850 }, { "epoch": 0.14, "learning_rate": 8.574354505939147e-05, "loss": 4.6417, "step": 114900 }, { "epoch": 0.14, "learning_rate": 8.573728541954611e-05, "loss": 4.5943, "step": 114950 }, { "epoch": 0.14, "learning_rate": 8.573102577970074e-05, "loss": 4.6515, "step": 115000 }, { "epoch": 0.14, "learning_rate": 8.572476613985538e-05, "loss": 4.5814, "step": 115050 }, { "epoch": 0.14, "learning_rate": 8.571850650001002e-05, "loss": 4.6448, "step": 115100 }, { "epoch": 0.14, "learning_rate": 8.571224686016466e-05, "loss": 4.6247, "step": 115150 }, { "epoch": 0.14, "learning_rate": 8.57059872203193e-05, "loss": 4.6018, "step": 115200 }, { "epoch": 0.14, "learning_rate": 8.569972758047393e-05, "loss": 4.6344, "step": 115250 }, { "epoch": 0.14, "learning_rate": 8.569346794062857e-05, "loss": 4.6044, "step": 115300 }, { "epoch": 0.14, "learning_rate": 8.56872083007832e-05, "loss": 4.6844, "step": 115350 }, { "epoch": 0.14, "learning_rate": 8.568094866093784e-05, "loss": 4.6016, "step": 115400 }, { "epoch": 0.14, "learning_rate": 8.56746890210925e-05, "loss": 4.5257, "step": 115450 }, { "epoch": 0.14, "learning_rate": 8.566842938124713e-05, "loss": 4.6115, "step": 115500 }, { "epoch": 0.14, "learning_rate": 8.566216974140177e-05, "loss": 4.6413, "step": 115550 }, { "epoch": 0.14, "learning_rate": 8.56559101015564e-05, "loss": 4.652, "step": 115600 }, { "epoch": 0.14, "learning_rate": 8.564965046171104e-05, "loss": 4.5666, "step": 115650 }, { "epoch": 0.14, "learning_rate": 8.564339082186568e-05, "loss": 4.6543, "step": 115700 }, { "epoch": 0.14, "learning_rate": 8.563713118202032e-05, "loss": 4.622, "step": 115750 }, { "epoch": 0.14, "learning_rate": 8.563087154217496e-05, "loss": 4.6598, "step": 115800 }, { "epoch": 0.14, "learning_rate": 8.562461190232959e-05, "loss": 4.5543, "step": 115850 }, { "epoch": 0.14, "learning_rate": 8.561835226248423e-05, "loss": 4.726, "step": 115900 }, { "epoch": 0.14, "learning_rate": 8.561209262263887e-05, "loss": 4.5922, "step": 115950 }, { "epoch": 0.15, "learning_rate": 8.56058329827935e-05, "loss": 4.6411, "step": 116000 }, { "epoch": 0.15, "learning_rate": 8.559957334294814e-05, "loss": 4.6538, "step": 116050 }, { "epoch": 0.15, "learning_rate": 8.559331370310278e-05, "loss": 4.5857, "step": 116100 }, { "epoch": 0.15, "learning_rate": 8.558705406325742e-05, "loss": 4.5156, "step": 116150 }, { "epoch": 0.15, "learning_rate": 8.558079442341205e-05, "loss": 4.5395, "step": 116200 }, { "epoch": 0.15, "learning_rate": 8.557453478356669e-05, "loss": 4.5087, "step": 116250 }, { "epoch": 0.15, "learning_rate": 8.556827514372133e-05, "loss": 4.611, "step": 116300 }, { "epoch": 0.15, "learning_rate": 8.556201550387597e-05, "loss": 4.5037, "step": 116350 }, { "epoch": 0.15, "learning_rate": 8.55557558640306e-05, "loss": 4.596, "step": 116400 }, { "epoch": 0.15, "learning_rate": 8.554949622418525e-05, "loss": 4.5389, "step": 116450 }, { "epoch": 0.15, "learning_rate": 8.554323658433989e-05, "loss": 4.653, "step": 116500 }, { "epoch": 0.15, "learning_rate": 8.553697694449453e-05, "loss": 4.5716, "step": 116550 }, { "epoch": 0.15, "learning_rate": 8.553071730464917e-05, "loss": 4.5892, "step": 116600 }, { "epoch": 0.15, "learning_rate": 8.55244576648038e-05, "loss": 4.6325, "step": 116650 }, { "epoch": 0.15, "learning_rate": 8.551819802495844e-05, "loss": 4.6291, "step": 116700 }, { "epoch": 0.15, "learning_rate": 8.551193838511308e-05, "loss": 4.6059, "step": 116750 }, { "epoch": 0.15, "learning_rate": 8.550567874526772e-05, "loss": 4.741, "step": 116800 }, { "epoch": 0.15, "learning_rate": 8.549941910542235e-05, "loss": 4.6126, "step": 116850 }, { "epoch": 0.15, "learning_rate": 8.549315946557699e-05, "loss": 4.5773, "step": 116900 }, { "epoch": 0.15, "learning_rate": 8.548689982573163e-05, "loss": 4.7138, "step": 116950 }, { "epoch": 0.15, "learning_rate": 8.548064018588628e-05, "loss": 4.5766, "step": 117000 }, { "epoch": 0.15, "learning_rate": 8.547438054604092e-05, "loss": 4.6058, "step": 117050 }, { "epoch": 0.15, "learning_rate": 8.546812090619555e-05, "loss": 4.5959, "step": 117100 }, { "epoch": 0.15, "learning_rate": 8.546186126635019e-05, "loss": 4.6305, "step": 117150 }, { "epoch": 0.15, "learning_rate": 8.545560162650483e-05, "loss": 4.5949, "step": 117200 }, { "epoch": 0.15, "learning_rate": 8.544934198665947e-05, "loss": 4.5151, "step": 117250 }, { "epoch": 0.15, "learning_rate": 8.54430823468141e-05, "loss": 4.5726, "step": 117300 }, { "epoch": 0.15, "learning_rate": 8.543682270696874e-05, "loss": 4.7263, "step": 117350 }, { "epoch": 0.15, "learning_rate": 8.543056306712338e-05, "loss": 4.5462, "step": 117400 }, { "epoch": 0.15, "learning_rate": 8.5424303427278e-05, "loss": 4.6203, "step": 117450 }, { "epoch": 0.15, "learning_rate": 8.541804378743265e-05, "loss": 4.5882, "step": 117500 }, { "epoch": 0.15, "learning_rate": 8.541178414758729e-05, "loss": 4.6105, "step": 117550 }, { "epoch": 0.15, "learning_rate": 8.540552450774193e-05, "loss": 4.6148, "step": 117600 }, { "epoch": 0.15, "learning_rate": 8.539926486789656e-05, "loss": 4.6319, "step": 117650 }, { "epoch": 0.15, "learning_rate": 8.53930052280512e-05, "loss": 4.5073, "step": 117700 }, { "epoch": 0.15, "learning_rate": 8.538674558820584e-05, "loss": 4.677, "step": 117750 }, { "epoch": 0.15, "learning_rate": 8.538048594836048e-05, "loss": 4.6717, "step": 117800 }, { "epoch": 0.15, "learning_rate": 8.537422630851511e-05, "loss": 4.6109, "step": 117850 }, { "epoch": 0.15, "learning_rate": 8.536796666866975e-05, "loss": 4.6306, "step": 117900 }, { "epoch": 0.15, "learning_rate": 8.536170702882439e-05, "loss": 4.6696, "step": 117950 }, { "epoch": 0.15, "learning_rate": 8.535544738897902e-05, "loss": 4.661, "step": 118000 }, { "epoch": 0.15, "learning_rate": 8.534918774913368e-05, "loss": 4.5158, "step": 118050 }, { "epoch": 0.15, "learning_rate": 8.534292810928831e-05, "loss": 4.6041, "step": 118100 }, { "epoch": 0.15, "learning_rate": 8.533666846944295e-05, "loss": 4.5498, "step": 118150 }, { "epoch": 0.15, "learning_rate": 8.533040882959759e-05, "loss": 4.5715, "step": 118200 }, { "epoch": 0.15, "learning_rate": 8.532414918975223e-05, "loss": 4.5988, "step": 118250 }, { "epoch": 0.15, "learning_rate": 8.531788954990686e-05, "loss": 4.5688, "step": 118300 }, { "epoch": 0.15, "learning_rate": 8.53116299100615e-05, "loss": 4.5644, "step": 118350 }, { "epoch": 0.15, "learning_rate": 8.530537027021614e-05, "loss": 4.5768, "step": 118400 }, { "epoch": 0.15, "learning_rate": 8.529911063037077e-05, "loss": 4.6084, "step": 118450 }, { "epoch": 0.15, "learning_rate": 8.529285099052541e-05, "loss": 4.5729, "step": 118500 } ], "logging_steps": 50, "max_steps": 799792, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.0382987584821658e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }