{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999471809686243, "eval_steps": 500, "global_step": 1016500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9990164053747546e-05, "loss": 1.4256, "step": 500 }, { "epoch": 0.0, "learning_rate": 1.998032810749509e-05, "loss": 1.4033, "step": 1000 }, { "epoch": 0.0, "learning_rate": 1.9970492161242635e-05, "loss": 1.3732, "step": 1500 }, { "epoch": 0.01, "learning_rate": 1.996065621499018e-05, "loss": 1.3641, "step": 2000 }, { "epoch": 0.01, "learning_rate": 1.9950820268737727e-05, "loss": 1.3819, "step": 2500 }, { "epoch": 0.01, "learning_rate": 1.994098432248527e-05, "loss": 1.3614, "step": 3000 }, { "epoch": 0.01, "learning_rate": 1.9931148376232815e-05, "loss": 1.3657, "step": 3500 }, { "epoch": 0.01, "learning_rate": 1.992131242998036e-05, "loss": 1.3703, "step": 4000 }, { "epoch": 0.01, "learning_rate": 1.9911476483727904e-05, "loss": 1.3459, "step": 4500 }, { "epoch": 0.01, "learning_rate": 1.9901640537475448e-05, "loss": 1.351, "step": 5000 }, { "epoch": 0.02, "learning_rate": 1.9891804591222993e-05, "loss": 1.3489, "step": 5500 }, { "epoch": 0.02, "learning_rate": 1.9881968644970537e-05, "loss": 1.3667, "step": 6000 }, { "epoch": 0.02, "learning_rate": 1.9872132698718085e-05, "loss": 1.3659, "step": 6500 }, { "epoch": 0.02, "learning_rate": 1.986229675246563e-05, "loss": 1.3433, "step": 7000 }, { "epoch": 0.02, "learning_rate": 1.9852460806213173e-05, "loss": 1.3518, "step": 7500 }, { "epoch": 0.02, "learning_rate": 1.9842624859960718e-05, "loss": 1.3454, "step": 8000 }, { "epoch": 0.03, "learning_rate": 1.9832788913708262e-05, "loss": 1.3461, "step": 8500 }, { "epoch": 0.03, "learning_rate": 1.9822952967455806e-05, "loss": 1.3391, "step": 9000 }, { "epoch": 0.03, "learning_rate": 1.981311702120335e-05, "loss": 1.3562, "step": 9500 }, { "epoch": 0.03, "learning_rate": 1.9803281074950895e-05, "loss": 1.3505, "step": 10000 }, { "epoch": 0.03, "learning_rate": 1.9793445128698443e-05, "loss": 1.3564, "step": 10500 }, { "epoch": 0.03, "learning_rate": 1.9783609182445983e-05, "loss": 1.3211, "step": 11000 }, { "epoch": 0.03, "learning_rate": 1.977377323619353e-05, "loss": 1.3481, "step": 11500 }, { "epoch": 0.04, "learning_rate": 1.9763937289941075e-05, "loss": 1.333, "step": 12000 }, { "epoch": 0.04, "learning_rate": 1.975410134368862e-05, "loss": 1.3272, "step": 12500 }, { "epoch": 0.04, "learning_rate": 1.9744265397436164e-05, "loss": 1.3339, "step": 13000 }, { "epoch": 0.04, "learning_rate": 1.973442945118371e-05, "loss": 1.3428, "step": 13500 }, { "epoch": 0.04, "learning_rate": 1.9724593504931253e-05, "loss": 1.3576, "step": 14000 }, { "epoch": 0.04, "learning_rate": 1.9714757558678797e-05, "loss": 1.3451, "step": 14500 }, { "epoch": 0.04, "learning_rate": 1.970492161242634e-05, "loss": 1.3258, "step": 15000 }, { "epoch": 0.05, "learning_rate": 1.969508566617389e-05, "loss": 1.3592, "step": 15500 }, { "epoch": 0.05, "learning_rate": 1.968524971992143e-05, "loss": 1.3407, "step": 16000 }, { "epoch": 0.05, "learning_rate": 1.9675413773668978e-05, "loss": 1.3294, "step": 16500 }, { "epoch": 0.05, "learning_rate": 1.9665577827416522e-05, "loss": 1.3305, "step": 17000 }, { "epoch": 0.05, "learning_rate": 1.9655741881164066e-05, "loss": 1.3375, "step": 17500 }, { "epoch": 0.05, "learning_rate": 1.964590593491161e-05, "loss": 1.3343, "step": 18000 }, { "epoch": 0.05, "learning_rate": 1.9636069988659155e-05, "loss": 1.3257, "step": 18500 }, { "epoch": 0.06, "learning_rate": 1.96262340424067e-05, "loss": 1.3228, "step": 19000 }, { "epoch": 0.06, "learning_rate": 1.9616398096154247e-05, "loss": 1.3442, "step": 19500 }, { "epoch": 0.06, "learning_rate": 1.9606562149901788e-05, "loss": 1.3442, "step": 20000 }, { "epoch": 0.06, "learning_rate": 1.9596726203649336e-05, "loss": 1.3304, "step": 20500 }, { "epoch": 0.06, "learning_rate": 1.958689025739688e-05, "loss": 1.3379, "step": 21000 }, { "epoch": 0.06, "learning_rate": 1.9577054311144424e-05, "loss": 1.3416, "step": 21500 }, { "epoch": 0.06, "learning_rate": 1.956721836489197e-05, "loss": 1.3261, "step": 22000 }, { "epoch": 0.07, "learning_rate": 1.9557382418639513e-05, "loss": 1.3294, "step": 22500 }, { "epoch": 0.07, "learning_rate": 1.9547546472387057e-05, "loss": 1.3284, "step": 23000 }, { "epoch": 0.07, "learning_rate": 1.9537710526134605e-05, "loss": 1.3261, "step": 23500 }, { "epoch": 0.07, "learning_rate": 1.9527874579882146e-05, "loss": 1.3364, "step": 24000 }, { "epoch": 0.07, "learning_rate": 1.9518038633629694e-05, "loss": 1.3196, "step": 24500 }, { "epoch": 0.07, "learning_rate": 1.9508202687377235e-05, "loss": 1.3112, "step": 25000 }, { "epoch": 0.08, "learning_rate": 1.9498366741124782e-05, "loss": 1.3149, "step": 25500 }, { "epoch": 0.08, "learning_rate": 1.9488530794872327e-05, "loss": 1.3089, "step": 26000 }, { "epoch": 0.08, "learning_rate": 1.947869484861987e-05, "loss": 1.3272, "step": 26500 }, { "epoch": 0.08, "learning_rate": 1.9468858902367415e-05, "loss": 1.2981, "step": 27000 }, { "epoch": 0.08, "learning_rate": 1.9459022956114963e-05, "loss": 1.3099, "step": 27500 }, { "epoch": 0.08, "learning_rate": 1.9449187009862504e-05, "loss": 1.2879, "step": 28000 }, { "epoch": 0.08, "learning_rate": 1.943935106361005e-05, "loss": 1.3178, "step": 28500 }, { "epoch": 0.09, "learning_rate": 1.9429515117357592e-05, "loss": 1.3391, "step": 29000 }, { "epoch": 0.09, "learning_rate": 1.941967917110514e-05, "loss": 1.3275, "step": 29500 }, { "epoch": 0.09, "learning_rate": 1.9409843224852685e-05, "loss": 1.3246, "step": 30000 }, { "epoch": 0.09, "learning_rate": 1.940000727860023e-05, "loss": 1.288, "step": 30500 }, { "epoch": 0.09, "learning_rate": 1.9390171332347773e-05, "loss": 1.3016, "step": 31000 }, { "epoch": 0.09, "learning_rate": 1.9380335386095317e-05, "loss": 1.3071, "step": 31500 }, { "epoch": 0.09, "learning_rate": 1.9370499439842862e-05, "loss": 1.3164, "step": 32000 }, { "epoch": 0.1, "learning_rate": 1.936066349359041e-05, "loss": 1.3009, "step": 32500 }, { "epoch": 0.1, "learning_rate": 1.935082754733795e-05, "loss": 1.32, "step": 33000 }, { "epoch": 0.1, "learning_rate": 1.9340991601085498e-05, "loss": 1.3179, "step": 33500 }, { "epoch": 0.1, "learning_rate": 1.933115565483304e-05, "loss": 1.3206, "step": 34000 }, { "epoch": 0.1, "learning_rate": 1.9321319708580587e-05, "loss": 1.3166, "step": 34500 }, { "epoch": 0.1, "learning_rate": 1.931148376232813e-05, "loss": 1.328, "step": 35000 }, { "epoch": 0.1, "learning_rate": 1.9301647816075675e-05, "loss": 1.2958, "step": 35500 }, { "epoch": 0.11, "learning_rate": 1.929181186982322e-05, "loss": 1.3128, "step": 36000 }, { "epoch": 0.11, "learning_rate": 1.9281975923570764e-05, "loss": 1.3111, "step": 36500 }, { "epoch": 0.11, "learning_rate": 1.927213997731831e-05, "loss": 1.3231, "step": 37000 }, { "epoch": 0.11, "learning_rate": 1.9262304031065856e-05, "loss": 1.3208, "step": 37500 }, { "epoch": 0.11, "learning_rate": 1.9252468084813397e-05, "loss": 1.3095, "step": 38000 }, { "epoch": 0.11, "learning_rate": 1.9242632138560945e-05, "loss": 1.3195, "step": 38500 }, { "epoch": 0.12, "learning_rate": 1.923279619230849e-05, "loss": 1.3479, "step": 39000 }, { "epoch": 0.12, "learning_rate": 1.9222960246056033e-05, "loss": 1.3256, "step": 39500 }, { "epoch": 0.12, "learning_rate": 1.9213124299803578e-05, "loss": 1.3129, "step": 40000 }, { "epoch": 0.12, "learning_rate": 1.9203288353551122e-05, "loss": 1.3071, "step": 40500 }, { "epoch": 0.12, "learning_rate": 1.9193452407298666e-05, "loss": 1.3246, "step": 41000 }, { "epoch": 0.12, "learning_rate": 1.9183616461046214e-05, "loss": 1.3135, "step": 41500 }, { "epoch": 0.12, "learning_rate": 1.9173780514793755e-05, "loss": 1.3236, "step": 42000 }, { "epoch": 0.13, "learning_rate": 1.9163944568541303e-05, "loss": 1.3108, "step": 42500 }, { "epoch": 0.13, "learning_rate": 1.9154108622288844e-05, "loss": 1.2978, "step": 43000 }, { "epoch": 0.13, "learning_rate": 1.914427267603639e-05, "loss": 1.2937, "step": 43500 }, { "epoch": 0.13, "learning_rate": 1.9134436729783936e-05, "loss": 1.3012, "step": 44000 }, { "epoch": 0.13, "learning_rate": 1.912460078353148e-05, "loss": 1.3, "step": 44500 }, { "epoch": 0.13, "learning_rate": 1.9114764837279024e-05, "loss": 1.3167, "step": 45000 }, { "epoch": 0.13, "learning_rate": 1.910492889102657e-05, "loss": 1.3121, "step": 45500 }, { "epoch": 0.14, "learning_rate": 1.9095092944774113e-05, "loss": 1.297, "step": 46000 }, { "epoch": 0.14, "learning_rate": 1.908525699852166e-05, "loss": 1.3065, "step": 46500 }, { "epoch": 0.14, "learning_rate": 1.90754210522692e-05, "loss": 1.2947, "step": 47000 }, { "epoch": 0.14, "learning_rate": 1.906558510601675e-05, "loss": 1.3247, "step": 47500 }, { "epoch": 0.14, "learning_rate": 1.905574915976429e-05, "loss": 1.3235, "step": 48000 }, { "epoch": 0.14, "learning_rate": 1.9045913213511838e-05, "loss": 1.299, "step": 48500 }, { "epoch": 0.14, "learning_rate": 1.9036077267259382e-05, "loss": 1.299, "step": 49000 }, { "epoch": 0.15, "learning_rate": 1.9026241321006927e-05, "loss": 1.3089, "step": 49500 }, { "epoch": 0.15, "learning_rate": 1.901640537475447e-05, "loss": 1.2939, "step": 50000 }, { "epoch": 0.15, "learning_rate": 1.900656942850202e-05, "loss": 1.3031, "step": 50500 }, { "epoch": 0.15, "learning_rate": 1.899673348224956e-05, "loss": 1.3049, "step": 51000 }, { "epoch": 0.15, "learning_rate": 1.8986897535997107e-05, "loss": 1.3039, "step": 51500 }, { "epoch": 0.15, "learning_rate": 1.8977061589744648e-05, "loss": 1.2925, "step": 52000 }, { "epoch": 0.15, "learning_rate": 1.8967225643492196e-05, "loss": 1.3242, "step": 52500 }, { "epoch": 0.16, "learning_rate": 1.895738969723974e-05, "loss": 1.3, "step": 53000 }, { "epoch": 0.16, "learning_rate": 1.8947553750987284e-05, "loss": 1.2995, "step": 53500 }, { "epoch": 0.16, "learning_rate": 1.893771780473483e-05, "loss": 1.3063, "step": 54000 }, { "epoch": 0.16, "learning_rate": 1.8927881858482373e-05, "loss": 1.2911, "step": 54500 }, { "epoch": 0.16, "learning_rate": 1.8918045912229917e-05, "loss": 1.3194, "step": 55000 }, { "epoch": 0.16, "learning_rate": 1.8908209965977465e-05, "loss": 1.3141, "step": 55500 }, { "epoch": 0.17, "learning_rate": 1.8898374019725006e-05, "loss": 1.3038, "step": 56000 }, { "epoch": 0.17, "learning_rate": 1.8888538073472554e-05, "loss": 1.3206, "step": 56500 }, { "epoch": 0.17, "learning_rate": 1.8878702127220098e-05, "loss": 1.2879, "step": 57000 }, { "epoch": 0.17, "learning_rate": 1.8868866180967642e-05, "loss": 1.311, "step": 57500 }, { "epoch": 0.17, "learning_rate": 1.8859030234715187e-05, "loss": 1.3032, "step": 58000 }, { "epoch": 0.17, "learning_rate": 1.884919428846273e-05, "loss": 1.3258, "step": 58500 }, { "epoch": 0.17, "learning_rate": 1.8839358342210275e-05, "loss": 1.2989, "step": 59000 }, { "epoch": 0.18, "learning_rate": 1.8829522395957823e-05, "loss": 1.3052, "step": 59500 }, { "epoch": 0.18, "learning_rate": 1.8819686449705364e-05, "loss": 1.3301, "step": 60000 }, { "epoch": 0.18, "learning_rate": 1.8809850503452912e-05, "loss": 1.3068, "step": 60500 }, { "epoch": 0.18, "learning_rate": 1.8800014557200456e-05, "loss": 1.2967, "step": 61000 }, { "epoch": 0.18, "learning_rate": 1.8790178610948e-05, "loss": 1.2913, "step": 61500 }, { "epoch": 0.18, "learning_rate": 1.8780342664695545e-05, "loss": 1.2898, "step": 62000 }, { "epoch": 0.18, "learning_rate": 1.877050671844309e-05, "loss": 1.3135, "step": 62500 }, { "epoch": 0.19, "learning_rate": 1.8760670772190633e-05, "loss": 1.3011, "step": 63000 }, { "epoch": 0.19, "learning_rate": 1.8750834825938178e-05, "loss": 1.292, "step": 63500 }, { "epoch": 0.19, "learning_rate": 1.8740998879685722e-05, "loss": 1.3054, "step": 64000 }, { "epoch": 0.19, "learning_rate": 1.873116293343327e-05, "loss": 1.307, "step": 64500 }, { "epoch": 0.19, "learning_rate": 1.8721326987180814e-05, "loss": 1.2997, "step": 65000 }, { "epoch": 0.19, "learning_rate": 1.8711491040928358e-05, "loss": 1.297, "step": 65500 }, { "epoch": 0.19, "learning_rate": 1.8701655094675903e-05, "loss": 1.2856, "step": 66000 }, { "epoch": 0.2, "learning_rate": 1.8691819148423447e-05, "loss": 1.2979, "step": 66500 }, { "epoch": 0.2, "learning_rate": 1.868198320217099e-05, "loss": 1.2846, "step": 67000 }, { "epoch": 0.2, "learning_rate": 1.8672147255918536e-05, "loss": 1.2962, "step": 67500 }, { "epoch": 0.2, "learning_rate": 1.866231130966608e-05, "loss": 1.2852, "step": 68000 }, { "epoch": 0.2, "learning_rate": 1.8652475363413624e-05, "loss": 1.3157, "step": 68500 }, { "epoch": 0.2, "learning_rate": 1.864263941716117e-05, "loss": 1.2959, "step": 69000 }, { "epoch": 0.21, "learning_rate": 1.8632803470908716e-05, "loss": 1.3149, "step": 69500 }, { "epoch": 0.21, "learning_rate": 1.862296752465626e-05, "loss": 1.302, "step": 70000 }, { "epoch": 0.21, "learning_rate": 1.8613131578403805e-05, "loss": 1.296, "step": 70500 }, { "epoch": 0.21, "learning_rate": 1.860329563215135e-05, "loss": 1.297, "step": 71000 }, { "epoch": 0.21, "learning_rate": 1.8593459685898894e-05, "loss": 1.3029, "step": 71500 }, { "epoch": 0.21, "learning_rate": 1.8583623739646438e-05, "loss": 1.2749, "step": 72000 }, { "epoch": 0.21, "learning_rate": 1.8573787793393982e-05, "loss": 1.2809, "step": 72500 }, { "epoch": 0.22, "learning_rate": 1.8563951847141526e-05, "loss": 1.3008, "step": 73000 }, { "epoch": 0.22, "learning_rate": 1.8554115900889074e-05, "loss": 1.3083, "step": 73500 }, { "epoch": 0.22, "learning_rate": 1.854427995463662e-05, "loss": 1.3037, "step": 74000 }, { "epoch": 0.22, "learning_rate": 1.8534444008384163e-05, "loss": 1.2926, "step": 74500 }, { "epoch": 0.22, "learning_rate": 1.8524608062131707e-05, "loss": 1.2977, "step": 75000 }, { "epoch": 0.22, "learning_rate": 1.851477211587925e-05, "loss": 1.2659, "step": 75500 }, { "epoch": 0.22, "learning_rate": 1.8504936169626796e-05, "loss": 1.2816, "step": 76000 }, { "epoch": 0.23, "learning_rate": 1.849510022337434e-05, "loss": 1.2886, "step": 76500 }, { "epoch": 0.23, "learning_rate": 1.8485264277121884e-05, "loss": 1.3132, "step": 77000 }, { "epoch": 0.23, "learning_rate": 1.847542833086943e-05, "loss": 1.285, "step": 77500 }, { "epoch": 0.23, "learning_rate": 1.8465592384616976e-05, "loss": 1.3076, "step": 78000 }, { "epoch": 0.23, "learning_rate": 1.845575643836452e-05, "loss": 1.2872, "step": 78500 }, { "epoch": 0.23, "learning_rate": 1.8445920492112065e-05, "loss": 1.308, "step": 79000 }, { "epoch": 0.23, "learning_rate": 1.843608454585961e-05, "loss": 1.2938, "step": 79500 }, { "epoch": 0.24, "learning_rate": 1.8426248599607154e-05, "loss": 1.3007, "step": 80000 }, { "epoch": 0.24, "learning_rate": 1.8416412653354698e-05, "loss": 1.2918, "step": 80500 }, { "epoch": 0.24, "learning_rate": 1.8406576707102242e-05, "loss": 1.2914, "step": 81000 }, { "epoch": 0.24, "learning_rate": 1.8396740760849787e-05, "loss": 1.2919, "step": 81500 }, { "epoch": 0.24, "learning_rate": 1.8386904814597334e-05, "loss": 1.2816, "step": 82000 }, { "epoch": 0.24, "learning_rate": 1.837706886834488e-05, "loss": 1.3166, "step": 82500 }, { "epoch": 0.24, "learning_rate": 1.8367232922092423e-05, "loss": 1.2769, "step": 83000 }, { "epoch": 0.25, "learning_rate": 1.8357396975839967e-05, "loss": 1.2907, "step": 83500 }, { "epoch": 0.25, "learning_rate": 1.834756102958751e-05, "loss": 1.2902, "step": 84000 }, { "epoch": 0.25, "learning_rate": 1.8337725083335056e-05, "loss": 1.3032, "step": 84500 }, { "epoch": 0.25, "learning_rate": 1.83278891370826e-05, "loss": 1.3012, "step": 85000 }, { "epoch": 0.25, "learning_rate": 1.8318053190830145e-05, "loss": 1.2869, "step": 85500 }, { "epoch": 0.25, "learning_rate": 1.8308217244577692e-05, "loss": 1.2898, "step": 86000 }, { "epoch": 0.26, "learning_rate": 1.8298381298325233e-05, "loss": 1.2806, "step": 86500 }, { "epoch": 0.26, "learning_rate": 1.828854535207278e-05, "loss": 1.2866, "step": 87000 }, { "epoch": 0.26, "learning_rate": 1.8278709405820325e-05, "loss": 1.2933, "step": 87500 }, { "epoch": 0.26, "learning_rate": 1.826887345956787e-05, "loss": 1.2923, "step": 88000 }, { "epoch": 0.26, "learning_rate": 1.8259037513315414e-05, "loss": 1.2877, "step": 88500 }, { "epoch": 0.26, "learning_rate": 1.8249201567062958e-05, "loss": 1.2977, "step": 89000 }, { "epoch": 0.26, "learning_rate": 1.8239365620810503e-05, "loss": 1.2887, "step": 89500 }, { "epoch": 0.27, "learning_rate": 1.8229529674558047e-05, "loss": 1.2818, "step": 90000 }, { "epoch": 0.27, "learning_rate": 1.821969372830559e-05, "loss": 1.2931, "step": 90500 }, { "epoch": 0.27, "learning_rate": 1.820985778205314e-05, "loss": 1.2965, "step": 91000 }, { "epoch": 0.27, "learning_rate": 1.8200021835800683e-05, "loss": 1.3018, "step": 91500 }, { "epoch": 0.27, "learning_rate": 1.8190185889548228e-05, "loss": 1.3002, "step": 92000 }, { "epoch": 0.27, "learning_rate": 1.8180349943295772e-05, "loss": 1.3027, "step": 92500 }, { "epoch": 0.27, "learning_rate": 1.8170513997043316e-05, "loss": 1.3097, "step": 93000 }, { "epoch": 0.28, "learning_rate": 1.816067805079086e-05, "loss": 1.3081, "step": 93500 }, { "epoch": 0.28, "learning_rate": 1.8150842104538405e-05, "loss": 1.2974, "step": 94000 }, { "epoch": 0.28, "learning_rate": 1.814100615828595e-05, "loss": 1.2778, "step": 94500 }, { "epoch": 0.28, "learning_rate": 1.8131170212033497e-05, "loss": 1.2873, "step": 95000 }, { "epoch": 0.28, "learning_rate": 1.8121334265781038e-05, "loss": 1.2849, "step": 95500 }, { "epoch": 0.28, "learning_rate": 1.8111498319528585e-05, "loss": 1.2943, "step": 96000 }, { "epoch": 0.28, "learning_rate": 1.810166237327613e-05, "loss": 1.3183, "step": 96500 }, { "epoch": 0.29, "learning_rate": 1.8091826427023674e-05, "loss": 1.2825, "step": 97000 }, { "epoch": 0.29, "learning_rate": 1.808199048077122e-05, "loss": 1.2769, "step": 97500 }, { "epoch": 0.29, "learning_rate": 1.8072154534518763e-05, "loss": 1.2804, "step": 98000 }, { "epoch": 0.29, "learning_rate": 1.8062318588266307e-05, "loss": 1.2918, "step": 98500 }, { "epoch": 0.29, "learning_rate": 1.8052482642013855e-05, "loss": 1.2929, "step": 99000 }, { "epoch": 0.29, "learning_rate": 1.8042646695761396e-05, "loss": 1.2917, "step": 99500 }, { "epoch": 0.3, "learning_rate": 1.8032810749508943e-05, "loss": 1.2858, "step": 100000 }, { "epoch": 0.3, "learning_rate": 1.8022974803256484e-05, "loss": 1.2801, "step": 100500 }, { "epoch": 0.3, "learning_rate": 1.8013138857004032e-05, "loss": 1.2915, "step": 101000 }, { "epoch": 0.3, "learning_rate": 1.8003302910751576e-05, "loss": 1.2866, "step": 101500 }, { "epoch": 0.3, "learning_rate": 1.799346696449912e-05, "loss": 1.3089, "step": 102000 }, { "epoch": 0.3, "learning_rate": 1.7983631018246665e-05, "loss": 1.2853, "step": 102500 }, { "epoch": 0.3, "learning_rate": 1.7973795071994213e-05, "loss": 1.2879, "step": 103000 }, { "epoch": 0.31, "learning_rate": 1.7963959125741754e-05, "loss": 1.2757, "step": 103500 }, { "epoch": 0.31, "learning_rate": 1.79541231794893e-05, "loss": 1.2893, "step": 104000 }, { "epoch": 0.31, "learning_rate": 1.7944287233236842e-05, "loss": 1.2907, "step": 104500 }, { "epoch": 0.31, "learning_rate": 1.793445128698439e-05, "loss": 1.2922, "step": 105000 }, { "epoch": 0.31, "learning_rate": 1.7924615340731934e-05, "loss": 1.2844, "step": 105500 }, { "epoch": 0.31, "learning_rate": 1.791477939447948e-05, "loss": 1.29, "step": 106000 }, { "epoch": 0.31, "learning_rate": 1.7904943448227023e-05, "loss": 1.3004, "step": 106500 }, { "epoch": 0.32, "learning_rate": 1.7895107501974567e-05, "loss": 1.2816, "step": 107000 }, { "epoch": 0.32, "learning_rate": 1.788527155572211e-05, "loss": 1.283, "step": 107500 }, { "epoch": 0.32, "learning_rate": 1.787543560946966e-05, "loss": 1.2924, "step": 108000 }, { "epoch": 0.32, "learning_rate": 1.78655996632172e-05, "loss": 1.2906, "step": 108500 }, { "epoch": 0.32, "learning_rate": 1.7855763716964748e-05, "loss": 1.2721, "step": 109000 }, { "epoch": 0.32, "learning_rate": 1.784592777071229e-05, "loss": 1.2887, "step": 109500 }, { "epoch": 0.32, "learning_rate": 1.7836091824459837e-05, "loss": 1.2651, "step": 110000 }, { "epoch": 0.33, "learning_rate": 1.782625587820738e-05, "loss": 1.2902, "step": 110500 }, { "epoch": 0.33, "learning_rate": 1.7816419931954925e-05, "loss": 1.2984, "step": 111000 }, { "epoch": 0.33, "learning_rate": 1.780658398570247e-05, "loss": 1.255, "step": 111500 }, { "epoch": 0.33, "learning_rate": 1.7796748039450014e-05, "loss": 1.2652, "step": 112000 }, { "epoch": 0.33, "learning_rate": 1.7786912093197558e-05, "loss": 1.2956, "step": 112500 }, { "epoch": 0.33, "learning_rate": 1.7777076146945106e-05, "loss": 1.2747, "step": 113000 }, { "epoch": 0.33, "learning_rate": 1.7767240200692647e-05, "loss": 1.3074, "step": 113500 }, { "epoch": 0.34, "learning_rate": 1.7757404254440195e-05, "loss": 1.2823, "step": 114000 }, { "epoch": 0.34, "learning_rate": 1.774756830818774e-05, "loss": 1.2615, "step": 114500 }, { "epoch": 0.34, "learning_rate": 1.7737732361935283e-05, "loss": 1.3011, "step": 115000 }, { "epoch": 0.34, "learning_rate": 1.7727896415682828e-05, "loss": 1.2829, "step": 115500 }, { "epoch": 0.34, "learning_rate": 1.7718060469430372e-05, "loss": 1.2864, "step": 116000 }, { "epoch": 0.34, "learning_rate": 1.7708224523177916e-05, "loss": 1.2653, "step": 116500 }, { "epoch": 0.35, "learning_rate": 1.7698388576925464e-05, "loss": 1.3048, "step": 117000 }, { "epoch": 0.35, "learning_rate": 1.7688552630673005e-05, "loss": 1.2915, "step": 117500 }, { "epoch": 0.35, "learning_rate": 1.7678716684420552e-05, "loss": 1.2874, "step": 118000 }, { "epoch": 0.35, "learning_rate": 1.7668880738168093e-05, "loss": 1.2794, "step": 118500 }, { "epoch": 0.35, "learning_rate": 1.765904479191564e-05, "loss": 1.2749, "step": 119000 }, { "epoch": 0.35, "learning_rate": 1.7649208845663185e-05, "loss": 1.2945, "step": 119500 }, { "epoch": 0.35, "learning_rate": 1.763937289941073e-05, "loss": 1.2911, "step": 120000 }, { "epoch": 0.36, "learning_rate": 1.7629536953158274e-05, "loss": 1.2454, "step": 120500 }, { "epoch": 0.36, "learning_rate": 1.761970100690582e-05, "loss": 1.2733, "step": 121000 }, { "epoch": 0.36, "learning_rate": 1.7609865060653363e-05, "loss": 1.2882, "step": 121500 }, { "epoch": 0.36, "learning_rate": 1.760002911440091e-05, "loss": 1.282, "step": 122000 }, { "epoch": 0.36, "learning_rate": 1.759019316814845e-05, "loss": 1.2944, "step": 122500 }, { "epoch": 0.36, "learning_rate": 1.7580357221896e-05, "loss": 1.2946, "step": 123000 }, { "epoch": 0.36, "learning_rate": 1.7570521275643543e-05, "loss": 1.2684, "step": 123500 }, { "epoch": 0.37, "learning_rate": 1.7560685329391088e-05, "loss": 1.255, "step": 124000 }, { "epoch": 0.37, "learning_rate": 1.7550849383138632e-05, "loss": 1.2967, "step": 124500 }, { "epoch": 0.37, "learning_rate": 1.7541013436886176e-05, "loss": 1.287, "step": 125000 }, { "epoch": 0.37, "learning_rate": 1.753117749063372e-05, "loss": 1.2863, "step": 125500 }, { "epoch": 0.37, "learning_rate": 1.752134154438127e-05, "loss": 1.2994, "step": 126000 }, { "epoch": 0.37, "learning_rate": 1.751150559812881e-05, "loss": 1.3017, "step": 126500 }, { "epoch": 0.37, "learning_rate": 1.7501669651876357e-05, "loss": 1.2793, "step": 127000 }, { "epoch": 0.38, "learning_rate": 1.7491833705623898e-05, "loss": 1.2813, "step": 127500 }, { "epoch": 0.38, "learning_rate": 1.7481997759371446e-05, "loss": 1.2807, "step": 128000 }, { "epoch": 0.38, "learning_rate": 1.747216181311899e-05, "loss": 1.2765, "step": 128500 }, { "epoch": 0.38, "learning_rate": 1.7462325866866534e-05, "loss": 1.2817, "step": 129000 }, { "epoch": 0.38, "learning_rate": 1.745248992061408e-05, "loss": 1.2763, "step": 129500 }, { "epoch": 0.38, "learning_rate": 1.7442653974361623e-05, "loss": 1.2854, "step": 130000 }, { "epoch": 0.39, "learning_rate": 1.7432818028109167e-05, "loss": 1.2889, "step": 130500 }, { "epoch": 0.39, "learning_rate": 1.7422982081856715e-05, "loss": 1.283, "step": 131000 }, { "epoch": 0.39, "learning_rate": 1.7413146135604256e-05, "loss": 1.2789, "step": 131500 }, { "epoch": 0.39, "learning_rate": 1.7403310189351804e-05, "loss": 1.2773, "step": 132000 }, { "epoch": 0.39, "learning_rate": 1.7393474243099348e-05, "loss": 1.2831, "step": 132500 }, { "epoch": 0.39, "learning_rate": 1.7383638296846892e-05, "loss": 1.2805, "step": 133000 }, { "epoch": 0.39, "learning_rate": 1.7373802350594437e-05, "loss": 1.2683, "step": 133500 }, { "epoch": 0.4, "learning_rate": 1.736396640434198e-05, "loss": 1.2814, "step": 134000 }, { "epoch": 0.4, "learning_rate": 1.7354130458089525e-05, "loss": 1.2844, "step": 134500 }, { "epoch": 0.4, "learning_rate": 1.7344294511837073e-05, "loss": 1.3064, "step": 135000 }, { "epoch": 0.4, "learning_rate": 1.7334458565584614e-05, "loss": 1.2754, "step": 135500 }, { "epoch": 0.4, "learning_rate": 1.732462261933216e-05, "loss": 1.289, "step": 136000 }, { "epoch": 0.4, "learning_rate": 1.7314786673079706e-05, "loss": 1.2716, "step": 136500 }, { "epoch": 0.4, "learning_rate": 1.730495072682725e-05, "loss": 1.2655, "step": 137000 }, { "epoch": 0.41, "learning_rate": 1.7295114780574794e-05, "loss": 1.264, "step": 137500 }, { "epoch": 0.41, "learning_rate": 1.728527883432234e-05, "loss": 1.2898, "step": 138000 }, { "epoch": 0.41, "learning_rate": 1.7275442888069883e-05, "loss": 1.2873, "step": 138500 }, { "epoch": 0.41, "learning_rate": 1.7265606941817427e-05, "loss": 1.2727, "step": 139000 }, { "epoch": 0.41, "learning_rate": 1.7255770995564972e-05, "loss": 1.2901, "step": 139500 }, { "epoch": 0.41, "learning_rate": 1.724593504931252e-05, "loss": 1.2724, "step": 140000 }, { "epoch": 0.41, "learning_rate": 1.7236099103060064e-05, "loss": 1.2653, "step": 140500 }, { "epoch": 0.42, "learning_rate": 1.7226263156807608e-05, "loss": 1.2736, "step": 141000 }, { "epoch": 0.42, "learning_rate": 1.7216427210555152e-05, "loss": 1.2664, "step": 141500 }, { "epoch": 0.42, "learning_rate": 1.7206591264302697e-05, "loss": 1.2845, "step": 142000 }, { "epoch": 0.42, "learning_rate": 1.719675531805024e-05, "loss": 1.2758, "step": 142500 }, { "epoch": 0.42, "learning_rate": 1.7186919371797785e-05, "loss": 1.2827, "step": 143000 }, { "epoch": 0.42, "learning_rate": 1.717708342554533e-05, "loss": 1.2787, "step": 143500 }, { "epoch": 0.42, "learning_rate": 1.7167247479292874e-05, "loss": 1.2775, "step": 144000 }, { "epoch": 0.43, "learning_rate": 1.7157411533040422e-05, "loss": 1.2824, "step": 144500 }, { "epoch": 0.43, "learning_rate": 1.7147575586787966e-05, "loss": 1.291, "step": 145000 }, { "epoch": 0.43, "learning_rate": 1.713773964053551e-05, "loss": 1.2955, "step": 145500 }, { "epoch": 0.43, "learning_rate": 1.7127903694283055e-05, "loss": 1.2702, "step": 146000 }, { "epoch": 0.43, "learning_rate": 1.71180677480306e-05, "loss": 1.2666, "step": 146500 }, { "epoch": 0.43, "learning_rate": 1.7108231801778143e-05, "loss": 1.2785, "step": 147000 }, { "epoch": 0.44, "learning_rate": 1.7098395855525688e-05, "loss": 1.2669, "step": 147500 }, { "epoch": 0.44, "learning_rate": 1.7088559909273232e-05, "loss": 1.2694, "step": 148000 }, { "epoch": 0.44, "learning_rate": 1.7078723963020776e-05, "loss": 1.2817, "step": 148500 }, { "epoch": 0.44, "learning_rate": 1.7068888016768324e-05, "loss": 1.2698, "step": 149000 }, { "epoch": 0.44, "learning_rate": 1.705905207051587e-05, "loss": 1.2757, "step": 149500 }, { "epoch": 0.44, "learning_rate": 1.7049216124263413e-05, "loss": 1.2731, "step": 150000 }, { "epoch": 0.44, "learning_rate": 1.7039380178010957e-05, "loss": 1.276, "step": 150500 }, { "epoch": 0.45, "learning_rate": 1.70295442317585e-05, "loss": 1.2694, "step": 151000 }, { "epoch": 0.45, "learning_rate": 1.7019708285506046e-05, "loss": 1.2775, "step": 151500 }, { "epoch": 0.45, "learning_rate": 1.700987233925359e-05, "loss": 1.2745, "step": 152000 }, { "epoch": 0.45, "learning_rate": 1.7000036393001134e-05, "loss": 1.2637, "step": 152500 }, { "epoch": 0.45, "learning_rate": 1.699020044674868e-05, "loss": 1.3018, "step": 153000 }, { "epoch": 0.45, "learning_rate": 1.6980364500496226e-05, "loss": 1.2738, "step": 153500 }, { "epoch": 0.45, "learning_rate": 1.697052855424377e-05, "loss": 1.2582, "step": 154000 }, { "epoch": 0.46, "learning_rate": 1.6960692607991315e-05, "loss": 1.2724, "step": 154500 }, { "epoch": 0.46, "learning_rate": 1.695085666173886e-05, "loss": 1.2796, "step": 155000 }, { "epoch": 0.46, "learning_rate": 1.6941020715486404e-05, "loss": 1.2933, "step": 155500 }, { "epoch": 0.46, "learning_rate": 1.6931184769233948e-05, "loss": 1.2813, "step": 156000 }, { "epoch": 0.46, "learning_rate": 1.6921348822981492e-05, "loss": 1.2507, "step": 156500 }, { "epoch": 0.46, "learning_rate": 1.6911512876729037e-05, "loss": 1.2833, "step": 157000 }, { "epoch": 0.46, "learning_rate": 1.6901676930476584e-05, "loss": 1.275, "step": 157500 }, { "epoch": 0.47, "learning_rate": 1.689184098422413e-05, "loss": 1.2827, "step": 158000 }, { "epoch": 0.47, "learning_rate": 1.6882005037971673e-05, "loss": 1.2722, "step": 158500 }, { "epoch": 0.47, "learning_rate": 1.6872169091719217e-05, "loss": 1.2778, "step": 159000 }, { "epoch": 0.47, "learning_rate": 1.686233314546676e-05, "loss": 1.2903, "step": 159500 }, { "epoch": 0.47, "learning_rate": 1.6852497199214306e-05, "loss": 1.2909, "step": 160000 }, { "epoch": 0.47, "learning_rate": 1.684266125296185e-05, "loss": 1.2637, "step": 160500 }, { "epoch": 0.48, "learning_rate": 1.6832825306709394e-05, "loss": 1.2776, "step": 161000 }, { "epoch": 0.48, "learning_rate": 1.6822989360456942e-05, "loss": 1.2655, "step": 161500 }, { "epoch": 0.48, "learning_rate": 1.6813153414204483e-05, "loss": 1.2599, "step": 162000 }, { "epoch": 0.48, "learning_rate": 1.680331746795203e-05, "loss": 1.2639, "step": 162500 }, { "epoch": 0.48, "learning_rate": 1.6793481521699575e-05, "loss": 1.2694, "step": 163000 }, { "epoch": 0.48, "learning_rate": 1.678364557544712e-05, "loss": 1.2486, "step": 163500 }, { "epoch": 0.48, "learning_rate": 1.6773809629194664e-05, "loss": 1.2819, "step": 164000 }, { "epoch": 0.49, "learning_rate": 1.6763973682942208e-05, "loss": 1.2799, "step": 164500 }, { "epoch": 0.49, "learning_rate": 1.6754137736689752e-05, "loss": 1.265, "step": 165000 }, { "epoch": 0.49, "learning_rate": 1.67443017904373e-05, "loss": 1.2776, "step": 165500 }, { "epoch": 0.49, "learning_rate": 1.673446584418484e-05, "loss": 1.2704, "step": 166000 }, { "epoch": 0.49, "learning_rate": 1.672462989793239e-05, "loss": 1.2777, "step": 166500 }, { "epoch": 0.49, "learning_rate": 1.6714793951679933e-05, "loss": 1.2737, "step": 167000 }, { "epoch": 0.49, "learning_rate": 1.6704958005427477e-05, "loss": 1.2736, "step": 167500 }, { "epoch": 0.5, "learning_rate": 1.669512205917502e-05, "loss": 1.2535, "step": 168000 }, { "epoch": 0.5, "learning_rate": 1.6685286112922566e-05, "loss": 1.2815, "step": 168500 }, { "epoch": 0.5, "learning_rate": 1.667545016667011e-05, "loss": 1.2726, "step": 169000 }, { "epoch": 0.5, "learning_rate": 1.6665614220417655e-05, "loss": 1.2536, "step": 169500 }, { "epoch": 0.5, "learning_rate": 1.66557782741652e-05, "loss": 1.272, "step": 170000 }, { "epoch": 0.5, "learning_rate": 1.6645942327912747e-05, "loss": 1.2742, "step": 170500 }, { "epoch": 0.5, "learning_rate": 1.6636106381660288e-05, "loss": 1.2732, "step": 171000 }, { "epoch": 0.51, "learning_rate": 1.6626270435407835e-05, "loss": 1.2706, "step": 171500 }, { "epoch": 0.51, "learning_rate": 1.661643448915538e-05, "loss": 1.2682, "step": 172000 }, { "epoch": 0.51, "learning_rate": 1.6606598542902924e-05, "loss": 1.2461, "step": 172500 }, { "epoch": 0.51, "learning_rate": 1.6596762596650468e-05, "loss": 1.2652, "step": 173000 }, { "epoch": 0.51, "learning_rate": 1.6586926650398013e-05, "loss": 1.2587, "step": 173500 }, { "epoch": 0.51, "learning_rate": 1.6577090704145557e-05, "loss": 1.259, "step": 174000 }, { "epoch": 0.51, "learning_rate": 1.6567254757893105e-05, "loss": 1.2817, "step": 174500 }, { "epoch": 0.52, "learning_rate": 1.6557418811640646e-05, "loss": 1.2655, "step": 175000 }, { "epoch": 0.52, "learning_rate": 1.6547582865388193e-05, "loss": 1.2712, "step": 175500 }, { "epoch": 0.52, "learning_rate": 1.6537746919135734e-05, "loss": 1.2783, "step": 176000 }, { "epoch": 0.52, "learning_rate": 1.6527910972883282e-05, "loss": 1.2652, "step": 176500 }, { "epoch": 0.52, "learning_rate": 1.6518075026630826e-05, "loss": 1.2795, "step": 177000 }, { "epoch": 0.52, "learning_rate": 1.650823908037837e-05, "loss": 1.2888, "step": 177500 }, { "epoch": 0.53, "learning_rate": 1.6498403134125915e-05, "loss": 1.2649, "step": 178000 }, { "epoch": 0.53, "learning_rate": 1.6488567187873463e-05, "loss": 1.269, "step": 178500 }, { "epoch": 0.53, "learning_rate": 1.6478731241621003e-05, "loss": 1.2616, "step": 179000 }, { "epoch": 0.53, "learning_rate": 1.646889529536855e-05, "loss": 1.2872, "step": 179500 }, { "epoch": 0.53, "learning_rate": 1.6459059349116092e-05, "loss": 1.2662, "step": 180000 }, { "epoch": 0.53, "learning_rate": 1.644922340286364e-05, "loss": 1.2629, "step": 180500 }, { "epoch": 0.53, "learning_rate": 1.6439387456611184e-05, "loss": 1.2743, "step": 181000 }, { "epoch": 0.54, "learning_rate": 1.642955151035873e-05, "loss": 1.2672, "step": 181500 }, { "epoch": 0.54, "learning_rate": 1.6419715564106273e-05, "loss": 1.2726, "step": 182000 }, { "epoch": 0.54, "learning_rate": 1.6409879617853817e-05, "loss": 1.2705, "step": 182500 }, { "epoch": 0.54, "learning_rate": 1.640004367160136e-05, "loss": 1.273, "step": 183000 }, { "epoch": 0.54, "learning_rate": 1.639020772534891e-05, "loss": 1.259, "step": 183500 }, { "epoch": 0.54, "learning_rate": 1.638037177909645e-05, "loss": 1.2653, "step": 184000 }, { "epoch": 0.54, "learning_rate": 1.6370535832843998e-05, "loss": 1.2656, "step": 184500 }, { "epoch": 0.55, "learning_rate": 1.636069988659154e-05, "loss": 1.2701, "step": 185000 }, { "epoch": 0.55, "learning_rate": 1.6350863940339086e-05, "loss": 1.2726, "step": 185500 }, { "epoch": 0.55, "learning_rate": 1.634102799408663e-05, "loss": 1.2633, "step": 186000 }, { "epoch": 0.55, "learning_rate": 1.6331192047834175e-05, "loss": 1.2522, "step": 186500 }, { "epoch": 0.55, "learning_rate": 1.632135610158172e-05, "loss": 1.2592, "step": 187000 }, { "epoch": 0.55, "learning_rate": 1.6311520155329264e-05, "loss": 1.2903, "step": 187500 }, { "epoch": 0.55, "learning_rate": 1.6301684209076808e-05, "loss": 1.2526, "step": 188000 }, { "epoch": 0.56, "learning_rate": 1.6291848262824356e-05, "loss": 1.2561, "step": 188500 }, { "epoch": 0.56, "learning_rate": 1.6282012316571897e-05, "loss": 1.2741, "step": 189000 }, { "epoch": 0.56, "learning_rate": 1.6272176370319444e-05, "loss": 1.2676, "step": 189500 }, { "epoch": 0.56, "learning_rate": 1.626234042406699e-05, "loss": 1.2967, "step": 190000 }, { "epoch": 0.56, "learning_rate": 1.6252504477814533e-05, "loss": 1.2631, "step": 190500 }, { "epoch": 0.56, "learning_rate": 1.6242668531562077e-05, "loss": 1.277, "step": 191000 }, { "epoch": 0.57, "learning_rate": 1.623283258530962e-05, "loss": 1.2693, "step": 191500 }, { "epoch": 0.57, "learning_rate": 1.6222996639057166e-05, "loss": 1.2652, "step": 192000 }, { "epoch": 0.57, "learning_rate": 1.6213160692804714e-05, "loss": 1.2666, "step": 192500 }, { "epoch": 0.57, "learning_rate": 1.6203324746552255e-05, "loss": 1.2474, "step": 193000 }, { "epoch": 0.57, "learning_rate": 1.6193488800299802e-05, "loss": 1.2998, "step": 193500 }, { "epoch": 0.57, "learning_rate": 1.6183652854047343e-05, "loss": 1.2544, "step": 194000 }, { "epoch": 0.57, "learning_rate": 1.617381690779489e-05, "loss": 1.2626, "step": 194500 }, { "epoch": 0.58, "learning_rate": 1.6163980961542435e-05, "loss": 1.27, "step": 195000 }, { "epoch": 0.58, "learning_rate": 1.615414501528998e-05, "loss": 1.239, "step": 195500 }, { "epoch": 0.58, "learning_rate": 1.6144309069037524e-05, "loss": 1.2578, "step": 196000 }, { "epoch": 0.58, "learning_rate": 1.6134473122785068e-05, "loss": 1.2456, "step": 196500 }, { "epoch": 0.58, "learning_rate": 1.6124637176532613e-05, "loss": 1.2719, "step": 197000 }, { "epoch": 0.58, "learning_rate": 1.611480123028016e-05, "loss": 1.2616, "step": 197500 }, { "epoch": 0.58, "learning_rate": 1.61049652840277e-05, "loss": 1.2622, "step": 198000 }, { "epoch": 0.59, "learning_rate": 1.609512933777525e-05, "loss": 1.2449, "step": 198500 }, { "epoch": 0.59, "learning_rate": 1.6085293391522793e-05, "loss": 1.2644, "step": 199000 }, { "epoch": 0.59, "learning_rate": 1.6075457445270338e-05, "loss": 1.2713, "step": 199500 }, { "epoch": 0.59, "learning_rate": 1.6065621499017882e-05, "loss": 1.2391, "step": 200000 }, { "epoch": 0.59, "learning_rate": 1.6055785552765426e-05, "loss": 1.2586, "step": 200500 }, { "epoch": 0.59, "learning_rate": 1.604594960651297e-05, "loss": 1.2743, "step": 201000 }, { "epoch": 0.59, "learning_rate": 1.6036113660260518e-05, "loss": 1.2511, "step": 201500 }, { "epoch": 0.6, "learning_rate": 1.602627771400806e-05, "loss": 1.2531, "step": 202000 }, { "epoch": 0.6, "learning_rate": 1.6016441767755607e-05, "loss": 1.2526, "step": 202500 }, { "epoch": 0.6, "learning_rate": 1.6006605821503148e-05, "loss": 1.2617, "step": 203000 }, { "epoch": 0.6, "learning_rate": 1.5996769875250695e-05, "loss": 1.2749, "step": 203500 }, { "epoch": 0.6, "learning_rate": 1.598693392899824e-05, "loss": 1.2644, "step": 204000 }, { "epoch": 0.6, "learning_rate": 1.5977097982745784e-05, "loss": 1.2608, "step": 204500 }, { "epoch": 0.6, "learning_rate": 1.596726203649333e-05, "loss": 1.2679, "step": 205000 }, { "epoch": 0.61, "learning_rate": 1.5957426090240873e-05, "loss": 1.2442, "step": 205500 }, { "epoch": 0.61, "learning_rate": 1.5947590143988417e-05, "loss": 1.2619, "step": 206000 }, { "epoch": 0.61, "learning_rate": 1.5937754197735965e-05, "loss": 1.2664, "step": 206500 }, { "epoch": 0.61, "learning_rate": 1.5927918251483506e-05, "loss": 1.2719, "step": 207000 }, { "epoch": 0.61, "learning_rate": 1.5918082305231053e-05, "loss": 1.2517, "step": 207500 }, { "epoch": 0.61, "learning_rate": 1.5908246358978598e-05, "loss": 1.2799, "step": 208000 }, { "epoch": 0.62, "learning_rate": 1.5898410412726142e-05, "loss": 1.248, "step": 208500 }, { "epoch": 0.62, "learning_rate": 1.5888574466473686e-05, "loss": 1.264, "step": 209000 }, { "epoch": 0.62, "learning_rate": 1.587873852022123e-05, "loss": 1.2459, "step": 209500 }, { "epoch": 0.62, "learning_rate": 1.5868902573968775e-05, "loss": 1.252, "step": 210000 }, { "epoch": 0.62, "learning_rate": 1.5859066627716323e-05, "loss": 1.2596, "step": 210500 }, { "epoch": 0.62, "learning_rate": 1.5849230681463864e-05, "loss": 1.2484, "step": 211000 }, { "epoch": 0.62, "learning_rate": 1.583939473521141e-05, "loss": 1.2533, "step": 211500 }, { "epoch": 0.63, "learning_rate": 1.5829558788958956e-05, "loss": 1.2747, "step": 212000 }, { "epoch": 0.63, "learning_rate": 1.58197228427065e-05, "loss": 1.2512, "step": 212500 }, { "epoch": 0.63, "learning_rate": 1.5809886896454044e-05, "loss": 1.2637, "step": 213000 }, { "epoch": 0.63, "learning_rate": 1.580005095020159e-05, "loss": 1.2481, "step": 213500 }, { "epoch": 0.63, "learning_rate": 1.5790215003949133e-05, "loss": 1.2556, "step": 214000 }, { "epoch": 0.63, "learning_rate": 1.5780379057696677e-05, "loss": 1.2591, "step": 214500 }, { "epoch": 0.63, "learning_rate": 1.577054311144422e-05, "loss": 1.2536, "step": 215000 }, { "epoch": 0.64, "learning_rate": 1.576070716519177e-05, "loss": 1.2584, "step": 215500 }, { "epoch": 0.64, "learning_rate": 1.5750871218939314e-05, "loss": 1.2749, "step": 216000 }, { "epoch": 0.64, "learning_rate": 1.5741035272686858e-05, "loss": 1.2459, "step": 216500 }, { "epoch": 0.64, "learning_rate": 1.5731199326434402e-05, "loss": 1.2554, "step": 217000 }, { "epoch": 0.64, "learning_rate": 1.5721363380181947e-05, "loss": 1.2489, "step": 217500 }, { "epoch": 0.64, "learning_rate": 1.571152743392949e-05, "loss": 1.2557, "step": 218000 }, { "epoch": 0.64, "learning_rate": 1.5701691487677035e-05, "loss": 1.264, "step": 218500 }, { "epoch": 0.65, "learning_rate": 1.569185554142458e-05, "loss": 1.2569, "step": 219000 }, { "epoch": 0.65, "learning_rate": 1.5682019595172124e-05, "loss": 1.2752, "step": 219500 }, { "epoch": 0.65, "learning_rate": 1.567218364891967e-05, "loss": 1.265, "step": 220000 }, { "epoch": 0.65, "learning_rate": 1.5662347702667216e-05, "loss": 1.2569, "step": 220500 }, { "epoch": 0.65, "learning_rate": 1.565251175641476e-05, "loss": 1.2612, "step": 221000 }, { "epoch": 0.65, "learning_rate": 1.5642675810162305e-05, "loss": 1.2799, "step": 221500 }, { "epoch": 0.66, "learning_rate": 1.563283986390985e-05, "loss": 1.2498, "step": 222000 }, { "epoch": 0.66, "learning_rate": 1.5623003917657393e-05, "loss": 1.2401, "step": 222500 }, { "epoch": 0.66, "learning_rate": 1.5613167971404937e-05, "loss": 1.2744, "step": 223000 }, { "epoch": 0.66, "learning_rate": 1.5603332025152482e-05, "loss": 1.246, "step": 223500 }, { "epoch": 0.66, "learning_rate": 1.5593496078900026e-05, "loss": 1.2725, "step": 224000 }, { "epoch": 0.66, "learning_rate": 1.5583660132647574e-05, "loss": 1.2489, "step": 224500 }, { "epoch": 0.66, "learning_rate": 1.5573824186395118e-05, "loss": 1.255, "step": 225000 }, { "epoch": 0.67, "learning_rate": 1.5563988240142662e-05, "loss": 1.263, "step": 225500 }, { "epoch": 0.67, "learning_rate": 1.5554152293890207e-05, "loss": 1.2629, "step": 226000 }, { "epoch": 0.67, "learning_rate": 1.554431634763775e-05, "loss": 1.2524, "step": 226500 }, { "epoch": 0.67, "learning_rate": 1.5534480401385295e-05, "loss": 1.2655, "step": 227000 }, { "epoch": 0.67, "learning_rate": 1.552464445513284e-05, "loss": 1.2491, "step": 227500 }, { "epoch": 0.67, "learning_rate": 1.5514808508880384e-05, "loss": 1.2457, "step": 228000 }, { "epoch": 0.67, "learning_rate": 1.550497256262793e-05, "loss": 1.2314, "step": 228500 }, { "epoch": 0.68, "learning_rate": 1.5495136616375476e-05, "loss": 1.2724, "step": 229000 }, { "epoch": 0.68, "learning_rate": 1.548530067012302e-05, "loss": 1.2353, "step": 229500 }, { "epoch": 0.68, "learning_rate": 1.5475464723870565e-05, "loss": 1.2421, "step": 230000 }, { "epoch": 0.68, "learning_rate": 1.546562877761811e-05, "loss": 1.2546, "step": 230500 }, { "epoch": 0.68, "learning_rate": 1.5455792831365653e-05, "loss": 1.2498, "step": 231000 }, { "epoch": 0.68, "learning_rate": 1.5445956885113198e-05, "loss": 1.2472, "step": 231500 }, { "epoch": 0.68, "learning_rate": 1.5436120938860742e-05, "loss": 1.2603, "step": 232000 }, { "epoch": 0.69, "learning_rate": 1.5426284992608286e-05, "loss": 1.2679, "step": 232500 }, { "epoch": 0.69, "learning_rate": 1.5416449046355834e-05, "loss": 1.2814, "step": 233000 }, { "epoch": 0.69, "learning_rate": 1.540661310010338e-05, "loss": 1.2633, "step": 233500 }, { "epoch": 0.69, "learning_rate": 1.5396777153850923e-05, "loss": 1.2636, "step": 234000 }, { "epoch": 0.69, "learning_rate": 1.5386941207598467e-05, "loss": 1.2531, "step": 234500 }, { "epoch": 0.69, "learning_rate": 1.537710526134601e-05, "loss": 1.2622, "step": 235000 }, { "epoch": 0.69, "learning_rate": 1.5367269315093556e-05, "loss": 1.2436, "step": 235500 }, { "epoch": 0.7, "learning_rate": 1.53574333688411e-05, "loss": 1.2499, "step": 236000 }, { "epoch": 0.7, "learning_rate": 1.5347597422588644e-05, "loss": 1.269, "step": 236500 }, { "epoch": 0.7, "learning_rate": 1.5337761476336192e-05, "loss": 1.2689, "step": 237000 }, { "epoch": 0.7, "learning_rate": 1.5327925530083733e-05, "loss": 1.2596, "step": 237500 }, { "epoch": 0.7, "learning_rate": 1.531808958383128e-05, "loss": 1.2513, "step": 238000 }, { "epoch": 0.7, "learning_rate": 1.5308253637578825e-05, "loss": 1.2369, "step": 238500 }, { "epoch": 0.71, "learning_rate": 1.529841769132637e-05, "loss": 1.2498, "step": 239000 }, { "epoch": 0.71, "learning_rate": 1.5288581745073914e-05, "loss": 1.2435, "step": 239500 }, { "epoch": 0.71, "learning_rate": 1.5278745798821458e-05, "loss": 1.2435, "step": 240000 }, { "epoch": 0.71, "learning_rate": 1.5268909852569002e-05, "loss": 1.2446, "step": 240500 }, { "epoch": 0.71, "learning_rate": 1.525907390631655e-05, "loss": 1.2372, "step": 241000 }, { "epoch": 0.71, "learning_rate": 1.5249237960064091e-05, "loss": 1.2533, "step": 241500 }, { "epoch": 0.71, "learning_rate": 1.5239402013811637e-05, "loss": 1.2619, "step": 242000 }, { "epoch": 0.72, "learning_rate": 1.5229566067559183e-05, "loss": 1.2434, "step": 242500 }, { "epoch": 0.72, "learning_rate": 1.5219730121306726e-05, "loss": 1.2754, "step": 243000 }, { "epoch": 0.72, "learning_rate": 1.5209894175054272e-05, "loss": 1.2779, "step": 243500 }, { "epoch": 0.72, "learning_rate": 1.5200058228801816e-05, "loss": 1.2279, "step": 244000 }, { "epoch": 0.72, "learning_rate": 1.519022228254936e-05, "loss": 1.2633, "step": 244500 }, { "epoch": 0.72, "learning_rate": 1.5180386336296906e-05, "loss": 1.2464, "step": 245000 }, { "epoch": 0.72, "learning_rate": 1.5170550390044449e-05, "loss": 1.2565, "step": 245500 }, { "epoch": 0.73, "learning_rate": 1.5160714443791995e-05, "loss": 1.26, "step": 246000 }, { "epoch": 0.73, "learning_rate": 1.5150878497539539e-05, "loss": 1.2631, "step": 246500 }, { "epoch": 0.73, "learning_rate": 1.5141042551287083e-05, "loss": 1.2462, "step": 247000 }, { "epoch": 0.73, "learning_rate": 1.513120660503463e-05, "loss": 1.2387, "step": 247500 }, { "epoch": 0.73, "learning_rate": 1.5121370658782172e-05, "loss": 1.2528, "step": 248000 }, { "epoch": 0.73, "learning_rate": 1.5111534712529718e-05, "loss": 1.2507, "step": 248500 }, { "epoch": 0.73, "learning_rate": 1.5101698766277262e-05, "loss": 1.26, "step": 249000 }, { "epoch": 0.74, "learning_rate": 1.5091862820024807e-05, "loss": 1.2711, "step": 249500 }, { "epoch": 0.74, "learning_rate": 1.5082026873772353e-05, "loss": 1.2493, "step": 250000 }, { "epoch": 0.74, "learning_rate": 1.5072190927519897e-05, "loss": 1.2464, "step": 250500 }, { "epoch": 0.74, "learning_rate": 1.5062354981267441e-05, "loss": 1.2663, "step": 251000 }, { "epoch": 0.74, "learning_rate": 1.5052519035014986e-05, "loss": 1.25, "step": 251500 }, { "epoch": 0.74, "learning_rate": 1.504268308876253e-05, "loss": 1.2644, "step": 252000 }, { "epoch": 0.75, "learning_rate": 1.5032847142510076e-05, "loss": 1.2575, "step": 252500 }, { "epoch": 0.75, "learning_rate": 1.502301119625762e-05, "loss": 1.2687, "step": 253000 }, { "epoch": 0.75, "learning_rate": 1.5013175250005165e-05, "loss": 1.2522, "step": 253500 }, { "epoch": 0.75, "learning_rate": 1.500333930375271e-05, "loss": 1.2493, "step": 254000 }, { "epoch": 0.75, "learning_rate": 1.4993503357500255e-05, "loss": 1.2599, "step": 254500 }, { "epoch": 0.75, "learning_rate": 1.49836674112478e-05, "loss": 1.243, "step": 255000 }, { "epoch": 0.75, "learning_rate": 1.4973831464995344e-05, "loss": 1.2451, "step": 255500 }, { "epoch": 0.76, "learning_rate": 1.4963995518742888e-05, "loss": 1.2269, "step": 256000 }, { "epoch": 0.76, "learning_rate": 1.4954159572490434e-05, "loss": 1.2432, "step": 256500 }, { "epoch": 0.76, "learning_rate": 1.4944323626237978e-05, "loss": 1.238, "step": 257000 }, { "epoch": 0.76, "learning_rate": 1.4934487679985523e-05, "loss": 1.2555, "step": 257500 }, { "epoch": 0.76, "learning_rate": 1.4924651733733067e-05, "loss": 1.2543, "step": 258000 }, { "epoch": 0.76, "learning_rate": 1.4914815787480611e-05, "loss": 1.2574, "step": 258500 }, { "epoch": 0.76, "learning_rate": 1.4904979841228157e-05, "loss": 1.2517, "step": 259000 }, { "epoch": 0.77, "learning_rate": 1.4895143894975702e-05, "loss": 1.256, "step": 259500 }, { "epoch": 0.77, "learning_rate": 1.4885307948723246e-05, "loss": 1.2497, "step": 260000 }, { "epoch": 0.77, "learning_rate": 1.487547200247079e-05, "loss": 1.2357, "step": 260500 }, { "epoch": 0.77, "learning_rate": 1.4865636056218336e-05, "loss": 1.2475, "step": 261000 }, { "epoch": 0.77, "learning_rate": 1.485580010996588e-05, "loss": 1.2598, "step": 261500 }, { "epoch": 0.77, "learning_rate": 1.4845964163713425e-05, "loss": 1.2668, "step": 262000 }, { "epoch": 0.77, "learning_rate": 1.483612821746097e-05, "loss": 1.257, "step": 262500 }, { "epoch": 0.78, "learning_rate": 1.4826292271208514e-05, "loss": 1.2199, "step": 263000 }, { "epoch": 0.78, "learning_rate": 1.481645632495606e-05, "loss": 1.271, "step": 263500 }, { "epoch": 0.78, "learning_rate": 1.4806620378703604e-05, "loss": 1.234, "step": 264000 }, { "epoch": 0.78, "learning_rate": 1.4796784432451148e-05, "loss": 1.2621, "step": 264500 }, { "epoch": 0.78, "learning_rate": 1.4786948486198694e-05, "loss": 1.2504, "step": 265000 }, { "epoch": 0.78, "learning_rate": 1.4777112539946239e-05, "loss": 1.2433, "step": 265500 }, { "epoch": 0.78, "learning_rate": 1.4767276593693783e-05, "loss": 1.2542, "step": 266000 }, { "epoch": 0.79, "learning_rate": 1.4757440647441327e-05, "loss": 1.251, "step": 266500 }, { "epoch": 0.79, "learning_rate": 1.4747604701188871e-05, "loss": 1.2421, "step": 267000 }, { "epoch": 0.79, "learning_rate": 1.4737768754936417e-05, "loss": 1.2496, "step": 267500 }, { "epoch": 0.79, "learning_rate": 1.4727932808683962e-05, "loss": 1.2573, "step": 268000 }, { "epoch": 0.79, "learning_rate": 1.4718096862431506e-05, "loss": 1.2486, "step": 268500 }, { "epoch": 0.79, "learning_rate": 1.470826091617905e-05, "loss": 1.2197, "step": 269000 }, { "epoch": 0.8, "learning_rate": 1.4698424969926595e-05, "loss": 1.2544, "step": 269500 }, { "epoch": 0.8, "learning_rate": 1.468858902367414e-05, "loss": 1.2473, "step": 270000 }, { "epoch": 0.8, "learning_rate": 1.4678753077421685e-05, "loss": 1.2292, "step": 270500 }, { "epoch": 0.8, "learning_rate": 1.466891713116923e-05, "loss": 1.2666, "step": 271000 }, { "epoch": 0.8, "learning_rate": 1.4659081184916775e-05, "loss": 1.2295, "step": 271500 }, { "epoch": 0.8, "learning_rate": 1.4649245238664318e-05, "loss": 1.2416, "step": 272000 }, { "epoch": 0.8, "learning_rate": 1.4639409292411864e-05, "loss": 1.2556, "step": 272500 }, { "epoch": 0.81, "learning_rate": 1.4629573346159408e-05, "loss": 1.2479, "step": 273000 }, { "epoch": 0.81, "learning_rate": 1.4619737399906953e-05, "loss": 1.2331, "step": 273500 }, { "epoch": 0.81, "learning_rate": 1.4609901453654499e-05, "loss": 1.2521, "step": 274000 }, { "epoch": 0.81, "learning_rate": 1.4600065507402043e-05, "loss": 1.2412, "step": 274500 }, { "epoch": 0.81, "learning_rate": 1.4590229561149587e-05, "loss": 1.2433, "step": 275000 }, { "epoch": 0.81, "learning_rate": 1.4580393614897133e-05, "loss": 1.2536, "step": 275500 }, { "epoch": 0.81, "learning_rate": 1.4570557668644676e-05, "loss": 1.2469, "step": 276000 }, { "epoch": 0.82, "learning_rate": 1.4560721722392222e-05, "loss": 1.2255, "step": 276500 }, { "epoch": 0.82, "learning_rate": 1.4550885776139766e-05, "loss": 1.2355, "step": 277000 }, { "epoch": 0.82, "learning_rate": 1.454104982988731e-05, "loss": 1.2505, "step": 277500 }, { "epoch": 0.82, "learning_rate": 1.4531213883634857e-05, "loss": 1.2468, "step": 278000 }, { "epoch": 0.82, "learning_rate": 1.45213779373824e-05, "loss": 1.2475, "step": 278500 }, { "epoch": 0.82, "learning_rate": 1.4511541991129945e-05, "loss": 1.2469, "step": 279000 }, { "epoch": 0.82, "learning_rate": 1.450170604487749e-05, "loss": 1.2362, "step": 279500 }, { "epoch": 0.83, "learning_rate": 1.4491870098625034e-05, "loss": 1.2484, "step": 280000 }, { "epoch": 0.83, "learning_rate": 1.448203415237258e-05, "loss": 1.2311, "step": 280500 }, { "epoch": 0.83, "learning_rate": 1.4472198206120123e-05, "loss": 1.2485, "step": 281000 }, { "epoch": 0.83, "learning_rate": 1.4462362259867669e-05, "loss": 1.2246, "step": 281500 }, { "epoch": 0.83, "learning_rate": 1.4452526313615215e-05, "loss": 1.228, "step": 282000 }, { "epoch": 0.83, "learning_rate": 1.4442690367362757e-05, "loss": 1.2429, "step": 282500 }, { "epoch": 0.84, "learning_rate": 1.4432854421110303e-05, "loss": 1.2335, "step": 283000 }, { "epoch": 0.84, "learning_rate": 1.4423018474857846e-05, "loss": 1.2373, "step": 283500 }, { "epoch": 0.84, "learning_rate": 1.4413182528605392e-05, "loss": 1.2355, "step": 284000 }, { "epoch": 0.84, "learning_rate": 1.4403346582352938e-05, "loss": 1.2575, "step": 284500 }, { "epoch": 0.84, "learning_rate": 1.439351063610048e-05, "loss": 1.2745, "step": 285000 }, { "epoch": 0.84, "learning_rate": 1.4383674689848027e-05, "loss": 1.2455, "step": 285500 }, { "epoch": 0.84, "learning_rate": 1.4373838743595573e-05, "loss": 1.243, "step": 286000 }, { "epoch": 0.85, "learning_rate": 1.4364002797343115e-05, "loss": 1.2297, "step": 286500 }, { "epoch": 0.85, "learning_rate": 1.4354166851090661e-05, "loss": 1.2312, "step": 287000 }, { "epoch": 0.85, "learning_rate": 1.4344330904838204e-05, "loss": 1.2534, "step": 287500 }, { "epoch": 0.85, "learning_rate": 1.433449495858575e-05, "loss": 1.2487, "step": 288000 }, { "epoch": 0.85, "learning_rate": 1.4324659012333296e-05, "loss": 1.2582, "step": 288500 }, { "epoch": 0.85, "learning_rate": 1.4314823066080838e-05, "loss": 1.239, "step": 289000 }, { "epoch": 0.85, "learning_rate": 1.4304987119828384e-05, "loss": 1.2532, "step": 289500 }, { "epoch": 0.86, "learning_rate": 1.4295151173575927e-05, "loss": 1.2375, "step": 290000 }, { "epoch": 0.86, "learning_rate": 1.4285315227323473e-05, "loss": 1.2517, "step": 290500 }, { "epoch": 0.86, "learning_rate": 1.4275479281071019e-05, "loss": 1.2348, "step": 291000 }, { "epoch": 0.86, "learning_rate": 1.4265643334818562e-05, "loss": 1.2334, "step": 291500 }, { "epoch": 0.86, "learning_rate": 1.4255807388566108e-05, "loss": 1.2467, "step": 292000 }, { "epoch": 0.86, "learning_rate": 1.424597144231365e-05, "loss": 1.251, "step": 292500 }, { "epoch": 0.86, "learning_rate": 1.4236135496061196e-05, "loss": 1.2342, "step": 293000 }, { "epoch": 0.87, "learning_rate": 1.4226299549808742e-05, "loss": 1.2522, "step": 293500 }, { "epoch": 0.87, "learning_rate": 1.4216463603556285e-05, "loss": 1.2348, "step": 294000 }, { "epoch": 0.87, "learning_rate": 1.4206627657303831e-05, "loss": 1.2525, "step": 294500 }, { "epoch": 0.87, "learning_rate": 1.4196791711051374e-05, "loss": 1.2465, "step": 295000 }, { "epoch": 0.87, "learning_rate": 1.418695576479892e-05, "loss": 1.2419, "step": 295500 }, { "epoch": 0.87, "learning_rate": 1.4177119818546466e-05, "loss": 1.2452, "step": 296000 }, { "epoch": 0.87, "learning_rate": 1.4167283872294008e-05, "loss": 1.2346, "step": 296500 }, { "epoch": 0.88, "learning_rate": 1.4157447926041554e-05, "loss": 1.2334, "step": 297000 }, { "epoch": 0.88, "learning_rate": 1.41476119797891e-05, "loss": 1.2535, "step": 297500 }, { "epoch": 0.88, "learning_rate": 1.4137776033536643e-05, "loss": 1.2439, "step": 298000 }, { "epoch": 0.88, "learning_rate": 1.4127940087284189e-05, "loss": 1.2282, "step": 298500 }, { "epoch": 0.88, "learning_rate": 1.4118104141031732e-05, "loss": 1.2233, "step": 299000 }, { "epoch": 0.88, "learning_rate": 1.4108268194779278e-05, "loss": 1.2499, "step": 299500 }, { "epoch": 0.89, "learning_rate": 1.4098432248526824e-05, "loss": 1.2695, "step": 300000 }, { "epoch": 0.89, "learning_rate": 1.4088596302274366e-05, "loss": 1.2485, "step": 300500 }, { "epoch": 0.89, "learning_rate": 1.4078760356021912e-05, "loss": 1.2308, "step": 301000 }, { "epoch": 0.89, "learning_rate": 1.4068924409769455e-05, "loss": 1.2181, "step": 301500 }, { "epoch": 0.89, "learning_rate": 1.4059088463517001e-05, "loss": 1.2476, "step": 302000 }, { "epoch": 0.89, "learning_rate": 1.4049252517264547e-05, "loss": 1.2446, "step": 302500 }, { "epoch": 0.89, "learning_rate": 1.403941657101209e-05, "loss": 1.2432, "step": 303000 }, { "epoch": 0.9, "learning_rate": 1.4029580624759636e-05, "loss": 1.2306, "step": 303500 }, { "epoch": 0.9, "learning_rate": 1.4019744678507178e-05, "loss": 1.2565, "step": 304000 }, { "epoch": 0.9, "learning_rate": 1.4009908732254724e-05, "loss": 1.2502, "step": 304500 }, { "epoch": 0.9, "learning_rate": 1.400007278600227e-05, "loss": 1.2301, "step": 305000 }, { "epoch": 0.9, "learning_rate": 1.3990236839749813e-05, "loss": 1.2445, "step": 305500 }, { "epoch": 0.9, "learning_rate": 1.3980400893497359e-05, "loss": 1.2574, "step": 306000 }, { "epoch": 0.9, "learning_rate": 1.3970564947244901e-05, "loss": 1.2414, "step": 306500 }, { "epoch": 0.91, "learning_rate": 1.3960729000992448e-05, "loss": 1.2431, "step": 307000 }, { "epoch": 0.91, "learning_rate": 1.3950893054739994e-05, "loss": 1.239, "step": 307500 }, { "epoch": 0.91, "learning_rate": 1.3941057108487536e-05, "loss": 1.2272, "step": 308000 }, { "epoch": 0.91, "learning_rate": 1.3931221162235082e-05, "loss": 1.2437, "step": 308500 }, { "epoch": 0.91, "learning_rate": 1.3921385215982628e-05, "loss": 1.2109, "step": 309000 }, { "epoch": 0.91, "learning_rate": 1.391154926973017e-05, "loss": 1.2431, "step": 309500 }, { "epoch": 0.91, "learning_rate": 1.3901713323477717e-05, "loss": 1.252, "step": 310000 }, { "epoch": 0.92, "learning_rate": 1.389187737722526e-05, "loss": 1.2414, "step": 310500 }, { "epoch": 0.92, "learning_rate": 1.3882041430972805e-05, "loss": 1.2568, "step": 311000 }, { "epoch": 0.92, "learning_rate": 1.3872205484720351e-05, "loss": 1.2266, "step": 311500 }, { "epoch": 0.92, "learning_rate": 1.3862369538467894e-05, "loss": 1.244, "step": 312000 }, { "epoch": 0.92, "learning_rate": 1.385253359221544e-05, "loss": 1.2382, "step": 312500 }, { "epoch": 0.92, "learning_rate": 1.3842697645962984e-05, "loss": 1.2589, "step": 313000 }, { "epoch": 0.93, "learning_rate": 1.3832861699710529e-05, "loss": 1.2349, "step": 313500 }, { "epoch": 0.93, "learning_rate": 1.3823025753458075e-05, "loss": 1.2403, "step": 314000 }, { "epoch": 0.93, "learning_rate": 1.3813189807205617e-05, "loss": 1.2298, "step": 314500 }, { "epoch": 0.93, "learning_rate": 1.3803353860953163e-05, "loss": 1.2362, "step": 315000 }, { "epoch": 0.93, "learning_rate": 1.3793517914700708e-05, "loss": 1.2459, "step": 315500 }, { "epoch": 0.93, "learning_rate": 1.3783681968448252e-05, "loss": 1.2391, "step": 316000 }, { "epoch": 0.93, "learning_rate": 1.3773846022195798e-05, "loss": 1.2645, "step": 316500 }, { "epoch": 0.94, "learning_rate": 1.376401007594334e-05, "loss": 1.248, "step": 317000 }, { "epoch": 0.94, "learning_rate": 1.3754174129690887e-05, "loss": 1.2386, "step": 317500 }, { "epoch": 0.94, "learning_rate": 1.3744338183438433e-05, "loss": 1.2263, "step": 318000 }, { "epoch": 0.94, "learning_rate": 1.3734502237185975e-05, "loss": 1.2357, "step": 318500 }, { "epoch": 0.94, "learning_rate": 1.3724666290933521e-05, "loss": 1.2446, "step": 319000 }, { "epoch": 0.94, "learning_rate": 1.3714830344681066e-05, "loss": 1.2369, "step": 319500 }, { "epoch": 0.94, "learning_rate": 1.370499439842861e-05, "loss": 1.2214, "step": 320000 }, { "epoch": 0.95, "learning_rate": 1.3695158452176156e-05, "loss": 1.2376, "step": 320500 }, { "epoch": 0.95, "learning_rate": 1.3685322505923699e-05, "loss": 1.2156, "step": 321000 }, { "epoch": 0.95, "learning_rate": 1.3675486559671245e-05, "loss": 1.2295, "step": 321500 }, { "epoch": 0.95, "learning_rate": 1.3665650613418789e-05, "loss": 1.2458, "step": 322000 }, { "epoch": 0.95, "learning_rate": 1.3655814667166333e-05, "loss": 1.2243, "step": 322500 }, { "epoch": 0.95, "learning_rate": 1.364597872091388e-05, "loss": 1.2414, "step": 323000 }, { "epoch": 0.95, "learning_rate": 1.3636142774661424e-05, "loss": 1.2497, "step": 323500 }, { "epoch": 0.96, "learning_rate": 1.3626306828408968e-05, "loss": 1.2366, "step": 324000 }, { "epoch": 0.96, "learning_rate": 1.3616470882156512e-05, "loss": 1.2448, "step": 324500 }, { "epoch": 0.96, "learning_rate": 1.3606634935904057e-05, "loss": 1.2386, "step": 325000 }, { "epoch": 0.96, "learning_rate": 1.3596798989651603e-05, "loss": 1.2223, "step": 325500 }, { "epoch": 0.96, "learning_rate": 1.3586963043399147e-05, "loss": 1.249, "step": 326000 }, { "epoch": 0.96, "learning_rate": 1.3577127097146691e-05, "loss": 1.2225, "step": 326500 }, { "epoch": 0.96, "learning_rate": 1.3567291150894236e-05, "loss": 1.2302, "step": 327000 }, { "epoch": 0.97, "learning_rate": 1.355745520464178e-05, "loss": 1.2524, "step": 327500 }, { "epoch": 0.97, "learning_rate": 1.3547619258389326e-05, "loss": 1.2417, "step": 328000 }, { "epoch": 0.97, "learning_rate": 1.353778331213687e-05, "loss": 1.233, "step": 328500 }, { "epoch": 0.97, "learning_rate": 1.3527947365884415e-05, "loss": 1.2499, "step": 329000 }, { "epoch": 0.97, "learning_rate": 1.351811141963196e-05, "loss": 1.2304, "step": 329500 }, { "epoch": 0.97, "learning_rate": 1.3508275473379505e-05, "loss": 1.2491, "step": 330000 }, { "epoch": 0.98, "learning_rate": 1.3498439527127049e-05, "loss": 1.2437, "step": 330500 }, { "epoch": 0.98, "learning_rate": 1.3488603580874593e-05, "loss": 1.2237, "step": 331000 }, { "epoch": 0.98, "learning_rate": 1.3478767634622138e-05, "loss": 1.2305, "step": 331500 }, { "epoch": 0.98, "learning_rate": 1.3468931688369684e-05, "loss": 1.2534, "step": 332000 }, { "epoch": 0.98, "learning_rate": 1.3459095742117228e-05, "loss": 1.2296, "step": 332500 }, { "epoch": 0.98, "learning_rate": 1.3449259795864772e-05, "loss": 1.2293, "step": 333000 }, { "epoch": 0.98, "learning_rate": 1.3439423849612317e-05, "loss": 1.2255, "step": 333500 }, { "epoch": 0.99, "learning_rate": 1.3429587903359863e-05, "loss": 1.2444, "step": 334000 }, { "epoch": 0.99, "learning_rate": 1.3419751957107407e-05, "loss": 1.2444, "step": 334500 }, { "epoch": 0.99, "learning_rate": 1.3409916010854951e-05, "loss": 1.2465, "step": 335000 }, { "epoch": 0.99, "learning_rate": 1.3400080064602496e-05, "loss": 1.2527, "step": 335500 }, { "epoch": 0.99, "learning_rate": 1.339024411835004e-05, "loss": 1.233, "step": 336000 }, { "epoch": 0.99, "learning_rate": 1.3380408172097586e-05, "loss": 1.2359, "step": 336500 }, { "epoch": 0.99, "learning_rate": 1.337057222584513e-05, "loss": 1.252, "step": 337000 }, { "epoch": 1.0, "learning_rate": 1.3360736279592675e-05, "loss": 1.2197, "step": 337500 }, { "epoch": 1.0, "learning_rate": 1.3350900333340219e-05, "loss": 1.2147, "step": 338000 }, { "epoch": 1.0, "learning_rate": 1.3341064387087763e-05, "loss": 1.2461, "step": 338500 }, { "epoch": 1.0, "eval_bleu": 41.7587, "eval_gen_len": 17.8683, "eval_loss": 1.1849511861801147, "eval_runtime": 940.2074, "eval_samples_per_second": 58.254, "eval_steps_per_second": 3.642, "step": 338893 }, { "epoch": 1.0, "learning_rate": 1.333122844083531e-05, "loss": 1.2142, "step": 339000 }, { "epoch": 1.0, "learning_rate": 1.3321392494582854e-05, "loss": 1.1509, "step": 339500 }, { "epoch": 1.0, "learning_rate": 1.3311556548330398e-05, "loss": 1.1624, "step": 340000 }, { "epoch": 1.0, "learning_rate": 1.3301720602077944e-05, "loss": 1.161, "step": 340500 }, { "epoch": 1.01, "learning_rate": 1.3291884655825488e-05, "loss": 1.1554, "step": 341000 }, { "epoch": 1.01, "learning_rate": 1.3282048709573033e-05, "loss": 1.1563, "step": 341500 }, { "epoch": 1.01, "learning_rate": 1.3272212763320577e-05, "loss": 1.1638, "step": 342000 }, { "epoch": 1.01, "learning_rate": 1.3262376817068121e-05, "loss": 1.1567, "step": 342500 }, { "epoch": 1.01, "learning_rate": 1.3252540870815667e-05, "loss": 1.1689, "step": 343000 }, { "epoch": 1.01, "learning_rate": 1.3242704924563212e-05, "loss": 1.1721, "step": 343500 }, { "epoch": 1.02, "learning_rate": 1.3232868978310756e-05, "loss": 1.1746, "step": 344000 }, { "epoch": 1.02, "learning_rate": 1.3223033032058302e-05, "loss": 1.1689, "step": 344500 }, { "epoch": 1.02, "learning_rate": 1.3213197085805845e-05, "loss": 1.1557, "step": 345000 }, { "epoch": 1.02, "learning_rate": 1.320336113955339e-05, "loss": 1.16, "step": 345500 }, { "epoch": 1.02, "learning_rate": 1.3193525193300935e-05, "loss": 1.1772, "step": 346000 }, { "epoch": 1.02, "learning_rate": 1.318368924704848e-05, "loss": 1.1863, "step": 346500 }, { "epoch": 1.02, "learning_rate": 1.3173853300796025e-05, "loss": 1.1549, "step": 347000 }, { "epoch": 1.03, "learning_rate": 1.3164017354543568e-05, "loss": 1.1524, "step": 347500 }, { "epoch": 1.03, "learning_rate": 1.3154181408291114e-05, "loss": 1.1715, "step": 348000 }, { "epoch": 1.03, "learning_rate": 1.3144345462038658e-05, "loss": 1.1835, "step": 348500 }, { "epoch": 1.03, "learning_rate": 1.3134509515786203e-05, "loss": 1.1663, "step": 349000 }, { "epoch": 1.03, "learning_rate": 1.3124673569533749e-05, "loss": 1.1563, "step": 349500 }, { "epoch": 1.03, "learning_rate": 1.3114837623281293e-05, "loss": 1.1627, "step": 350000 }, { "epoch": 1.03, "learning_rate": 1.3105001677028837e-05, "loss": 1.1627, "step": 350500 }, { "epoch": 1.04, "learning_rate": 1.3095165730776383e-05, "loss": 1.1584, "step": 351000 }, { "epoch": 1.04, "learning_rate": 1.3085329784523926e-05, "loss": 1.1698, "step": 351500 }, { "epoch": 1.04, "learning_rate": 1.3075493838271472e-05, "loss": 1.1529, "step": 352000 }, { "epoch": 1.04, "learning_rate": 1.3065657892019016e-05, "loss": 1.1636, "step": 352500 }, { "epoch": 1.04, "learning_rate": 1.305582194576656e-05, "loss": 1.1672, "step": 353000 }, { "epoch": 1.04, "learning_rate": 1.3045985999514106e-05, "loss": 1.1669, "step": 353500 }, { "epoch": 1.04, "learning_rate": 1.3036150053261649e-05, "loss": 1.1701, "step": 354000 }, { "epoch": 1.05, "learning_rate": 1.3026314107009195e-05, "loss": 1.1652, "step": 354500 }, { "epoch": 1.05, "learning_rate": 1.3016478160756741e-05, "loss": 1.1614, "step": 355000 }, { "epoch": 1.05, "learning_rate": 1.3006642214504284e-05, "loss": 1.152, "step": 355500 }, { "epoch": 1.05, "learning_rate": 1.299680626825183e-05, "loss": 1.1669, "step": 356000 }, { "epoch": 1.05, "learning_rate": 1.2986970321999372e-05, "loss": 1.1557, "step": 356500 }, { "epoch": 1.05, "learning_rate": 1.2977134375746918e-05, "loss": 1.1678, "step": 357000 }, { "epoch": 1.05, "learning_rate": 1.2967298429494464e-05, "loss": 1.1809, "step": 357500 }, { "epoch": 1.06, "learning_rate": 1.2957462483242007e-05, "loss": 1.149, "step": 358000 }, { "epoch": 1.06, "learning_rate": 1.2947626536989553e-05, "loss": 1.1483, "step": 358500 }, { "epoch": 1.06, "learning_rate": 1.2937790590737096e-05, "loss": 1.1851, "step": 359000 }, { "epoch": 1.06, "learning_rate": 1.2927954644484642e-05, "loss": 1.1602, "step": 359500 }, { "epoch": 1.06, "learning_rate": 1.2918118698232188e-05, "loss": 1.1571, "step": 360000 }, { "epoch": 1.06, "learning_rate": 1.290828275197973e-05, "loss": 1.1695, "step": 360500 }, { "epoch": 1.07, "learning_rate": 1.2898446805727276e-05, "loss": 1.175, "step": 361000 }, { "epoch": 1.07, "learning_rate": 1.2888610859474822e-05, "loss": 1.1789, "step": 361500 }, { "epoch": 1.07, "learning_rate": 1.2878774913222365e-05, "loss": 1.1413, "step": 362000 }, { "epoch": 1.07, "learning_rate": 1.2868938966969911e-05, "loss": 1.1697, "step": 362500 }, { "epoch": 1.07, "learning_rate": 1.2859103020717454e-05, "loss": 1.1816, "step": 363000 }, { "epoch": 1.07, "learning_rate": 1.2849267074465e-05, "loss": 1.1584, "step": 363500 }, { "epoch": 1.07, "learning_rate": 1.2839431128212546e-05, "loss": 1.1594, "step": 364000 }, { "epoch": 1.08, "learning_rate": 1.2829595181960088e-05, "loss": 1.1766, "step": 364500 }, { "epoch": 1.08, "learning_rate": 1.2819759235707634e-05, "loss": 1.1863, "step": 365000 }, { "epoch": 1.08, "learning_rate": 1.2809923289455177e-05, "loss": 1.1621, "step": 365500 }, { "epoch": 1.08, "learning_rate": 1.2800087343202723e-05, "loss": 1.1382, "step": 366000 }, { "epoch": 1.08, "learning_rate": 1.2790251396950269e-05, "loss": 1.1748, "step": 366500 }, { "epoch": 1.08, "learning_rate": 1.2780415450697812e-05, "loss": 1.1621, "step": 367000 }, { "epoch": 1.08, "learning_rate": 1.2770579504445358e-05, "loss": 1.1666, "step": 367500 }, { "epoch": 1.09, "learning_rate": 1.27607435581929e-05, "loss": 1.1652, "step": 368000 }, { "epoch": 1.09, "learning_rate": 1.2750907611940446e-05, "loss": 1.1769, "step": 368500 }, { "epoch": 1.09, "learning_rate": 1.2741071665687992e-05, "loss": 1.1576, "step": 369000 }, { "epoch": 1.09, "learning_rate": 1.2731235719435535e-05, "loss": 1.1668, "step": 369500 }, { "epoch": 1.09, "learning_rate": 1.2721399773183081e-05, "loss": 1.1583, "step": 370000 }, { "epoch": 1.09, "learning_rate": 1.2711563826930624e-05, "loss": 1.171, "step": 370500 }, { "epoch": 1.09, "learning_rate": 1.270172788067817e-05, "loss": 1.1497, "step": 371000 }, { "epoch": 1.1, "learning_rate": 1.2691891934425716e-05, "loss": 1.1588, "step": 371500 }, { "epoch": 1.1, "learning_rate": 1.2682055988173258e-05, "loss": 1.1529, "step": 372000 }, { "epoch": 1.1, "learning_rate": 1.2672220041920804e-05, "loss": 1.1537, "step": 372500 }, { "epoch": 1.1, "learning_rate": 1.266238409566835e-05, "loss": 1.1736, "step": 373000 }, { "epoch": 1.1, "learning_rate": 1.2652548149415893e-05, "loss": 1.1531, "step": 373500 }, { "epoch": 1.1, "learning_rate": 1.2642712203163439e-05, "loss": 1.1882, "step": 374000 }, { "epoch": 1.11, "learning_rate": 1.2632876256910981e-05, "loss": 1.167, "step": 374500 }, { "epoch": 1.11, "learning_rate": 1.2623040310658527e-05, "loss": 1.1592, "step": 375000 }, { "epoch": 1.11, "learning_rate": 1.2613204364406073e-05, "loss": 1.1612, "step": 375500 }, { "epoch": 1.11, "learning_rate": 1.2603368418153616e-05, "loss": 1.1613, "step": 376000 }, { "epoch": 1.11, "learning_rate": 1.2593532471901162e-05, "loss": 1.1553, "step": 376500 }, { "epoch": 1.11, "learning_rate": 1.2583696525648705e-05, "loss": 1.1823, "step": 377000 }, { "epoch": 1.11, "learning_rate": 1.257386057939625e-05, "loss": 1.166, "step": 377500 }, { "epoch": 1.12, "learning_rate": 1.2564024633143797e-05, "loss": 1.1855, "step": 378000 }, { "epoch": 1.12, "learning_rate": 1.255418868689134e-05, "loss": 1.171, "step": 378500 }, { "epoch": 1.12, "learning_rate": 1.2544352740638885e-05, "loss": 1.1659, "step": 379000 }, { "epoch": 1.12, "learning_rate": 1.2534516794386428e-05, "loss": 1.1614, "step": 379500 }, { "epoch": 1.12, "learning_rate": 1.2524680848133974e-05, "loss": 1.1571, "step": 380000 }, { "epoch": 1.12, "learning_rate": 1.251484490188152e-05, "loss": 1.175, "step": 380500 }, { "epoch": 1.12, "learning_rate": 1.2505008955629063e-05, "loss": 1.1857, "step": 381000 }, { "epoch": 1.13, "learning_rate": 1.2495173009376609e-05, "loss": 1.1705, "step": 381500 }, { "epoch": 1.13, "learning_rate": 1.2485337063124155e-05, "loss": 1.1689, "step": 382000 }, { "epoch": 1.13, "learning_rate": 1.2475501116871697e-05, "loss": 1.1632, "step": 382500 }, { "epoch": 1.13, "learning_rate": 1.2465665170619243e-05, "loss": 1.181, "step": 383000 }, { "epoch": 1.13, "learning_rate": 1.2455829224366786e-05, "loss": 1.202, "step": 383500 }, { "epoch": 1.13, "learning_rate": 1.2445993278114332e-05, "loss": 1.164, "step": 384000 }, { "epoch": 1.13, "learning_rate": 1.2436157331861878e-05, "loss": 1.1589, "step": 384500 }, { "epoch": 1.14, "learning_rate": 1.242632138560942e-05, "loss": 1.1587, "step": 385000 }, { "epoch": 1.14, "learning_rate": 1.2416485439356967e-05, "loss": 1.1774, "step": 385500 }, { "epoch": 1.14, "learning_rate": 1.240664949310451e-05, "loss": 1.1614, "step": 386000 }, { "epoch": 1.14, "learning_rate": 1.2396813546852055e-05, "loss": 1.1882, "step": 386500 }, { "epoch": 1.14, "learning_rate": 1.2386977600599601e-05, "loss": 1.1725, "step": 387000 }, { "epoch": 1.14, "learning_rate": 1.2377141654347144e-05, "loss": 1.1617, "step": 387500 }, { "epoch": 1.14, "learning_rate": 1.236730570809469e-05, "loss": 1.1651, "step": 388000 }, { "epoch": 1.15, "learning_rate": 1.2357469761842234e-05, "loss": 1.1787, "step": 388500 }, { "epoch": 1.15, "learning_rate": 1.2347633815589779e-05, "loss": 1.1894, "step": 389000 }, { "epoch": 1.15, "learning_rate": 1.2337797869337325e-05, "loss": 1.1567, "step": 389500 }, { "epoch": 1.15, "learning_rate": 1.2327961923084867e-05, "loss": 1.1645, "step": 390000 }, { "epoch": 1.15, "learning_rate": 1.2318125976832413e-05, "loss": 1.1714, "step": 390500 }, { "epoch": 1.15, "learning_rate": 1.2308290030579958e-05, "loss": 1.168, "step": 391000 }, { "epoch": 1.16, "learning_rate": 1.2298454084327502e-05, "loss": 1.1442, "step": 391500 }, { "epoch": 1.16, "learning_rate": 1.2288618138075048e-05, "loss": 1.1823, "step": 392000 }, { "epoch": 1.16, "learning_rate": 1.2278782191822592e-05, "loss": 1.1717, "step": 392500 }, { "epoch": 1.16, "learning_rate": 1.2268946245570137e-05, "loss": 1.1749, "step": 393000 }, { "epoch": 1.16, "learning_rate": 1.2259110299317683e-05, "loss": 1.1818, "step": 393500 }, { "epoch": 1.16, "learning_rate": 1.2249274353065225e-05, "loss": 1.1605, "step": 394000 }, { "epoch": 1.16, "learning_rate": 1.2239438406812771e-05, "loss": 1.1568, "step": 394500 }, { "epoch": 1.17, "learning_rate": 1.2229602460560315e-05, "loss": 1.167, "step": 395000 }, { "epoch": 1.17, "learning_rate": 1.221976651430786e-05, "loss": 1.1675, "step": 395500 }, { "epoch": 1.17, "learning_rate": 1.2209930568055406e-05, "loss": 1.1702, "step": 396000 }, { "epoch": 1.17, "learning_rate": 1.2200094621802948e-05, "loss": 1.1714, "step": 396500 }, { "epoch": 1.17, "learning_rate": 1.2190258675550494e-05, "loss": 1.1743, "step": 397000 }, { "epoch": 1.17, "learning_rate": 1.2180422729298039e-05, "loss": 1.1499, "step": 397500 }, { "epoch": 1.17, "learning_rate": 1.2170586783045583e-05, "loss": 1.1679, "step": 398000 }, { "epoch": 1.18, "learning_rate": 1.2160750836793129e-05, "loss": 1.1718, "step": 398500 }, { "epoch": 1.18, "learning_rate": 1.2150914890540673e-05, "loss": 1.1649, "step": 399000 }, { "epoch": 1.18, "learning_rate": 1.2141078944288218e-05, "loss": 1.1495, "step": 399500 }, { "epoch": 1.18, "learning_rate": 1.2131242998035762e-05, "loss": 1.1706, "step": 400000 }, { "epoch": 1.18, "learning_rate": 1.2121407051783306e-05, "loss": 1.183, "step": 400500 }, { "epoch": 1.18, "learning_rate": 1.2111571105530852e-05, "loss": 1.1501, "step": 401000 }, { "epoch": 1.18, "learning_rate": 1.2101735159278397e-05, "loss": 1.1663, "step": 401500 }, { "epoch": 1.19, "learning_rate": 1.2091899213025941e-05, "loss": 1.1722, "step": 402000 }, { "epoch": 1.19, "learning_rate": 1.2082063266773485e-05, "loss": 1.1781, "step": 402500 }, { "epoch": 1.19, "learning_rate": 1.2072227320521031e-05, "loss": 1.1579, "step": 403000 }, { "epoch": 1.19, "learning_rate": 1.2062391374268576e-05, "loss": 1.1676, "step": 403500 }, { "epoch": 1.19, "learning_rate": 1.205255542801612e-05, "loss": 1.1518, "step": 404000 }, { "epoch": 1.19, "learning_rate": 1.2042719481763664e-05, "loss": 1.16, "step": 404500 }, { "epoch": 1.2, "learning_rate": 1.203288353551121e-05, "loss": 1.1536, "step": 405000 }, { "epoch": 1.2, "learning_rate": 1.2023047589258755e-05, "loss": 1.1737, "step": 405500 }, { "epoch": 1.2, "learning_rate": 1.2013211643006299e-05, "loss": 1.1614, "step": 406000 }, { "epoch": 1.2, "learning_rate": 1.2003375696753843e-05, "loss": 1.1693, "step": 406500 }, { "epoch": 1.2, "learning_rate": 1.1993539750501388e-05, "loss": 1.1578, "step": 407000 }, { "epoch": 1.2, "learning_rate": 1.1983703804248934e-05, "loss": 1.1832, "step": 407500 }, { "epoch": 1.2, "learning_rate": 1.1973867857996478e-05, "loss": 1.1426, "step": 408000 }, { "epoch": 1.21, "learning_rate": 1.1964031911744022e-05, "loss": 1.158, "step": 408500 }, { "epoch": 1.21, "learning_rate": 1.1954195965491567e-05, "loss": 1.1559, "step": 409000 }, { "epoch": 1.21, "learning_rate": 1.1944360019239113e-05, "loss": 1.1603, "step": 409500 }, { "epoch": 1.21, "learning_rate": 1.1934524072986657e-05, "loss": 1.1785, "step": 410000 }, { "epoch": 1.21, "learning_rate": 1.1924688126734201e-05, "loss": 1.1506, "step": 410500 }, { "epoch": 1.21, "learning_rate": 1.1914852180481746e-05, "loss": 1.1769, "step": 411000 }, { "epoch": 1.21, "learning_rate": 1.190501623422929e-05, "loss": 1.1607, "step": 411500 }, { "epoch": 1.22, "learning_rate": 1.1895180287976836e-05, "loss": 1.168, "step": 412000 }, { "epoch": 1.22, "learning_rate": 1.188534434172438e-05, "loss": 1.1544, "step": 412500 }, { "epoch": 1.22, "learning_rate": 1.1875508395471925e-05, "loss": 1.1576, "step": 413000 }, { "epoch": 1.22, "learning_rate": 1.186567244921947e-05, "loss": 1.1769, "step": 413500 }, { "epoch": 1.22, "learning_rate": 1.1855836502967013e-05, "loss": 1.163, "step": 414000 }, { "epoch": 1.22, "learning_rate": 1.184600055671456e-05, "loss": 1.1328, "step": 414500 }, { "epoch": 1.22, "learning_rate": 1.1836164610462103e-05, "loss": 1.169, "step": 415000 }, { "epoch": 1.23, "learning_rate": 1.1826328664209648e-05, "loss": 1.1798, "step": 415500 }, { "epoch": 1.23, "learning_rate": 1.1816492717957194e-05, "loss": 1.1771, "step": 416000 }, { "epoch": 1.23, "learning_rate": 1.1806656771704738e-05, "loss": 1.178, "step": 416500 }, { "epoch": 1.23, "learning_rate": 1.1796820825452282e-05, "loss": 1.1732, "step": 417000 }, { "epoch": 1.23, "learning_rate": 1.1786984879199827e-05, "loss": 1.1842, "step": 417500 }, { "epoch": 1.23, "learning_rate": 1.1777148932947371e-05, "loss": 1.1563, "step": 418000 }, { "epoch": 1.23, "learning_rate": 1.1767312986694917e-05, "loss": 1.1819, "step": 418500 }, { "epoch": 1.24, "learning_rate": 1.1757477040442461e-05, "loss": 1.1475, "step": 419000 }, { "epoch": 1.24, "learning_rate": 1.1747641094190006e-05, "loss": 1.189, "step": 419500 }, { "epoch": 1.24, "learning_rate": 1.1737805147937552e-05, "loss": 1.174, "step": 420000 }, { "epoch": 1.24, "learning_rate": 1.1727969201685094e-05, "loss": 1.1499, "step": 420500 }, { "epoch": 1.24, "learning_rate": 1.171813325543264e-05, "loss": 1.1565, "step": 421000 }, { "epoch": 1.24, "learning_rate": 1.1708297309180185e-05, "loss": 1.1685, "step": 421500 }, { "epoch": 1.25, "learning_rate": 1.1698461362927729e-05, "loss": 1.1713, "step": 422000 }, { "epoch": 1.25, "learning_rate": 1.1688625416675275e-05, "loss": 1.1876, "step": 422500 }, { "epoch": 1.25, "learning_rate": 1.1678789470422818e-05, "loss": 1.1682, "step": 423000 }, { "epoch": 1.25, "learning_rate": 1.1668953524170364e-05, "loss": 1.1463, "step": 423500 }, { "epoch": 1.25, "learning_rate": 1.165911757791791e-05, "loss": 1.1537, "step": 424000 }, { "epoch": 1.25, "learning_rate": 1.1649281631665452e-05, "loss": 1.1525, "step": 424500 }, { "epoch": 1.25, "learning_rate": 1.1639445685412998e-05, "loss": 1.1536, "step": 425000 }, { "epoch": 1.26, "learning_rate": 1.1629609739160543e-05, "loss": 1.17, "step": 425500 }, { "epoch": 1.26, "learning_rate": 1.1619773792908087e-05, "loss": 1.169, "step": 426000 }, { "epoch": 1.26, "learning_rate": 1.1609937846655633e-05, "loss": 1.1608, "step": 426500 }, { "epoch": 1.26, "learning_rate": 1.1600101900403176e-05, "loss": 1.173, "step": 427000 }, { "epoch": 1.26, "learning_rate": 1.1590265954150722e-05, "loss": 1.1712, "step": 427500 }, { "epoch": 1.26, "learning_rate": 1.1580430007898266e-05, "loss": 1.1576, "step": 428000 }, { "epoch": 1.26, "learning_rate": 1.157059406164581e-05, "loss": 1.1808, "step": 428500 }, { "epoch": 1.27, "learning_rate": 1.1560758115393356e-05, "loss": 1.1519, "step": 429000 }, { "epoch": 1.27, "learning_rate": 1.1550922169140899e-05, "loss": 1.1528, "step": 429500 }, { "epoch": 1.27, "learning_rate": 1.1541086222888445e-05, "loss": 1.1391, "step": 430000 }, { "epoch": 1.27, "learning_rate": 1.1531250276635991e-05, "loss": 1.1703, "step": 430500 }, { "epoch": 1.27, "learning_rate": 1.1521414330383534e-05, "loss": 1.145, "step": 431000 }, { "epoch": 1.27, "learning_rate": 1.151157838413108e-05, "loss": 1.193, "step": 431500 }, { "epoch": 1.27, "learning_rate": 1.1501742437878622e-05, "loss": 1.1627, "step": 432000 }, { "epoch": 1.28, "learning_rate": 1.1491906491626168e-05, "loss": 1.165, "step": 432500 }, { "epoch": 1.28, "learning_rate": 1.1482070545373714e-05, "loss": 1.1556, "step": 433000 }, { "epoch": 1.28, "learning_rate": 1.1472234599121257e-05, "loss": 1.1537, "step": 433500 }, { "epoch": 1.28, "learning_rate": 1.1462398652868803e-05, "loss": 1.1818, "step": 434000 }, { "epoch": 1.28, "learning_rate": 1.1452562706616346e-05, "loss": 1.1622, "step": 434500 }, { "epoch": 1.28, "learning_rate": 1.1442726760363892e-05, "loss": 1.1625, "step": 435000 }, { "epoch": 1.29, "learning_rate": 1.1432890814111438e-05, "loss": 1.1634, "step": 435500 }, { "epoch": 1.29, "learning_rate": 1.142305486785898e-05, "loss": 1.1541, "step": 436000 }, { "epoch": 1.29, "learning_rate": 1.1413218921606526e-05, "loss": 1.1821, "step": 436500 }, { "epoch": 1.29, "learning_rate": 1.1403382975354072e-05, "loss": 1.179, "step": 437000 }, { "epoch": 1.29, "learning_rate": 1.1393547029101615e-05, "loss": 1.1696, "step": 437500 }, { "epoch": 1.29, "learning_rate": 1.138371108284916e-05, "loss": 1.1757, "step": 438000 }, { "epoch": 1.29, "learning_rate": 1.1373875136596703e-05, "loss": 1.1816, "step": 438500 }, { "epoch": 1.3, "learning_rate": 1.136403919034425e-05, "loss": 1.157, "step": 439000 }, { "epoch": 1.3, "learning_rate": 1.1354203244091795e-05, "loss": 1.1681, "step": 439500 }, { "epoch": 1.3, "learning_rate": 1.1344367297839338e-05, "loss": 1.1639, "step": 440000 }, { "epoch": 1.3, "learning_rate": 1.1334531351586884e-05, "loss": 1.1711, "step": 440500 }, { "epoch": 1.3, "learning_rate": 1.1324695405334427e-05, "loss": 1.1726, "step": 441000 }, { "epoch": 1.3, "learning_rate": 1.1314859459081973e-05, "loss": 1.1904, "step": 441500 }, { "epoch": 1.3, "learning_rate": 1.1305023512829519e-05, "loss": 1.1634, "step": 442000 }, { "epoch": 1.31, "learning_rate": 1.1295187566577061e-05, "loss": 1.1693, "step": 442500 }, { "epoch": 1.31, "learning_rate": 1.1285351620324607e-05, "loss": 1.1747, "step": 443000 }, { "epoch": 1.31, "learning_rate": 1.127551567407215e-05, "loss": 1.1648, "step": 443500 }, { "epoch": 1.31, "learning_rate": 1.1265679727819696e-05, "loss": 1.1484, "step": 444000 }, { "epoch": 1.31, "learning_rate": 1.1255843781567242e-05, "loss": 1.1591, "step": 444500 }, { "epoch": 1.31, "learning_rate": 1.1246007835314785e-05, "loss": 1.1857, "step": 445000 }, { "epoch": 1.31, "learning_rate": 1.123617188906233e-05, "loss": 1.158, "step": 445500 }, { "epoch": 1.32, "learning_rate": 1.1226335942809873e-05, "loss": 1.1705, "step": 446000 }, { "epoch": 1.32, "learning_rate": 1.121649999655742e-05, "loss": 1.1519, "step": 446500 }, { "epoch": 1.32, "learning_rate": 1.1206664050304965e-05, "loss": 1.174, "step": 447000 }, { "epoch": 1.32, "learning_rate": 1.1196828104052508e-05, "loss": 1.1454, "step": 447500 }, { "epoch": 1.32, "learning_rate": 1.1186992157800054e-05, "loss": 1.155, "step": 448000 }, { "epoch": 1.32, "learning_rate": 1.11771562115476e-05, "loss": 1.1775, "step": 448500 }, { "epoch": 1.32, "learning_rate": 1.1167320265295143e-05, "loss": 1.1584, "step": 449000 }, { "epoch": 1.33, "learning_rate": 1.1157484319042689e-05, "loss": 1.1866, "step": 449500 }, { "epoch": 1.33, "learning_rate": 1.1147648372790231e-05, "loss": 1.1648, "step": 450000 }, { "epoch": 1.33, "learning_rate": 1.1137812426537777e-05, "loss": 1.1699, "step": 450500 }, { "epoch": 1.33, "learning_rate": 1.1127976480285323e-05, "loss": 1.149, "step": 451000 }, { "epoch": 1.33, "learning_rate": 1.1118140534032866e-05, "loss": 1.1716, "step": 451500 }, { "epoch": 1.33, "learning_rate": 1.1108304587780412e-05, "loss": 1.1583, "step": 452000 }, { "epoch": 1.34, "learning_rate": 1.1098468641527955e-05, "loss": 1.1479, "step": 452500 }, { "epoch": 1.34, "learning_rate": 1.10886326952755e-05, "loss": 1.1813, "step": 453000 }, { "epoch": 1.34, "learning_rate": 1.1078796749023047e-05, "loss": 1.1674, "step": 453500 }, { "epoch": 1.34, "learning_rate": 1.106896080277059e-05, "loss": 1.1546, "step": 454000 }, { "epoch": 1.34, "learning_rate": 1.1059124856518135e-05, "loss": 1.1569, "step": 454500 }, { "epoch": 1.34, "learning_rate": 1.1049288910265678e-05, "loss": 1.1501, "step": 455000 }, { "epoch": 1.34, "learning_rate": 1.1039452964013224e-05, "loss": 1.1542, "step": 455500 }, { "epoch": 1.35, "learning_rate": 1.102961701776077e-05, "loss": 1.1647, "step": 456000 }, { "epoch": 1.35, "learning_rate": 1.1019781071508313e-05, "loss": 1.162, "step": 456500 }, { "epoch": 1.35, "learning_rate": 1.1009945125255859e-05, "loss": 1.1616, "step": 457000 }, { "epoch": 1.35, "learning_rate": 1.1000109179003405e-05, "loss": 1.1626, "step": 457500 }, { "epoch": 1.35, "learning_rate": 1.0990273232750947e-05, "loss": 1.1677, "step": 458000 }, { "epoch": 1.35, "learning_rate": 1.0980437286498493e-05, "loss": 1.1631, "step": 458500 }, { "epoch": 1.35, "learning_rate": 1.0970601340246036e-05, "loss": 1.1585, "step": 459000 }, { "epoch": 1.36, "learning_rate": 1.0960765393993582e-05, "loss": 1.1722, "step": 459500 }, { "epoch": 1.36, "learning_rate": 1.0950929447741128e-05, "loss": 1.183, "step": 460000 }, { "epoch": 1.36, "learning_rate": 1.094109350148867e-05, "loss": 1.1612, "step": 460500 }, { "epoch": 1.36, "learning_rate": 1.0931257555236216e-05, "loss": 1.1544, "step": 461000 }, { "epoch": 1.36, "learning_rate": 1.0921421608983759e-05, "loss": 1.1708, "step": 461500 }, { "epoch": 1.36, "learning_rate": 1.0911585662731305e-05, "loss": 1.1568, "step": 462000 }, { "epoch": 1.36, "learning_rate": 1.0901749716478851e-05, "loss": 1.1512, "step": 462500 }, { "epoch": 1.37, "learning_rate": 1.0891913770226394e-05, "loss": 1.1759, "step": 463000 }, { "epoch": 1.37, "learning_rate": 1.088207782397394e-05, "loss": 1.1662, "step": 463500 }, { "epoch": 1.37, "learning_rate": 1.0872241877721484e-05, "loss": 1.1346, "step": 464000 }, { "epoch": 1.37, "learning_rate": 1.0862405931469028e-05, "loss": 1.1601, "step": 464500 }, { "epoch": 1.37, "learning_rate": 1.0852569985216574e-05, "loss": 1.1861, "step": 465000 }, { "epoch": 1.37, "learning_rate": 1.0842734038964117e-05, "loss": 1.1585, "step": 465500 }, { "epoch": 1.38, "learning_rate": 1.0832898092711663e-05, "loss": 1.1777, "step": 466000 }, { "epoch": 1.38, "learning_rate": 1.0823062146459207e-05, "loss": 1.1637, "step": 466500 }, { "epoch": 1.38, "learning_rate": 1.0813226200206752e-05, "loss": 1.1648, "step": 467000 }, { "epoch": 1.38, "learning_rate": 1.0803390253954298e-05, "loss": 1.172, "step": 467500 }, { "epoch": 1.38, "learning_rate": 1.0793554307701842e-05, "loss": 1.1575, "step": 468000 }, { "epoch": 1.38, "learning_rate": 1.0783718361449386e-05, "loss": 1.1497, "step": 468500 }, { "epoch": 1.38, "learning_rate": 1.0773882415196932e-05, "loss": 1.1414, "step": 469000 }, { "epoch": 1.39, "learning_rate": 1.0764046468944475e-05, "loss": 1.1627, "step": 469500 }, { "epoch": 1.39, "learning_rate": 1.0754210522692021e-05, "loss": 1.1726, "step": 470000 }, { "epoch": 1.39, "learning_rate": 1.0744374576439565e-05, "loss": 1.1634, "step": 470500 }, { "epoch": 1.39, "learning_rate": 1.073453863018711e-05, "loss": 1.1531, "step": 471000 }, { "epoch": 1.39, "learning_rate": 1.0724702683934656e-05, "loss": 1.1705, "step": 471500 }, { "epoch": 1.39, "learning_rate": 1.0714866737682198e-05, "loss": 1.1546, "step": 472000 }, { "epoch": 1.39, "learning_rate": 1.0705030791429744e-05, "loss": 1.1485, "step": 472500 }, { "epoch": 1.4, "learning_rate": 1.0695194845177289e-05, "loss": 1.1588, "step": 473000 }, { "epoch": 1.4, "learning_rate": 1.0685358898924833e-05, "loss": 1.1528, "step": 473500 }, { "epoch": 1.4, "learning_rate": 1.0675522952672379e-05, "loss": 1.1673, "step": 474000 }, { "epoch": 1.4, "learning_rate": 1.0665687006419923e-05, "loss": 1.1596, "step": 474500 }, { "epoch": 1.4, "learning_rate": 1.0655851060167468e-05, "loss": 1.154, "step": 475000 }, { "epoch": 1.4, "learning_rate": 1.0646015113915012e-05, "loss": 1.1615, "step": 475500 }, { "epoch": 1.4, "learning_rate": 1.0636179167662556e-05, "loss": 1.1711, "step": 476000 }, { "epoch": 1.41, "learning_rate": 1.0626343221410102e-05, "loss": 1.1518, "step": 476500 }, { "epoch": 1.41, "learning_rate": 1.0616507275157647e-05, "loss": 1.1638, "step": 477000 }, { "epoch": 1.41, "learning_rate": 1.0606671328905191e-05, "loss": 1.1388, "step": 477500 }, { "epoch": 1.41, "learning_rate": 1.0596835382652735e-05, "loss": 1.1595, "step": 478000 }, { "epoch": 1.41, "learning_rate": 1.0586999436400281e-05, "loss": 1.1633, "step": 478500 }, { "epoch": 1.41, "learning_rate": 1.0577163490147826e-05, "loss": 1.1548, "step": 479000 }, { "epoch": 1.41, "learning_rate": 1.056732754389537e-05, "loss": 1.1498, "step": 479500 }, { "epoch": 1.42, "learning_rate": 1.0557491597642914e-05, "loss": 1.1585, "step": 480000 }, { "epoch": 1.42, "learning_rate": 1.054765565139046e-05, "loss": 1.1504, "step": 480500 }, { "epoch": 1.42, "learning_rate": 1.0537819705138004e-05, "loss": 1.146, "step": 481000 }, { "epoch": 1.42, "learning_rate": 1.0527983758885549e-05, "loss": 1.1599, "step": 481500 }, { "epoch": 1.42, "learning_rate": 1.0518147812633093e-05, "loss": 1.1729, "step": 482000 }, { "epoch": 1.42, "learning_rate": 1.0508311866380637e-05, "loss": 1.1592, "step": 482500 }, { "epoch": 1.43, "learning_rate": 1.0498475920128183e-05, "loss": 1.1694, "step": 483000 }, { "epoch": 1.43, "learning_rate": 1.0488639973875728e-05, "loss": 1.1637, "step": 483500 }, { "epoch": 1.43, "learning_rate": 1.0478804027623272e-05, "loss": 1.185, "step": 484000 }, { "epoch": 1.43, "learning_rate": 1.0468968081370816e-05, "loss": 1.1691, "step": 484500 }, { "epoch": 1.43, "learning_rate": 1.0459132135118362e-05, "loss": 1.1705, "step": 485000 }, { "epoch": 1.43, "learning_rate": 1.0449296188865907e-05, "loss": 1.165, "step": 485500 }, { "epoch": 1.43, "learning_rate": 1.0439460242613451e-05, "loss": 1.1527, "step": 486000 }, { "epoch": 1.44, "learning_rate": 1.0429624296360995e-05, "loss": 1.1538, "step": 486500 }, { "epoch": 1.44, "learning_rate": 1.041978835010854e-05, "loss": 1.1727, "step": 487000 }, { "epoch": 1.44, "learning_rate": 1.0409952403856086e-05, "loss": 1.1639, "step": 487500 }, { "epoch": 1.44, "learning_rate": 1.040011645760363e-05, "loss": 1.1616, "step": 488000 }, { "epoch": 1.44, "learning_rate": 1.0390280511351174e-05, "loss": 1.1644, "step": 488500 }, { "epoch": 1.44, "learning_rate": 1.038044456509872e-05, "loss": 1.1514, "step": 489000 }, { "epoch": 1.44, "learning_rate": 1.0370608618846265e-05, "loss": 1.1568, "step": 489500 }, { "epoch": 1.45, "learning_rate": 1.0360772672593809e-05, "loss": 1.1723, "step": 490000 }, { "epoch": 1.45, "learning_rate": 1.0350936726341353e-05, "loss": 1.1479, "step": 490500 }, { "epoch": 1.45, "learning_rate": 1.0341100780088898e-05, "loss": 1.1494, "step": 491000 }, { "epoch": 1.45, "learning_rate": 1.0331264833836444e-05, "loss": 1.1527, "step": 491500 }, { "epoch": 1.45, "learning_rate": 1.0321428887583988e-05, "loss": 1.1602, "step": 492000 }, { "epoch": 1.45, "learning_rate": 1.0311592941331532e-05, "loss": 1.1525, "step": 492500 }, { "epoch": 1.45, "learning_rate": 1.0301756995079077e-05, "loss": 1.1828, "step": 493000 }, { "epoch": 1.46, "learning_rate": 1.0291921048826621e-05, "loss": 1.1517, "step": 493500 }, { "epoch": 1.46, "learning_rate": 1.0282085102574167e-05, "loss": 1.1565, "step": 494000 }, { "epoch": 1.46, "learning_rate": 1.0272249156321711e-05, "loss": 1.1612, "step": 494500 }, { "epoch": 1.46, "learning_rate": 1.0262413210069256e-05, "loss": 1.182, "step": 495000 }, { "epoch": 1.46, "learning_rate": 1.0252577263816802e-05, "loss": 1.1619, "step": 495500 }, { "epoch": 1.46, "learning_rate": 1.0242741317564344e-05, "loss": 1.1521, "step": 496000 }, { "epoch": 1.47, "learning_rate": 1.023290537131189e-05, "loss": 1.1692, "step": 496500 }, { "epoch": 1.47, "learning_rate": 1.0223069425059435e-05, "loss": 1.1601, "step": 497000 }, { "epoch": 1.47, "learning_rate": 1.0213233478806979e-05, "loss": 1.1678, "step": 497500 }, { "epoch": 1.47, "learning_rate": 1.0203397532554525e-05, "loss": 1.16, "step": 498000 }, { "epoch": 1.47, "learning_rate": 1.0193561586302068e-05, "loss": 1.1495, "step": 498500 }, { "epoch": 1.47, "learning_rate": 1.0183725640049614e-05, "loss": 1.1446, "step": 499000 }, { "epoch": 1.47, "learning_rate": 1.017388969379716e-05, "loss": 1.1687, "step": 499500 }, { "epoch": 1.48, "learning_rate": 1.0164053747544702e-05, "loss": 1.1505, "step": 500000 }, { "epoch": 1.48, "learning_rate": 1.0154217801292248e-05, "loss": 1.1602, "step": 500500 }, { "epoch": 1.48, "learning_rate": 1.0144381855039792e-05, "loss": 1.1758, "step": 501000 }, { "epoch": 1.48, "learning_rate": 1.0134545908787337e-05, "loss": 1.1586, "step": 501500 }, { "epoch": 1.48, "learning_rate": 1.0124709962534883e-05, "loss": 1.1524, "step": 502000 }, { "epoch": 1.48, "learning_rate": 1.0114874016282425e-05, "loss": 1.1672, "step": 502500 }, { "epoch": 1.48, "learning_rate": 1.0105038070029971e-05, "loss": 1.159, "step": 503000 }, { "epoch": 1.49, "learning_rate": 1.0095202123777516e-05, "loss": 1.1641, "step": 503500 }, { "epoch": 1.49, "learning_rate": 1.008536617752506e-05, "loss": 1.1529, "step": 504000 }, { "epoch": 1.49, "learning_rate": 1.0075530231272606e-05, "loss": 1.1489, "step": 504500 }, { "epoch": 1.49, "learning_rate": 1.0065694285020149e-05, "loss": 1.1663, "step": 505000 }, { "epoch": 1.49, "learning_rate": 1.0055858338767695e-05, "loss": 1.1567, "step": 505500 }, { "epoch": 1.49, "learning_rate": 1.004602239251524e-05, "loss": 1.1433, "step": 506000 }, { "epoch": 1.49, "learning_rate": 1.0036186446262783e-05, "loss": 1.1558, "step": 506500 }, { "epoch": 1.5, "learning_rate": 1.002635050001033e-05, "loss": 1.1493, "step": 507000 }, { "epoch": 1.5, "learning_rate": 1.0016514553757872e-05, "loss": 1.1574, "step": 507500 }, { "epoch": 1.5, "learning_rate": 1.0006678607505418e-05, "loss": 1.1596, "step": 508000 }, { "epoch": 1.5, "learning_rate": 9.996842661252962e-06, "loss": 1.1591, "step": 508500 }, { "epoch": 1.5, "learning_rate": 9.987006715000508e-06, "loss": 1.1592, "step": 509000 }, { "epoch": 1.5, "learning_rate": 9.977170768748053e-06, "loss": 1.1676, "step": 509500 }, { "epoch": 1.5, "learning_rate": 9.967334822495597e-06, "loss": 1.1729, "step": 510000 }, { "epoch": 1.51, "learning_rate": 9.957498876243141e-06, "loss": 1.1425, "step": 510500 }, { "epoch": 1.51, "learning_rate": 9.947662929990686e-06, "loss": 1.1478, "step": 511000 }, { "epoch": 1.51, "learning_rate": 9.937826983738232e-06, "loss": 1.1723, "step": 511500 }, { "epoch": 1.51, "learning_rate": 9.927991037485776e-06, "loss": 1.1627, "step": 512000 }, { "epoch": 1.51, "learning_rate": 9.91815509123332e-06, "loss": 1.158, "step": 512500 }, { "epoch": 1.51, "learning_rate": 9.908319144980865e-06, "loss": 1.1495, "step": 513000 }, { "epoch": 1.52, "learning_rate": 9.898483198728409e-06, "loss": 1.1373, "step": 513500 }, { "epoch": 1.52, "learning_rate": 9.888647252475955e-06, "loss": 1.1637, "step": 514000 }, { "epoch": 1.52, "learning_rate": 9.8788113062235e-06, "loss": 1.1445, "step": 514500 }, { "epoch": 1.52, "learning_rate": 9.868975359971044e-06, "loss": 1.1527, "step": 515000 }, { "epoch": 1.52, "learning_rate": 9.859139413718588e-06, "loss": 1.142, "step": 515500 }, { "epoch": 1.52, "learning_rate": 9.849303467466134e-06, "loss": 1.1553, "step": 516000 }, { "epoch": 1.52, "learning_rate": 9.839467521213678e-06, "loss": 1.1498, "step": 516500 }, { "epoch": 1.53, "learning_rate": 9.829631574961223e-06, "loss": 1.1485, "step": 517000 }, { "epoch": 1.53, "learning_rate": 9.819795628708767e-06, "loss": 1.1574, "step": 517500 }, { "epoch": 1.53, "learning_rate": 9.809959682456311e-06, "loss": 1.159, "step": 518000 }, { "epoch": 1.53, "learning_rate": 9.800123736203857e-06, "loss": 1.1564, "step": 518500 }, { "epoch": 1.53, "learning_rate": 9.790287789951402e-06, "loss": 1.1682, "step": 519000 }, { "epoch": 1.53, "learning_rate": 9.780451843698946e-06, "loss": 1.1645, "step": 519500 }, { "epoch": 1.53, "learning_rate": 9.77061589744649e-06, "loss": 1.1303, "step": 520000 }, { "epoch": 1.54, "learning_rate": 9.760779951194036e-06, "loss": 1.1705, "step": 520500 }, { "epoch": 1.54, "learning_rate": 9.75094400494158e-06, "loss": 1.1727, "step": 521000 }, { "epoch": 1.54, "learning_rate": 9.741108058689125e-06, "loss": 1.1428, "step": 521500 }, { "epoch": 1.54, "learning_rate": 9.73127211243667e-06, "loss": 1.1502, "step": 522000 }, { "epoch": 1.54, "learning_rate": 9.721436166184213e-06, "loss": 1.165, "step": 522500 }, { "epoch": 1.54, "learning_rate": 9.71160021993176e-06, "loss": 1.148, "step": 523000 }, { "epoch": 1.54, "learning_rate": 9.701764273679304e-06, "loss": 1.1505, "step": 523500 }, { "epoch": 1.55, "learning_rate": 9.691928327426848e-06, "loss": 1.1513, "step": 524000 }, { "epoch": 1.55, "learning_rate": 9.682092381174392e-06, "loss": 1.1619, "step": 524500 }, { "epoch": 1.55, "learning_rate": 9.672256434921938e-06, "loss": 1.1541, "step": 525000 }, { "epoch": 1.55, "learning_rate": 9.662420488669483e-06, "loss": 1.1557, "step": 525500 }, { "epoch": 1.55, "learning_rate": 9.652584542417027e-06, "loss": 1.1478, "step": 526000 }, { "epoch": 1.55, "learning_rate": 9.642748596164571e-06, "loss": 1.1656, "step": 526500 }, { "epoch": 1.56, "learning_rate": 9.632912649912116e-06, "loss": 1.1636, "step": 527000 }, { "epoch": 1.56, "learning_rate": 9.623076703659662e-06, "loss": 1.1453, "step": 527500 }, { "epoch": 1.56, "learning_rate": 9.613240757407206e-06, "loss": 1.1541, "step": 528000 }, { "epoch": 1.56, "learning_rate": 9.60340481115475e-06, "loss": 1.1634, "step": 528500 }, { "epoch": 1.56, "learning_rate": 9.593568864902295e-06, "loss": 1.1653, "step": 529000 }, { "epoch": 1.56, "learning_rate": 9.583732918649839e-06, "loss": 1.1756, "step": 529500 }, { "epoch": 1.56, "learning_rate": 9.573896972397385e-06, "loss": 1.1414, "step": 530000 }, { "epoch": 1.57, "learning_rate": 9.56406102614493e-06, "loss": 1.1796, "step": 530500 }, { "epoch": 1.57, "learning_rate": 9.554225079892474e-06, "loss": 1.1593, "step": 531000 }, { "epoch": 1.57, "learning_rate": 9.544389133640018e-06, "loss": 1.1633, "step": 531500 }, { "epoch": 1.57, "learning_rate": 9.534553187387564e-06, "loss": 1.1792, "step": 532000 }, { "epoch": 1.57, "learning_rate": 9.524717241135108e-06, "loss": 1.1575, "step": 532500 }, { "epoch": 1.57, "learning_rate": 9.514881294882653e-06, "loss": 1.1451, "step": 533000 }, { "epoch": 1.57, "learning_rate": 9.505045348630197e-06, "loss": 1.1622, "step": 533500 }, { "epoch": 1.58, "learning_rate": 9.495209402377741e-06, "loss": 1.1462, "step": 534000 }, { "epoch": 1.58, "learning_rate": 9.485373456125287e-06, "loss": 1.1536, "step": 534500 }, { "epoch": 1.58, "learning_rate": 9.475537509872832e-06, "loss": 1.1605, "step": 535000 }, { "epoch": 1.58, "learning_rate": 9.465701563620376e-06, "loss": 1.1597, "step": 535500 }, { "epoch": 1.58, "learning_rate": 9.45586561736792e-06, "loss": 1.1608, "step": 536000 }, { "epoch": 1.58, "learning_rate": 9.446029671115466e-06, "loss": 1.1416, "step": 536500 }, { "epoch": 1.58, "learning_rate": 9.43619372486301e-06, "loss": 1.1431, "step": 537000 }, { "epoch": 1.59, "learning_rate": 9.426357778610555e-06, "loss": 1.1541, "step": 537500 }, { "epoch": 1.59, "learning_rate": 9.4165218323581e-06, "loss": 1.1601, "step": 538000 }, { "epoch": 1.59, "learning_rate": 9.406685886105644e-06, "loss": 1.1705, "step": 538500 }, { "epoch": 1.59, "learning_rate": 9.39684993985319e-06, "loss": 1.1433, "step": 539000 }, { "epoch": 1.59, "learning_rate": 9.387013993600734e-06, "loss": 1.1612, "step": 539500 }, { "epoch": 1.59, "learning_rate": 9.377178047348278e-06, "loss": 1.1632, "step": 540000 }, { "epoch": 1.59, "learning_rate": 9.367342101095823e-06, "loss": 1.1488, "step": 540500 }, { "epoch": 1.6, "learning_rate": 9.357506154843369e-06, "loss": 1.149, "step": 541000 }, { "epoch": 1.6, "learning_rate": 9.347670208590913e-06, "loss": 1.1435, "step": 541500 }, { "epoch": 1.6, "learning_rate": 9.337834262338457e-06, "loss": 1.1633, "step": 542000 }, { "epoch": 1.6, "learning_rate": 9.327998316086001e-06, "loss": 1.1458, "step": 542500 }, { "epoch": 1.6, "learning_rate": 9.318162369833546e-06, "loss": 1.17, "step": 543000 }, { "epoch": 1.6, "learning_rate": 9.308326423581092e-06, "loss": 1.1575, "step": 543500 }, { "epoch": 1.61, "learning_rate": 9.298490477328636e-06, "loss": 1.1667, "step": 544000 }, { "epoch": 1.61, "learning_rate": 9.28865453107618e-06, "loss": 1.1607, "step": 544500 }, { "epoch": 1.61, "learning_rate": 9.278818584823725e-06, "loss": 1.1484, "step": 545000 }, { "epoch": 1.61, "learning_rate": 9.26898263857127e-06, "loss": 1.1548, "step": 545500 }, { "epoch": 1.61, "learning_rate": 9.259146692318815e-06, "loss": 1.1748, "step": 546000 }, { "epoch": 1.61, "learning_rate": 9.24931074606636e-06, "loss": 1.1526, "step": 546500 }, { "epoch": 1.61, "learning_rate": 9.239474799813904e-06, "loss": 1.1506, "step": 547000 }, { "epoch": 1.62, "learning_rate": 9.22963885356145e-06, "loss": 1.1398, "step": 547500 }, { "epoch": 1.62, "learning_rate": 9.219802907308994e-06, "loss": 1.1532, "step": 548000 }, { "epoch": 1.62, "learning_rate": 9.209966961056538e-06, "loss": 1.1513, "step": 548500 }, { "epoch": 1.62, "learning_rate": 9.200131014804083e-06, "loss": 1.1508, "step": 549000 }, { "epoch": 1.62, "learning_rate": 9.190295068551627e-06, "loss": 1.1392, "step": 549500 }, { "epoch": 1.62, "learning_rate": 9.180459122299173e-06, "loss": 1.1661, "step": 550000 }, { "epoch": 1.62, "learning_rate": 9.170623176046717e-06, "loss": 1.1464, "step": 550500 }, { "epoch": 1.63, "learning_rate": 9.160787229794262e-06, "loss": 1.1572, "step": 551000 }, { "epoch": 1.63, "learning_rate": 9.150951283541806e-06, "loss": 1.1528, "step": 551500 }, { "epoch": 1.63, "learning_rate": 9.141115337289352e-06, "loss": 1.1606, "step": 552000 }, { "epoch": 1.63, "learning_rate": 9.131279391036896e-06, "loss": 1.1433, "step": 552500 }, { "epoch": 1.63, "learning_rate": 9.12144344478444e-06, "loss": 1.1624, "step": 553000 }, { "epoch": 1.63, "learning_rate": 9.111607498531985e-06, "loss": 1.1541, "step": 553500 }, { "epoch": 1.63, "learning_rate": 9.101771552279531e-06, "loss": 1.1637, "step": 554000 }, { "epoch": 1.64, "learning_rate": 9.091935606027075e-06, "loss": 1.1533, "step": 554500 }, { "epoch": 1.64, "learning_rate": 9.08209965977462e-06, "loss": 1.1472, "step": 555000 }, { "epoch": 1.64, "learning_rate": 9.072263713522164e-06, "loss": 1.1689, "step": 555500 }, { "epoch": 1.64, "learning_rate": 9.06242776726971e-06, "loss": 1.1512, "step": 556000 }, { "epoch": 1.64, "learning_rate": 9.052591821017254e-06, "loss": 1.1863, "step": 556500 }, { "epoch": 1.64, "learning_rate": 9.042755874764799e-06, "loss": 1.1646, "step": 557000 }, { "epoch": 1.65, "learning_rate": 9.032919928512343e-06, "loss": 1.1543, "step": 557500 }, { "epoch": 1.65, "learning_rate": 9.023083982259889e-06, "loss": 1.1811, "step": 558000 }, { "epoch": 1.65, "learning_rate": 9.013248036007433e-06, "loss": 1.1567, "step": 558500 }, { "epoch": 1.65, "learning_rate": 9.003412089754978e-06, "loss": 1.16, "step": 559000 }, { "epoch": 1.65, "learning_rate": 8.993576143502522e-06, "loss": 1.1449, "step": 559500 }, { "epoch": 1.65, "learning_rate": 8.983740197250066e-06, "loss": 1.1503, "step": 560000 }, { "epoch": 1.65, "learning_rate": 8.973904250997612e-06, "loss": 1.1586, "step": 560500 }, { "epoch": 1.66, "learning_rate": 8.964068304745157e-06, "loss": 1.1514, "step": 561000 }, { "epoch": 1.66, "learning_rate": 8.954232358492701e-06, "loss": 1.149, "step": 561500 }, { "epoch": 1.66, "learning_rate": 8.944396412240245e-06, "loss": 1.1525, "step": 562000 }, { "epoch": 1.66, "learning_rate": 8.934560465987791e-06, "loss": 1.1598, "step": 562500 }, { "epoch": 1.66, "learning_rate": 8.924724519735336e-06, "loss": 1.1607, "step": 563000 }, { "epoch": 1.66, "learning_rate": 8.91488857348288e-06, "loss": 1.1547, "step": 563500 }, { "epoch": 1.66, "learning_rate": 8.905052627230424e-06, "loss": 1.157, "step": 564000 }, { "epoch": 1.67, "learning_rate": 8.89521668097797e-06, "loss": 1.151, "step": 564500 }, { "epoch": 1.67, "learning_rate": 8.885380734725515e-06, "loss": 1.1607, "step": 565000 }, { "epoch": 1.67, "learning_rate": 8.875544788473059e-06, "loss": 1.1461, "step": 565500 }, { "epoch": 1.67, "learning_rate": 8.865708842220603e-06, "loss": 1.1541, "step": 566000 }, { "epoch": 1.67, "learning_rate": 8.85587289596815e-06, "loss": 1.1632, "step": 566500 }, { "epoch": 1.67, "learning_rate": 8.846036949715693e-06, "loss": 1.1391, "step": 567000 }, { "epoch": 1.67, "learning_rate": 8.836201003463238e-06, "loss": 1.14, "step": 567500 }, { "epoch": 1.68, "learning_rate": 8.826365057210782e-06, "loss": 1.149, "step": 568000 }, { "epoch": 1.68, "learning_rate": 8.816529110958328e-06, "loss": 1.149, "step": 568500 }, { "epoch": 1.68, "learning_rate": 8.806693164705872e-06, "loss": 1.1552, "step": 569000 }, { "epoch": 1.68, "learning_rate": 8.796857218453417e-06, "loss": 1.1624, "step": 569500 }, { "epoch": 1.68, "learning_rate": 8.787021272200961e-06, "loss": 1.1711, "step": 570000 }, { "epoch": 1.68, "learning_rate": 8.777185325948505e-06, "loss": 1.1513, "step": 570500 }, { "epoch": 1.68, "learning_rate": 8.767349379696051e-06, "loss": 1.1672, "step": 571000 }, { "epoch": 1.69, "learning_rate": 8.757513433443596e-06, "loss": 1.146, "step": 571500 }, { "epoch": 1.69, "learning_rate": 8.74767748719114e-06, "loss": 1.1479, "step": 572000 }, { "epoch": 1.69, "learning_rate": 8.737841540938684e-06, "loss": 1.1656, "step": 572500 }, { "epoch": 1.69, "learning_rate": 8.72800559468623e-06, "loss": 1.1525, "step": 573000 }, { "epoch": 1.69, "learning_rate": 8.718169648433775e-06, "loss": 1.1546, "step": 573500 }, { "epoch": 1.69, "learning_rate": 8.708333702181319e-06, "loss": 1.138, "step": 574000 }, { "epoch": 1.7, "learning_rate": 8.698497755928863e-06, "loss": 1.1427, "step": 574500 }, { "epoch": 1.7, "learning_rate": 8.688661809676408e-06, "loss": 1.1455, "step": 575000 }, { "epoch": 1.7, "learning_rate": 8.678825863423954e-06, "loss": 1.1548, "step": 575500 }, { "epoch": 1.7, "learning_rate": 8.668989917171498e-06, "loss": 1.1606, "step": 576000 }, { "epoch": 1.7, "learning_rate": 8.659153970919042e-06, "loss": 1.1476, "step": 576500 }, { "epoch": 1.7, "learning_rate": 8.649318024666587e-06, "loss": 1.1399, "step": 577000 }, { "epoch": 1.7, "learning_rate": 8.639482078414131e-06, "loss": 1.149, "step": 577500 }, { "epoch": 1.71, "learning_rate": 8.629646132161677e-06, "loss": 1.1359, "step": 578000 }, { "epoch": 1.71, "learning_rate": 8.619810185909221e-06, "loss": 1.1748, "step": 578500 }, { "epoch": 1.71, "learning_rate": 8.609974239656766e-06, "loss": 1.1543, "step": 579000 }, { "epoch": 1.71, "learning_rate": 8.60013829340431e-06, "loss": 1.1139, "step": 579500 }, { "epoch": 1.71, "learning_rate": 8.590302347151856e-06, "loss": 1.1547, "step": 580000 }, { "epoch": 1.71, "learning_rate": 8.5804664008994e-06, "loss": 1.1447, "step": 580500 }, { "epoch": 1.71, "learning_rate": 8.570630454646945e-06, "loss": 1.1611, "step": 581000 }, { "epoch": 1.72, "learning_rate": 8.560794508394489e-06, "loss": 1.1442, "step": 581500 }, { "epoch": 1.72, "learning_rate": 8.550958562142033e-06, "loss": 1.1546, "step": 582000 }, { "epoch": 1.72, "learning_rate": 8.54112261588958e-06, "loss": 1.1644, "step": 582500 }, { "epoch": 1.72, "learning_rate": 8.531286669637124e-06, "loss": 1.1725, "step": 583000 }, { "epoch": 1.72, "learning_rate": 8.521450723384668e-06, "loss": 1.1389, "step": 583500 }, { "epoch": 1.72, "learning_rate": 8.511614777132212e-06, "loss": 1.1406, "step": 584000 }, { "epoch": 1.72, "learning_rate": 8.501778830879758e-06, "loss": 1.1651, "step": 584500 }, { "epoch": 1.73, "learning_rate": 8.491942884627303e-06, "loss": 1.1304, "step": 585000 }, { "epoch": 1.73, "learning_rate": 8.482106938374847e-06, "loss": 1.1513, "step": 585500 }, { "epoch": 1.73, "learning_rate": 8.472270992122391e-06, "loss": 1.116, "step": 586000 }, { "epoch": 1.73, "learning_rate": 8.462435045869935e-06, "loss": 1.16, "step": 586500 }, { "epoch": 1.73, "learning_rate": 8.452599099617481e-06, "loss": 1.1641, "step": 587000 }, { "epoch": 1.73, "learning_rate": 8.442763153365026e-06, "loss": 1.1504, "step": 587500 }, { "epoch": 1.74, "learning_rate": 8.43292720711257e-06, "loss": 1.1443, "step": 588000 }, { "epoch": 1.74, "learning_rate": 8.423091260860114e-06, "loss": 1.1365, "step": 588500 }, { "epoch": 1.74, "learning_rate": 8.41325531460766e-06, "loss": 1.1666, "step": 589000 }, { "epoch": 1.74, "learning_rate": 8.403419368355205e-06, "loss": 1.1463, "step": 589500 }, { "epoch": 1.74, "learning_rate": 8.393583422102749e-06, "loss": 1.138, "step": 590000 }, { "epoch": 1.74, "learning_rate": 8.383747475850293e-06, "loss": 1.1663, "step": 590500 }, { "epoch": 1.74, "learning_rate": 8.373911529597838e-06, "loss": 1.16, "step": 591000 }, { "epoch": 1.75, "learning_rate": 8.364075583345384e-06, "loss": 1.1657, "step": 591500 }, { "epoch": 1.75, "learning_rate": 8.354239637092928e-06, "loss": 1.153, "step": 592000 }, { "epoch": 1.75, "learning_rate": 8.344403690840472e-06, "loss": 1.1559, "step": 592500 }, { "epoch": 1.75, "learning_rate": 8.334567744588017e-06, "loss": 1.1219, "step": 593000 }, { "epoch": 1.75, "learning_rate": 8.324731798335561e-06, "loss": 1.15, "step": 593500 }, { "epoch": 1.75, "learning_rate": 8.314895852083107e-06, "loss": 1.139, "step": 594000 }, { "epoch": 1.75, "learning_rate": 8.305059905830651e-06, "loss": 1.1559, "step": 594500 }, { "epoch": 1.76, "learning_rate": 8.295223959578196e-06, "loss": 1.1539, "step": 595000 }, { "epoch": 1.76, "learning_rate": 8.28538801332574e-06, "loss": 1.149, "step": 595500 }, { "epoch": 1.76, "learning_rate": 8.275552067073286e-06, "loss": 1.1509, "step": 596000 }, { "epoch": 1.76, "learning_rate": 8.26571612082083e-06, "loss": 1.1549, "step": 596500 }, { "epoch": 1.76, "learning_rate": 8.255880174568375e-06, "loss": 1.1645, "step": 597000 }, { "epoch": 1.76, "learning_rate": 8.246044228315919e-06, "loss": 1.1499, "step": 597500 }, { "epoch": 1.76, "learning_rate": 8.236208282063463e-06, "loss": 1.1479, "step": 598000 }, { "epoch": 1.77, "learning_rate": 8.22637233581101e-06, "loss": 1.1416, "step": 598500 }, { "epoch": 1.77, "learning_rate": 8.216536389558554e-06, "loss": 1.1473, "step": 599000 }, { "epoch": 1.77, "learning_rate": 8.206700443306098e-06, "loss": 1.1551, "step": 599500 }, { "epoch": 1.77, "learning_rate": 8.196864497053642e-06, "loss": 1.1469, "step": 600000 }, { "epoch": 1.77, "learning_rate": 8.187028550801188e-06, "loss": 1.1299, "step": 600500 }, { "epoch": 1.77, "learning_rate": 8.177192604548733e-06, "loss": 1.1697, "step": 601000 }, { "epoch": 1.77, "learning_rate": 8.167356658296277e-06, "loss": 1.1453, "step": 601500 }, { "epoch": 1.78, "learning_rate": 8.157520712043821e-06, "loss": 1.1599, "step": 602000 }, { "epoch": 1.78, "learning_rate": 8.147684765791366e-06, "loss": 1.156, "step": 602500 }, { "epoch": 1.78, "learning_rate": 8.137848819538912e-06, "loss": 1.1636, "step": 603000 }, { "epoch": 1.78, "learning_rate": 8.128012873286456e-06, "loss": 1.1393, "step": 603500 }, { "epoch": 1.78, "learning_rate": 8.118176927034e-06, "loss": 1.1393, "step": 604000 }, { "epoch": 1.78, "learning_rate": 8.108340980781545e-06, "loss": 1.1552, "step": 604500 }, { "epoch": 1.79, "learning_rate": 8.09850503452909e-06, "loss": 1.1405, "step": 605000 }, { "epoch": 1.79, "learning_rate": 8.088669088276635e-06, "loss": 1.1601, "step": 605500 }, { "epoch": 1.79, "learning_rate": 8.07883314202418e-06, "loss": 1.1496, "step": 606000 }, { "epoch": 1.79, "learning_rate": 8.068997195771724e-06, "loss": 1.1541, "step": 606500 }, { "epoch": 1.79, "learning_rate": 8.059161249519268e-06, "loss": 1.1611, "step": 607000 }, { "epoch": 1.79, "learning_rate": 8.049325303266814e-06, "loss": 1.152, "step": 607500 }, { "epoch": 1.79, "learning_rate": 8.039489357014358e-06, "loss": 1.156, "step": 608000 }, { "epoch": 1.8, "learning_rate": 8.029653410761902e-06, "loss": 1.1441, "step": 608500 }, { "epoch": 1.8, "learning_rate": 8.019817464509447e-06, "loss": 1.145, "step": 609000 }, { "epoch": 1.8, "learning_rate": 8.009981518256991e-06, "loss": 1.1433, "step": 609500 }, { "epoch": 1.8, "learning_rate": 8.000145572004537e-06, "loss": 1.1484, "step": 610000 }, { "epoch": 1.8, "learning_rate": 7.990309625752081e-06, "loss": 1.1358, "step": 610500 }, { "epoch": 1.8, "learning_rate": 7.980473679499626e-06, "loss": 1.1526, "step": 611000 }, { "epoch": 1.8, "learning_rate": 7.97063773324717e-06, "loss": 1.1471, "step": 611500 }, { "epoch": 1.81, "learning_rate": 7.960801786994716e-06, "loss": 1.1585, "step": 612000 }, { "epoch": 1.81, "learning_rate": 7.95096584074226e-06, "loss": 1.1536, "step": 612500 }, { "epoch": 1.81, "learning_rate": 7.941129894489805e-06, "loss": 1.1629, "step": 613000 }, { "epoch": 1.81, "learning_rate": 7.931293948237349e-06, "loss": 1.1508, "step": 613500 }, { "epoch": 1.81, "learning_rate": 7.921458001984893e-06, "loss": 1.156, "step": 614000 }, { "epoch": 1.81, "learning_rate": 7.91162205573244e-06, "loss": 1.1441, "step": 614500 }, { "epoch": 1.81, "learning_rate": 7.901786109479984e-06, "loss": 1.1621, "step": 615000 }, { "epoch": 1.82, "learning_rate": 7.891950163227528e-06, "loss": 1.1568, "step": 615500 }, { "epoch": 1.82, "learning_rate": 7.882114216975072e-06, "loss": 1.1676, "step": 616000 }, { "epoch": 1.82, "learning_rate": 7.872278270722618e-06, "loss": 1.1579, "step": 616500 }, { "epoch": 1.82, "learning_rate": 7.862442324470163e-06, "loss": 1.1375, "step": 617000 }, { "epoch": 1.82, "learning_rate": 7.852606378217707e-06, "loss": 1.1297, "step": 617500 }, { "epoch": 1.82, "learning_rate": 7.842770431965251e-06, "loss": 1.155, "step": 618000 }, { "epoch": 1.83, "learning_rate": 7.832934485712796e-06, "loss": 1.1312, "step": 618500 }, { "epoch": 1.83, "learning_rate": 7.823098539460342e-06, "loss": 1.1337, "step": 619000 }, { "epoch": 1.83, "learning_rate": 7.813262593207886e-06, "loss": 1.1512, "step": 619500 }, { "epoch": 1.83, "learning_rate": 7.80342664695543e-06, "loss": 1.1596, "step": 620000 }, { "epoch": 1.83, "learning_rate": 7.793590700702975e-06, "loss": 1.1351, "step": 620500 }, { "epoch": 1.83, "learning_rate": 7.78375475445052e-06, "loss": 1.1535, "step": 621000 }, { "epoch": 1.83, "learning_rate": 7.773918808198065e-06, "loss": 1.1393, "step": 621500 }, { "epoch": 1.84, "learning_rate": 7.76408286194561e-06, "loss": 1.1533, "step": 622000 }, { "epoch": 1.84, "learning_rate": 7.754246915693154e-06, "loss": 1.1325, "step": 622500 }, { "epoch": 1.84, "learning_rate": 7.7444109694407e-06, "loss": 1.1354, "step": 623000 }, { "epoch": 1.84, "learning_rate": 7.734575023188244e-06, "loss": 1.1315, "step": 623500 }, { "epoch": 1.84, "learning_rate": 7.724739076935788e-06, "loss": 1.1496, "step": 624000 }, { "epoch": 1.84, "learning_rate": 7.714903130683333e-06, "loss": 1.1664, "step": 624500 }, { "epoch": 1.84, "learning_rate": 7.705067184430879e-06, "loss": 1.1482, "step": 625000 }, { "epoch": 1.85, "learning_rate": 7.695231238178423e-06, "loss": 1.1704, "step": 625500 }, { "epoch": 1.85, "learning_rate": 7.685395291925967e-06, "loss": 1.144, "step": 626000 }, { "epoch": 1.85, "learning_rate": 7.675559345673512e-06, "loss": 1.1422, "step": 626500 }, { "epoch": 1.85, "learning_rate": 7.665723399421058e-06, "loss": 1.1703, "step": 627000 }, { "epoch": 1.85, "learning_rate": 7.655887453168602e-06, "loss": 1.1496, "step": 627500 }, { "epoch": 1.85, "learning_rate": 7.646051506916146e-06, "loss": 1.1554, "step": 628000 }, { "epoch": 1.85, "learning_rate": 7.63621556066369e-06, "loss": 1.1401, "step": 628500 }, { "epoch": 1.86, "learning_rate": 7.626379614411236e-06, "loss": 1.1431, "step": 629000 }, { "epoch": 1.86, "learning_rate": 7.61654366815878e-06, "loss": 1.1439, "step": 629500 }, { "epoch": 1.86, "learning_rate": 7.606707721906324e-06, "loss": 1.1622, "step": 630000 }, { "epoch": 1.86, "learning_rate": 7.59687177565387e-06, "loss": 1.138, "step": 630500 }, { "epoch": 1.86, "learning_rate": 7.587035829401415e-06, "loss": 1.1369, "step": 631000 }, { "epoch": 1.86, "learning_rate": 7.577199883148959e-06, "loss": 1.1603, "step": 631500 }, { "epoch": 1.86, "learning_rate": 7.567363936896503e-06, "loss": 1.1376, "step": 632000 }, { "epoch": 1.87, "learning_rate": 7.557527990644049e-06, "loss": 1.1293, "step": 632500 }, { "epoch": 1.87, "learning_rate": 7.547692044391594e-06, "loss": 1.1422, "step": 633000 }, { "epoch": 1.87, "learning_rate": 7.537856098139138e-06, "loss": 1.1319, "step": 633500 }, { "epoch": 1.87, "learning_rate": 7.528020151886682e-06, "loss": 1.1335, "step": 634000 }, { "epoch": 1.87, "learning_rate": 7.5181842056342266e-06, "loss": 1.1392, "step": 634500 }, { "epoch": 1.87, "learning_rate": 7.508348259381773e-06, "loss": 1.1544, "step": 635000 }, { "epoch": 1.88, "learning_rate": 7.498512313129317e-06, "loss": 1.1482, "step": 635500 }, { "epoch": 1.88, "learning_rate": 7.488676366876861e-06, "loss": 1.1537, "step": 636000 }, { "epoch": 1.88, "learning_rate": 7.4788404206244055e-06, "loss": 1.1739, "step": 636500 }, { "epoch": 1.88, "learning_rate": 7.469004474371951e-06, "loss": 1.1487, "step": 637000 }, { "epoch": 1.88, "learning_rate": 7.459168528119496e-06, "loss": 1.1679, "step": 637500 }, { "epoch": 1.88, "learning_rate": 7.44933258186704e-06, "loss": 1.1488, "step": 638000 }, { "epoch": 1.88, "learning_rate": 7.4394966356145845e-06, "loss": 1.1386, "step": 638500 }, { "epoch": 1.89, "learning_rate": 7.42966068936213e-06, "loss": 1.1547, "step": 639000 }, { "epoch": 1.89, "learning_rate": 7.419824743109675e-06, "loss": 1.1563, "step": 639500 }, { "epoch": 1.89, "learning_rate": 7.409988796857219e-06, "loss": 1.1479, "step": 640000 }, { "epoch": 1.89, "learning_rate": 7.4001528506047635e-06, "loss": 1.1451, "step": 640500 }, { "epoch": 1.89, "learning_rate": 7.390316904352308e-06, "loss": 1.1396, "step": 641000 }, { "epoch": 1.89, "learning_rate": 7.380480958099853e-06, "loss": 1.1595, "step": 641500 }, { "epoch": 1.89, "learning_rate": 7.370645011847398e-06, "loss": 1.1699, "step": 642000 }, { "epoch": 1.9, "learning_rate": 7.3608090655949425e-06, "loss": 1.1437, "step": 642500 }, { "epoch": 1.9, "learning_rate": 7.350973119342487e-06, "loss": 1.1538, "step": 643000 }, { "epoch": 1.9, "learning_rate": 7.341137173090032e-06, "loss": 1.1416, "step": 643500 }, { "epoch": 1.9, "learning_rate": 7.331301226837577e-06, "loss": 1.1406, "step": 644000 }, { "epoch": 1.9, "learning_rate": 7.3214652805851214e-06, "loss": 1.1302, "step": 644500 }, { "epoch": 1.9, "learning_rate": 7.311629334332666e-06, "loss": 1.1431, "step": 645000 }, { "epoch": 1.9, "learning_rate": 7.301793388080211e-06, "loss": 1.1494, "step": 645500 }, { "epoch": 1.91, "learning_rate": 7.291957441827755e-06, "loss": 1.1484, "step": 646000 }, { "epoch": 1.91, "learning_rate": 7.2821214955753e-06, "loss": 1.1504, "step": 646500 }, { "epoch": 1.91, "learning_rate": 7.272285549322845e-06, "loss": 1.1366, "step": 647000 }, { "epoch": 1.91, "learning_rate": 7.26244960307039e-06, "loss": 1.1425, "step": 647500 }, { "epoch": 1.91, "learning_rate": 7.252613656817934e-06, "loss": 1.1347, "step": 648000 }, { "epoch": 1.91, "learning_rate": 7.242777710565479e-06, "loss": 1.1407, "step": 648500 }, { "epoch": 1.92, "learning_rate": 7.232941764313024e-06, "loss": 1.1418, "step": 649000 }, { "epoch": 1.92, "learning_rate": 7.223105818060569e-06, "loss": 1.1477, "step": 649500 }, { "epoch": 1.92, "learning_rate": 7.213269871808113e-06, "loss": 1.155, "step": 650000 }, { "epoch": 1.92, "learning_rate": 7.2034339255556575e-06, "loss": 1.155, "step": 650500 }, { "epoch": 1.92, "learning_rate": 7.193597979303203e-06, "loss": 1.1269, "step": 651000 }, { "epoch": 1.92, "learning_rate": 7.183762033050747e-06, "loss": 1.1572, "step": 651500 }, { "epoch": 1.92, "learning_rate": 7.173926086798292e-06, "loss": 1.1541, "step": 652000 }, { "epoch": 1.93, "learning_rate": 7.1640901405458365e-06, "loss": 1.1384, "step": 652500 }, { "epoch": 1.93, "learning_rate": 7.154254194293381e-06, "loss": 1.1599, "step": 653000 }, { "epoch": 1.93, "learning_rate": 7.144418248040926e-06, "loss": 1.131, "step": 653500 }, { "epoch": 1.93, "learning_rate": 7.134582301788471e-06, "loss": 1.1481, "step": 654000 }, { "epoch": 1.93, "learning_rate": 7.1247463555360154e-06, "loss": 1.148, "step": 654500 }, { "epoch": 1.93, "learning_rate": 7.11491040928356e-06, "loss": 1.1233, "step": 655000 }, { "epoch": 1.93, "learning_rate": 7.105074463031105e-06, "loss": 1.1338, "step": 655500 }, { "epoch": 1.94, "learning_rate": 7.09523851677865e-06, "loss": 1.157, "step": 656000 }, { "epoch": 1.94, "learning_rate": 7.085402570526194e-06, "loss": 1.1479, "step": 656500 }, { "epoch": 1.94, "learning_rate": 7.075566624273739e-06, "loss": 1.1483, "step": 657000 }, { "epoch": 1.94, "learning_rate": 7.065730678021283e-06, "loss": 1.1462, "step": 657500 }, { "epoch": 1.94, "learning_rate": 7.055894731768829e-06, "loss": 1.174, "step": 658000 }, { "epoch": 1.94, "learning_rate": 7.046058785516373e-06, "loss": 1.1364, "step": 658500 }, { "epoch": 1.94, "learning_rate": 7.036222839263918e-06, "loss": 1.1614, "step": 659000 }, { "epoch": 1.95, "learning_rate": 7.026386893011462e-06, "loss": 1.1284, "step": 659500 }, { "epoch": 1.95, "learning_rate": 7.016550946759008e-06, "loss": 1.148, "step": 660000 }, { "epoch": 1.95, "learning_rate": 7.006715000506552e-06, "loss": 1.1214, "step": 660500 }, { "epoch": 1.95, "learning_rate": 6.996879054254097e-06, "loss": 1.1409, "step": 661000 }, { "epoch": 1.95, "learning_rate": 6.987043108001641e-06, "loss": 1.1441, "step": 661500 }, { "epoch": 1.95, "learning_rate": 6.977207161749185e-06, "loss": 1.1383, "step": 662000 }, { "epoch": 1.95, "learning_rate": 6.967371215496731e-06, "loss": 1.1517, "step": 662500 }, { "epoch": 1.96, "learning_rate": 6.957535269244276e-06, "loss": 1.1476, "step": 663000 }, { "epoch": 1.96, "learning_rate": 6.94769932299182e-06, "loss": 1.1366, "step": 663500 }, { "epoch": 1.96, "learning_rate": 6.937863376739364e-06, "loss": 1.1449, "step": 664000 }, { "epoch": 1.96, "learning_rate": 6.92802743048691e-06, "loss": 1.1442, "step": 664500 }, { "epoch": 1.96, "learning_rate": 6.918191484234455e-06, "loss": 1.1594, "step": 665000 }, { "epoch": 1.96, "learning_rate": 6.908355537981999e-06, "loss": 1.1227, "step": 665500 }, { "epoch": 1.97, "learning_rate": 6.898519591729543e-06, "loss": 1.1287, "step": 666000 }, { "epoch": 1.97, "learning_rate": 6.8886836454770876e-06, "loss": 1.1309, "step": 666500 }, { "epoch": 1.97, "learning_rate": 6.878847699224634e-06, "loss": 1.1482, "step": 667000 }, { "epoch": 1.97, "learning_rate": 6.869011752972178e-06, "loss": 1.1392, "step": 667500 }, { "epoch": 1.97, "learning_rate": 6.859175806719722e-06, "loss": 1.1445, "step": 668000 }, { "epoch": 1.97, "learning_rate": 6.8493398604672665e-06, "loss": 1.1328, "step": 668500 }, { "epoch": 1.97, "learning_rate": 6.839503914214811e-06, "loss": 1.1342, "step": 669000 }, { "epoch": 1.98, "learning_rate": 6.829667967962357e-06, "loss": 1.1438, "step": 669500 }, { "epoch": 1.98, "learning_rate": 6.819832021709901e-06, "loss": 1.1389, "step": 670000 }, { "epoch": 1.98, "learning_rate": 6.8099960754574455e-06, "loss": 1.1385, "step": 670500 }, { "epoch": 1.98, "learning_rate": 6.80016012920499e-06, "loss": 1.1281, "step": 671000 }, { "epoch": 1.98, "learning_rate": 6.790324182952536e-06, "loss": 1.1405, "step": 671500 }, { "epoch": 1.98, "learning_rate": 6.78048823670008e-06, "loss": 1.1207, "step": 672000 }, { "epoch": 1.98, "learning_rate": 6.7706522904476245e-06, "loss": 1.1281, "step": 672500 }, { "epoch": 1.99, "learning_rate": 6.760816344195169e-06, "loss": 1.132, "step": 673000 }, { "epoch": 1.99, "learning_rate": 6.750980397942713e-06, "loss": 1.1431, "step": 673500 }, { "epoch": 1.99, "learning_rate": 6.741144451690259e-06, "loss": 1.1389, "step": 674000 }, { "epoch": 1.99, "learning_rate": 6.7313085054378035e-06, "loss": 1.1402, "step": 674500 }, { "epoch": 1.99, "learning_rate": 6.721472559185348e-06, "loss": 1.1332, "step": 675000 }, { "epoch": 1.99, "learning_rate": 6.711636612932892e-06, "loss": 1.1514, "step": 675500 }, { "epoch": 1.99, "learning_rate": 6.701800666680438e-06, "loss": 1.1412, "step": 676000 }, { "epoch": 2.0, "learning_rate": 6.6919647204279824e-06, "loss": 1.1441, "step": 676500 }, { "epoch": 2.0, "learning_rate": 6.682128774175527e-06, "loss": 1.1358, "step": 677000 }, { "epoch": 2.0, "learning_rate": 6.672292827923071e-06, "loss": 1.137, "step": 677500 }, { "epoch": 2.0, "eval_bleu": 43.0048, "eval_gen_len": 17.7835, "eval_loss": 1.1314483880996704, "eval_runtime": 900.2647, "eval_samples_per_second": 60.839, "eval_steps_per_second": 3.803, "step": 677786 }, { "epoch": 2.0, "learning_rate": 6.662456881670616e-06, "loss": 1.1188, "step": 678000 }, { "epoch": 2.0, "learning_rate": 6.652620935418161e-06, "loss": 1.0837, "step": 678500 }, { "epoch": 2.0, "learning_rate": 6.642784989165706e-06, "loss": 1.0845, "step": 679000 }, { "epoch": 2.01, "learning_rate": 6.63294904291325e-06, "loss": 1.0876, "step": 679500 }, { "epoch": 2.01, "learning_rate": 6.623113096660795e-06, "loss": 1.0745, "step": 680000 }, { "epoch": 2.01, "learning_rate": 6.61327715040834e-06, "loss": 1.0481, "step": 680500 }, { "epoch": 2.01, "learning_rate": 6.603441204155885e-06, "loss": 1.0778, "step": 681000 }, { "epoch": 2.01, "learning_rate": 6.593605257903429e-06, "loss": 1.087, "step": 681500 }, { "epoch": 2.01, "learning_rate": 6.583769311650974e-06, "loss": 1.086, "step": 682000 }, { "epoch": 2.01, "learning_rate": 6.5739333653985185e-06, "loss": 1.073, "step": 682500 }, { "epoch": 2.02, "learning_rate": 6.564097419146064e-06, "loss": 1.0943, "step": 683000 }, { "epoch": 2.02, "learning_rate": 6.554261472893608e-06, "loss": 1.0944, "step": 683500 }, { "epoch": 2.02, "learning_rate": 6.544425526641152e-06, "loss": 1.0893, "step": 684000 }, { "epoch": 2.02, "learning_rate": 6.5345895803886975e-06, "loss": 1.0685, "step": 684500 }, { "epoch": 2.02, "learning_rate": 6.524753634136242e-06, "loss": 1.0741, "step": 685000 }, { "epoch": 2.02, "learning_rate": 6.514917687883787e-06, "loss": 1.0855, "step": 685500 }, { "epoch": 2.02, "learning_rate": 6.505081741631331e-06, "loss": 1.0694, "step": 686000 }, { "epoch": 2.03, "learning_rate": 6.4952457953788764e-06, "loss": 1.0725, "step": 686500 }, { "epoch": 2.03, "learning_rate": 6.485409849126421e-06, "loss": 1.0824, "step": 687000 }, { "epoch": 2.03, "learning_rate": 6.475573902873966e-06, "loss": 1.1022, "step": 687500 }, { "epoch": 2.03, "learning_rate": 6.46573795662151e-06, "loss": 1.0634, "step": 688000 }, { "epoch": 2.03, "learning_rate": 6.455902010369055e-06, "loss": 1.0761, "step": 688500 }, { "epoch": 2.03, "learning_rate": 6.4460660641166e-06, "loss": 1.0733, "step": 689000 }, { "epoch": 2.03, "learning_rate": 6.436230117864144e-06, "loss": 1.0699, "step": 689500 }, { "epoch": 2.04, "learning_rate": 6.426394171611689e-06, "loss": 1.0912, "step": 690000 }, { "epoch": 2.04, "learning_rate": 6.416558225359234e-06, "loss": 1.0712, "step": 690500 }, { "epoch": 2.04, "learning_rate": 6.406722279106779e-06, "loss": 1.0987, "step": 691000 }, { "epoch": 2.04, "learning_rate": 6.396886332854323e-06, "loss": 1.0833, "step": 691500 }, { "epoch": 2.04, "learning_rate": 6.387050386601868e-06, "loss": 1.0749, "step": 692000 }, { "epoch": 2.04, "learning_rate": 6.377214440349413e-06, "loss": 1.0842, "step": 692500 }, { "epoch": 2.04, "learning_rate": 6.367378494096958e-06, "loss": 1.0813, "step": 693000 }, { "epoch": 2.05, "learning_rate": 6.357542547844502e-06, "loss": 1.079, "step": 693500 }, { "epoch": 2.05, "learning_rate": 6.347706601592046e-06, "loss": 1.0709, "step": 694000 }, { "epoch": 2.05, "learning_rate": 6.3378706553395915e-06, "loss": 1.082, "step": 694500 }, { "epoch": 2.05, "learning_rate": 6.328034709087137e-06, "loss": 1.0747, "step": 695000 }, { "epoch": 2.05, "learning_rate": 6.318198762834681e-06, "loss": 1.0882, "step": 695500 }, { "epoch": 2.05, "learning_rate": 6.308362816582225e-06, "loss": 1.0809, "step": 696000 }, { "epoch": 2.06, "learning_rate": 6.2985268703297704e-06, "loss": 1.0628, "step": 696500 }, { "epoch": 2.06, "learning_rate": 6.288690924077316e-06, "loss": 1.0858, "step": 697000 }, { "epoch": 2.06, "learning_rate": 6.27885497782486e-06, "loss": 1.0898, "step": 697500 }, { "epoch": 2.06, "learning_rate": 6.269019031572404e-06, "loss": 1.0976, "step": 698000 }, { "epoch": 2.06, "learning_rate": 6.2591830853199486e-06, "loss": 1.0838, "step": 698500 }, { "epoch": 2.06, "learning_rate": 6.249347139067495e-06, "loss": 1.0731, "step": 699000 }, { "epoch": 2.06, "learning_rate": 6.239511192815039e-06, "loss": 1.0662, "step": 699500 }, { "epoch": 2.07, "learning_rate": 6.229675246562583e-06, "loss": 1.065, "step": 700000 }, { "epoch": 2.07, "learning_rate": 6.2198393003101275e-06, "loss": 1.081, "step": 700500 }, { "epoch": 2.07, "learning_rate": 6.210003354057672e-06, "loss": 1.0853, "step": 701000 }, { "epoch": 2.07, "learning_rate": 6.200167407805218e-06, "loss": 1.0695, "step": 701500 }, { "epoch": 2.07, "learning_rate": 6.190331461552762e-06, "loss": 1.0674, "step": 702000 }, { "epoch": 2.07, "learning_rate": 6.1804955153003065e-06, "loss": 1.0841, "step": 702500 }, { "epoch": 2.07, "learning_rate": 6.170659569047851e-06, "loss": 1.0919, "step": 703000 }, { "epoch": 2.08, "learning_rate": 6.160823622795397e-06, "loss": 1.0748, "step": 703500 }, { "epoch": 2.08, "learning_rate": 6.150987676542941e-06, "loss": 1.0766, "step": 704000 }, { "epoch": 2.08, "learning_rate": 6.1411517302904855e-06, "loss": 1.0782, "step": 704500 }, { "epoch": 2.08, "learning_rate": 6.13131578403803e-06, "loss": 1.0852, "step": 705000 }, { "epoch": 2.08, "learning_rate": 6.121479837785574e-06, "loss": 1.0839, "step": 705500 }, { "epoch": 2.08, "learning_rate": 6.11164389153312e-06, "loss": 1.0856, "step": 706000 }, { "epoch": 2.08, "learning_rate": 6.1018079452806645e-06, "loss": 1.0992, "step": 706500 }, { "epoch": 2.09, "learning_rate": 6.091971999028209e-06, "loss": 1.0778, "step": 707000 }, { "epoch": 2.09, "learning_rate": 6.082136052775753e-06, "loss": 1.0863, "step": 707500 }, { "epoch": 2.09, "learning_rate": 6.072300106523299e-06, "loss": 1.077, "step": 708000 }, { "epoch": 2.09, "learning_rate": 6.0624641602708434e-06, "loss": 1.0825, "step": 708500 }, { "epoch": 2.09, "learning_rate": 6.052628214018388e-06, "loss": 1.0722, "step": 709000 }, { "epoch": 2.09, "learning_rate": 6.042792267765932e-06, "loss": 1.0726, "step": 709500 }, { "epoch": 2.1, "learning_rate": 6.032956321513476e-06, "loss": 1.0879, "step": 710000 }, { "epoch": 2.1, "learning_rate": 6.023120375261022e-06, "loss": 1.077, "step": 710500 }, { "epoch": 2.1, "learning_rate": 6.013284429008567e-06, "loss": 1.0815, "step": 711000 }, { "epoch": 2.1, "learning_rate": 6.003448482756111e-06, "loss": 1.0799, "step": 711500 }, { "epoch": 2.1, "learning_rate": 5.993612536503655e-06, "loss": 1.0744, "step": 712000 }, { "epoch": 2.1, "learning_rate": 5.9837765902512005e-06, "loss": 1.0737, "step": 712500 }, { "epoch": 2.1, "learning_rate": 5.973940643998746e-06, "loss": 1.0827, "step": 713000 }, { "epoch": 2.11, "learning_rate": 5.96410469774629e-06, "loss": 1.0817, "step": 713500 }, { "epoch": 2.11, "learning_rate": 5.954268751493834e-06, "loss": 1.0673, "step": 714000 }, { "epoch": 2.11, "learning_rate": 5.9444328052413795e-06, "loss": 1.0859, "step": 714500 }, { "epoch": 2.11, "learning_rate": 5.934596858988925e-06, "loss": 1.0859, "step": 715000 }, { "epoch": 2.11, "learning_rate": 5.924760912736469e-06, "loss": 1.0985, "step": 715500 }, { "epoch": 2.11, "learning_rate": 5.914924966484013e-06, "loss": 1.0841, "step": 716000 }, { "epoch": 2.11, "learning_rate": 5.9050890202315585e-06, "loss": 1.0937, "step": 716500 }, { "epoch": 2.12, "learning_rate": 5.895253073979103e-06, "loss": 1.0671, "step": 717000 }, { "epoch": 2.12, "learning_rate": 5.885417127726648e-06, "loss": 1.0915, "step": 717500 }, { "epoch": 2.12, "learning_rate": 5.875581181474192e-06, "loss": 1.0724, "step": 718000 }, { "epoch": 2.12, "learning_rate": 5.865745235221737e-06, "loss": 1.0908, "step": 718500 }, { "epoch": 2.12, "learning_rate": 5.855909288969282e-06, "loss": 1.0959, "step": 719000 }, { "epoch": 2.12, "learning_rate": 5.846073342716827e-06, "loss": 1.0669, "step": 719500 }, { "epoch": 2.12, "learning_rate": 5.836237396464371e-06, "loss": 1.0877, "step": 720000 }, { "epoch": 2.13, "learning_rate": 5.8264014502119156e-06, "loss": 1.0948, "step": 720500 }, { "epoch": 2.13, "learning_rate": 5.816565503959461e-06, "loss": 1.1006, "step": 721000 }, { "epoch": 2.13, "learning_rate": 5.806729557707005e-06, "loss": 1.0832, "step": 721500 }, { "epoch": 2.13, "learning_rate": 5.79689361145455e-06, "loss": 1.049, "step": 722000 }, { "epoch": 2.13, "learning_rate": 5.7870576652020945e-06, "loss": 1.0786, "step": 722500 }, { "epoch": 2.13, "learning_rate": 5.77722171894964e-06, "loss": 1.0841, "step": 723000 }, { "epoch": 2.13, "learning_rate": 5.767385772697184e-06, "loss": 1.071, "step": 723500 }, { "epoch": 2.14, "learning_rate": 5.757549826444729e-06, "loss": 1.0627, "step": 724000 }, { "epoch": 2.14, "learning_rate": 5.7477138801922735e-06, "loss": 1.0756, "step": 724500 }, { "epoch": 2.14, "learning_rate": 5.737877933939819e-06, "loss": 1.0691, "step": 725000 }, { "epoch": 2.14, "learning_rate": 5.728041987687363e-06, "loss": 1.0891, "step": 725500 }, { "epoch": 2.14, "learning_rate": 5.718206041434907e-06, "loss": 1.0775, "step": 726000 }, { "epoch": 2.14, "learning_rate": 5.7083700951824525e-06, "loss": 1.078, "step": 726500 }, { "epoch": 2.15, "learning_rate": 5.698534148929998e-06, "loss": 1.0813, "step": 727000 }, { "epoch": 2.15, "learning_rate": 5.688698202677542e-06, "loss": 1.0661, "step": 727500 }, { "epoch": 2.15, "learning_rate": 5.678862256425086e-06, "loss": 1.0756, "step": 728000 }, { "epoch": 2.15, "learning_rate": 5.669026310172631e-06, "loss": 1.0677, "step": 728500 }, { "epoch": 2.15, "learning_rate": 5.659190363920176e-06, "loss": 1.0918, "step": 729000 }, { "epoch": 2.15, "learning_rate": 5.649354417667721e-06, "loss": 1.0665, "step": 729500 }, { "epoch": 2.15, "learning_rate": 5.639518471415265e-06, "loss": 1.1031, "step": 730000 }, { "epoch": 2.16, "learning_rate": 5.62968252516281e-06, "loss": 1.0595, "step": 730500 }, { "epoch": 2.16, "learning_rate": 5.619846578910355e-06, "loss": 1.0715, "step": 731000 }, { "epoch": 2.16, "learning_rate": 5.6100106326579e-06, "loss": 1.0788, "step": 731500 }, { "epoch": 2.16, "learning_rate": 5.600174686405444e-06, "loss": 1.0937, "step": 732000 }, { "epoch": 2.16, "learning_rate": 5.5903387401529885e-06, "loss": 1.0716, "step": 732500 }, { "epoch": 2.16, "learning_rate": 5.580502793900533e-06, "loss": 1.0809, "step": 733000 }, { "epoch": 2.16, "learning_rate": 5.570666847648079e-06, "loss": 1.0743, "step": 733500 }, { "epoch": 2.17, "learning_rate": 5.560830901395623e-06, "loss": 1.0511, "step": 734000 }, { "epoch": 2.17, "learning_rate": 5.5509949551431675e-06, "loss": 1.0831, "step": 734500 }, { "epoch": 2.17, "learning_rate": 5.541159008890712e-06, "loss": 1.0816, "step": 735000 }, { "epoch": 2.17, "learning_rate": 5.531323062638258e-06, "loss": 1.0779, "step": 735500 }, { "epoch": 2.17, "learning_rate": 5.521487116385802e-06, "loss": 1.0693, "step": 736000 }, { "epoch": 2.17, "learning_rate": 5.5116511701333465e-06, "loss": 1.0775, "step": 736500 }, { "epoch": 2.17, "learning_rate": 5.501815223880891e-06, "loss": 1.0987, "step": 737000 }, { "epoch": 2.18, "learning_rate": 5.491979277628435e-06, "loss": 1.059, "step": 737500 }, { "epoch": 2.18, "learning_rate": 5.482143331375981e-06, "loss": 1.0693, "step": 738000 }, { "epoch": 2.18, "learning_rate": 5.4723073851235255e-06, "loss": 1.0966, "step": 738500 }, { "epoch": 2.18, "learning_rate": 5.46247143887107e-06, "loss": 1.0975, "step": 739000 }, { "epoch": 2.18, "learning_rate": 5.452635492618614e-06, "loss": 1.0666, "step": 739500 }, { "epoch": 2.18, "learning_rate": 5.44279954636616e-06, "loss": 1.0958, "step": 740000 }, { "epoch": 2.19, "learning_rate": 5.4329636001137044e-06, "loss": 1.0848, "step": 740500 }, { "epoch": 2.19, "learning_rate": 5.423127653861249e-06, "loss": 1.0795, "step": 741000 }, { "epoch": 2.19, "learning_rate": 5.413291707608793e-06, "loss": 1.074, "step": 741500 }, { "epoch": 2.19, "learning_rate": 5.403455761356337e-06, "loss": 1.0742, "step": 742000 }, { "epoch": 2.19, "learning_rate": 5.393619815103883e-06, "loss": 1.0794, "step": 742500 }, { "epoch": 2.19, "learning_rate": 5.383783868851428e-06, "loss": 1.0864, "step": 743000 }, { "epoch": 2.19, "learning_rate": 5.373947922598972e-06, "loss": 1.0664, "step": 743500 }, { "epoch": 2.2, "learning_rate": 5.364111976346516e-06, "loss": 1.088, "step": 744000 }, { "epoch": 2.2, "learning_rate": 5.354276030094061e-06, "loss": 1.0966, "step": 744500 }, { "epoch": 2.2, "learning_rate": 5.344440083841607e-06, "loss": 1.074, "step": 745000 }, { "epoch": 2.2, "learning_rate": 5.334604137589151e-06, "loss": 1.0814, "step": 745500 }, { "epoch": 2.2, "learning_rate": 5.324768191336695e-06, "loss": 1.0803, "step": 746000 }, { "epoch": 2.2, "learning_rate": 5.31493224508424e-06, "loss": 1.0768, "step": 746500 }, { "epoch": 2.2, "learning_rate": 5.305096298831786e-06, "loss": 1.0803, "step": 747000 }, { "epoch": 2.21, "learning_rate": 5.29526035257933e-06, "loss": 1.0851, "step": 747500 }, { "epoch": 2.21, "learning_rate": 5.285424406326874e-06, "loss": 1.0732, "step": 748000 }, { "epoch": 2.21, "learning_rate": 5.275588460074419e-06, "loss": 1.0753, "step": 748500 }, { "epoch": 2.21, "learning_rate": 5.265752513821964e-06, "loss": 1.0794, "step": 749000 }, { "epoch": 2.21, "learning_rate": 5.255916567569509e-06, "loss": 1.0921, "step": 749500 }, { "epoch": 2.21, "learning_rate": 5.246080621317053e-06, "loss": 1.0715, "step": 750000 }, { "epoch": 2.21, "learning_rate": 5.236244675064598e-06, "loss": 1.082, "step": 750500 }, { "epoch": 2.22, "learning_rate": 5.226408728812142e-06, "loss": 1.0868, "step": 751000 }, { "epoch": 2.22, "learning_rate": 5.216572782559688e-06, "loss": 1.067, "step": 751500 }, { "epoch": 2.22, "learning_rate": 5.206736836307232e-06, "loss": 1.0749, "step": 752000 }, { "epoch": 2.22, "learning_rate": 5.1969008900547766e-06, "loss": 1.0922, "step": 752500 }, { "epoch": 2.22, "learning_rate": 5.187064943802321e-06, "loss": 1.0807, "step": 753000 }, { "epoch": 2.22, "learning_rate": 5.177228997549866e-06, "loss": 1.0727, "step": 753500 }, { "epoch": 2.22, "learning_rate": 5.167393051297411e-06, "loss": 1.079, "step": 754000 }, { "epoch": 2.23, "learning_rate": 5.1575571050449555e-06, "loss": 1.0739, "step": 754500 }, { "epoch": 2.23, "learning_rate": 5.1477211587925e-06, "loss": 1.054, "step": 755000 }, { "epoch": 2.23, "learning_rate": 5.137885212540045e-06, "loss": 1.0764, "step": 755500 }, { "epoch": 2.23, "learning_rate": 5.12804926628759e-06, "loss": 1.0859, "step": 756000 }, { "epoch": 2.23, "learning_rate": 5.1182133200351345e-06, "loss": 1.082, "step": 756500 }, { "epoch": 2.23, "learning_rate": 5.108377373782679e-06, "loss": 1.0692, "step": 757000 }, { "epoch": 2.24, "learning_rate": 5.098541427530224e-06, "loss": 1.0668, "step": 757500 }, { "epoch": 2.24, "learning_rate": 5.088705481277768e-06, "loss": 1.096, "step": 758000 }, { "epoch": 2.24, "learning_rate": 5.0788695350253135e-06, "loss": 1.0699, "step": 758500 }, { "epoch": 2.24, "learning_rate": 5.069033588772858e-06, "loss": 1.077, "step": 759000 }, { "epoch": 2.24, "learning_rate": 5.059197642520403e-06, "loss": 1.0915, "step": 759500 }, { "epoch": 2.24, "learning_rate": 5.049361696267947e-06, "loss": 1.0577, "step": 760000 }, { "epoch": 2.24, "learning_rate": 5.039525750015492e-06, "loss": 1.0767, "step": 760500 }, { "epoch": 2.25, "learning_rate": 5.029689803763037e-06, "loss": 1.0963, "step": 761000 }, { "epoch": 2.25, "learning_rate": 5.019853857510582e-06, "loss": 1.1015, "step": 761500 }, { "epoch": 2.25, "learning_rate": 5.010017911258126e-06, "loss": 1.0964, "step": 762000 }, { "epoch": 2.25, "learning_rate": 5.000181965005671e-06, "loss": 1.0602, "step": 762500 }, { "epoch": 2.25, "learning_rate": 4.990346018753216e-06, "loss": 1.068, "step": 763000 }, { "epoch": 2.25, "learning_rate": 4.98051007250076e-06, "loss": 1.0728, "step": 763500 }, { "epoch": 2.25, "learning_rate": 4.970674126248305e-06, "loss": 1.0728, "step": 764000 }, { "epoch": 2.26, "learning_rate": 4.9608381799958496e-06, "loss": 1.0647, "step": 764500 }, { "epoch": 2.26, "learning_rate": 4.951002233743395e-06, "loss": 1.0872, "step": 765000 }, { "epoch": 2.26, "learning_rate": 4.941166287490939e-06, "loss": 1.0919, "step": 765500 }, { "epoch": 2.26, "learning_rate": 4.931330341238484e-06, "loss": 1.0777, "step": 766000 }, { "epoch": 2.26, "learning_rate": 4.9214943949860285e-06, "loss": 1.0703, "step": 766500 }, { "epoch": 2.26, "learning_rate": 4.911658448733574e-06, "loss": 1.0891, "step": 767000 }, { "epoch": 2.26, "learning_rate": 4.901822502481118e-06, "loss": 1.0993, "step": 767500 }, { "epoch": 2.27, "learning_rate": 4.891986556228663e-06, "loss": 1.0728, "step": 768000 }, { "epoch": 2.27, "learning_rate": 4.8821506099762075e-06, "loss": 1.073, "step": 768500 }, { "epoch": 2.27, "learning_rate": 4.872314663723752e-06, "loss": 1.0661, "step": 769000 }, { "epoch": 2.27, "learning_rate": 4.862478717471297e-06, "loss": 1.0909, "step": 769500 }, { "epoch": 2.27, "learning_rate": 4.852642771218841e-06, "loss": 1.0926, "step": 770000 }, { "epoch": 2.27, "learning_rate": 4.8428068249663865e-06, "loss": 1.0679, "step": 770500 }, { "epoch": 2.28, "learning_rate": 4.832970878713931e-06, "loss": 1.0656, "step": 771000 }, { "epoch": 2.28, "learning_rate": 4.823134932461476e-06, "loss": 1.0812, "step": 771500 }, { "epoch": 2.28, "learning_rate": 4.81329898620902e-06, "loss": 1.0833, "step": 772000 }, { "epoch": 2.28, "learning_rate": 4.803463039956565e-06, "loss": 1.0983, "step": 772500 }, { "epoch": 2.28, "learning_rate": 4.79362709370411e-06, "loss": 1.0749, "step": 773000 }, { "epoch": 2.28, "learning_rate": 4.783791147451654e-06, "loss": 1.0714, "step": 773500 }, { "epoch": 2.28, "learning_rate": 4.773955201199199e-06, "loss": 1.0885, "step": 774000 }, { "epoch": 2.29, "learning_rate": 4.7641192549467436e-06, "loss": 1.0811, "step": 774500 }, { "epoch": 2.29, "learning_rate": 4.754283308694289e-06, "loss": 1.0711, "step": 775000 }, { "epoch": 2.29, "learning_rate": 4.744447362441833e-06, "loss": 1.0839, "step": 775500 }, { "epoch": 2.29, "learning_rate": 4.734611416189378e-06, "loss": 1.0802, "step": 776000 }, { "epoch": 2.29, "learning_rate": 4.7247754699369225e-06, "loss": 1.0739, "step": 776500 }, { "epoch": 2.29, "learning_rate": 4.714939523684467e-06, "loss": 1.08, "step": 777000 }, { "epoch": 2.29, "learning_rate": 4.705103577432012e-06, "loss": 1.0773, "step": 777500 }, { "epoch": 2.3, "learning_rate": 4.695267631179556e-06, "loss": 1.0783, "step": 778000 }, { "epoch": 2.3, "learning_rate": 4.6854316849271015e-06, "loss": 1.0709, "step": 778500 }, { "epoch": 2.3, "learning_rate": 4.675595738674646e-06, "loss": 1.0673, "step": 779000 }, { "epoch": 2.3, "learning_rate": 4.665759792422191e-06, "loss": 1.0734, "step": 779500 }, { "epoch": 2.3, "learning_rate": 4.655923846169735e-06, "loss": 1.0852, "step": 780000 }, { "epoch": 2.3, "learning_rate": 4.64608789991728e-06, "loss": 1.0577, "step": 780500 }, { "epoch": 2.3, "learning_rate": 4.636251953664825e-06, "loss": 1.0654, "step": 781000 }, { "epoch": 2.31, "learning_rate": 4.626416007412369e-06, "loss": 1.072, "step": 781500 }, { "epoch": 2.31, "learning_rate": 4.616580061159914e-06, "loss": 1.0808, "step": 782000 }, { "epoch": 2.31, "learning_rate": 4.606744114907459e-06, "loss": 1.0972, "step": 782500 }, { "epoch": 2.31, "learning_rate": 4.596908168655004e-06, "loss": 1.0829, "step": 783000 }, { "epoch": 2.31, "learning_rate": 4.587072222402548e-06, "loss": 1.0667, "step": 783500 }, { "epoch": 2.31, "learning_rate": 4.577236276150093e-06, "loss": 1.0635, "step": 784000 }, { "epoch": 2.31, "learning_rate": 4.5674003298976376e-06, "loss": 1.0754, "step": 784500 }, { "epoch": 2.32, "learning_rate": 4.557564383645182e-06, "loss": 1.0763, "step": 785000 }, { "epoch": 2.32, "learning_rate": 4.547728437392727e-06, "loss": 1.0726, "step": 785500 }, { "epoch": 2.32, "learning_rate": 4.537892491140271e-06, "loss": 1.0707, "step": 786000 }, { "epoch": 2.32, "learning_rate": 4.5280565448878165e-06, "loss": 1.0779, "step": 786500 }, { "epoch": 2.32, "learning_rate": 4.518220598635361e-06, "loss": 1.0897, "step": 787000 }, { "epoch": 2.32, "learning_rate": 4.508384652382906e-06, "loss": 1.0834, "step": 787500 }, { "epoch": 2.33, "learning_rate": 4.49854870613045e-06, "loss": 1.0702, "step": 788000 }, { "epoch": 2.33, "learning_rate": 4.488712759877995e-06, "loss": 1.0755, "step": 788500 }, { "epoch": 2.33, "learning_rate": 4.47887681362554e-06, "loss": 1.0766, "step": 789000 }, { "epoch": 2.33, "learning_rate": 4.469040867373084e-06, "loss": 1.0973, "step": 789500 }, { "epoch": 2.33, "learning_rate": 4.459204921120629e-06, "loss": 1.0735, "step": 790000 }, { "epoch": 2.33, "learning_rate": 4.449368974868174e-06, "loss": 1.0642, "step": 790500 }, { "epoch": 2.33, "learning_rate": 4.439533028615719e-06, "loss": 1.0588, "step": 791000 }, { "epoch": 2.34, "learning_rate": 4.429697082363263e-06, "loss": 1.0954, "step": 791500 }, { "epoch": 2.34, "learning_rate": 4.419861136110808e-06, "loss": 1.0889, "step": 792000 }, { "epoch": 2.34, "learning_rate": 4.410025189858353e-06, "loss": 1.0686, "step": 792500 }, { "epoch": 2.34, "learning_rate": 4.400189243605898e-06, "loss": 1.0712, "step": 793000 }, { "epoch": 2.34, "learning_rate": 4.390353297353442e-06, "loss": 1.0962, "step": 793500 }, { "epoch": 2.34, "learning_rate": 4.380517351100987e-06, "loss": 1.077, "step": 794000 }, { "epoch": 2.34, "learning_rate": 4.370681404848532e-06, "loss": 1.0767, "step": 794500 }, { "epoch": 2.35, "learning_rate": 4.360845458596077e-06, "loss": 1.0781, "step": 795000 }, { "epoch": 2.35, "learning_rate": 4.351009512343621e-06, "loss": 1.0919, "step": 795500 }, { "epoch": 2.35, "learning_rate": 4.341173566091165e-06, "loss": 1.0593, "step": 796000 }, { "epoch": 2.35, "learning_rate": 4.3313376198387106e-06, "loss": 1.0887, "step": 796500 }, { "epoch": 2.35, "learning_rate": 4.321501673586255e-06, "loss": 1.0863, "step": 797000 }, { "epoch": 2.35, "learning_rate": 4.3116657273338e-06, "loss": 1.0607, "step": 797500 }, { "epoch": 2.35, "learning_rate": 4.301829781081344e-06, "loss": 1.0717, "step": 798000 }, { "epoch": 2.36, "learning_rate": 4.2919938348288895e-06, "loss": 1.0709, "step": 798500 }, { "epoch": 2.36, "learning_rate": 4.282157888576434e-06, "loss": 1.0891, "step": 799000 }, { "epoch": 2.36, "learning_rate": 4.272321942323979e-06, "loss": 1.0715, "step": 799500 }, { "epoch": 2.36, "learning_rate": 4.262485996071523e-06, "loss": 1.0664, "step": 800000 }, { "epoch": 2.36, "learning_rate": 4.2526500498190685e-06, "loss": 1.0866, "step": 800500 }, { "epoch": 2.36, "learning_rate": 4.242814103566613e-06, "loss": 1.0731, "step": 801000 }, { "epoch": 2.37, "learning_rate": 4.232978157314158e-06, "loss": 1.0954, "step": 801500 }, { "epoch": 2.37, "learning_rate": 4.223142211061702e-06, "loss": 1.0766, "step": 802000 }, { "epoch": 2.37, "learning_rate": 4.2133062648092475e-06, "loss": 1.0606, "step": 802500 }, { "epoch": 2.37, "learning_rate": 4.203470318556792e-06, "loss": 1.0587, "step": 803000 }, { "epoch": 2.37, "learning_rate": 4.193634372304337e-06, "loss": 1.0859, "step": 803500 }, { "epoch": 2.37, "learning_rate": 4.183798426051881e-06, "loss": 1.0801, "step": 804000 }, { "epoch": 2.37, "learning_rate": 4.173962479799426e-06, "loss": 1.071, "step": 804500 }, { "epoch": 2.38, "learning_rate": 4.164126533546971e-06, "loss": 1.0778, "step": 805000 }, { "epoch": 2.38, "learning_rate": 4.154290587294515e-06, "loss": 1.0808, "step": 805500 }, { "epoch": 2.38, "learning_rate": 4.14445464104206e-06, "loss": 1.0886, "step": 806000 }, { "epoch": 2.38, "learning_rate": 4.1346186947896046e-06, "loss": 1.0897, "step": 806500 }, { "epoch": 2.38, "learning_rate": 4.12478274853715e-06, "loss": 1.0787, "step": 807000 }, { "epoch": 2.38, "learning_rate": 4.114946802284694e-06, "loss": 1.0629, "step": 807500 }, { "epoch": 2.38, "learning_rate": 4.105110856032238e-06, "loss": 1.0855, "step": 808000 }, { "epoch": 2.39, "learning_rate": 4.0952749097797835e-06, "loss": 1.0579, "step": 808500 }, { "epoch": 2.39, "learning_rate": 4.085438963527328e-06, "loss": 1.0815, "step": 809000 }, { "epoch": 2.39, "learning_rate": 4.075603017274873e-06, "loss": 1.0661, "step": 809500 }, { "epoch": 2.39, "learning_rate": 4.065767071022417e-06, "loss": 1.0705, "step": 810000 }, { "epoch": 2.39, "learning_rate": 4.0559311247699625e-06, "loss": 1.064, "step": 810500 }, { "epoch": 2.39, "learning_rate": 4.046095178517507e-06, "loss": 1.0849, "step": 811000 }, { "epoch": 2.39, "learning_rate": 4.036259232265052e-06, "loss": 1.0803, "step": 811500 }, { "epoch": 2.4, "learning_rate": 4.026423286012596e-06, "loss": 1.0728, "step": 812000 }, { "epoch": 2.4, "learning_rate": 4.016587339760141e-06, "loss": 1.0794, "step": 812500 }, { "epoch": 2.4, "learning_rate": 4.006751393507686e-06, "loss": 1.0762, "step": 813000 }, { "epoch": 2.4, "learning_rate": 3.99691544725523e-06, "loss": 1.0863, "step": 813500 }, { "epoch": 2.4, "learning_rate": 3.987079501002775e-06, "loss": 1.0748, "step": 814000 }, { "epoch": 2.4, "learning_rate": 3.97724355475032e-06, "loss": 1.0745, "step": 814500 }, { "epoch": 2.4, "learning_rate": 3.967407608497865e-06, "loss": 1.0825, "step": 815000 }, { "epoch": 2.41, "learning_rate": 3.957571662245409e-06, "loss": 1.0966, "step": 815500 }, { "epoch": 2.41, "learning_rate": 3.947735715992953e-06, "loss": 1.0747, "step": 816000 }, { "epoch": 2.41, "learning_rate": 3.9378997697404986e-06, "loss": 1.0802, "step": 816500 }, { "epoch": 2.41, "learning_rate": 3.928063823488043e-06, "loss": 1.0915, "step": 817000 }, { "epoch": 2.41, "learning_rate": 3.918227877235588e-06, "loss": 1.0612, "step": 817500 }, { "epoch": 2.41, "learning_rate": 3.908391930983132e-06, "loss": 1.0737, "step": 818000 }, { "epoch": 2.42, "learning_rate": 3.8985559847306775e-06, "loss": 1.0693, "step": 818500 }, { "epoch": 2.42, "learning_rate": 3.888720038478222e-06, "loss": 1.0652, "step": 819000 }, { "epoch": 2.42, "learning_rate": 3.878884092225767e-06, "loss": 1.0725, "step": 819500 }, { "epoch": 2.42, "learning_rate": 3.869048145973311e-06, "loss": 1.0866, "step": 820000 }, { "epoch": 2.42, "learning_rate": 3.859212199720856e-06, "loss": 1.0705, "step": 820500 }, { "epoch": 2.42, "learning_rate": 3.849376253468401e-06, "loss": 1.0679, "step": 821000 }, { "epoch": 2.42, "learning_rate": 3.839540307215945e-06, "loss": 1.0795, "step": 821500 }, { "epoch": 2.43, "learning_rate": 3.82970436096349e-06, "loss": 1.0743, "step": 822000 }, { "epoch": 2.43, "learning_rate": 3.819868414711035e-06, "loss": 1.0621, "step": 822500 }, { "epoch": 2.43, "learning_rate": 3.81003246845858e-06, "loss": 1.0756, "step": 823000 }, { "epoch": 2.43, "learning_rate": 3.800196522206124e-06, "loss": 1.0622, "step": 823500 }, { "epoch": 2.43, "learning_rate": 3.790360575953669e-06, "loss": 1.0735, "step": 824000 }, { "epoch": 2.43, "learning_rate": 3.7805246297012136e-06, "loss": 1.0726, "step": 824500 }, { "epoch": 2.43, "learning_rate": 3.7706886834487584e-06, "loss": 1.0595, "step": 825000 }, { "epoch": 2.44, "learning_rate": 3.760852737196303e-06, "loss": 1.0625, "step": 825500 }, { "epoch": 2.44, "learning_rate": 3.751016790943848e-06, "loss": 1.0805, "step": 826000 }, { "epoch": 2.44, "learning_rate": 3.7411808446913926e-06, "loss": 1.0791, "step": 826500 }, { "epoch": 2.44, "learning_rate": 3.7313448984389373e-06, "loss": 1.0822, "step": 827000 }, { "epoch": 2.44, "learning_rate": 3.721508952186482e-06, "loss": 1.0559, "step": 827500 }, { "epoch": 2.44, "learning_rate": 3.711673005934027e-06, "loss": 1.0872, "step": 828000 }, { "epoch": 2.44, "learning_rate": 3.701837059681571e-06, "loss": 1.0799, "step": 828500 }, { "epoch": 2.45, "learning_rate": 3.6920011134291163e-06, "loss": 1.0667, "step": 829000 }, { "epoch": 2.45, "learning_rate": 3.6821651671766606e-06, "loss": 1.0582, "step": 829500 }, { "epoch": 2.45, "learning_rate": 3.6723292209242058e-06, "loss": 1.0805, "step": 830000 }, { "epoch": 2.45, "learning_rate": 3.66249327467175e-06, "loss": 1.0655, "step": 830500 }, { "epoch": 2.45, "learning_rate": 3.6526573284192953e-06, "loss": 1.0686, "step": 831000 }, { "epoch": 2.45, "learning_rate": 3.6428213821668396e-06, "loss": 1.0774, "step": 831500 }, { "epoch": 2.46, "learning_rate": 3.632985435914384e-06, "loss": 1.0903, "step": 832000 }, { "epoch": 2.46, "learning_rate": 3.623149489661929e-06, "loss": 1.0776, "step": 832500 }, { "epoch": 2.46, "learning_rate": 3.6133135434094734e-06, "loss": 1.0812, "step": 833000 }, { "epoch": 2.46, "learning_rate": 3.6034775971570186e-06, "loss": 1.0945, "step": 833500 }, { "epoch": 2.46, "learning_rate": 3.593641650904563e-06, "loss": 1.0571, "step": 834000 }, { "epoch": 2.46, "learning_rate": 3.583805704652108e-06, "loss": 1.0911, "step": 834500 }, { "epoch": 2.46, "learning_rate": 3.5739697583996524e-06, "loss": 1.0805, "step": 835000 }, { "epoch": 2.47, "learning_rate": 3.5641338121471975e-06, "loss": 1.0912, "step": 835500 }, { "epoch": 2.47, "learning_rate": 3.554297865894742e-06, "loss": 1.0653, "step": 836000 }, { "epoch": 2.47, "learning_rate": 3.544461919642286e-06, "loss": 1.0752, "step": 836500 }, { "epoch": 2.47, "learning_rate": 3.5346259733898313e-06, "loss": 1.0737, "step": 837000 }, { "epoch": 2.47, "learning_rate": 3.5247900271373757e-06, "loss": 1.0761, "step": 837500 }, { "epoch": 2.47, "learning_rate": 3.514954080884921e-06, "loss": 1.0644, "step": 838000 }, { "epoch": 2.47, "learning_rate": 3.505118134632465e-06, "loss": 1.0677, "step": 838500 }, { "epoch": 2.48, "learning_rate": 3.4952821883800103e-06, "loss": 1.0731, "step": 839000 }, { "epoch": 2.48, "learning_rate": 3.4854462421275546e-06, "loss": 1.0735, "step": 839500 }, { "epoch": 2.48, "learning_rate": 3.4756102958750994e-06, "loss": 1.0642, "step": 840000 }, { "epoch": 2.48, "learning_rate": 3.465774349622644e-06, "loss": 1.0705, "step": 840500 }, { "epoch": 2.48, "learning_rate": 3.455938403370189e-06, "loss": 1.087, "step": 841000 }, { "epoch": 2.48, "learning_rate": 3.4461024571177336e-06, "loss": 1.0696, "step": 841500 }, { "epoch": 2.48, "learning_rate": 3.4362665108652783e-06, "loss": 1.0801, "step": 842000 }, { "epoch": 2.49, "learning_rate": 3.426430564612823e-06, "loss": 1.0787, "step": 842500 }, { "epoch": 2.49, "learning_rate": 3.416594618360368e-06, "loss": 1.0788, "step": 843000 }, { "epoch": 2.49, "learning_rate": 3.4067586721079126e-06, "loss": 1.0757, "step": 843500 }, { "epoch": 2.49, "learning_rate": 3.3969227258554573e-06, "loss": 1.0712, "step": 844000 }, { "epoch": 2.49, "learning_rate": 3.3870867796030016e-06, "loss": 1.0767, "step": 844500 }, { "epoch": 2.49, "learning_rate": 3.377250833350547e-06, "loss": 1.0434, "step": 845000 }, { "epoch": 2.49, "learning_rate": 3.367414887098091e-06, "loss": 1.0548, "step": 845500 }, { "epoch": 2.5, "learning_rate": 3.357578940845636e-06, "loss": 1.0799, "step": 846000 }, { "epoch": 2.5, "learning_rate": 3.3477429945931806e-06, "loss": 1.0857, "step": 846500 }, { "epoch": 2.5, "learning_rate": 3.3379070483407254e-06, "loss": 1.0798, "step": 847000 }, { "epoch": 2.5, "learning_rate": 3.32807110208827e-06, "loss": 1.091, "step": 847500 }, { "epoch": 2.5, "learning_rate": 3.3182351558358144e-06, "loss": 1.0806, "step": 848000 }, { "epoch": 2.5, "learning_rate": 3.3083992095833596e-06, "loss": 1.0697, "step": 848500 }, { "epoch": 2.51, "learning_rate": 3.298563263330904e-06, "loss": 1.057, "step": 849000 }, { "epoch": 2.51, "learning_rate": 3.288727317078449e-06, "loss": 1.0856, "step": 849500 }, { "epoch": 2.51, "learning_rate": 3.2788913708259934e-06, "loss": 1.0841, "step": 850000 }, { "epoch": 2.51, "learning_rate": 3.2690554245735386e-06, "loss": 1.0616, "step": 850500 }, { "epoch": 2.51, "learning_rate": 3.259219478321083e-06, "loss": 1.0512, "step": 851000 }, { "epoch": 2.51, "learning_rate": 3.249383532068628e-06, "loss": 1.0662, "step": 851500 }, { "epoch": 2.51, "learning_rate": 3.2395475858161724e-06, "loss": 1.0665, "step": 852000 }, { "epoch": 2.52, "learning_rate": 3.2297116395637167e-06, "loss": 1.0792, "step": 852500 }, { "epoch": 2.52, "learning_rate": 3.219875693311262e-06, "loss": 1.0704, "step": 853000 }, { "epoch": 2.52, "learning_rate": 3.210039747058806e-06, "loss": 1.0851, "step": 853500 }, { "epoch": 2.52, "learning_rate": 3.2002038008063513e-06, "loss": 1.0711, "step": 854000 }, { "epoch": 2.52, "learning_rate": 3.1903678545538956e-06, "loss": 1.0727, "step": 854500 }, { "epoch": 2.52, "learning_rate": 3.180531908301441e-06, "loss": 1.0764, "step": 855000 }, { "epoch": 2.52, "learning_rate": 3.170695962048985e-06, "loss": 1.0701, "step": 855500 }, { "epoch": 2.53, "learning_rate": 3.16086001579653e-06, "loss": 1.0666, "step": 856000 }, { "epoch": 2.53, "learning_rate": 3.1510240695440746e-06, "loss": 1.0771, "step": 856500 }, { "epoch": 2.53, "learning_rate": 3.1411881232916194e-06, "loss": 1.0758, "step": 857000 }, { "epoch": 2.53, "learning_rate": 3.131352177039164e-06, "loss": 1.0872, "step": 857500 }, { "epoch": 2.53, "learning_rate": 3.1215162307867084e-06, "loss": 1.0674, "step": 858000 }, { "epoch": 2.53, "learning_rate": 3.1116802845342536e-06, "loss": 1.0647, "step": 858500 }, { "epoch": 2.53, "learning_rate": 3.101844338281798e-06, "loss": 1.0804, "step": 859000 }, { "epoch": 2.54, "learning_rate": 3.092008392029343e-06, "loss": 1.0715, "step": 859500 }, { "epoch": 2.54, "learning_rate": 3.0821724457768874e-06, "loss": 1.0678, "step": 860000 }, { "epoch": 2.54, "learning_rate": 3.072336499524432e-06, "loss": 1.0604, "step": 860500 }, { "epoch": 2.54, "learning_rate": 3.062500553271977e-06, "loss": 1.0747, "step": 861000 }, { "epoch": 2.54, "learning_rate": 3.0526646070195216e-06, "loss": 1.0813, "step": 861500 }, { "epoch": 2.54, "learning_rate": 3.0428286607670664e-06, "loss": 1.0609, "step": 862000 }, { "epoch": 2.55, "learning_rate": 3.032992714514611e-06, "loss": 1.0751, "step": 862500 }, { "epoch": 2.55, "learning_rate": 3.023156768262156e-06, "loss": 1.0637, "step": 863000 }, { "epoch": 2.55, "learning_rate": 3.0133208220097006e-06, "loss": 1.0719, "step": 863500 }, { "epoch": 2.55, "learning_rate": 3.003484875757245e-06, "loss": 1.0565, "step": 864000 }, { "epoch": 2.55, "learning_rate": 2.99364892950479e-06, "loss": 1.0598, "step": 864500 }, { "epoch": 2.55, "learning_rate": 2.9838129832523344e-06, "loss": 1.0625, "step": 865000 }, { "epoch": 2.55, "learning_rate": 2.9739770369998796e-06, "loss": 1.075, "step": 865500 }, { "epoch": 2.56, "learning_rate": 2.964141090747424e-06, "loss": 1.0727, "step": 866000 }, { "epoch": 2.56, "learning_rate": 2.954305144494969e-06, "loss": 1.0654, "step": 866500 }, { "epoch": 2.56, "learning_rate": 2.9444691982425134e-06, "loss": 1.0816, "step": 867000 }, { "epoch": 2.56, "learning_rate": 2.9346332519900585e-06, "loss": 1.0786, "step": 867500 }, { "epoch": 2.56, "learning_rate": 2.924797305737603e-06, "loss": 1.0741, "step": 868000 }, { "epoch": 2.56, "learning_rate": 2.914961359485147e-06, "loss": 1.0982, "step": 868500 }, { "epoch": 2.56, "learning_rate": 2.9051254132326923e-06, "loss": 1.0765, "step": 869000 }, { "epoch": 2.57, "learning_rate": 2.8952894669802367e-06, "loss": 1.0767, "step": 869500 }, { "epoch": 2.57, "learning_rate": 2.885453520727782e-06, "loss": 1.0652, "step": 870000 }, { "epoch": 2.57, "learning_rate": 2.875617574475326e-06, "loss": 1.0709, "step": 870500 }, { "epoch": 2.57, "learning_rate": 2.8657816282228713e-06, "loss": 1.071, "step": 871000 }, { "epoch": 2.57, "learning_rate": 2.8559456819704156e-06, "loss": 1.0814, "step": 871500 }, { "epoch": 2.57, "learning_rate": 2.84610973571796e-06, "loss": 1.0698, "step": 872000 }, { "epoch": 2.57, "learning_rate": 2.836273789465505e-06, "loss": 1.073, "step": 872500 }, { "epoch": 2.58, "learning_rate": 2.8264378432130494e-06, "loss": 1.0703, "step": 873000 }, { "epoch": 2.58, "learning_rate": 2.8166018969605946e-06, "loss": 1.0709, "step": 873500 }, { "epoch": 2.58, "learning_rate": 2.806765950708139e-06, "loss": 1.091, "step": 874000 }, { "epoch": 2.58, "learning_rate": 2.796930004455684e-06, "loss": 1.0624, "step": 874500 }, { "epoch": 2.58, "learning_rate": 2.7870940582032284e-06, "loss": 1.0702, "step": 875000 }, { "epoch": 2.58, "learning_rate": 2.7772581119507736e-06, "loss": 1.0665, "step": 875500 }, { "epoch": 2.58, "learning_rate": 2.767422165698318e-06, "loss": 1.0705, "step": 876000 }, { "epoch": 2.59, "learning_rate": 2.7575862194458626e-06, "loss": 1.0772, "step": 876500 }, { "epoch": 2.59, "learning_rate": 2.7477502731934074e-06, "loss": 1.0677, "step": 877000 }, { "epoch": 2.59, "learning_rate": 2.737914326940952e-06, "loss": 1.0638, "step": 877500 }, { "epoch": 2.59, "learning_rate": 2.728078380688497e-06, "loss": 1.0874, "step": 878000 }, { "epoch": 2.59, "learning_rate": 2.7182424344360416e-06, "loss": 1.0555, "step": 878500 }, { "epoch": 2.59, "learning_rate": 2.7084064881835864e-06, "loss": 1.0532, "step": 879000 }, { "epoch": 2.6, "learning_rate": 2.6985705419311307e-06, "loss": 1.0829, "step": 879500 }, { "epoch": 2.6, "learning_rate": 2.6887345956786754e-06, "loss": 1.0519, "step": 880000 }, { "epoch": 2.6, "learning_rate": 2.67889864942622e-06, "loss": 1.0577, "step": 880500 }, { "epoch": 2.6, "learning_rate": 2.669062703173765e-06, "loss": 1.074, "step": 881000 }, { "epoch": 2.6, "learning_rate": 2.6592267569213096e-06, "loss": 1.0672, "step": 881500 }, { "epoch": 2.6, "learning_rate": 2.6493908106688544e-06, "loss": 1.0709, "step": 882000 }, { "epoch": 2.6, "learning_rate": 2.639554864416399e-06, "loss": 1.0612, "step": 882500 }, { "epoch": 2.61, "learning_rate": 2.629718918163944e-06, "loss": 1.08, "step": 883000 }, { "epoch": 2.61, "learning_rate": 2.6198829719114886e-06, "loss": 1.0668, "step": 883500 }, { "epoch": 2.61, "learning_rate": 2.6100470256590334e-06, "loss": 1.0753, "step": 884000 }, { "epoch": 2.61, "learning_rate": 2.6002110794065777e-06, "loss": 1.0927, "step": 884500 }, { "epoch": 2.61, "learning_rate": 2.590375133154123e-06, "loss": 1.0666, "step": 885000 }, { "epoch": 2.61, "learning_rate": 2.580539186901667e-06, "loss": 1.08, "step": 885500 }, { "epoch": 2.61, "learning_rate": 2.5707032406492123e-06, "loss": 1.0751, "step": 886000 }, { "epoch": 2.62, "learning_rate": 2.5608672943967567e-06, "loss": 1.0576, "step": 886500 }, { "epoch": 2.62, "learning_rate": 2.551031348144302e-06, "loss": 1.0685, "step": 887000 }, { "epoch": 2.62, "learning_rate": 2.541195401891846e-06, "loss": 1.0493, "step": 887500 }, { "epoch": 2.62, "learning_rate": 2.5313594556393905e-06, "loss": 1.0809, "step": 888000 }, { "epoch": 2.62, "learning_rate": 2.5215235093869356e-06, "loss": 1.07, "step": 888500 }, { "epoch": 2.62, "learning_rate": 2.51168756313448e-06, "loss": 1.0641, "step": 889000 }, { "epoch": 2.62, "learning_rate": 2.501851616882025e-06, "loss": 1.0555, "step": 889500 }, { "epoch": 2.63, "learning_rate": 2.4920156706295694e-06, "loss": 1.0687, "step": 890000 }, { "epoch": 2.63, "learning_rate": 2.482179724377114e-06, "loss": 1.0578, "step": 890500 }, { "epoch": 2.63, "learning_rate": 2.472343778124659e-06, "loss": 1.0818, "step": 891000 }, { "epoch": 2.63, "learning_rate": 2.4625078318722037e-06, "loss": 1.0705, "step": 891500 }, { "epoch": 2.63, "learning_rate": 2.4526718856197484e-06, "loss": 1.0748, "step": 892000 }, { "epoch": 2.63, "learning_rate": 2.442835939367293e-06, "loss": 1.0589, "step": 892500 }, { "epoch": 2.64, "learning_rate": 2.432999993114838e-06, "loss": 1.072, "step": 893000 }, { "epoch": 2.64, "learning_rate": 2.4231640468623826e-06, "loss": 1.0639, "step": 893500 }, { "epoch": 2.64, "learning_rate": 2.413328100609927e-06, "loss": 1.0664, "step": 894000 }, { "epoch": 2.64, "learning_rate": 2.4034921543574717e-06, "loss": 1.0638, "step": 894500 }, { "epoch": 2.64, "learning_rate": 2.3936562081050164e-06, "loss": 1.0638, "step": 895000 }, { "epoch": 2.64, "learning_rate": 2.383820261852561e-06, "loss": 1.1004, "step": 895500 }, { "epoch": 2.64, "learning_rate": 2.373984315600106e-06, "loss": 1.0557, "step": 896000 }, { "epoch": 2.65, "learning_rate": 2.3641483693476507e-06, "loss": 1.0726, "step": 896500 }, { "epoch": 2.65, "learning_rate": 2.3543124230951954e-06, "loss": 1.0661, "step": 897000 }, { "epoch": 2.65, "learning_rate": 2.34447647684274e-06, "loss": 1.0675, "step": 897500 }, { "epoch": 2.65, "learning_rate": 2.334640530590285e-06, "loss": 1.0573, "step": 898000 }, { "epoch": 2.65, "learning_rate": 2.3248045843378296e-06, "loss": 1.0557, "step": 898500 }, { "epoch": 2.65, "learning_rate": 2.3149686380853744e-06, "loss": 1.0739, "step": 899000 }, { "epoch": 2.65, "learning_rate": 2.305132691832919e-06, "loss": 1.069, "step": 899500 }, { "epoch": 2.66, "learning_rate": 2.295296745580464e-06, "loss": 1.0825, "step": 900000 }, { "epoch": 2.66, "learning_rate": 2.2854607993280086e-06, "loss": 1.0525, "step": 900500 }, { "epoch": 2.66, "learning_rate": 2.2756248530755533e-06, "loss": 1.0599, "step": 901000 }, { "epoch": 2.66, "learning_rate": 2.265788906823098e-06, "loss": 1.0645, "step": 901500 }, { "epoch": 2.66, "learning_rate": 2.2559529605706424e-06, "loss": 1.063, "step": 902000 }, { "epoch": 2.66, "learning_rate": 2.246117014318187e-06, "loss": 1.0842, "step": 902500 }, { "epoch": 2.66, "learning_rate": 2.236281068065732e-06, "loss": 1.0677, "step": 903000 }, { "epoch": 2.67, "learning_rate": 2.2264451218132766e-06, "loss": 1.0681, "step": 903500 }, { "epoch": 2.67, "learning_rate": 2.2166091755608214e-06, "loss": 1.0679, "step": 904000 }, { "epoch": 2.67, "learning_rate": 2.206773229308366e-06, "loss": 1.0612, "step": 904500 }, { "epoch": 2.67, "learning_rate": 2.196937283055911e-06, "loss": 1.0735, "step": 905000 }, { "epoch": 2.67, "learning_rate": 2.1871013368034556e-06, "loss": 1.058, "step": 905500 }, { "epoch": 2.67, "learning_rate": 2.177265390551e-06, "loss": 1.0889, "step": 906000 }, { "epoch": 2.67, "learning_rate": 2.1674294442985447e-06, "loss": 1.0602, "step": 906500 }, { "epoch": 2.68, "learning_rate": 2.1575934980460894e-06, "loss": 1.0671, "step": 907000 }, { "epoch": 2.68, "learning_rate": 2.147757551793634e-06, "loss": 1.045, "step": 907500 }, { "epoch": 2.68, "learning_rate": 2.137921605541179e-06, "loss": 1.0644, "step": 908000 }, { "epoch": 2.68, "learning_rate": 2.1280856592887236e-06, "loss": 1.0696, "step": 908500 }, { "epoch": 2.68, "learning_rate": 2.1182497130362684e-06, "loss": 1.0682, "step": 909000 }, { "epoch": 2.68, "learning_rate": 2.1084137667838127e-06, "loss": 1.077, "step": 909500 }, { "epoch": 2.69, "learning_rate": 2.0985778205313574e-06, "loss": 1.0846, "step": 910000 }, { "epoch": 2.69, "learning_rate": 2.088741874278902e-06, "loss": 1.0712, "step": 910500 }, { "epoch": 2.69, "learning_rate": 2.078905928026447e-06, "loss": 1.058, "step": 911000 }, { "epoch": 2.69, "learning_rate": 2.0690699817739917e-06, "loss": 1.0891, "step": 911500 }, { "epoch": 2.69, "learning_rate": 2.0592340355215364e-06, "loss": 1.0819, "step": 912000 }, { "epoch": 2.69, "learning_rate": 2.049398089269081e-06, "loss": 1.0739, "step": 912500 }, { "epoch": 2.69, "learning_rate": 2.039562143016626e-06, "loss": 1.0673, "step": 913000 }, { "epoch": 2.7, "learning_rate": 2.0297261967641702e-06, "loss": 1.0534, "step": 913500 }, { "epoch": 2.7, "learning_rate": 2.019890250511715e-06, "loss": 1.0501, "step": 914000 }, { "epoch": 2.7, "learning_rate": 2.0100543042592597e-06, "loss": 1.0585, "step": 914500 }, { "epoch": 2.7, "learning_rate": 2.0002183580068045e-06, "loss": 1.053, "step": 915000 }, { "epoch": 2.7, "learning_rate": 1.990382411754349e-06, "loss": 1.0671, "step": 915500 }, { "epoch": 2.7, "learning_rate": 1.980546465501894e-06, "loss": 1.0584, "step": 916000 }, { "epoch": 2.7, "learning_rate": 1.9707105192494387e-06, "loss": 1.0509, "step": 916500 }, { "epoch": 2.71, "learning_rate": 1.9608745729969834e-06, "loss": 1.0549, "step": 917000 }, { "epoch": 2.71, "learning_rate": 1.951038626744528e-06, "loss": 1.0701, "step": 917500 }, { "epoch": 2.71, "learning_rate": 1.941202680492073e-06, "loss": 1.0718, "step": 918000 }, { "epoch": 2.71, "learning_rate": 1.9313667342396177e-06, "loss": 1.0597, "step": 918500 }, { "epoch": 2.71, "learning_rate": 1.9215307879871624e-06, "loss": 1.0695, "step": 919000 }, { "epoch": 2.71, "learning_rate": 1.911694841734707e-06, "loss": 1.0634, "step": 919500 }, { "epoch": 2.71, "learning_rate": 1.9018588954822517e-06, "loss": 1.07, "step": 920000 }, { "epoch": 2.72, "learning_rate": 1.8920229492297964e-06, "loss": 1.0682, "step": 920500 }, { "epoch": 2.72, "learning_rate": 1.8821870029773412e-06, "loss": 1.0645, "step": 921000 }, { "epoch": 2.72, "learning_rate": 1.8723510567248857e-06, "loss": 1.0857, "step": 921500 }, { "epoch": 2.72, "learning_rate": 1.8625151104724304e-06, "loss": 1.0725, "step": 922000 }, { "epoch": 2.72, "learning_rate": 1.8526791642199752e-06, "loss": 1.0588, "step": 922500 }, { "epoch": 2.72, "learning_rate": 1.84284321796752e-06, "loss": 1.0675, "step": 923000 }, { "epoch": 2.73, "learning_rate": 1.8330072717150647e-06, "loss": 1.0725, "step": 923500 }, { "epoch": 2.73, "learning_rate": 1.8231713254626094e-06, "loss": 1.0648, "step": 924000 }, { "epoch": 2.73, "learning_rate": 1.8133353792101541e-06, "loss": 1.0576, "step": 924500 }, { "epoch": 2.73, "learning_rate": 1.8034994329576989e-06, "loss": 1.0472, "step": 925000 }, { "epoch": 2.73, "learning_rate": 1.7936634867052432e-06, "loss": 1.0471, "step": 925500 }, { "epoch": 2.73, "learning_rate": 1.783827540452788e-06, "loss": 1.0691, "step": 926000 }, { "epoch": 2.73, "learning_rate": 1.7739915942003327e-06, "loss": 1.0729, "step": 926500 }, { "epoch": 2.74, "learning_rate": 1.7641556479478774e-06, "loss": 1.072, "step": 927000 }, { "epoch": 2.74, "learning_rate": 1.7543197016954222e-06, "loss": 1.0647, "step": 927500 }, { "epoch": 2.74, "learning_rate": 1.744483755442967e-06, "loss": 1.0784, "step": 928000 }, { "epoch": 2.74, "learning_rate": 1.7346478091905117e-06, "loss": 1.0636, "step": 928500 }, { "epoch": 2.74, "learning_rate": 1.7248118629380564e-06, "loss": 1.071, "step": 929000 }, { "epoch": 2.74, "learning_rate": 1.714975916685601e-06, "loss": 1.0519, "step": 929500 }, { "epoch": 2.74, "learning_rate": 1.7051399704331457e-06, "loss": 1.0733, "step": 930000 }, { "epoch": 2.75, "learning_rate": 1.6953040241806904e-06, "loss": 1.0642, "step": 930500 }, { "epoch": 2.75, "learning_rate": 1.6854680779282352e-06, "loss": 1.0689, "step": 931000 }, { "epoch": 2.75, "learning_rate": 1.67563213167578e-06, "loss": 1.0826, "step": 931500 }, { "epoch": 2.75, "learning_rate": 1.6657961854233247e-06, "loss": 1.0737, "step": 932000 }, { "epoch": 2.75, "learning_rate": 1.6559602391708692e-06, "loss": 1.0768, "step": 932500 }, { "epoch": 2.75, "learning_rate": 1.646124292918414e-06, "loss": 1.0788, "step": 933000 }, { "epoch": 2.75, "learning_rate": 1.6362883466659585e-06, "loss": 1.0652, "step": 933500 }, { "epoch": 2.76, "learning_rate": 1.6264524004135032e-06, "loss": 1.0584, "step": 934000 }, { "epoch": 2.76, "learning_rate": 1.616616454161048e-06, "loss": 1.0722, "step": 934500 }, { "epoch": 2.76, "learning_rate": 1.6067805079085927e-06, "loss": 1.0583, "step": 935000 }, { "epoch": 2.76, "learning_rate": 1.5969445616561374e-06, "loss": 1.071, "step": 935500 }, { "epoch": 2.76, "learning_rate": 1.5871086154036822e-06, "loss": 1.0728, "step": 936000 }, { "epoch": 2.76, "learning_rate": 1.577272669151227e-06, "loss": 1.0551, "step": 936500 }, { "epoch": 2.76, "learning_rate": 1.5674367228987717e-06, "loss": 1.0738, "step": 937000 }, { "epoch": 2.77, "learning_rate": 1.5576007766463162e-06, "loss": 1.0733, "step": 937500 }, { "epoch": 2.77, "learning_rate": 1.547764830393861e-06, "loss": 1.0614, "step": 938000 }, { "epoch": 2.77, "learning_rate": 1.5379288841414055e-06, "loss": 1.0598, "step": 938500 }, { "epoch": 2.77, "learning_rate": 1.5280929378889502e-06, "loss": 1.066, "step": 939000 }, { "epoch": 2.77, "learning_rate": 1.518256991636495e-06, "loss": 1.0481, "step": 939500 }, { "epoch": 2.77, "learning_rate": 1.5084210453840397e-06, "loss": 1.0516, "step": 940000 }, { "epoch": 2.78, "learning_rate": 1.4985850991315844e-06, "loss": 1.0765, "step": 940500 }, { "epoch": 2.78, "learning_rate": 1.4887491528791292e-06, "loss": 1.064, "step": 941000 }, { "epoch": 2.78, "learning_rate": 1.4789132066266737e-06, "loss": 1.0596, "step": 941500 }, { "epoch": 2.78, "learning_rate": 1.4690772603742185e-06, "loss": 1.0637, "step": 942000 }, { "epoch": 2.78, "learning_rate": 1.4592413141217632e-06, "loss": 1.0567, "step": 942500 }, { "epoch": 2.78, "learning_rate": 1.449405367869308e-06, "loss": 1.0638, "step": 943000 }, { "epoch": 2.78, "learning_rate": 1.4395694216168527e-06, "loss": 1.0733, "step": 943500 }, { "epoch": 2.79, "learning_rate": 1.4297334753643974e-06, "loss": 1.0402, "step": 944000 }, { "epoch": 2.79, "learning_rate": 1.4198975291119422e-06, "loss": 1.0629, "step": 944500 }, { "epoch": 2.79, "learning_rate": 1.410061582859487e-06, "loss": 1.0587, "step": 945000 }, { "epoch": 2.79, "learning_rate": 1.4002256366070312e-06, "loss": 1.0591, "step": 945500 }, { "epoch": 2.79, "learning_rate": 1.390389690354576e-06, "loss": 1.0581, "step": 946000 }, { "epoch": 2.79, "learning_rate": 1.3805537441021207e-06, "loss": 1.0559, "step": 946500 }, { "epoch": 2.79, "learning_rate": 1.3707177978496655e-06, "loss": 1.084, "step": 947000 }, { "epoch": 2.8, "learning_rate": 1.3608818515972102e-06, "loss": 1.0724, "step": 947500 }, { "epoch": 2.8, "learning_rate": 1.351045905344755e-06, "loss": 1.073, "step": 948000 }, { "epoch": 2.8, "learning_rate": 1.3412099590922997e-06, "loss": 1.0679, "step": 948500 }, { "epoch": 2.8, "learning_rate": 1.3313740128398444e-06, "loss": 1.0682, "step": 949000 }, { "epoch": 2.8, "learning_rate": 1.321538066587389e-06, "loss": 1.0576, "step": 949500 }, { "epoch": 2.8, "learning_rate": 1.3117021203349337e-06, "loss": 1.0672, "step": 950000 }, { "epoch": 2.8, "learning_rate": 1.3018661740824784e-06, "loss": 1.0693, "step": 950500 }, { "epoch": 2.81, "learning_rate": 1.2920302278300232e-06, "loss": 1.0725, "step": 951000 }, { "epoch": 2.81, "learning_rate": 1.282194281577568e-06, "loss": 1.0742, "step": 951500 }, { "epoch": 2.81, "learning_rate": 1.2723583353251127e-06, "loss": 1.0871, "step": 952000 }, { "epoch": 2.81, "learning_rate": 1.2625223890726574e-06, "loss": 1.0582, "step": 952500 }, { "epoch": 2.81, "learning_rate": 1.2526864428202022e-06, "loss": 1.0593, "step": 953000 }, { "epoch": 2.81, "learning_rate": 1.2428504965677467e-06, "loss": 1.0502, "step": 953500 }, { "epoch": 2.82, "learning_rate": 1.2330145503152914e-06, "loss": 1.0593, "step": 954000 }, { "epoch": 2.82, "learning_rate": 1.223178604062836e-06, "loss": 1.072, "step": 954500 }, { "epoch": 2.82, "learning_rate": 1.2133426578103807e-06, "loss": 1.0678, "step": 955000 }, { "epoch": 2.82, "learning_rate": 1.2035067115579255e-06, "loss": 1.0649, "step": 955500 }, { "epoch": 2.82, "learning_rate": 1.1936707653054702e-06, "loss": 1.0618, "step": 956000 }, { "epoch": 2.82, "learning_rate": 1.1838348190530147e-06, "loss": 1.0681, "step": 956500 }, { "epoch": 2.82, "learning_rate": 1.1739988728005595e-06, "loss": 1.068, "step": 957000 }, { "epoch": 2.83, "learning_rate": 1.1641629265481042e-06, "loss": 1.0531, "step": 957500 }, { "epoch": 2.83, "learning_rate": 1.154326980295649e-06, "loss": 1.0548, "step": 958000 }, { "epoch": 2.83, "learning_rate": 1.1444910340431937e-06, "loss": 1.0608, "step": 958500 }, { "epoch": 2.83, "learning_rate": 1.1346550877907384e-06, "loss": 1.0901, "step": 959000 }, { "epoch": 2.83, "learning_rate": 1.1248191415382832e-06, "loss": 1.0682, "step": 959500 }, { "epoch": 2.83, "learning_rate": 1.114983195285828e-06, "loss": 1.062, "step": 960000 }, { "epoch": 2.83, "learning_rate": 1.1051472490333725e-06, "loss": 1.0621, "step": 960500 }, { "epoch": 2.84, "learning_rate": 1.0953113027809172e-06, "loss": 1.0651, "step": 961000 }, { "epoch": 2.84, "learning_rate": 1.085475356528462e-06, "loss": 1.0823, "step": 961500 }, { "epoch": 2.84, "learning_rate": 1.0756394102760067e-06, "loss": 1.0763, "step": 962000 }, { "epoch": 2.84, "learning_rate": 1.0658034640235512e-06, "loss": 1.0737, "step": 962500 }, { "epoch": 2.84, "learning_rate": 1.055967517771096e-06, "loss": 1.0589, "step": 963000 }, { "epoch": 2.84, "learning_rate": 1.0461315715186407e-06, "loss": 1.0828, "step": 963500 }, { "epoch": 2.84, "learning_rate": 1.0362956252661854e-06, "loss": 1.0599, "step": 964000 }, { "epoch": 2.85, "learning_rate": 1.02645967901373e-06, "loss": 1.0687, "step": 964500 }, { "epoch": 2.85, "learning_rate": 1.0166237327612747e-06, "loss": 1.0696, "step": 965000 }, { "epoch": 2.85, "learning_rate": 1.0067877865088195e-06, "loss": 1.0899, "step": 965500 }, { "epoch": 2.85, "learning_rate": 9.969518402563642e-07, "loss": 1.0749, "step": 966000 }, { "epoch": 2.85, "learning_rate": 9.871158940039087e-07, "loss": 1.0469, "step": 966500 }, { "epoch": 2.85, "learning_rate": 9.772799477514535e-07, "loss": 1.0638, "step": 967000 }, { "epoch": 2.85, "learning_rate": 9.674440014989982e-07, "loss": 1.063, "step": 967500 }, { "epoch": 2.86, "learning_rate": 9.57608055246543e-07, "loss": 1.06, "step": 968000 }, { "epoch": 2.86, "learning_rate": 9.477721089940876e-07, "loss": 1.0411, "step": 968500 }, { "epoch": 2.86, "learning_rate": 9.379361627416323e-07, "loss": 1.0727, "step": 969000 }, { "epoch": 2.86, "learning_rate": 9.281002164891771e-07, "loss": 1.0577, "step": 969500 }, { "epoch": 2.86, "learning_rate": 9.182642702367218e-07, "loss": 1.0571, "step": 970000 }, { "epoch": 2.86, "learning_rate": 9.084283239842665e-07, "loss": 1.058, "step": 970500 }, { "epoch": 2.87, "learning_rate": 8.985923777318112e-07, "loss": 1.0859, "step": 971000 }, { "epoch": 2.87, "learning_rate": 8.88756431479356e-07, "loss": 1.064, "step": 971500 }, { "epoch": 2.87, "learning_rate": 8.789204852269007e-07, "loss": 1.062, "step": 972000 }, { "epoch": 2.87, "learning_rate": 8.690845389744452e-07, "loss": 1.0767, "step": 972500 }, { "epoch": 2.87, "learning_rate": 8.5924859272199e-07, "loss": 1.0731, "step": 973000 }, { "epoch": 2.87, "learning_rate": 8.494126464695347e-07, "loss": 1.0731, "step": 973500 }, { "epoch": 2.87, "learning_rate": 8.395767002170795e-07, "loss": 1.0502, "step": 974000 }, { "epoch": 2.88, "learning_rate": 8.297407539646241e-07, "loss": 1.0612, "step": 974500 }, { "epoch": 2.88, "learning_rate": 8.199048077121688e-07, "loss": 1.0682, "step": 975000 }, { "epoch": 2.88, "learning_rate": 8.100688614597136e-07, "loss": 1.0602, "step": 975500 }, { "epoch": 2.88, "learning_rate": 8.002329152072582e-07, "loss": 1.0457, "step": 976000 }, { "epoch": 2.88, "learning_rate": 7.903969689548029e-07, "loss": 1.0759, "step": 976500 }, { "epoch": 2.88, "learning_rate": 7.805610227023476e-07, "loss": 1.0728, "step": 977000 }, { "epoch": 2.88, "learning_rate": 7.707250764498923e-07, "loss": 1.0434, "step": 977500 }, { "epoch": 2.89, "learning_rate": 7.608891301974371e-07, "loss": 1.0529, "step": 978000 }, { "epoch": 2.89, "learning_rate": 7.510531839449817e-07, "loss": 1.0426, "step": 978500 }, { "epoch": 2.89, "learning_rate": 7.412172376925264e-07, "loss": 1.054, "step": 979000 }, { "epoch": 2.89, "learning_rate": 7.313812914400711e-07, "loss": 1.0679, "step": 979500 }, { "epoch": 2.89, "learning_rate": 7.215453451876158e-07, "loss": 1.0531, "step": 980000 }, { "epoch": 2.89, "learning_rate": 7.117093989351605e-07, "loss": 1.0634, "step": 980500 }, { "epoch": 2.89, "learning_rate": 7.018734526827052e-07, "loss": 1.0429, "step": 981000 }, { "epoch": 2.9, "learning_rate": 6.9203750643025e-07, "loss": 1.0598, "step": 981500 }, { "epoch": 2.9, "learning_rate": 6.822015601777947e-07, "loss": 1.0659, "step": 982000 }, { "epoch": 2.9, "learning_rate": 6.723656139253392e-07, "loss": 1.0589, "step": 982500 }, { "epoch": 2.9, "learning_rate": 6.62529667672884e-07, "loss": 1.0688, "step": 983000 }, { "epoch": 2.9, "learning_rate": 6.526937214204287e-07, "loss": 1.0592, "step": 983500 }, { "epoch": 2.9, "learning_rate": 6.428577751679735e-07, "loss": 1.0754, "step": 984000 }, { "epoch": 2.91, "learning_rate": 6.330218289155181e-07, "loss": 1.0706, "step": 984500 }, { "epoch": 2.91, "learning_rate": 6.231858826630628e-07, "loss": 1.0546, "step": 985000 }, { "epoch": 2.91, "learning_rate": 6.133499364106076e-07, "loss": 1.0467, "step": 985500 }, { "epoch": 2.91, "learning_rate": 6.035139901581522e-07, "loss": 1.0606, "step": 986000 }, { "epoch": 2.91, "learning_rate": 5.93678043905697e-07, "loss": 1.0536, "step": 986500 }, { "epoch": 2.91, "learning_rate": 5.838420976532416e-07, "loss": 1.0576, "step": 987000 }, { "epoch": 2.91, "learning_rate": 5.740061514007863e-07, "loss": 1.0527, "step": 987500 }, { "epoch": 2.92, "learning_rate": 5.64170205148331e-07, "loss": 1.0592, "step": 988000 }, { "epoch": 2.92, "learning_rate": 5.543342588958757e-07, "loss": 1.0507, "step": 988500 }, { "epoch": 2.92, "learning_rate": 5.444983126434205e-07, "loss": 1.068, "step": 989000 }, { "epoch": 2.92, "learning_rate": 5.346623663909652e-07, "loss": 1.0556, "step": 989500 }, { "epoch": 2.92, "learning_rate": 5.248264201385099e-07, "loss": 1.0631, "step": 990000 }, { "epoch": 2.92, "learning_rate": 5.149904738860546e-07, "loss": 1.0641, "step": 990500 }, { "epoch": 2.92, "learning_rate": 5.051545276335992e-07, "loss": 1.068, "step": 991000 }, { "epoch": 2.93, "learning_rate": 4.95318581381144e-07, "loss": 1.0684, "step": 991500 }, { "epoch": 2.93, "learning_rate": 4.854826351286886e-07, "loss": 1.063, "step": 992000 }, { "epoch": 2.93, "learning_rate": 4.7564668887623335e-07, "loss": 1.0606, "step": 992500 }, { "epoch": 2.93, "learning_rate": 4.6581074262377804e-07, "loss": 1.0638, "step": 993000 }, { "epoch": 2.93, "learning_rate": 4.559747963713228e-07, "loss": 1.0722, "step": 993500 }, { "epoch": 2.93, "learning_rate": 4.461388501188674e-07, "loss": 1.0742, "step": 994000 }, { "epoch": 2.93, "learning_rate": 4.3630290386641217e-07, "loss": 1.0496, "step": 994500 }, { "epoch": 2.94, "learning_rate": 4.2646695761395686e-07, "loss": 1.0819, "step": 995000 }, { "epoch": 2.94, "learning_rate": 4.166310113615016e-07, "loss": 1.0519, "step": 995500 }, { "epoch": 2.94, "learning_rate": 4.0679506510904624e-07, "loss": 1.0646, "step": 996000 }, { "epoch": 2.94, "learning_rate": 3.96959118856591e-07, "loss": 1.0887, "step": 996500 }, { "epoch": 2.94, "learning_rate": 3.8712317260413567e-07, "loss": 1.0571, "step": 997000 }, { "epoch": 2.94, "learning_rate": 3.772872263516804e-07, "loss": 1.0719, "step": 997500 }, { "epoch": 2.94, "learning_rate": 3.6745128009922505e-07, "loss": 1.0698, "step": 998000 }, { "epoch": 2.95, "learning_rate": 3.576153338467698e-07, "loss": 1.0651, "step": 998500 }, { "epoch": 2.95, "learning_rate": 3.477793875943145e-07, "loss": 1.0578, "step": 999000 }, { "epoch": 2.95, "learning_rate": 3.3794344134185917e-07, "loss": 1.0586, "step": 999500 }, { "epoch": 2.95, "learning_rate": 3.2810749508940386e-07, "loss": 1.0545, "step": 1000000 }, { "epoch": 2.95, "learning_rate": 3.182715488369486e-07, "loss": 1.0543, "step": 1000500 }, { "epoch": 2.95, "learning_rate": 3.0843560258449324e-07, "loss": 1.0622, "step": 1001000 }, { "epoch": 2.96, "learning_rate": 2.98599656332038e-07, "loss": 1.0667, "step": 1001500 }, { "epoch": 2.96, "learning_rate": 2.887637100795827e-07, "loss": 1.0615, "step": 1002000 }, { "epoch": 2.96, "learning_rate": 2.7892776382712736e-07, "loss": 1.0537, "step": 1002500 }, { "epoch": 2.96, "learning_rate": 2.6909181757467205e-07, "loss": 1.053, "step": 1003000 }, { "epoch": 2.96, "learning_rate": 2.5925587132221674e-07, "loss": 1.0541, "step": 1003500 }, { "epoch": 2.96, "learning_rate": 2.494199250697615e-07, "loss": 1.0652, "step": 1004000 }, { "epoch": 2.96, "learning_rate": 2.395839788173062e-07, "loss": 1.0706, "step": 1004500 }, { "epoch": 2.97, "learning_rate": 2.2974803256485087e-07, "loss": 1.0795, "step": 1005000 }, { "epoch": 2.97, "learning_rate": 2.1991208631239558e-07, "loss": 1.0621, "step": 1005500 }, { "epoch": 2.97, "learning_rate": 2.1007614005994027e-07, "loss": 1.0711, "step": 1006000 }, { "epoch": 2.97, "learning_rate": 2.0024019380748496e-07, "loss": 1.0668, "step": 1006500 }, { "epoch": 2.97, "learning_rate": 1.9040424755502968e-07, "loss": 1.0694, "step": 1007000 }, { "epoch": 2.97, "learning_rate": 1.8056830130257437e-07, "loss": 1.0708, "step": 1007500 }, { "epoch": 2.97, "learning_rate": 1.7073235505011909e-07, "loss": 1.0694, "step": 1008000 }, { "epoch": 2.98, "learning_rate": 1.6089640879766378e-07, "loss": 1.0435, "step": 1008500 }, { "epoch": 2.98, "learning_rate": 1.510604625452085e-07, "loss": 1.0691, "step": 1009000 }, { "epoch": 2.98, "learning_rate": 1.4122451629275318e-07, "loss": 1.0646, "step": 1009500 }, { "epoch": 2.98, "learning_rate": 1.3138857004029787e-07, "loss": 1.0761, "step": 1010000 }, { "epoch": 2.98, "learning_rate": 1.215526237878426e-07, "loss": 1.0473, "step": 1010500 }, { "epoch": 2.98, "learning_rate": 1.1171667753538729e-07, "loss": 1.0831, "step": 1011000 }, { "epoch": 2.98, "learning_rate": 1.0188073128293198e-07, "loss": 1.0827, "step": 1011500 }, { "epoch": 2.99, "learning_rate": 9.204478503047668e-08, "loss": 1.0644, "step": 1012000 }, { "epoch": 2.99, "learning_rate": 8.220883877802139e-08, "loss": 1.0511, "step": 1012500 }, { "epoch": 2.99, "learning_rate": 7.237289252556609e-08, "loss": 1.0466, "step": 1013000 }, { "epoch": 2.99, "learning_rate": 6.25369462731108e-08, "loss": 1.0609, "step": 1013500 }, { "epoch": 2.99, "learning_rate": 5.270100002065549e-08, "loss": 1.0562, "step": 1014000 }, { "epoch": 2.99, "learning_rate": 4.2865053768200194e-08, "loss": 1.0699, "step": 1014500 }, { "epoch": 3.0, "learning_rate": 3.302910751574489e-08, "loss": 1.0711, "step": 1015000 }, { "epoch": 3.0, "learning_rate": 2.3193161263289593e-08, "loss": 1.059, "step": 1015500 }, { "epoch": 3.0, "learning_rate": 1.3357215010834297e-08, "loss": 1.058, "step": 1016000 }, { "epoch": 3.0, "learning_rate": 3.521268758378997e-09, "loss": 1.0544, "step": 1016500 } ], "logging_steps": 500, "max_steps": 1016679, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7.198356590588068e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }