diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,12759 @@ +{ + "best_metric": 0.03287291918282044, + "best_model_checkpoint": "00_medieval/v2_medieval_escriptorium_masks/models/checkpoint-87000", + "epoch": 19.9244622037331, + "eval_steps": 500, + "global_step": 91000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.6e-06, + "loss": 8.5577, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 9.600000000000001e-06, + "loss": 5.1307, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 1.4599999999999999e-05, + "loss": 3.9933, + "step": 150 + }, + { + "epoch": 0.04, + "learning_rate": 1.9600000000000002e-05, + "loss": 3.5669, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 2.46e-05, + "loss": 3.3447, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 2.96e-05, + "loss": 3.1734, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 3.45e-05, + "loss": 3.1923, + "step": 350 + }, + { + "epoch": 0.09, + "learning_rate": 3.9500000000000005e-05, + "loss": 3.094, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 4.4500000000000004e-05, + "loss": 3.1098, + "step": 450 + }, + { + "epoch": 0.11, + "learning_rate": 4.9500000000000004e-05, + "loss": 3.077, + "step": 500 + }, + { + "epoch": 0.11, + "eval_acc": 0.020410816430003795, + "eval_cer": 0.5912892888206028, + "eval_loss": 2.8952395915985107, + "eval_runtime": 4854.9711, + "eval_samples_per_second": 1.584, + "eval_steps_per_second": 0.198, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 4.997523117569353e-05, + "loss": 3.0275, + "step": 550 + }, + { + "epoch": 0.13, + "learning_rate": 4.994771025979745e-05, + "loss": 2.8246, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 4.992018934390137e-05, + "loss": 2.6547, + "step": 650 + }, + { + "epoch": 0.15, + "learning_rate": 4.9892668428005284e-05, + "loss": 2.562, + "step": 700 + }, + { + "epoch": 0.16, + "learning_rate": 4.986514751210921e-05, + "loss": 2.4166, + "step": 750 + }, + { + "epoch": 0.18, + "learning_rate": 4.983762659621312e-05, + "loss": 2.3276, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 4.981010568031704e-05, + "loss": 2.2942, + "step": 850 + }, + { + "epoch": 0.2, + "learning_rate": 4.9782584764420966e-05, + "loss": 2.2679, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 4.975506384852488e-05, + "loss": 2.2626, + "step": 950 + }, + { + "epoch": 0.22, + "learning_rate": 4.97275429326288e-05, + "loss": 2.1657, + "step": 1000 + }, + { + "epoch": 0.22, + "eval_acc": 0.025871034838030285, + "eval_cer": 0.4587138894788984, + "eval_loss": 1.9457383155822754, + "eval_runtime": 3670.9235, + "eval_samples_per_second": 2.095, + "eval_steps_per_second": 0.262, + "step": 1000 + }, + { + "epoch": 0.23, + "learning_rate": 4.970002201673272e-05, + "loss": 2.0625, + "step": 1050 + }, + { + "epoch": 0.24, + "learning_rate": 4.9672501100836635e-05, + "loss": 2.0504, + "step": 1100 + }, + { + "epoch": 0.25, + "learning_rate": 4.964498018494056e-05, + "loss": 1.9837, + "step": 1150 + }, + { + "epoch": 0.26, + "learning_rate": 4.9617459269044476e-05, + "loss": 1.9131, + "step": 1200 + }, + { + "epoch": 0.27, + "learning_rate": 4.958993835314839e-05, + "loss": 1.8233, + "step": 1250 + }, + { + "epoch": 0.28, + "learning_rate": 4.956241743725231e-05, + "loss": 1.8375, + "step": 1300 + }, + { + "epoch": 0.3, + "learning_rate": 4.9534896521356234e-05, + "loss": 1.8702, + "step": 1350 + }, + { + "epoch": 0.31, + "learning_rate": 4.950737560546015e-05, + "loss": 1.7618, + "step": 1400 + }, + { + "epoch": 0.32, + "learning_rate": 4.947985468956407e-05, + "loss": 1.7752, + "step": 1450 + }, + { + "epoch": 0.33, + "learning_rate": 4.945233377366799e-05, + "loss": 1.7533, + "step": 1500 + }, + { + "epoch": 0.33, + "eval_acc": 0.04017160685905205, + "eval_cer": 0.311746219761581, + "eval_loss": 1.5871385335922241, + "eval_runtime": 2919.4107, + "eval_samples_per_second": 2.635, + "eval_steps_per_second": 0.33, + "step": 1500 + }, + { + "epoch": 0.34, + "learning_rate": 4.942481285777191e-05, + "loss": 1.6499, + "step": 1550 + }, + { + "epoch": 0.35, + "learning_rate": 4.939729194187583e-05, + "loss": 1.646, + "step": 1600 + }, + { + "epoch": 0.36, + "learning_rate": 4.936977102597975e-05, + "loss": 1.5978, + "step": 1650 + }, + { + "epoch": 0.37, + "learning_rate": 4.934225011008367e-05, + "loss": 1.6438, + "step": 1700 + }, + { + "epoch": 0.38, + "learning_rate": 4.9314729194187585e-05, + "loss": 1.6361, + "step": 1750 + }, + { + "epoch": 0.39, + "learning_rate": 4.92872082782915e-05, + "loss": 1.5909, + "step": 1800 + }, + { + "epoch": 0.41, + "learning_rate": 4.9259687362395426e-05, + "loss": 1.5129, + "step": 1850 + }, + { + "epoch": 0.42, + "learning_rate": 4.923216644649934e-05, + "loss": 1.5617, + "step": 1900 + }, + { + "epoch": 0.43, + "learning_rate": 4.920464553060326e-05, + "loss": 1.4992, + "step": 1950 + }, + { + "epoch": 0.44, + "learning_rate": 4.9177124614707185e-05, + "loss": 1.4751, + "step": 2000 + }, + { + "epoch": 0.44, + "eval_acc": 0.05200208007644278, + "eval_cer": 0.26249498061237303, + "eval_loss": 1.336438536643982, + "eval_runtime": 2734.4386, + "eval_samples_per_second": 2.813, + "eval_steps_per_second": 0.352, + "step": 2000 + }, + { + "epoch": 0.45, + "learning_rate": 4.9149603698811095e-05, + "loss": 1.4507, + "step": 2050 + }, + { + "epoch": 0.46, + "learning_rate": 4.912208278291502e-05, + "loss": 1.4969, + "step": 2100 + }, + { + "epoch": 0.47, + "learning_rate": 4.9094561867018936e-05, + "loss": 1.424, + "step": 2150 + }, + { + "epoch": 0.48, + "learning_rate": 4.906704095112285e-05, + "loss": 1.4178, + "step": 2200 + }, + { + "epoch": 0.49, + "learning_rate": 4.903952003522678e-05, + "loss": 1.4282, + "step": 2250 + }, + { + "epoch": 0.5, + "learning_rate": 4.9011999119330694e-05, + "loss": 1.331, + "step": 2300 + }, + { + "epoch": 0.51, + "learning_rate": 4.898447820343461e-05, + "loss": 1.3595, + "step": 2350 + }, + { + "epoch": 0.53, + "learning_rate": 4.895695728753853e-05, + "loss": 1.3561, + "step": 2400 + }, + { + "epoch": 0.54, + "learning_rate": 4.892943637164245e-05, + "loss": 1.368, + "step": 2450 + }, + { + "epoch": 0.55, + "learning_rate": 4.890191545574637e-05, + "loss": 1.3775, + "step": 2500 + }, + { + "epoch": 0.55, + "eval_acc": 0.05564222568179378, + "eval_cer": 0.3026113218780184, + "eval_loss": 1.2641615867614746, + "eval_runtime": 3245.8116, + "eval_samples_per_second": 2.37, + "eval_steps_per_second": 0.296, + "step": 2500 + }, + { + "epoch": 0.56, + "learning_rate": 4.887439453985029e-05, + "loss": 1.3244, + "step": 2550 + }, + { + "epoch": 0.57, + "learning_rate": 4.884687362395421e-05, + "loss": 1.3514, + "step": 2600 + }, + { + "epoch": 0.58, + "learning_rate": 4.881935270805813e-05, + "loss": 1.3349, + "step": 2650 + }, + { + "epoch": 0.59, + "learning_rate": 4.8791831792162045e-05, + "loss": 1.2717, + "step": 2700 + }, + { + "epoch": 0.6, + "learning_rate": 4.876431087626596e-05, + "loss": 1.2243, + "step": 2750 + }, + { + "epoch": 0.61, + "learning_rate": 4.873678996036988e-05, + "loss": 1.2679, + "step": 2800 + }, + { + "epoch": 0.62, + "learning_rate": 4.8709269044473804e-05, + "loss": 1.2447, + "step": 2850 + }, + { + "epoch": 0.63, + "learning_rate": 4.868174812857772e-05, + "loss": 1.2269, + "step": 2900 + }, + { + "epoch": 0.65, + "learning_rate": 4.865422721268164e-05, + "loss": 1.2036, + "step": 2950 + }, + { + "epoch": 0.66, + "learning_rate": 4.8626706296785555e-05, + "loss": 1.2241, + "step": 3000 + }, + { + "epoch": 0.66, + "eval_acc": 0.06604264169708234, + "eval_cer": 0.23580191597196276, + "eval_loss": 1.1367168426513672, + "eval_runtime": 2884.4123, + "eval_samples_per_second": 2.667, + "eval_steps_per_second": 0.334, + "step": 3000 + }, + { + "epoch": 0.67, + "learning_rate": 4.859918538088948e-05, + "loss": 1.2466, + "step": 3050 + }, + { + "epoch": 0.68, + "learning_rate": 4.8571664464993396e-05, + "loss": 1.2071, + "step": 3100 + }, + { + "epoch": 0.69, + "learning_rate": 4.854414354909731e-05, + "loss": 1.2059, + "step": 3150 + }, + { + "epoch": 0.7, + "learning_rate": 4.851662263320124e-05, + "loss": 1.1822, + "step": 3200 + }, + { + "epoch": 0.71, + "learning_rate": 4.8489101717305154e-05, + "loss": 1.0968, + "step": 3250 + }, + { + "epoch": 0.72, + "learning_rate": 4.846158080140907e-05, + "loss": 1.1966, + "step": 3300 + }, + { + "epoch": 0.73, + "learning_rate": 4.8434059885512996e-05, + "loss": 1.1477, + "step": 3350 + }, + { + "epoch": 0.74, + "learning_rate": 4.840653896961691e-05, + "loss": 1.1433, + "step": 3400 + }, + { + "epoch": 0.76, + "learning_rate": 4.837901805372083e-05, + "loss": 1.138, + "step": 3450 + }, + { + "epoch": 0.77, + "learning_rate": 4.835149713782475e-05, + "loss": 1.0963, + "step": 3500 + }, + { + "epoch": 0.77, + "eval_acc": 0.07384295370854875, + "eval_cer": 0.195494876736243, + "eval_loss": 1.0342167615890503, + "eval_runtime": 2491.4875, + "eval_samples_per_second": 3.087, + "eval_steps_per_second": 0.386, + "step": 3500 + }, + { + "epoch": 0.78, + "learning_rate": 4.832397622192867e-05, + "loss": 1.0988, + "step": 3550 + }, + { + "epoch": 0.79, + "learning_rate": 4.829645530603259e-05, + "loss": 1.1284, + "step": 3600 + }, + { + "epoch": 0.8, + "learning_rate": 4.8268934390136505e-05, + "loss": 1.1235, + "step": 3650 + }, + { + "epoch": 0.81, + "learning_rate": 4.824196389255835e-05, + "loss": 1.1225, + "step": 3700 + }, + { + "epoch": 0.82, + "learning_rate": 4.8214442976662265e-05, + "loss": 1.0763, + "step": 3750 + }, + { + "epoch": 0.83, + "learning_rate": 4.818692206076618e-05, + "loss": 1.0841, + "step": 3800 + }, + { + "epoch": 0.84, + "learning_rate": 4.8159401144870106e-05, + "loss": 1.0994, + "step": 3850 + }, + { + "epoch": 0.85, + "learning_rate": 4.813188022897402e-05, + "loss": 1.0723, + "step": 3900 + }, + { + "epoch": 0.86, + "learning_rate": 4.810435931307794e-05, + "loss": 1.0384, + "step": 3950 + }, + { + "epoch": 0.88, + "learning_rate": 4.8076838397181864e-05, + "loss": 1.0655, + "step": 4000 + }, + { + "epoch": 0.88, + "eval_acc": 0.07514300571045983, + "eval_cer": 0.24283758579470666, + "eval_loss": 1.1094621419906616, + "eval_runtime": 3232.3014, + "eval_samples_per_second": 2.38, + "eval_steps_per_second": 0.298, + "step": 4000 + }, + { + "epoch": 0.89, + "learning_rate": 4.8049317481285775e-05, + "loss": 1.0818, + "step": 4050 + }, + { + "epoch": 0.9, + "learning_rate": 4.80217965653897e-05, + "loss": 1.0594, + "step": 4100 + }, + { + "epoch": 0.91, + "learning_rate": 4.799427564949362e-05, + "loss": 1.0087, + "step": 4150 + }, + { + "epoch": 0.92, + "learning_rate": 4.796675473359753e-05, + "loss": 1.0247, + "step": 4200 + }, + { + "epoch": 0.93, + "learning_rate": 4.793923381770146e-05, + "loss": 1.0896, + "step": 4250 + }, + { + "epoch": 0.94, + "learning_rate": 4.7911712901805374e-05, + "loss": 1.0826, + "step": 4300 + }, + { + "epoch": 0.95, + "learning_rate": 4.788419198590929e-05, + "loss": 0.996, + "step": 4350 + }, + { + "epoch": 0.96, + "learning_rate": 4.7856671070013215e-05, + "loss": 0.9957, + "step": 4400 + }, + { + "epoch": 0.97, + "learning_rate": 4.782915015411713e-05, + "loss": 1.0103, + "step": 4450 + }, + { + "epoch": 0.99, + "learning_rate": 4.780162923822105e-05, + "loss": 0.9841, + "step": 4500 + }, + { + "epoch": 0.99, + "eval_acc": 0.08931357253129048, + "eval_cer": 0.17093824951666592, + "eval_loss": 0.9081698656082153, + "eval_runtime": 2397.9448, + "eval_samples_per_second": 3.208, + "eval_steps_per_second": 0.401, + "step": 4500 + }, + { + "epoch": 1.0, + "learning_rate": 4.777410832232497e-05, + "loss": 1.014, + "step": 4550 + }, + { + "epoch": 1.01, + "learning_rate": 4.774658740642889e-05, + "loss": 0.9617, + "step": 4600 + }, + { + "epoch": 1.02, + "learning_rate": 4.771906649053281e-05, + "loss": 0.9232, + "step": 4650 + }, + { + "epoch": 1.03, + "learning_rate": 4.7691545574636725e-05, + "loss": 0.96, + "step": 4700 + }, + { + "epoch": 1.04, + "learning_rate": 4.766402465874065e-05, + "loss": 0.9087, + "step": 4750 + }, + { + "epoch": 1.05, + "learning_rate": 4.763650374284456e-05, + "loss": 0.9226, + "step": 4800 + }, + { + "epoch": 1.06, + "learning_rate": 4.760898282694848e-05, + "loss": 0.9249, + "step": 4850 + }, + { + "epoch": 1.07, + "learning_rate": 4.75814619110524e-05, + "loss": 0.8718, + "step": 4900 + }, + { + "epoch": 1.08, + "learning_rate": 4.755394099515632e-05, + "loss": 0.8789, + "step": 4950 + }, + { + "epoch": 1.09, + "learning_rate": 4.752642007926024e-05, + "loss": 0.8746, + "step": 5000 + }, + { + "epoch": 1.09, + "eval_acc": 0.09100364013377488, + "eval_cer": 0.16938940990606188, + "eval_loss": 0.8924765586853027, + "eval_runtime": 2509.8917, + "eval_samples_per_second": 3.065, + "eval_steps_per_second": 0.383, + "step": 5000 + }, + { + "epoch": 1.11, + "learning_rate": 4.749889916336416e-05, + "loss": 0.9212, + "step": 5050 + }, + { + "epoch": 1.12, + "learning_rate": 4.7471378247468076e-05, + "loss": 0.8748, + "step": 5100 + }, + { + "epoch": 1.13, + "learning_rate": 4.744385733157199e-05, + "loss": 0.8864, + "step": 5150 + }, + { + "epoch": 1.14, + "learning_rate": 4.741633641567592e-05, + "loss": 0.8509, + "step": 5200 + }, + { + "epoch": 1.15, + "learning_rate": 4.7388815499779834e-05, + "loss": 0.8594, + "step": 5250 + }, + { + "epoch": 1.16, + "learning_rate": 4.736129458388375e-05, + "loss": 0.8814, + "step": 5300 + }, + { + "epoch": 1.17, + "learning_rate": 4.7333773667987675e-05, + "loss": 0.8689, + "step": 5350 + }, + { + "epoch": 1.18, + "learning_rate": 4.730625275209159e-05, + "loss": 0.8678, + "step": 5400 + }, + { + "epoch": 1.19, + "learning_rate": 4.727873183619551e-05, + "loss": 0.8475, + "step": 5450 + }, + { + "epoch": 1.2, + "learning_rate": 4.7251210920299434e-05, + "loss": 0.8549, + "step": 5500 + }, + { + "epoch": 1.2, + "eval_acc": 0.08385335412326399, + "eval_cer": 0.17893981230978653, + "eval_loss": 0.8790249824523926, + "eval_runtime": 2637.8013, + "eval_samples_per_second": 2.916, + "eval_steps_per_second": 0.365, + "step": 5500 + }, + { + "epoch": 1.22, + "learning_rate": 4.722369000440335e-05, + "loss": 0.8873, + "step": 5550 + }, + { + "epoch": 1.23, + "learning_rate": 4.719616908850727e-05, + "loss": 0.8834, + "step": 5600 + }, + { + "epoch": 1.24, + "learning_rate": 4.7168648172611185e-05, + "loss": 0.855, + "step": 5650 + }, + { + "epoch": 1.25, + "learning_rate": 4.714112725671511e-05, + "loss": 0.8473, + "step": 5700 + }, + { + "epoch": 1.26, + "learning_rate": 4.711360634081902e-05, + "loss": 0.8253, + "step": 5750 + }, + { + "epoch": 1.27, + "learning_rate": 4.708608542492294e-05, + "loss": 0.8476, + "step": 5800 + }, + { + "epoch": 1.28, + "learning_rate": 4.705856450902687e-05, + "loss": 0.8522, + "step": 5850 + }, + { + "epoch": 1.29, + "learning_rate": 4.703104359313078e-05, + "loss": 0.8159, + "step": 5900 + }, + { + "epoch": 1.3, + "learning_rate": 4.70035226772347e-05, + "loss": 0.8073, + "step": 5950 + }, + { + "epoch": 1.31, + "learning_rate": 4.697600176133862e-05, + "loss": 0.8467, + "step": 6000 + }, + { + "epoch": 1.31, + "eval_acc": 0.09620384814141915, + "eval_cer": 0.16569328014462037, + "eval_loss": 0.8687126040458679, + "eval_runtime": 2433.3407, + "eval_samples_per_second": 3.161, + "eval_steps_per_second": 0.395, + "step": 6000 + }, + { + "epoch": 1.32, + "learning_rate": 4.6948480845442536e-05, + "loss": 0.837, + "step": 6050 + }, + { + "epoch": 1.34, + "learning_rate": 4.692095992954646e-05, + "loss": 0.8188, + "step": 6100 + }, + { + "epoch": 1.35, + "learning_rate": 4.689343901365038e-05, + "loss": 0.8061, + "step": 6150 + }, + { + "epoch": 1.36, + "learning_rate": 4.6865918097754294e-05, + "loss": 0.8355, + "step": 6200 + }, + { + "epoch": 1.37, + "learning_rate": 4.683839718185821e-05, + "loss": 0.7924, + "step": 6250 + }, + { + "epoch": 1.38, + "learning_rate": 4.6810876265962135e-05, + "loss": 0.7915, + "step": 6300 + }, + { + "epoch": 1.39, + "learning_rate": 4.678335535006605e-05, + "loss": 0.7906, + "step": 6350 + }, + { + "epoch": 1.4, + "learning_rate": 4.675583443416997e-05, + "loss": 0.8021, + "step": 6400 + }, + { + "epoch": 1.41, + "learning_rate": 4.6728313518273894e-05, + "loss": 0.8202, + "step": 6450 + }, + { + "epoch": 1.42, + "learning_rate": 4.670079260237781e-05, + "loss": 0.7687, + "step": 6500 + }, + { + "epoch": 1.42, + "eval_acc": 0.10348413935212114, + "eval_cer": 0.15004829464851885, + "eval_loss": 0.7853290438652039, + "eval_runtime": 2338.8646, + "eval_samples_per_second": 3.289, + "eval_steps_per_second": 0.411, + "step": 6500 + }, + { + "epoch": 1.43, + "learning_rate": 4.667327168648173e-05, + "loss": 0.7909, + "step": 6550 + }, + { + "epoch": 1.45, + "learning_rate": 4.664575077058565e-05, + "loss": 0.7908, + "step": 6600 + }, + { + "epoch": 1.46, + "learning_rate": 4.661822985468957e-05, + "loss": 0.7847, + "step": 6650 + }, + { + "epoch": 1.47, + "learning_rate": 4.6590708938793486e-05, + "loss": 0.7905, + "step": 6700 + }, + { + "epoch": 1.48, + "learning_rate": 4.6563188022897403e-05, + "loss": 0.778, + "step": 6750 + }, + { + "epoch": 1.49, + "learning_rate": 4.653566710700132e-05, + "loss": 0.7986, + "step": 6800 + }, + { + "epoch": 1.5, + "learning_rate": 4.650814619110524e-05, + "loss": 0.7472, + "step": 6850 + }, + { + "epoch": 1.51, + "learning_rate": 4.648062527520916e-05, + "loss": 0.8086, + "step": 6900 + }, + { + "epoch": 1.52, + "learning_rate": 4.645310435931308e-05, + "loss": 0.7977, + "step": 6950 + }, + { + "epoch": 1.53, + "learning_rate": 4.6425583443416996e-05, + "loss": 0.7824, + "step": 7000 + }, + { + "epoch": 1.53, + "eval_acc": 0.11687467497180516, + "eval_cer": 0.15055527217871656, + "eval_loss": 0.7585737705230713, + "eval_runtime": 2451.3177, + "eval_samples_per_second": 3.138, + "eval_steps_per_second": 0.392, + "step": 7000 + }, + { + "epoch": 1.54, + "learning_rate": 4.639806252752092e-05, + "loss": 0.7635, + "step": 7050 + }, + { + "epoch": 1.55, + "learning_rate": 4.637054161162484e-05, + "loss": 0.7356, + "step": 7100 + }, + { + "epoch": 1.57, + "learning_rate": 4.6343020695728754e-05, + "loss": 0.7346, + "step": 7150 + }, + { + "epoch": 1.58, + "learning_rate": 4.631549977983268e-05, + "loss": 0.7301, + "step": 7200 + }, + { + "epoch": 1.59, + "learning_rate": 4.6287978863936595e-05, + "loss": 0.7068, + "step": 7250 + }, + { + "epoch": 1.6, + "learning_rate": 4.626045794804051e-05, + "loss": 0.7685, + "step": 7300 + }, + { + "epoch": 1.61, + "learning_rate": 4.623293703214443e-05, + "loss": 0.8037, + "step": 7350 + }, + { + "epoch": 1.62, + "learning_rate": 4.6205416116248354e-05, + "loss": 0.7372, + "step": 7400 + }, + { + "epoch": 1.63, + "learning_rate": 4.617789520035227e-05, + "loss": 0.8084, + "step": 7450 + }, + { + "epoch": 1.64, + "learning_rate": 4.615037428445619e-05, + "loss": 0.7914, + "step": 7500 + }, + { + "epoch": 1.64, + "eval_acc": 0.11440457616817412, + "eval_cer": 0.14829325316783437, + "eval_loss": 0.7524902820587158, + "eval_runtime": 2409.0581, + "eval_samples_per_second": 3.193, + "eval_steps_per_second": 0.399, + "step": 7500 + }, + { + "epoch": 1.65, + "learning_rate": 4.612285336856011e-05, + "loss": 0.729, + "step": 7550 + }, + { + "epoch": 1.66, + "learning_rate": 4.609533245266402e-05, + "loss": 0.7403, + "step": 7600 + }, + { + "epoch": 1.67, + "learning_rate": 4.6067811536767946e-05, + "loss": 0.7689, + "step": 7650 + }, + { + "epoch": 1.69, + "learning_rate": 4.6040290620871863e-05, + "loss": 0.7634, + "step": 7700 + }, + { + "epoch": 1.7, + "learning_rate": 4.601276970497578e-05, + "loss": 0.7226, + "step": 7750 + }, + { + "epoch": 1.71, + "learning_rate": 4.5985248789079705e-05, + "loss": 0.7229, + "step": 7800 + }, + { + "epoch": 1.72, + "learning_rate": 4.595772787318362e-05, + "loss": 0.7214, + "step": 7850 + }, + { + "epoch": 1.73, + "learning_rate": 4.593020695728754e-05, + "loss": 0.7426, + "step": 7900 + }, + { + "epoch": 1.74, + "learning_rate": 4.5902686041391456e-05, + "loss": 0.7308, + "step": 7950 + }, + { + "epoch": 1.75, + "learning_rate": 4.587516512549538e-05, + "loss": 0.7028, + "step": 8000 + }, + { + "epoch": 1.75, + "eval_acc": 0.11960478417581841, + "eval_cer": 0.14120952125507172, + "eval_loss": 0.7158035039901733, + "eval_runtime": 2504.3586, + "eval_samples_per_second": 3.071, + "eval_steps_per_second": 0.384, + "step": 8000 + }, + { + "epoch": 1.76, + "learning_rate": 4.58476442095993e-05, + "loss": 0.7227, + "step": 8050 + }, + { + "epoch": 1.77, + "learning_rate": 4.5820123293703214e-05, + "loss": 0.7226, + "step": 8100 + }, + { + "epoch": 1.78, + "learning_rate": 4.579260237780714e-05, + "loss": 0.7356, + "step": 8150 + }, + { + "epoch": 1.8, + "learning_rate": 4.5765081461911055e-05, + "loss": 0.7623, + "step": 8200 + }, + { + "epoch": 1.81, + "learning_rate": 4.573756054601497e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 1.82, + "learning_rate": 4.5710039630118897e-05, + "loss": 0.7187, + "step": 8300 + }, + { + "epoch": 1.83, + "learning_rate": 4.5682518714222814e-05, + "loss": 0.699, + "step": 8350 + }, + { + "epoch": 1.84, + "learning_rate": 4.565499779832673e-05, + "loss": 0.7306, + "step": 8400 + }, + { + "epoch": 1.85, + "learning_rate": 4.562747688243065e-05, + "loss": 0.6923, + "step": 8450 + }, + { + "epoch": 1.86, + "learning_rate": 4.559995596653457e-05, + "loss": 0.695, + "step": 8500 + }, + { + "epoch": 1.86, + "eval_acc": 0.12077483097753837, + "eval_cer": 0.13451183645245962, + "eval_loss": 0.6999772787094116, + "eval_runtime": 2428.0435, + "eval_samples_per_second": 3.168, + "eval_steps_per_second": 0.396, + "step": 8500 + }, + { + "epoch": 1.87, + "learning_rate": 4.557243505063849e-05, + "loss": 0.7093, + "step": 8550 + }, + { + "epoch": 1.88, + "learning_rate": 4.5544914134742406e-05, + "loss": 0.7235, + "step": 8600 + }, + { + "epoch": 1.89, + "learning_rate": 4.551739321884633e-05, + "loss": 0.7058, + "step": 8650 + }, + { + "epoch": 1.9, + "learning_rate": 4.548987230295024e-05, + "loss": 0.6814, + "step": 8700 + }, + { + "epoch": 1.92, + "learning_rate": 4.5462351387054165e-05, + "loss": 0.6825, + "step": 8750 + }, + { + "epoch": 1.93, + "learning_rate": 4.543483047115808e-05, + "loss": 0.6913, + "step": 8800 + }, + { + "epoch": 1.94, + "learning_rate": 4.5407309555262e-05, + "loss": 0.7448, + "step": 8850 + }, + { + "epoch": 1.95, + "learning_rate": 4.537978863936592e-05, + "loss": 0.6972, + "step": 8900 + }, + { + "epoch": 1.96, + "learning_rate": 4.535226772346984e-05, + "loss": 0.6655, + "step": 8950 + }, + { + "epoch": 1.97, + "learning_rate": 4.532474680757376e-05, + "loss": 0.7233, + "step": 9000 + }, + { + "epoch": 1.97, + "eval_acc": 0.12688507538652039, + "eval_cer": 0.1338389671921972, + "eval_loss": 0.6706005930900574, + "eval_runtime": 2472.9865, + "eval_samples_per_second": 3.11, + "eval_steps_per_second": 0.389, + "step": 9000 + }, + { + "epoch": 1.98, + "learning_rate": 4.5297225891677674e-05, + "loss": 0.6875, + "step": 9050 + }, + { + "epoch": 1.99, + "learning_rate": 4.52697049757816e-05, + "loss": 0.6878, + "step": 9100 + }, + { + "epoch": 2.0, + "learning_rate": 4.5242184059885516e-05, + "loss": 0.6486, + "step": 9150 + }, + { + "epoch": 2.01, + "learning_rate": 4.521466314398943e-05, + "loss": 0.5747, + "step": 9200 + }, + { + "epoch": 2.03, + "learning_rate": 4.518714222809336e-05, + "loss": 0.5716, + "step": 9250 + }, + { + "epoch": 2.04, + "learning_rate": 4.515962131219727e-05, + "loss": 0.5689, + "step": 9300 + }, + { + "epoch": 2.05, + "learning_rate": 4.513210039630119e-05, + "loss": 0.5613, + "step": 9350 + }, + { + "epoch": 2.06, + "learning_rate": 4.5104579480405115e-05, + "loss": 0.5692, + "step": 9400 + }, + { + "epoch": 2.07, + "learning_rate": 4.5077058564509025e-05, + "loss": 0.5767, + "step": 9450 + }, + { + "epoch": 2.08, + "learning_rate": 4.504953764861295e-05, + "loss": 0.5931, + "step": 9500 + }, + { + "epoch": 2.08, + "eval_acc": 0.1327353093951202, + "eval_cer": 0.1271722901895972, + "eval_loss": 0.657380223274231, + "eval_runtime": 2374.7935, + "eval_samples_per_second": 3.239, + "eval_steps_per_second": 0.405, + "step": 9500 + }, + { + "epoch": 2.09, + "learning_rate": 4.5022016732716866e-05, + "loss": 0.5764, + "step": 9550 + }, + { + "epoch": 2.1, + "learning_rate": 4.4994495816820784e-05, + "loss": 0.6077, + "step": 9600 + }, + { + "epoch": 2.11, + "learning_rate": 4.496697490092471e-05, + "loss": 0.5916, + "step": 9650 + }, + { + "epoch": 2.12, + "learning_rate": 4.4939453985028625e-05, + "loss": 0.5912, + "step": 9700 + }, + { + "epoch": 2.13, + "learning_rate": 4.491193306913254e-05, + "loss": 0.5922, + "step": 9750 + }, + { + "epoch": 2.15, + "learning_rate": 4.488441215323646e-05, + "loss": 0.5762, + "step": 9800 + }, + { + "epoch": 2.16, + "learning_rate": 4.485689123734038e-05, + "loss": 0.5326, + "step": 9850 + }, + { + "epoch": 2.17, + "learning_rate": 4.48293703214443e-05, + "loss": 0.559, + "step": 9900 + }, + { + "epoch": 2.18, + "learning_rate": 4.480184940554822e-05, + "loss": 0.5893, + "step": 9950 + }, + { + "epoch": 2.19, + "learning_rate": 4.477432848965214e-05, + "loss": 0.6099, + "step": 10000 + }, + { + "epoch": 2.19, + "eval_acc": 0.13598543939989788, + "eval_cer": 0.12414437851841631, + "eval_loss": 0.6273950934410095, + "eval_runtime": 2477.2194, + "eval_samples_per_second": 3.105, + "eval_steps_per_second": 0.388, + "step": 10000 + }, + { + "epoch": 2.2, + "learning_rate": 4.474680757375606e-05, + "loss": 0.5248, + "step": 10050 + }, + { + "epoch": 2.21, + "learning_rate": 4.4719286657859976e-05, + "loss": 0.5711, + "step": 10100 + }, + { + "epoch": 2.22, + "learning_rate": 4.469176574196389e-05, + "loss": 0.5764, + "step": 10150 + }, + { + "epoch": 2.23, + "learning_rate": 4.466424482606782e-05, + "loss": 0.5665, + "step": 10200 + }, + { + "epoch": 2.24, + "learning_rate": 4.4636723910171734e-05, + "loss": 0.559, + "step": 10250 + }, + { + "epoch": 2.26, + "learning_rate": 4.460920299427565e-05, + "loss": 0.5561, + "step": 10300 + }, + { + "epoch": 2.27, + "learning_rate": 4.4581682078379575e-05, + "loss": 0.6077, + "step": 10350 + }, + { + "epoch": 2.28, + "learning_rate": 4.4554161162483485e-05, + "loss": 0.5665, + "step": 10400 + }, + { + "epoch": 2.29, + "learning_rate": 4.452664024658741e-05, + "loss": 0.5709, + "step": 10450 + }, + { + "epoch": 2.3, + "learning_rate": 4.449911933069133e-05, + "loss": 0.5675, + "step": 10500 + }, + { + "epoch": 2.3, + "eval_acc": 0.13624544980028008, + "eval_cer": 0.11983739509673659, + "eval_loss": 0.6147823333740234, + "eval_runtime": 2420.4162, + "eval_samples_per_second": 3.178, + "eval_steps_per_second": 0.397, + "step": 10500 + }, + { + "epoch": 2.31, + "learning_rate": 4.4471598414795244e-05, + "loss": 0.5561, + "step": 10550 + }, + { + "epoch": 2.32, + "learning_rate": 4.444407749889917e-05, + "loss": 0.5691, + "step": 10600 + }, + { + "epoch": 2.33, + "learning_rate": 4.4416556583003085e-05, + "loss": 0.5794, + "step": 10650 + }, + { + "epoch": 2.34, + "learning_rate": 4.4389035667107e-05, + "loss": 0.585, + "step": 10700 + }, + { + "epoch": 2.35, + "learning_rate": 4.436151475121092e-05, + "loss": 0.5594, + "step": 10750 + }, + { + "epoch": 2.36, + "learning_rate": 4.433399383531484e-05, + "loss": 0.5559, + "step": 10800 + }, + { + "epoch": 2.38, + "learning_rate": 4.430647291941876e-05, + "loss": 0.5723, + "step": 10850 + }, + { + "epoch": 2.39, + "learning_rate": 4.427895200352268e-05, + "loss": 0.5518, + "step": 10900 + }, + { + "epoch": 2.4, + "learning_rate": 4.42514310876266e-05, + "loss": 0.5658, + "step": 10950 + }, + { + "epoch": 2.41, + "learning_rate": 4.422446059004844e-05, + "loss": 0.5909, + "step": 11000 + }, + { + "epoch": 2.41, + "eval_acc": 0.14911596461919968, + "eval_cer": 0.1165412659554511, + "eval_loss": 0.6066173911094666, + "eval_runtime": 2395.5518, + "eval_samples_per_second": 3.211, + "eval_steps_per_second": 0.402, + "step": 11000 + }, + { + "epoch": 2.42, + "learning_rate": 4.419693967415236e-05, + "loss": 0.5511, + "step": 11050 + }, + { + "epoch": 2.43, + "learning_rate": 4.416941875825628e-05, + "loss": 0.5715, + "step": 11100 + }, + { + "epoch": 2.44, + "learning_rate": 4.4141897842360195e-05, + "loss": 0.5424, + "step": 11150 + }, + { + "epoch": 2.45, + "learning_rate": 4.411437692646411e-05, + "loss": 0.5902, + "step": 11200 + }, + { + "epoch": 2.46, + "learning_rate": 4.4086856010568036e-05, + "loss": 0.5642, + "step": 11250 + }, + { + "epoch": 2.47, + "learning_rate": 4.4059335094671954e-05, + "loss": 0.5261, + "step": 11300 + }, + { + "epoch": 2.49, + "learning_rate": 4.403181417877587e-05, + "loss": 0.548, + "step": 11350 + }, + { + "epoch": 2.5, + "learning_rate": 4.4004293262879795e-05, + "loss": 0.5623, + "step": 11400 + }, + { + "epoch": 2.51, + "learning_rate": 4.3976772346983705e-05, + "loss": 0.5736, + "step": 11450 + }, + { + "epoch": 2.52, + "learning_rate": 4.394925143108763e-05, + "loss": 0.5088, + "step": 11500 + }, + { + "epoch": 2.52, + "eval_acc": 0.15275611022455068, + "eval_cer": 0.11419397549453565, + "eval_loss": 0.5838978886604309, + "eval_runtime": 2490.473, + "eval_samples_per_second": 3.089, + "eval_steps_per_second": 0.386, + "step": 11500 + }, + { + "epoch": 2.53, + "learning_rate": 4.392173051519155e-05, + "loss": 0.5502, + "step": 11550 + }, + { + "epoch": 2.54, + "learning_rate": 4.389420959929546e-05, + "loss": 0.5694, + "step": 11600 + }, + { + "epoch": 2.55, + "learning_rate": 4.386668868339939e-05, + "loss": 0.5612, + "step": 11650 + }, + { + "epoch": 2.56, + "learning_rate": 4.3839167767503304e-05, + "loss": 0.5259, + "step": 11700 + }, + { + "epoch": 2.57, + "learning_rate": 4.381164685160722e-05, + "loss": 0.5491, + "step": 11750 + }, + { + "epoch": 2.58, + "learning_rate": 4.378412593571114e-05, + "loss": 0.5464, + "step": 11800 + }, + { + "epoch": 2.59, + "learning_rate": 4.375660501981506e-05, + "loss": 0.5407, + "step": 11850 + }, + { + "epoch": 2.61, + "learning_rate": 4.372908410391898e-05, + "loss": 0.511, + "step": 11900 + }, + { + "epoch": 2.62, + "learning_rate": 4.37015631880229e-05, + "loss": 0.5804, + "step": 11950 + }, + { + "epoch": 2.63, + "learning_rate": 4.367404227212682e-05, + "loss": 0.5597, + "step": 12000 + }, + { + "epoch": 2.63, + "eval_acc": 0.15236609462397735, + "eval_cer": 0.11861258699625891, + "eval_loss": 0.5860298275947571, + "eval_runtime": 2486.6893, + "eval_samples_per_second": 3.093, + "eval_steps_per_second": 0.387, + "step": 12000 + }, + { + "epoch": 2.64, + "learning_rate": 4.364652135623074e-05, + "loss": 0.5672, + "step": 12050 + }, + { + "epoch": 2.65, + "learning_rate": 4.3619000440334655e-05, + "loss": 0.535, + "step": 12100 + }, + { + "epoch": 2.66, + "learning_rate": 4.359147952443858e-05, + "loss": 0.527, + "step": 12150 + }, + { + "epoch": 2.67, + "learning_rate": 4.3563958608542496e-05, + "loss": 0.5377, + "step": 12200 + }, + { + "epoch": 2.68, + "learning_rate": 4.3536437692646414e-05, + "loss": 0.5611, + "step": 12250 + }, + { + "epoch": 2.69, + "learning_rate": 4.350891677675033e-05, + "loss": 0.5487, + "step": 12300 + }, + { + "epoch": 2.7, + "learning_rate": 4.3481395860854255e-05, + "loss": 0.5627, + "step": 12350 + }, + { + "epoch": 2.71, + "learning_rate": 4.345387494495817e-05, + "loss": 0.5527, + "step": 12400 + }, + { + "epoch": 2.73, + "learning_rate": 4.342635402906209e-05, + "loss": 0.527, + "step": 12450 + }, + { + "epoch": 2.74, + "learning_rate": 4.339883311316601e-05, + "loss": 0.5305, + "step": 12500 + }, + { + "epoch": 2.74, + "eval_acc": 0.1570462818308572, + "eval_cer": 0.11371335459434821, + "eval_loss": 0.5711297392845154, + "eval_runtime": 2415.9809, + "eval_samples_per_second": 3.184, + "eval_steps_per_second": 0.398, + "step": 12500 + }, + { + "epoch": 2.75, + "learning_rate": 4.337131219726992e-05, + "loss": 0.531, + "step": 12550 + }, + { + "epoch": 2.76, + "learning_rate": 4.334379128137385e-05, + "loss": 0.5056, + "step": 12600 + }, + { + "epoch": 2.77, + "learning_rate": 4.3316270365477764e-05, + "loss": 0.5423, + "step": 12650 + }, + { + "epoch": 2.78, + "learning_rate": 4.328874944958168e-05, + "loss": 0.5293, + "step": 12700 + }, + { + "epoch": 2.79, + "learning_rate": 4.3261228533685606e-05, + "loss": 0.5174, + "step": 12750 + }, + { + "epoch": 2.8, + "learning_rate": 4.323370761778952e-05, + "loss": 0.5212, + "step": 12800 + }, + { + "epoch": 2.81, + "learning_rate": 4.320618670189344e-05, + "loss": 0.5368, + "step": 12850 + }, + { + "epoch": 2.82, + "learning_rate": 4.317866578599736e-05, + "loss": 0.526, + "step": 12900 + }, + { + "epoch": 2.84, + "learning_rate": 4.315114487010128e-05, + "loss": 0.5288, + "step": 12950 + }, + { + "epoch": 2.85, + "learning_rate": 4.31236239542052e-05, + "loss": 0.5453, + "step": 13000 + }, + { + "epoch": 2.85, + "eval_acc": 0.1608164326363993, + "eval_cer": 0.11214901108373812, + "eval_loss": 0.5519789457321167, + "eval_runtime": 2476.1549, + "eval_samples_per_second": 3.106, + "eval_steps_per_second": 0.389, + "step": 13000 + }, + { + "epoch": 2.86, + "learning_rate": 4.3096103038309115e-05, + "loss": 0.5121, + "step": 13050 + }, + { + "epoch": 2.87, + "learning_rate": 4.306858212241304e-05, + "loss": 0.5292, + "step": 13100 + }, + { + "epoch": 2.88, + "learning_rate": 4.304106120651695e-05, + "loss": 0.5381, + "step": 13150 + }, + { + "epoch": 2.89, + "learning_rate": 4.3013540290620874e-05, + "loss": 0.4871, + "step": 13200 + }, + { + "epoch": 2.9, + "learning_rate": 4.29860193747248e-05, + "loss": 0.4917, + "step": 13250 + }, + { + "epoch": 2.91, + "learning_rate": 4.295849845882871e-05, + "loss": 0.5253, + "step": 13300 + }, + { + "epoch": 2.92, + "learning_rate": 4.293097754293263e-05, + "loss": 0.5127, + "step": 13350 + }, + { + "epoch": 2.93, + "learning_rate": 4.290345662703655e-05, + "loss": 0.4916, + "step": 13400 + }, + { + "epoch": 2.94, + "learning_rate": 4.2875935711140466e-05, + "loss": 0.5053, + "step": 13450 + }, + { + "epoch": 2.96, + "learning_rate": 4.284841479524439e-05, + "loss": 0.4931, + "step": 13500 + }, + { + "epoch": 2.96, + "eval_acc": 0.16666666664499913, + "eval_cer": 0.10775055465202271, + "eval_loss": 0.5448443293571472, + "eval_runtime": 2437.1677, + "eval_samples_per_second": 3.156, + "eval_steps_per_second": 0.395, + "step": 13500 + }, + { + "epoch": 2.97, + "learning_rate": 4.282089387934831e-05, + "loss": 0.5284, + "step": 13550 + }, + { + "epoch": 2.98, + "learning_rate": 4.2793372963452225e-05, + "loss": 0.4843, + "step": 13600 + }, + { + "epoch": 2.99, + "learning_rate": 4.276585204755614e-05, + "loss": 0.5259, + "step": 13650 + }, + { + "epoch": 3.0, + "learning_rate": 4.2738331131660066e-05, + "loss": 0.5194, + "step": 13700 + }, + { + "epoch": 3.01, + "learning_rate": 4.271081021576398e-05, + "loss": 0.4109, + "step": 13750 + }, + { + "epoch": 3.02, + "learning_rate": 4.26832892998679e-05, + "loss": 0.412, + "step": 13800 + }, + { + "epoch": 3.03, + "learning_rate": 4.2655768383971824e-05, + "loss": 0.4071, + "step": 13850 + }, + { + "epoch": 3.04, + "learning_rate": 4.262824746807574e-05, + "loss": 0.434, + "step": 13900 + }, + { + "epoch": 3.05, + "learning_rate": 4.260072655217966e-05, + "loss": 0.4159, + "step": 13950 + }, + { + "epoch": 3.07, + "learning_rate": 4.2573205636283575e-05, + "loss": 0.4154, + "step": 14000 + }, + { + "epoch": 3.07, + "eval_acc": 0.17095683825130564, + "eval_cer": 0.10412419244060843, + "eval_loss": 0.5264742970466614, + "eval_runtime": 2375.6119, + "eval_samples_per_second": 3.238, + "eval_steps_per_second": 0.405, + "step": 14000 + }, + { + "epoch": 3.08, + "learning_rate": 4.25456847203875e-05, + "loss": 0.4043, + "step": 14050 + }, + { + "epoch": 3.09, + "learning_rate": 4.2518163804491417e-05, + "loss": 0.4101, + "step": 14100 + }, + { + "epoch": 3.1, + "learning_rate": 4.2490642888595334e-05, + "loss": 0.4034, + "step": 14150 + }, + { + "epoch": 3.11, + "learning_rate": 4.246312197269926e-05, + "loss": 0.4307, + "step": 14200 + }, + { + "epoch": 3.12, + "learning_rate": 4.243560105680317e-05, + "loss": 0.408, + "step": 14250 + }, + { + "epoch": 3.13, + "learning_rate": 4.240808014090709e-05, + "loss": 0.42, + "step": 14300 + }, + { + "epoch": 3.14, + "learning_rate": 4.2380559225011016e-05, + "loss": 0.4213, + "step": 14350 + }, + { + "epoch": 3.15, + "learning_rate": 4.2353038309114926e-05, + "loss": 0.3993, + "step": 14400 + }, + { + "epoch": 3.16, + "learning_rate": 4.232551739321885e-05, + "loss": 0.4172, + "step": 14450 + }, + { + "epoch": 3.17, + "learning_rate": 4.229799647732277e-05, + "loss": 0.4151, + "step": 14500 + }, + { + "epoch": 3.17, + "eval_acc": 0.181097243866212, + "eval_cer": 0.10292264019013983, + "eval_loss": 0.5197580456733704, + "eval_runtime": 2415.2798, + "eval_samples_per_second": 3.185, + "eval_steps_per_second": 0.398, + "step": 14500 + }, + { + "epoch": 3.19, + "learning_rate": 4.2270475561426685e-05, + "loss": 0.4562, + "step": 14550 + }, + { + "epoch": 3.2, + "learning_rate": 4.224295464553061e-05, + "loss": 0.4017, + "step": 14600 + }, + { + "epoch": 3.21, + "learning_rate": 4.2215433729634526e-05, + "loss": 0.4453, + "step": 14650 + }, + { + "epoch": 3.22, + "learning_rate": 4.218791281373844e-05, + "loss": 0.4333, + "step": 14700 + }, + { + "epoch": 3.23, + "learning_rate": 4.216039189784236e-05, + "loss": 0.431, + "step": 14750 + }, + { + "epoch": 3.24, + "learning_rate": 4.2132870981946284e-05, + "loss": 0.4047, + "step": 14800 + }, + { + "epoch": 3.25, + "learning_rate": 4.2105350066050194e-05, + "loss": 0.4046, + "step": 14850 + }, + { + "epoch": 3.26, + "learning_rate": 4.207782915015412e-05, + "loss": 0.4192, + "step": 14900 + }, + { + "epoch": 3.27, + "learning_rate": 4.205030823425804e-05, + "loss": 0.4344, + "step": 14950 + }, + { + "epoch": 3.28, + "learning_rate": 4.202278731836195e-05, + "loss": 0.3972, + "step": 15000 + }, + { + "epoch": 3.28, + "eval_acc": 0.17641705665933216, + "eval_cer": 0.10194434409975829, + "eval_loss": 0.5112528800964355, + "eval_runtime": 2422.1094, + "eval_samples_per_second": 3.176, + "eval_steps_per_second": 0.397, + "step": 15000 + }, + { + "epoch": 3.3, + "learning_rate": 4.1995266402465877e-05, + "loss": 0.4183, + "step": 15050 + }, + { + "epoch": 3.31, + "learning_rate": 4.1967745486569794e-05, + "loss": 0.4075, + "step": 15100 + }, + { + "epoch": 3.32, + "learning_rate": 4.194022457067371e-05, + "loss": 0.4087, + "step": 15150 + }, + { + "epoch": 3.33, + "learning_rate": 4.1912703654777635e-05, + "loss": 0.4282, + "step": 15200 + }, + { + "epoch": 3.34, + "learning_rate": 4.188518273888155e-05, + "loss": 0.3998, + "step": 15250 + }, + { + "epoch": 3.35, + "learning_rate": 4.185766182298547e-05, + "loss": 0.4356, + "step": 15300 + }, + { + "epoch": 3.36, + "learning_rate": 4.1830140907089386e-05, + "loss": 0.4222, + "step": 15350 + }, + { + "epoch": 3.37, + "learning_rate": 4.180261999119331e-05, + "loss": 0.4528, + "step": 15400 + }, + { + "epoch": 3.38, + "learning_rate": 4.177509907529723e-05, + "loss": 0.3943, + "step": 15450 + }, + { + "epoch": 3.39, + "learning_rate": 4.1747578159401145e-05, + "loss": 0.4126, + "step": 15500 + }, + { + "epoch": 3.39, + "eval_acc": 0.1777171086612432, + "eval_cer": 0.10119860650946745, + "eval_loss": 0.5097116231918335, + "eval_runtime": 2416.1113, + "eval_samples_per_second": 3.184, + "eval_steps_per_second": 0.398, + "step": 15500 + }, + { + "epoch": 3.4, + "learning_rate": 4.172005724350507e-05, + "loss": 0.4223, + "step": 15550 + }, + { + "epoch": 3.42, + "learning_rate": 4.1692536327608986e-05, + "loss": 0.4272, + "step": 15600 + }, + { + "epoch": 3.43, + "learning_rate": 4.16650154117129e-05, + "loss": 0.4322, + "step": 15650 + }, + { + "epoch": 3.44, + "learning_rate": 4.163749449581682e-05, + "loss": 0.4199, + "step": 15700 + }, + { + "epoch": 3.45, + "learning_rate": 4.1609973579920744e-05, + "loss": 0.4032, + "step": 15750 + }, + { + "epoch": 3.46, + "learning_rate": 4.158245266402466e-05, + "loss": 0.415, + "step": 15800 + }, + { + "epoch": 3.47, + "learning_rate": 4.155493174812858e-05, + "loss": 0.417, + "step": 15850 + }, + { + "epoch": 3.48, + "learning_rate": 4.15274108322325e-05, + "loss": 0.4071, + "step": 15900 + }, + { + "epoch": 3.49, + "learning_rate": 4.149988991633641e-05, + "loss": 0.4119, + "step": 15950 + }, + { + "epoch": 3.5, + "learning_rate": 4.1472369000440337e-05, + "loss": 0.4082, + "step": 16000 + }, + { + "epoch": 3.5, + "eval_acc": 0.18616744667366517, + "eval_cer": 0.09961720870885071, + "eval_loss": 0.49047818779945374, + "eval_runtime": 2463.5838, + "eval_samples_per_second": 3.122, + "eval_steps_per_second": 0.39, + "step": 16000 + }, + { + "epoch": 3.51, + "learning_rate": 4.144484808454426e-05, + "loss": 0.4502, + "step": 16050 + }, + { + "epoch": 3.53, + "learning_rate": 4.141732716864817e-05, + "loss": 0.4244, + "step": 16100 + }, + { + "epoch": 3.54, + "learning_rate": 4.1389806252752095e-05, + "loss": 0.3983, + "step": 16150 + }, + { + "epoch": 3.55, + "learning_rate": 4.136228533685601e-05, + "loss": 0.394, + "step": 16200 + }, + { + "epoch": 3.56, + "learning_rate": 4.133476442095993e-05, + "loss": 0.4326, + "step": 16250 + }, + { + "epoch": 3.57, + "learning_rate": 4.130779392338177e-05, + "loss": 0.4019, + "step": 16300 + }, + { + "epoch": 3.58, + "learning_rate": 4.1280273007485696e-05, + "loss": 0.402, + "step": 16350 + }, + { + "epoch": 3.59, + "learning_rate": 4.1252752091589606e-05, + "loss": 0.4258, + "step": 16400 + }, + { + "epoch": 3.6, + "learning_rate": 4.122523117569353e-05, + "loss": 0.4178, + "step": 16450 + }, + { + "epoch": 3.61, + "learning_rate": 4.1197710259797454e-05, + "loss": 0.4013, + "step": 16500 + }, + { + "epoch": 3.61, + "eval_acc": 0.19175767028188276, + "eval_cer": 0.09475518566695453, + "eval_loss": 0.4757140874862671, + "eval_runtime": 2404.3281, + "eval_samples_per_second": 3.199, + "eval_steps_per_second": 0.4, + "step": 16500 + }, + { + "epoch": 3.62, + "learning_rate": 4.1170189343901364e-05, + "loss": 0.3808, + "step": 16550 + }, + { + "epoch": 3.63, + "learning_rate": 4.114266842800529e-05, + "loss": 0.4034, + "step": 16600 + }, + { + "epoch": 3.65, + "learning_rate": 4.1115147512109205e-05, + "loss": 0.4042, + "step": 16650 + }, + { + "epoch": 3.66, + "learning_rate": 4.108762659621312e-05, + "loss": 0.4304, + "step": 16700 + }, + { + "epoch": 3.67, + "learning_rate": 4.106010568031704e-05, + "loss": 0.427, + "step": 16750 + }, + { + "epoch": 3.68, + "learning_rate": 4.1032584764420964e-05, + "loss": 0.3849, + "step": 16800 + }, + { + "epoch": 3.69, + "learning_rate": 4.100506384852488e-05, + "loss": 0.412, + "step": 16850 + }, + { + "epoch": 3.7, + "learning_rate": 4.09775429326288e-05, + "loss": 0.4099, + "step": 16900 + }, + { + "epoch": 3.71, + "learning_rate": 4.095002201673272e-05, + "loss": 0.4052, + "step": 16950 + }, + { + "epoch": 3.72, + "learning_rate": 4.092250110083663e-05, + "loss": 0.4014, + "step": 17000 + }, + { + "epoch": 3.72, + "eval_acc": 0.19825793029143812, + "eval_cer": 0.09623580811753196, + "eval_loss": 0.47613686323165894, + "eval_runtime": 2485.0631, + "eval_samples_per_second": 3.095, + "eval_steps_per_second": 0.387, + "step": 17000 + }, + { + "epoch": 3.73, + "learning_rate": 4.0894980184940556e-05, + "loss": 0.4268, + "step": 17050 + }, + { + "epoch": 3.74, + "learning_rate": 4.086745926904448e-05, + "loss": 0.384, + "step": 17100 + }, + { + "epoch": 3.75, + "learning_rate": 4.083993835314839e-05, + "loss": 0.3994, + "step": 17150 + }, + { + "epoch": 3.77, + "learning_rate": 4.0812417437252315e-05, + "loss": 0.4024, + "step": 17200 + }, + { + "epoch": 3.78, + "learning_rate": 4.078489652135623e-05, + "loss": 0.3895, + "step": 17250 + }, + { + "epoch": 3.79, + "learning_rate": 4.075737560546015e-05, + "loss": 0.387, + "step": 17300 + }, + { + "epoch": 3.8, + "learning_rate": 4.072985468956407e-05, + "loss": 0.4229, + "step": 17350 + }, + { + "epoch": 3.81, + "learning_rate": 4.070233377366799e-05, + "loss": 0.4237, + "step": 17400 + }, + { + "epoch": 3.82, + "learning_rate": 4.0675363276089826e-05, + "loss": 0.3938, + "step": 17450 + }, + { + "epoch": 3.83, + "learning_rate": 4.064784236019375e-05, + "loss": 0.3766, + "step": 17500 + }, + { + "epoch": 3.83, + "eval_acc": 0.19656786268895374, + "eval_cer": 0.09395828520664373, + "eval_loss": 0.468285471200943, + "eval_runtime": 2433.7209, + "eval_samples_per_second": 3.161, + "eval_steps_per_second": 0.395, + "step": 17500 + }, + { + "epoch": 3.84, + "learning_rate": 4.0620321444297674e-05, + "loss": 0.4031, + "step": 17550 + }, + { + "epoch": 3.85, + "learning_rate": 4.0592800528401584e-05, + "loss": 0.4033, + "step": 17600 + }, + { + "epoch": 3.86, + "learning_rate": 4.056527961250551e-05, + "loss": 0.4201, + "step": 17650 + }, + { + "epoch": 3.88, + "learning_rate": 4.0537758696609425e-05, + "loss": 0.3757, + "step": 17700 + }, + { + "epoch": 3.89, + "learning_rate": 4.051023778071334e-05, + "loss": 0.3922, + "step": 17750 + }, + { + "epoch": 3.9, + "learning_rate": 4.048271686481726e-05, + "loss": 0.3964, + "step": 17800 + }, + { + "epoch": 3.91, + "learning_rate": 4.045519594892118e-05, + "loss": 0.3917, + "step": 17850 + }, + { + "epoch": 3.92, + "learning_rate": 4.04276750330251e-05, + "loss": 0.3899, + "step": 17900 + }, + { + "epoch": 3.93, + "learning_rate": 4.040015411712902e-05, + "loss": 0.3806, + "step": 17950 + }, + { + "epoch": 3.94, + "learning_rate": 4.037263320123294e-05, + "loss": 0.3757, + "step": 18000 + }, + { + "epoch": 3.94, + "eval_acc": 0.20319812789870018, + "eval_cer": 0.09292107429623922, + "eval_loss": 0.46000081300735474, + "eval_runtime": 2445.2781, + "eval_samples_per_second": 3.146, + "eval_steps_per_second": 0.393, + "step": 18000 + }, + { + "epoch": 3.95, + "learning_rate": 4.034511228533686e-05, + "loss": 0.4001, + "step": 18050 + }, + { + "epoch": 3.96, + "learning_rate": 4.0317591369440776e-05, + "loss": 0.3737, + "step": 18100 + }, + { + "epoch": 3.97, + "learning_rate": 4.02900704535447e-05, + "loss": 0.4042, + "step": 18150 + }, + { + "epoch": 3.98, + "learning_rate": 4.026254953764862e-05, + "loss": 0.3745, + "step": 18200 + }, + { + "epoch": 4.0, + "learning_rate": 4.0235028621752534e-05, + "loss": 0.3906, + "step": 18250 + }, + { + "epoch": 4.01, + "learning_rate": 4.020750770585645e-05, + "loss": 0.312, + "step": 18300 + }, + { + "epoch": 4.02, + "learning_rate": 4.0179986789960375e-05, + "loss": 0.3038, + "step": 18350 + }, + { + "epoch": 4.03, + "learning_rate": 4.015246587406429e-05, + "loss": 0.3159, + "step": 18400 + }, + { + "epoch": 4.04, + "learning_rate": 4.012494495816821e-05, + "loss": 0.2978, + "step": 18450 + }, + { + "epoch": 4.05, + "learning_rate": 4.0097424042272134e-05, + "loss": 0.2936, + "step": 18500 + }, + { + "epoch": 4.05, + "eval_acc": 0.20800832030577113, + "eval_cer": 0.09400944807666368, + "eval_loss": 0.4694964587688446, + "eval_runtime": 2466.2638, + "eval_samples_per_second": 3.119, + "eval_steps_per_second": 0.39, + "step": 18500 + }, + { + "epoch": 4.06, + "learning_rate": 4.0069903126376044e-05, + "loss": 0.3163, + "step": 18550 + }, + { + "epoch": 4.07, + "learning_rate": 4.004238221047997e-05, + "loss": 0.2903, + "step": 18600 + }, + { + "epoch": 4.08, + "learning_rate": 4.0014861294583885e-05, + "loss": 0.3251, + "step": 18650 + }, + { + "epoch": 4.09, + "learning_rate": 3.99873403786878e-05, + "loss": 0.3194, + "step": 18700 + }, + { + "epoch": 4.11, + "learning_rate": 3.9959819462791726e-05, + "loss": 0.3029, + "step": 18750 + }, + { + "epoch": 4.12, + "learning_rate": 3.9932298546895643e-05, + "loss": 0.3113, + "step": 18800 + }, + { + "epoch": 4.13, + "learning_rate": 3.990477763099956e-05, + "loss": 0.3039, + "step": 18850 + }, + { + "epoch": 4.14, + "learning_rate": 3.987725671510348e-05, + "loss": 0.298, + "step": 18900 + }, + { + "epoch": 4.15, + "learning_rate": 3.98497357992074e-05, + "loss": 0.3092, + "step": 18950 + }, + { + "epoch": 4.16, + "learning_rate": 3.982221488331132e-05, + "loss": 0.3142, + "step": 19000 + }, + { + "epoch": 4.16, + "eval_acc": 0.21970878832297078, + "eval_cer": 0.08872261817460182, + "eval_loss": 0.44163626432418823, + "eval_runtime": 2477.8186, + "eval_samples_per_second": 3.104, + "eval_steps_per_second": 0.388, + "step": 19000 + }, + { + "epoch": 4.17, + "learning_rate": 3.9794693967415236e-05, + "loss": 0.3072, + "step": 19050 + }, + { + "epoch": 4.18, + "learning_rate": 3.976717305151916e-05, + "loss": 0.3123, + "step": 19100 + }, + { + "epoch": 4.19, + "learning_rate": 3.973965213562307e-05, + "loss": 0.2902, + "step": 19150 + }, + { + "epoch": 4.2, + "learning_rate": 3.9712131219726994e-05, + "loss": 0.3212, + "step": 19200 + }, + { + "epoch": 4.21, + "learning_rate": 3.968461030383092e-05, + "loss": 0.2916, + "step": 19250 + }, + { + "epoch": 4.23, + "learning_rate": 3.965708938793483e-05, + "loss": 0.3308, + "step": 19300 + }, + { + "epoch": 4.24, + "learning_rate": 3.962956847203875e-05, + "loss": 0.3016, + "step": 19350 + }, + { + "epoch": 4.25, + "learning_rate": 3.960204755614267e-05, + "loss": 0.3116, + "step": 19400 + }, + { + "epoch": 4.26, + "learning_rate": 3.957452664024659e-05, + "loss": 0.3083, + "step": 19450 + }, + { + "epoch": 4.27, + "learning_rate": 3.954700572435051e-05, + "loss": 0.3144, + "step": 19500 + }, + { + "epoch": 4.27, + "eval_acc": 0.2143785751151354, + "eval_cer": 0.0860528465935606, + "eval_loss": 0.437505841255188, + "eval_runtime": 2403.934, + "eval_samples_per_second": 3.2, + "eval_steps_per_second": 0.4, + "step": 19500 + }, + { + "epoch": 4.28, + "learning_rate": 3.951948480845443e-05, + "loss": 0.2962, + "step": 19550 + }, + { + "epoch": 4.29, + "learning_rate": 3.9491963892558345e-05, + "loss": 0.3088, + "step": 19600 + }, + { + "epoch": 4.3, + "learning_rate": 3.946444297666226e-05, + "loss": 0.3045, + "step": 19650 + }, + { + "epoch": 4.31, + "learning_rate": 3.9436922060766186e-05, + "loss": 0.3207, + "step": 19700 + }, + { + "epoch": 4.32, + "learning_rate": 3.9409401144870103e-05, + "loss": 0.3149, + "step": 19750 + }, + { + "epoch": 4.34, + "learning_rate": 3.938188022897402e-05, + "loss": 0.3031, + "step": 19800 + }, + { + "epoch": 4.35, + "learning_rate": 3.9354359313077945e-05, + "loss": 0.3168, + "step": 19850 + }, + { + "epoch": 4.36, + "learning_rate": 3.932683839718186e-05, + "loss": 0.3027, + "step": 19900 + }, + { + "epoch": 4.37, + "learning_rate": 3.929931748128578e-05, + "loss": 0.3243, + "step": 19950 + }, + { + "epoch": 4.38, + "learning_rate": 3.9271796565389696e-05, + "loss": 0.3092, + "step": 20000 + }, + { + "epoch": 4.38, + "eval_acc": 0.21697867911895752, + "eval_cer": 0.08637222693368517, + "eval_loss": 0.4346335232257843, + "eval_runtime": 2403.4504, + "eval_samples_per_second": 3.2, + "eval_steps_per_second": 0.4, + "step": 20000 + }, + { + "epoch": 4.39, + "learning_rate": 3.924427564949362e-05, + "loss": 0.3074, + "step": 20050 + }, + { + "epoch": 4.4, + "learning_rate": 3.921675473359754e-05, + "loss": 0.3082, + "step": 20100 + }, + { + "epoch": 4.41, + "learning_rate": 3.9189233817701454e-05, + "loss": 0.3088, + "step": 20150 + }, + { + "epoch": 4.42, + "learning_rate": 3.916171290180538e-05, + "loss": 0.3118, + "step": 20200 + }, + { + "epoch": 4.43, + "learning_rate": 3.913419198590929e-05, + "loss": 0.3229, + "step": 20250 + }, + { + "epoch": 4.44, + "learning_rate": 3.910667107001321e-05, + "loss": 0.3273, + "step": 20300 + }, + { + "epoch": 4.46, + "learning_rate": 3.9079150154117137e-05, + "loss": 0.3068, + "step": 20350 + }, + { + "epoch": 4.47, + "learning_rate": 3.905162923822105e-05, + "loss": 0.3177, + "step": 20400 + }, + { + "epoch": 4.48, + "learning_rate": 3.902410832232497e-05, + "loss": 0.3011, + "step": 20450 + }, + { + "epoch": 4.49, + "learning_rate": 3.899658740642889e-05, + "loss": 0.3275, + "step": 20500 + }, + { + "epoch": 4.49, + "eval_acc": 0.2238689547290862, + "eval_cer": 0.08576757483344935, + "eval_loss": 0.42684033513069153, + "eval_runtime": 2437.8724, + "eval_samples_per_second": 3.155, + "eval_steps_per_second": 0.395, + "step": 20500 + }, + { + "epoch": 4.5, + "learning_rate": 3.8969066490532805e-05, + "loss": 0.3103, + "step": 20550 + }, + { + "epoch": 4.51, + "learning_rate": 3.894154557463672e-05, + "loss": 0.3184, + "step": 20600 + }, + { + "epoch": 4.52, + "learning_rate": 3.8914024658740646e-05, + "loss": 0.3149, + "step": 20650 + }, + { + "epoch": 4.53, + "learning_rate": 3.8886503742844564e-05, + "loss": 0.3312, + "step": 20700 + }, + { + "epoch": 4.54, + "learning_rate": 3.885898282694848e-05, + "loss": 0.3176, + "step": 20750 + }, + { + "epoch": 4.55, + "learning_rate": 3.8831461911052405e-05, + "loss": 0.3085, + "step": 20800 + }, + { + "epoch": 4.57, + "learning_rate": 3.8803940995156315e-05, + "loss": 0.3118, + "step": 20850 + }, + { + "epoch": 4.58, + "learning_rate": 3.877642007926024e-05, + "loss": 0.3103, + "step": 20900 + }, + { + "epoch": 4.59, + "learning_rate": 3.874889916336416e-05, + "loss": 0.3097, + "step": 20950 + }, + { + "epoch": 4.6, + "learning_rate": 3.872137824746807e-05, + "loss": 0.3132, + "step": 21000 + }, + { + "epoch": 4.6, + "eval_acc": 0.23218928754131704, + "eval_cer": 0.08339392774252363, + "eval_loss": 0.42323964834213257, + "eval_runtime": 2399.2661, + "eval_samples_per_second": 3.206, + "eval_steps_per_second": 0.401, + "step": 21000 + }, + { + "epoch": 4.61, + "learning_rate": 3.8693857331572e-05, + "loss": 0.3174, + "step": 21050 + }, + { + "epoch": 4.62, + "learning_rate": 3.8666336415675914e-05, + "loss": 0.2985, + "step": 21100 + }, + { + "epoch": 4.63, + "learning_rate": 3.863881549977983e-05, + "loss": 0.3264, + "step": 21150 + }, + { + "epoch": 4.64, + "learning_rate": 3.8611294583883756e-05, + "loss": 0.2915, + "step": 21200 + }, + { + "epoch": 4.65, + "learning_rate": 3.858377366798767e-05, + "loss": 0.2875, + "step": 21250 + }, + { + "epoch": 4.66, + "learning_rate": 3.855625275209159e-05, + "loss": 0.3061, + "step": 21300 + }, + { + "epoch": 4.67, + "learning_rate": 3.852873183619551e-05, + "loss": 0.311, + "step": 21350 + }, + { + "epoch": 4.69, + "learning_rate": 3.850121092029943e-05, + "loss": 0.3053, + "step": 21400 + }, + { + "epoch": 4.7, + "learning_rate": 3.847369000440335e-05, + "loss": 0.3197, + "step": 21450 + }, + { + "epoch": 4.71, + "learning_rate": 3.8446169088507265e-05, + "loss": 0.3039, + "step": 21500 + }, + { + "epoch": 4.71, + "eval_acc": 0.22971918873768601, + "eval_cer": 0.08402803725277093, + "eval_loss": 0.4141163229942322, + "eval_runtime": 2448.8052, + "eval_samples_per_second": 3.141, + "eval_steps_per_second": 0.393, + "step": 21500 + }, + { + "epoch": 4.72, + "learning_rate": 3.841864817261119e-05, + "loss": 0.273, + "step": 21550 + }, + { + "epoch": 4.73, + "learning_rate": 3.8391127256715106e-05, + "loss": 0.3525, + "step": 21600 + }, + { + "epoch": 4.74, + "learning_rate": 3.8363606340819024e-05, + "loss": 0.3004, + "step": 21650 + }, + { + "epoch": 4.75, + "learning_rate": 3.833608542492294e-05, + "loss": 0.3112, + "step": 21700 + }, + { + "epoch": 4.76, + "learning_rate": 3.8308564509026865e-05, + "loss": 0.3008, + "step": 21750 + }, + { + "epoch": 4.77, + "learning_rate": 3.828104359313078e-05, + "loss": 0.2954, + "step": 21800 + }, + { + "epoch": 4.78, + "learning_rate": 3.82535226772347e-05, + "loss": 0.2975, + "step": 21850 + }, + { + "epoch": 4.8, + "learning_rate": 3.822600176133862e-05, + "loss": 0.2923, + "step": 21900 + }, + { + "epoch": 4.81, + "learning_rate": 3.819848084544253e-05, + "loss": 0.2934, + "step": 21950 + }, + { + "epoch": 4.82, + "learning_rate": 3.817095992954646e-05, + "loss": 0.3063, + "step": 22000 + }, + { + "epoch": 4.82, + "eval_acc": 0.236999479948388, + "eval_cer": 0.08368695145263791, + "eval_loss": 0.40729042887687683, + "eval_runtime": 2434.9032, + "eval_samples_per_second": 3.159, + "eval_steps_per_second": 0.395, + "step": 22000 + }, + { + "epoch": 4.83, + "learning_rate": 3.814343901365038e-05, + "loss": 0.3022, + "step": 22050 + }, + { + "epoch": 4.84, + "learning_rate": 3.811591809775429e-05, + "loss": 0.2964, + "step": 22100 + }, + { + "epoch": 4.85, + "learning_rate": 3.8088397181858216e-05, + "loss": 0.3109, + "step": 22150 + }, + { + "epoch": 4.86, + "learning_rate": 3.806087626596213e-05, + "loss": 0.3044, + "step": 22200 + }, + { + "epoch": 4.87, + "learning_rate": 3.803335535006605e-05, + "loss": 0.3082, + "step": 22250 + }, + { + "epoch": 4.88, + "learning_rate": 3.8005834434169974e-05, + "loss": 0.3162, + "step": 22300 + }, + { + "epoch": 4.89, + "learning_rate": 3.797831351827389e-05, + "loss": 0.3013, + "step": 22350 + }, + { + "epoch": 4.9, + "learning_rate": 3.795079260237781e-05, + "loss": 0.306, + "step": 22400 + }, + { + "epoch": 4.92, + "learning_rate": 3.7923271686481725e-05, + "loss": 0.299, + "step": 22450 + }, + { + "epoch": 4.93, + "learning_rate": 3.789575077058565e-05, + "loss": 0.3085, + "step": 22500 + }, + { + "epoch": 4.93, + "eval_acc": 0.24193967755565005, + "eval_cer": 0.07995981389118433, + "eval_loss": 0.3981165289878845, + "eval_runtime": 2412.3309, + "eval_samples_per_second": 3.189, + "eval_steps_per_second": 0.399, + "step": 22500 + }, + { + "epoch": 4.94, + "learning_rate": 3.7868229854689566e-05, + "loss": 0.2896, + "step": 22550 + }, + { + "epoch": 4.95, + "learning_rate": 3.7840708938793484e-05, + "loss": 0.3118, + "step": 22600 + }, + { + "epoch": 4.96, + "learning_rate": 3.781318802289741e-05, + "loss": 0.2927, + "step": 22650 + }, + { + "epoch": 4.97, + "learning_rate": 3.778566710700132e-05, + "loss": 0.3019, + "step": 22700 + }, + { + "epoch": 4.98, + "learning_rate": 3.775814619110524e-05, + "loss": 0.2849, + "step": 22750 + }, + { + "epoch": 4.99, + "learning_rate": 3.773062527520916e-05, + "loss": 0.3009, + "step": 22800 + }, + { + "epoch": 5.0, + "learning_rate": 3.7703104359313076e-05, + "loss": 0.2766, + "step": 22850 + }, + { + "epoch": 5.01, + "learning_rate": 3.7675583443417e-05, + "loss": 0.2198, + "step": 22900 + }, + { + "epoch": 5.02, + "learning_rate": 3.764806252752092e-05, + "loss": 0.2204, + "step": 22950 + }, + { + "epoch": 5.04, + "learning_rate": 3.7620541611624835e-05, + "loss": 0.2059, + "step": 23000 + }, + { + "epoch": 5.04, + "eval_acc": 0.2506500259684542, + "eval_cer": 0.0784187262305833, + "eval_loss": 0.3935554623603821, + "eval_runtime": 2451.7215, + "eval_samples_per_second": 3.137, + "eval_steps_per_second": 0.392, + "step": 23000 + }, + { + "epoch": 5.05, + "learning_rate": 3.759302069572875e-05, + "loss": 0.2265, + "step": 23050 + }, + { + "epoch": 5.06, + "learning_rate": 3.7565499779832676e-05, + "loss": 0.2116, + "step": 23100 + }, + { + "epoch": 5.07, + "learning_rate": 3.753797886393659e-05, + "loss": 0.2365, + "step": 23150 + }, + { + "epoch": 5.08, + "learning_rate": 3.751045794804051e-05, + "loss": 0.2479, + "step": 23200 + }, + { + "epoch": 5.09, + "learning_rate": 3.7482937032144434e-05, + "loss": 0.2294, + "step": 23250 + }, + { + "epoch": 5.1, + "learning_rate": 3.745541611624835e-05, + "loss": 0.2348, + "step": 23300 + }, + { + "epoch": 5.11, + "learning_rate": 3.742789520035227e-05, + "loss": 0.2182, + "step": 23350 + }, + { + "epoch": 5.12, + "learning_rate": 3.740037428445619e-05, + "loss": 0.2341, + "step": 23400 + }, + { + "epoch": 5.13, + "learning_rate": 3.737285336856011e-05, + "loss": 0.2202, + "step": 23450 + }, + { + "epoch": 5.15, + "learning_rate": 3.7345332452664027e-05, + "loss": 0.2505, + "step": 23500 + }, + { + "epoch": 5.15, + "eval_acc": 0.2485699427653965, + "eval_cer": 0.07783422920035535, + "eval_loss": 0.3888697326183319, + "eval_runtime": 2447.0114, + "eval_samples_per_second": 3.143, + "eval_steps_per_second": 0.393, + "step": 23500 + }, + { + "epoch": 5.16, + "learning_rate": 3.731836195508587e-05, + "loss": 0.2226, + "step": 23550 + }, + { + "epoch": 5.17, + "learning_rate": 3.7290841039189786e-05, + "loss": 0.2489, + "step": 23600 + }, + { + "epoch": 5.18, + "learning_rate": 3.72633201232937e-05, + "loss": 0.2134, + "step": 23650 + }, + { + "epoch": 5.19, + "learning_rate": 3.723579920739763e-05, + "loss": 0.2285, + "step": 23700 + }, + { + "epoch": 5.2, + "learning_rate": 3.7208278291501544e-05, + "loss": 0.2436, + "step": 23750 + }, + { + "epoch": 5.21, + "learning_rate": 3.718075737560546e-05, + "loss": 0.234, + "step": 23800 + }, + { + "epoch": 5.22, + "learning_rate": 3.715323645970938e-05, + "loss": 0.2278, + "step": 23850 + }, + { + "epoch": 5.23, + "learning_rate": 3.71257155438133e-05, + "loss": 0.2226, + "step": 23900 + }, + { + "epoch": 5.24, + "learning_rate": 3.709819462791722e-05, + "loss": 0.2259, + "step": 23950 + }, + { + "epoch": 5.25, + "learning_rate": 3.707067371202114e-05, + "loss": 0.2251, + "step": 24000 + }, + { + "epoch": 5.25, + "eval_acc": 0.26183047318488945, + "eval_cer": 0.07873345540070605, + "eval_loss": 0.38558822870254517, + "eval_runtime": 2441.3865, + "eval_samples_per_second": 3.151, + "eval_steps_per_second": 0.394, + "step": 24000 + }, + { + "epoch": 5.27, + "learning_rate": 3.704315279612506e-05, + "loss": 0.2283, + "step": 24050 + }, + { + "epoch": 5.28, + "learning_rate": 3.701563188022897e-05, + "loss": 0.2267, + "step": 24100 + }, + { + "epoch": 5.29, + "learning_rate": 3.6988110964332895e-05, + "loss": 0.2295, + "step": 24150 + }, + { + "epoch": 5.3, + "learning_rate": 3.696059004843682e-05, + "loss": 0.2367, + "step": 24200 + }, + { + "epoch": 5.31, + "learning_rate": 3.693306913254073e-05, + "loss": 0.2212, + "step": 24250 + }, + { + "epoch": 5.32, + "learning_rate": 3.6905548216644654e-05, + "loss": 0.2417, + "step": 24300 + }, + { + "epoch": 5.33, + "learning_rate": 3.687802730074857e-05, + "loss": 0.2277, + "step": 24350 + }, + { + "epoch": 5.34, + "learning_rate": 3.685050638485249e-05, + "loss": 0.2199, + "step": 24400 + }, + { + "epoch": 5.35, + "learning_rate": 3.682298546895641e-05, + "loss": 0.2416, + "step": 24450 + }, + { + "epoch": 5.36, + "learning_rate": 3.679546455306033e-05, + "loss": 0.2337, + "step": 24500 + }, + { + "epoch": 5.36, + "eval_acc": 0.2687207487950181, + "eval_cer": 0.07631484699976279, + "eval_loss": 0.38175275921821594, + "eval_runtime": 2401.0005, + "eval_samples_per_second": 3.204, + "eval_steps_per_second": 0.401, + "step": 24500 + }, + { + "epoch": 5.38, + "learning_rate": 3.6767943637164246e-05, + "loss": 0.2184, + "step": 24550 + }, + { + "epoch": 5.39, + "learning_rate": 3.674042272126816e-05, + "loss": 0.2285, + "step": 24600 + }, + { + "epoch": 5.4, + "learning_rate": 3.671290180537209e-05, + "loss": 0.2263, + "step": 24650 + }, + { + "epoch": 5.41, + "learning_rate": 3.6685380889476e-05, + "loss": 0.2257, + "step": 24700 + }, + { + "epoch": 5.42, + "learning_rate": 3.665785997357992e-05, + "loss": 0.2422, + "step": 24750 + }, + { + "epoch": 5.43, + "learning_rate": 3.6630339057683846e-05, + "loss": 0.2271, + "step": 24800 + }, + { + "epoch": 5.44, + "learning_rate": 3.6602818141787756e-05, + "loss": 0.2312, + "step": 24850 + }, + { + "epoch": 5.45, + "learning_rate": 3.657529722589168e-05, + "loss": 0.2221, + "step": 24900 + }, + { + "epoch": 5.46, + "learning_rate": 3.65477763099956e-05, + "loss": 0.2382, + "step": 24950 + }, + { + "epoch": 5.47, + "learning_rate": 3.6520255394099514e-05, + "loss": 0.2173, + "step": 25000 + }, + { + "epoch": 5.47, + "eval_acc": 0.27171086839941355, + "eval_cer": 0.07626523451974344, + "eval_loss": 0.38215985894203186, + "eval_runtime": 2452.1174, + "eval_samples_per_second": 3.137, + "eval_steps_per_second": 0.392, + "step": 25000 + }, + { + "epoch": 5.48, + "learning_rate": 3.649273447820344e-05, + "loss": 0.2393, + "step": 25050 + }, + { + "epoch": 5.5, + "learning_rate": 3.6465213562307355e-05, + "loss": 0.2445, + "step": 25100 + }, + { + "epoch": 5.51, + "learning_rate": 3.643769264641127e-05, + "loss": 0.2258, + "step": 25150 + }, + { + "epoch": 5.52, + "learning_rate": 3.641017173051519e-05, + "loss": 0.2308, + "step": 25200 + }, + { + "epoch": 5.53, + "learning_rate": 3.6382650814619114e-05, + "loss": 0.2456, + "step": 25250 + }, + { + "epoch": 5.54, + "learning_rate": 3.635512989872303e-05, + "loss": 0.2332, + "step": 25300 + }, + { + "epoch": 5.55, + "learning_rate": 3.632760898282695e-05, + "loss": 0.2377, + "step": 25350 + }, + { + "epoch": 5.56, + "learning_rate": 3.630008806693087e-05, + "loss": 0.2393, + "step": 25400 + }, + { + "epoch": 5.57, + "learning_rate": 3.627256715103479e-05, + "loss": 0.2172, + "step": 25450 + }, + { + "epoch": 5.58, + "learning_rate": 3.6245046235138706e-05, + "loss": 0.2316, + "step": 25500 + }, + { + "epoch": 5.58, + "eval_acc": 0.27626105040610227, + "eval_cer": 0.07521251970933289, + "eval_loss": 0.37909185886383057, + "eval_runtime": 2402.1196, + "eval_samples_per_second": 3.202, + "eval_steps_per_second": 0.4, + "step": 25500 + }, + { + "epoch": 5.59, + "learning_rate": 3.6217525319242623e-05, + "loss": 0.2604, + "step": 25550 + }, + { + "epoch": 5.61, + "learning_rate": 3.619000440334655e-05, + "loss": 0.2378, + "step": 25600 + }, + { + "epoch": 5.62, + "learning_rate": 3.6162483487450465e-05, + "loss": 0.2424, + "step": 25650 + }, + { + "epoch": 5.63, + "learning_rate": 3.613551298987231e-05, + "loss": 0.2439, + "step": 25700 + }, + { + "epoch": 5.64, + "learning_rate": 3.6107992073976224e-05, + "loss": 0.2377, + "step": 25750 + }, + { + "epoch": 5.65, + "learning_rate": 3.608047115808014e-05, + "loss": 0.2263, + "step": 25800 + }, + { + "epoch": 5.66, + "learning_rate": 3.6052950242184065e-05, + "loss": 0.2228, + "step": 25850 + }, + { + "epoch": 5.67, + "learning_rate": 3.602542932628798e-05, + "loss": 0.2231, + "step": 25900 + }, + { + "epoch": 5.68, + "learning_rate": 3.59979084103919e-05, + "loss": 0.2247, + "step": 25950 + }, + { + "epoch": 5.69, + "learning_rate": 3.597038749449582e-05, + "loss": 0.2348, + "step": 26000 + }, + { + "epoch": 5.69, + "eval_acc": 0.2852314092192887, + "eval_cer": 0.07336600521861274, + "eval_loss": 0.36554059386253357, + "eval_runtime": 2406.905, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 26000 + }, + { + "epoch": 5.7, + "learning_rate": 3.594286657859974e-05, + "loss": 0.2381, + "step": 26050 + }, + { + "epoch": 5.71, + "learning_rate": 3.591534566270366e-05, + "loss": 0.2189, + "step": 26100 + }, + { + "epoch": 5.73, + "learning_rate": 3.5887824746807575e-05, + "loss": 0.2467, + "step": 26150 + }, + { + "epoch": 5.74, + "learning_rate": 3.58603038309115e-05, + "loss": 0.2192, + "step": 26200 + }, + { + "epoch": 5.75, + "learning_rate": 3.583278291501541e-05, + "loss": 0.2484, + "step": 26250 + }, + { + "epoch": 5.76, + "learning_rate": 3.580526199911933e-05, + "loss": 0.2236, + "step": 26300 + }, + { + "epoch": 5.77, + "learning_rate": 3.577774108322326e-05, + "loss": 0.2344, + "step": 26350 + }, + { + "epoch": 5.78, + "learning_rate": 3.575022016732717e-05, + "loss": 0.2343, + "step": 26400 + }, + { + "epoch": 5.79, + "learning_rate": 3.572269925143109e-05, + "loss": 0.2252, + "step": 26450 + }, + { + "epoch": 5.8, + "learning_rate": 3.569517833553501e-05, + "loss": 0.2553, + "step": 26500 + }, + { + "epoch": 5.8, + "eval_acc": 0.2951118044338128, + "eval_cer": 0.07258925982830981, + "eval_loss": 0.3526802659034729, + "eval_runtime": 2459.0427, + "eval_samples_per_second": 3.128, + "eval_steps_per_second": 0.391, + "step": 26500 + }, + { + "epoch": 5.81, + "learning_rate": 3.5667657419638926e-05, + "loss": 0.2235, + "step": 26550 + }, + { + "epoch": 5.82, + "learning_rate": 3.564013650374284e-05, + "loss": 0.2257, + "step": 26600 + }, + { + "epoch": 5.84, + "learning_rate": 3.561261558784677e-05, + "loss": 0.2309, + "step": 26650 + }, + { + "epoch": 5.85, + "learning_rate": 3.5585094671950684e-05, + "loss": 0.2291, + "step": 26700 + }, + { + "epoch": 5.86, + "learning_rate": 3.55575737560546e-05, + "loss": 0.2064, + "step": 26750 + }, + { + "epoch": 5.87, + "learning_rate": 3.5530052840158525e-05, + "loss": 0.237, + "step": 26800 + }, + { + "epoch": 5.88, + "learning_rate": 3.5502531924262436e-05, + "loss": 0.2322, + "step": 26850 + }, + { + "epoch": 5.89, + "learning_rate": 3.547501100836636e-05, + "loss": 0.2466, + "step": 26900 + }, + { + "epoch": 5.9, + "learning_rate": 3.5447490092470284e-05, + "loss": 0.2296, + "step": 26950 + }, + { + "epoch": 5.91, + "learning_rate": 3.5419969176574194e-05, + "loss": 0.2319, + "step": 27000 + }, + { + "epoch": 5.91, + "eval_acc": 0.30330213204585255, + "eval_cer": 0.07118305609776139, + "eval_loss": 0.35267382860183716, + "eval_runtime": 2453.5416, + "eval_samples_per_second": 3.135, + "eval_steps_per_second": 0.392, + "step": 27000 + }, + { + "epoch": 5.92, + "learning_rate": 3.539244826067812e-05, + "loss": 0.2354, + "step": 27050 + }, + { + "epoch": 5.93, + "learning_rate": 3.5364927344782035e-05, + "loss": 0.2217, + "step": 27100 + }, + { + "epoch": 5.94, + "learning_rate": 3.533740642888595e-05, + "loss": 0.2285, + "step": 27150 + }, + { + "epoch": 5.96, + "learning_rate": 3.5309885512989876e-05, + "loss": 0.2071, + "step": 27200 + }, + { + "epoch": 5.97, + "learning_rate": 3.528236459709379e-05, + "loss": 0.2207, + "step": 27250 + }, + { + "epoch": 5.98, + "learning_rate": 3.525484368119771e-05, + "loss": 0.2173, + "step": 27300 + }, + { + "epoch": 5.99, + "learning_rate": 3.522732276530163e-05, + "loss": 0.2414, + "step": 27350 + }, + { + "epoch": 6.0, + "learning_rate": 3.519980184940555e-05, + "loss": 0.231, + "step": 27400 + }, + { + "epoch": 6.01, + "learning_rate": 3.517228093350947e-05, + "loss": 0.1624, + "step": 27450 + }, + { + "epoch": 6.02, + "learning_rate": 3.5144760017613386e-05, + "loss": 0.1641, + "step": 27500 + }, + { + "epoch": 6.02, + "eval_acc": 0.3092823712546435, + "eval_cer": 0.06900475814691186, + "eval_loss": 0.3480505049228668, + "eval_runtime": 2442.26, + "eval_samples_per_second": 3.15, + "eval_steps_per_second": 0.394, + "step": 27500 + }, + { + "epoch": 6.03, + "learning_rate": 3.511723910171731e-05, + "loss": 0.1622, + "step": 27550 + }, + { + "epoch": 6.04, + "learning_rate": 3.508971818582123e-05, + "loss": 0.1549, + "step": 27600 + }, + { + "epoch": 6.05, + "learning_rate": 3.5062197269925144e-05, + "loss": 0.1649, + "step": 27650 + }, + { + "epoch": 6.06, + "learning_rate": 3.503467635402906e-05, + "loss": 0.1519, + "step": 27700 + }, + { + "epoch": 6.08, + "learning_rate": 3.5007155438132985e-05, + "loss": 0.1632, + "step": 27750 + }, + { + "epoch": 6.09, + "learning_rate": 3.49796345222369e-05, + "loss": 0.173, + "step": 27800 + }, + { + "epoch": 6.1, + "learning_rate": 3.495211360634082e-05, + "loss": 0.1684, + "step": 27850 + }, + { + "epoch": 6.11, + "learning_rate": 3.4924592690444744e-05, + "loss": 0.1682, + "step": 27900 + }, + { + "epoch": 6.12, + "learning_rate": 3.4897071774548654e-05, + "loss": 0.1652, + "step": 27950 + }, + { + "epoch": 6.13, + "learning_rate": 3.486955085865258e-05, + "loss": 0.1691, + "step": 28000 + }, + { + "epoch": 6.13, + "eval_acc": 0.3095423816550257, + "eval_cer": 0.07106367606771484, + "eval_loss": 0.34748131036758423, + "eval_runtime": 2438.156, + "eval_samples_per_second": 3.155, + "eval_steps_per_second": 0.395, + "step": 28000 + }, + { + "epoch": 6.14, + "learning_rate": 3.48420299427565e-05, + "loss": 0.1665, + "step": 28050 + }, + { + "epoch": 6.15, + "learning_rate": 3.481450902686041e-05, + "loss": 0.1652, + "step": 28100 + }, + { + "epoch": 6.16, + "learning_rate": 3.4786988110964336e-05, + "loss": 0.1716, + "step": 28150 + }, + { + "epoch": 6.17, + "learning_rate": 3.4759467195068253e-05, + "loss": 0.1717, + "step": 28200 + }, + { + "epoch": 6.19, + "learning_rate": 3.473194627917217e-05, + "loss": 0.1633, + "step": 28250 + }, + { + "epoch": 6.2, + "learning_rate": 3.4704425363276095e-05, + "loss": 0.1724, + "step": 28300 + }, + { + "epoch": 6.21, + "learning_rate": 3.467690444738001e-05, + "loss": 0.1676, + "step": 28350 + }, + { + "epoch": 6.22, + "learning_rate": 3.464938353148393e-05, + "loss": 0.1761, + "step": 28400 + }, + { + "epoch": 6.23, + "learning_rate": 3.4621862615587846e-05, + "loss": 0.1665, + "step": 28450 + }, + { + "epoch": 6.24, + "learning_rate": 3.459434169969177e-05, + "loss": 0.1777, + "step": 28500 + }, + { + "epoch": 6.24, + "eval_acc": 0.3095423816550257, + "eval_cer": 0.0700838295873327, + "eval_loss": 0.3448556661605835, + "eval_runtime": 2427.8966, + "eval_samples_per_second": 3.168, + "eval_steps_per_second": 0.396, + "step": 28500 + }, + { + "epoch": 6.25, + "learning_rate": 3.456682078379568e-05, + "loss": 0.1831, + "step": 28550 + }, + { + "epoch": 6.26, + "learning_rate": 3.4539299867899604e-05, + "loss": 0.1543, + "step": 28600 + }, + { + "epoch": 6.27, + "learning_rate": 3.451177895200353e-05, + "loss": 0.1754, + "step": 28650 + }, + { + "epoch": 6.28, + "learning_rate": 3.448425803610744e-05, + "loss": 0.185, + "step": 28700 + }, + { + "epoch": 6.29, + "learning_rate": 3.445673712021136e-05, + "loss": 0.1825, + "step": 28750 + }, + { + "epoch": 6.31, + "learning_rate": 3.442921620431528e-05, + "loss": 0.1781, + "step": 28800 + }, + { + "epoch": 6.32, + "learning_rate": 3.44016952884192e-05, + "loss": 0.183, + "step": 28850 + }, + { + "epoch": 6.33, + "learning_rate": 3.437417437252312e-05, + "loss": 0.1726, + "step": 28900 + }, + { + "epoch": 6.34, + "learning_rate": 3.434720387494496e-05, + "loss": 0.1857, + "step": 28950 + }, + { + "epoch": 6.35, + "learning_rate": 3.4319682959048874e-05, + "loss": 0.1812, + "step": 29000 + }, + { + "epoch": 6.35, + "eval_acc": 0.3226729068743275, + "eval_cer": 0.06831483459664278, + "eval_loss": 0.3413793742656708, + "eval_runtime": 2402.3605, + "eval_samples_per_second": 3.202, + "eval_steps_per_second": 0.4, + "step": 29000 + }, + { + "epoch": 6.36, + "learning_rate": 3.42921620431528e-05, + "loss": 0.1814, + "step": 29050 + }, + { + "epoch": 6.37, + "learning_rate": 3.426464112725672e-05, + "loss": 0.172, + "step": 29100 + }, + { + "epoch": 6.38, + "learning_rate": 3.423712021136063e-05, + "loss": 0.1774, + "step": 29150 + }, + { + "epoch": 6.39, + "learning_rate": 3.4209599295464556e-05, + "loss": 0.165, + "step": 29200 + }, + { + "epoch": 6.4, + "learning_rate": 3.418207837956847e-05, + "loss": 0.1818, + "step": 29250 + }, + { + "epoch": 6.42, + "learning_rate": 3.415455746367239e-05, + "loss": 0.1792, + "step": 29300 + }, + { + "epoch": 6.43, + "learning_rate": 3.4127036547776314e-05, + "loss": 0.1761, + "step": 29350 + }, + { + "epoch": 6.44, + "learning_rate": 3.409951563188023e-05, + "loss": 0.1896, + "step": 29400 + }, + { + "epoch": 6.45, + "learning_rate": 3.407199471598415e-05, + "loss": 0.17, + "step": 29450 + }, + { + "epoch": 6.46, + "learning_rate": 3.4044473800088066e-05, + "loss": 0.176, + "step": 29500 + }, + { + "epoch": 6.46, + "eval_acc": 0.31448257926228773, + "eval_cer": 0.06894274254688768, + "eval_loss": 0.34407129883766174, + "eval_runtime": 2409.0404, + "eval_samples_per_second": 3.193, + "eval_steps_per_second": 0.399, + "step": 29500 + }, + { + "epoch": 6.47, + "learning_rate": 3.401695288419199e-05, + "loss": 0.1697, + "step": 29550 + }, + { + "epoch": 6.48, + "learning_rate": 3.398943196829591e-05, + "loss": 0.1635, + "step": 29600 + }, + { + "epoch": 6.49, + "learning_rate": 3.3961911052399824e-05, + "loss": 0.1768, + "step": 29650 + }, + { + "epoch": 6.5, + "learning_rate": 3.393439013650375e-05, + "loss": 0.1658, + "step": 29700 + }, + { + "epoch": 6.51, + "learning_rate": 3.3906869220607665e-05, + "loss": 0.1718, + "step": 29750 + }, + { + "epoch": 6.52, + "learning_rate": 3.387934830471158e-05, + "loss": 0.1778, + "step": 29800 + }, + { + "epoch": 6.54, + "learning_rate": 3.38518273888155e-05, + "loss": 0.1834, + "step": 29850 + }, + { + "epoch": 6.55, + "learning_rate": 3.3824306472919423e-05, + "loss": 0.1727, + "step": 29900 + }, + { + "epoch": 6.56, + "learning_rate": 3.379678555702334e-05, + "loss": 0.1676, + "step": 29950 + }, + { + "epoch": 6.57, + "learning_rate": 3.376926464112726e-05, + "loss": 0.1851, + "step": 30000 + }, + { + "epoch": 6.57, + "eval_acc": 0.341523660902038, + "eval_cer": 0.06639855255589544, + "eval_loss": 0.33033427596092224, + "eval_runtime": 2449.8438, + "eval_samples_per_second": 3.14, + "eval_steps_per_second": 0.393, + "step": 30000 + }, + { + "epoch": 6.58, + "learning_rate": 3.374174372523118e-05, + "loss": 0.1848, + "step": 30050 + }, + { + "epoch": 6.59, + "learning_rate": 3.371422280933509e-05, + "loss": 0.1665, + "step": 30100 + }, + { + "epoch": 6.6, + "learning_rate": 3.3686701893439016e-05, + "loss": 0.1852, + "step": 30150 + }, + { + "epoch": 6.61, + "learning_rate": 3.365918097754294e-05, + "loss": 0.1784, + "step": 30200 + }, + { + "epoch": 6.62, + "learning_rate": 3.363166006164685e-05, + "loss": 0.1773, + "step": 30250 + }, + { + "epoch": 6.63, + "learning_rate": 3.3604139145750774e-05, + "loss": 0.1649, + "step": 30300 + }, + { + "epoch": 6.65, + "learning_rate": 3.357661822985469e-05, + "loss": 0.1757, + "step": 30350 + }, + { + "epoch": 6.66, + "learning_rate": 3.354964773227653e-05, + "loss": 0.1737, + "step": 30400 + }, + { + "epoch": 6.67, + "learning_rate": 3.352212681638045e-05, + "loss": 0.17, + "step": 30450 + }, + { + "epoch": 6.68, + "learning_rate": 3.349460590048437e-05, + "loss": 0.1821, + "step": 30500 + }, + { + "epoch": 6.68, + "eval_acc": 0.3542641705207665, + "eval_cer": 0.0656853731556173, + "eval_loss": 0.32759806513786316, + "eval_runtime": 2443.4895, + "eval_samples_per_second": 3.148, + "eval_steps_per_second": 0.394, + "step": 30500 + }, + { + "epoch": 6.69, + "learning_rate": 3.3467084984588285e-05, + "loss": 0.184, + "step": 30550 + }, + { + "epoch": 6.7, + "learning_rate": 3.343956406869221e-05, + "loss": 0.1599, + "step": 30600 + }, + { + "epoch": 6.71, + "learning_rate": 3.3412043152796127e-05, + "loss": 0.1702, + "step": 30650 + }, + { + "epoch": 6.72, + "learning_rate": 3.3384522236900044e-05, + "loss": 0.1789, + "step": 30700 + }, + { + "epoch": 6.73, + "learning_rate": 3.335700132100397e-05, + "loss": 0.1894, + "step": 30750 + }, + { + "epoch": 6.74, + "learning_rate": 3.3329480405107885e-05, + "loss": 0.1738, + "step": 30800 + }, + { + "epoch": 6.75, + "learning_rate": 3.33019594892118e-05, + "loss": 0.1634, + "step": 30850 + }, + { + "epoch": 6.77, + "learning_rate": 3.327443857331572e-05, + "loss": 0.1645, + "step": 30900 + }, + { + "epoch": 6.78, + "learning_rate": 3.324691765741964e-05, + "loss": 0.1718, + "step": 30950 + }, + { + "epoch": 6.79, + "learning_rate": 3.321939674152356e-05, + "loss": 0.1841, + "step": 31000 + }, + { + "epoch": 6.79, + "eval_acc": 0.3491939677133133, + "eval_cer": 0.06529002370546311, + "eval_loss": 0.32412809133529663, + "eval_runtime": 2425.8573, + "eval_samples_per_second": 3.171, + "eval_steps_per_second": 0.397, + "step": 31000 + }, + { + "epoch": 6.8, + "learning_rate": 3.319187582562748e-05, + "loss": 0.1749, + "step": 31050 + }, + { + "epoch": 6.81, + "learning_rate": 3.31643549097314e-05, + "loss": 0.1653, + "step": 31100 + }, + { + "epoch": 6.82, + "learning_rate": 3.313683399383531e-05, + "loss": 0.1583, + "step": 31150 + }, + { + "epoch": 6.83, + "learning_rate": 3.3109313077939236e-05, + "loss": 0.1632, + "step": 31200 + }, + { + "epoch": 6.84, + "learning_rate": 3.308179216204316e-05, + "loss": 0.1738, + "step": 31250 + }, + { + "epoch": 6.85, + "learning_rate": 3.305427124614707e-05, + "loss": 0.166, + "step": 31300 + }, + { + "epoch": 6.86, + "learning_rate": 3.3026750330250994e-05, + "loss": 0.167, + "step": 31350 + }, + { + "epoch": 6.88, + "learning_rate": 3.299922941435491e-05, + "loss": 0.1644, + "step": 31400 + }, + { + "epoch": 6.89, + "learning_rate": 3.297170849845883e-05, + "loss": 0.1716, + "step": 31450 + }, + { + "epoch": 6.9, + "learning_rate": 3.2944187582562745e-05, + "loss": 0.1772, + "step": 31500 + }, + { + "epoch": 6.9, + "eval_acc": 0.35205408211751765, + "eval_cer": 0.06414893666501809, + "eval_loss": 0.321593701839447, + "eval_runtime": 2418.8594, + "eval_samples_per_second": 3.18, + "eval_steps_per_second": 0.398, + "step": 31500 + }, + { + "epoch": 6.91, + "learning_rate": 3.291666666666667e-05, + "loss": 0.1648, + "step": 31550 + }, + { + "epoch": 6.92, + "learning_rate": 3.2889145750770587e-05, + "loss": 0.1846, + "step": 31600 + }, + { + "epoch": 6.93, + "learning_rate": 3.2861624834874504e-05, + "loss": 0.1678, + "step": 31650 + }, + { + "epoch": 6.94, + "learning_rate": 3.283410391897843e-05, + "loss": 0.1614, + "step": 31700 + }, + { + "epoch": 6.95, + "learning_rate": 3.2806583003082345e-05, + "loss": 0.1832, + "step": 31750 + }, + { + "epoch": 6.96, + "learning_rate": 3.277906208718626e-05, + "loss": 0.1755, + "step": 31800 + }, + { + "epoch": 6.97, + "learning_rate": 3.2751541171290186e-05, + "loss": 0.1679, + "step": 31850 + }, + { + "epoch": 6.98, + "learning_rate": 3.27240202553941e-05, + "loss": 0.1672, + "step": 31900 + }, + { + "epoch": 7.0, + "learning_rate": 3.269649933949802e-05, + "loss": 0.1728, + "step": 31950 + }, + { + "epoch": 7.01, + "learning_rate": 3.266897842360194e-05, + "loss": 0.1417, + "step": 32000 + }, + { + "epoch": 7.01, + "eval_acc": 0.36648465933873053, + "eval_cer": 0.06339854790472543, + "eval_loss": 0.31661146879196167, + "eval_runtime": 2439.7983, + "eval_samples_per_second": 3.153, + "eval_steps_per_second": 0.394, + "step": 32000 + }, + { + "epoch": 7.02, + "learning_rate": 3.264145750770586e-05, + "loss": 0.1188, + "step": 32050 + }, + { + "epoch": 7.03, + "learning_rate": 3.261393659180978e-05, + "loss": 0.1098, + "step": 32100 + }, + { + "epoch": 7.04, + "learning_rate": 3.2586415675913696e-05, + "loss": 0.1097, + "step": 32150 + }, + { + "epoch": 7.05, + "learning_rate": 3.255889476001762e-05, + "loss": 0.1268, + "step": 32200 + }, + { + "epoch": 7.06, + "learning_rate": 3.253137384412153e-05, + "loss": 0.1251, + "step": 32250 + }, + { + "epoch": 7.07, + "learning_rate": 3.2503852928225454e-05, + "loss": 0.1307, + "step": 32300 + }, + { + "epoch": 7.08, + "learning_rate": 3.247633201232938e-05, + "loss": 0.1111, + "step": 32350 + }, + { + "epoch": 7.09, + "learning_rate": 3.244881109643329e-05, + "loss": 0.11, + "step": 32400 + }, + { + "epoch": 7.1, + "learning_rate": 3.242129018053721e-05, + "loss": 0.1324, + "step": 32450 + }, + { + "epoch": 7.12, + "learning_rate": 3.239376926464113e-05, + "loss": 0.1426, + "step": 32500 + }, + { + "epoch": 7.12, + "eval_acc": 0.35816432652649965, + "eval_cer": 0.0637876957948772, + "eval_loss": 0.32509633898735046, + "eval_runtime": 2419.5538, + "eval_samples_per_second": 3.179, + "eval_steps_per_second": 0.398, + "step": 32500 + }, + { + "epoch": 7.13, + "learning_rate": 3.236624834874505e-05, + "loss": 0.1214, + "step": 32550 + }, + { + "epoch": 7.14, + "learning_rate": 3.2338727432848964e-05, + "loss": 0.119, + "step": 32600 + }, + { + "epoch": 7.15, + "learning_rate": 3.231120651695289e-05, + "loss": 0.1233, + "step": 32650 + }, + { + "epoch": 7.16, + "learning_rate": 3.2283685601056805e-05, + "loss": 0.1237, + "step": 32700 + }, + { + "epoch": 7.17, + "learning_rate": 3.225616468516072e-05, + "loss": 0.1298, + "step": 32750 + }, + { + "epoch": 7.18, + "learning_rate": 3.2228643769264646e-05, + "loss": 0.1168, + "step": 32800 + }, + { + "epoch": 7.19, + "learning_rate": 3.2201122853368556e-05, + "loss": 0.1283, + "step": 32850 + }, + { + "epoch": 7.2, + "learning_rate": 3.217360193747248e-05, + "loss": 0.123, + "step": 32900 + }, + { + "epoch": 7.21, + "learning_rate": 3.2146081021576404e-05, + "loss": 0.1198, + "step": 32950 + }, + { + "epoch": 7.23, + "learning_rate": 3.2118560105680315e-05, + "loss": 0.1251, + "step": 33000 + }, + { + "epoch": 7.23, + "eval_acc": 0.3653146125370106, + "eval_cer": 0.06344195882474236, + "eval_loss": 0.3220234513282776, + "eval_runtime": 2413.7335, + "eval_samples_per_second": 3.187, + "eval_steps_per_second": 0.399, + "step": 33000 + }, + { + "epoch": 7.24, + "learning_rate": 3.209103918978424e-05, + "loss": 0.1201, + "step": 33050 + }, + { + "epoch": 7.25, + "learning_rate": 3.2063518273888156e-05, + "loss": 0.1239, + "step": 33100 + }, + { + "epoch": 7.26, + "learning_rate": 3.203599735799207e-05, + "loss": 0.1272, + "step": 33150 + }, + { + "epoch": 7.27, + "learning_rate": 3.2008476442096e-05, + "loss": 0.1176, + "step": 33200 + }, + { + "epoch": 7.28, + "learning_rate": 3.1980955526199914e-05, + "loss": 0.1229, + "step": 33250 + }, + { + "epoch": 7.29, + "learning_rate": 3.195398502862175e-05, + "loss": 0.1251, + "step": 33300 + }, + { + "epoch": 7.3, + "learning_rate": 3.1926464112725674e-05, + "loss": 0.1271, + "step": 33350 + }, + { + "epoch": 7.31, + "learning_rate": 3.189894319682959e-05, + "loss": 0.1204, + "step": 33400 + }, + { + "epoch": 7.32, + "learning_rate": 3.187142228093351e-05, + "loss": 0.1339, + "step": 33450 + }, + { + "epoch": 7.33, + "learning_rate": 3.184390136503743e-05, + "loss": 0.131, + "step": 33500 + }, + { + "epoch": 7.33, + "eval_acc": 0.3733749349488592, + "eval_cer": 0.06140319597394724, + "eval_loss": 0.3167717754840851, + "eval_runtime": 2416.3007, + "eval_samples_per_second": 3.183, + "eval_steps_per_second": 0.398, + "step": 33500 + }, + { + "epoch": 7.35, + "learning_rate": 3.181638044914135e-05, + "loss": 0.1267, + "step": 33550 + }, + { + "epoch": 7.36, + "learning_rate": 3.1788859533245266e-05, + "loss": 0.1179, + "step": 33600 + }, + { + "epoch": 7.37, + "learning_rate": 3.1761338617349184e-05, + "loss": 0.1275, + "step": 33650 + }, + { + "epoch": 7.38, + "learning_rate": 3.173381770145311e-05, + "loss": 0.1249, + "step": 33700 + }, + { + "epoch": 7.39, + "learning_rate": 3.1706296785557025e-05, + "loss": 0.1244, + "step": 33750 + }, + { + "epoch": 7.4, + "learning_rate": 3.167877586966094e-05, + "loss": 0.1268, + "step": 33800 + }, + { + "epoch": 7.41, + "learning_rate": 3.1651254953764866e-05, + "loss": 0.1255, + "step": 33850 + }, + { + "epoch": 7.42, + "learning_rate": 3.162373403786878e-05, + "loss": 0.1454, + "step": 33900 + }, + { + "epoch": 7.43, + "learning_rate": 3.15962131219727e-05, + "loss": 0.1169, + "step": 33950 + }, + { + "epoch": 7.44, + "learning_rate": 3.1568692206076624e-05, + "loss": 0.1504, + "step": 34000 + }, + { + "epoch": 7.44, + "eval_acc": 0.3848153925656766, + "eval_cer": 0.062052809384200595, + "eval_loss": 0.31372004747390747, + "eval_runtime": 2434.2574, + "eval_samples_per_second": 3.16, + "eval_steps_per_second": 0.395, + "step": 34000 + }, + { + "epoch": 7.46, + "learning_rate": 3.154117129018054e-05, + "loss": 0.1325, + "step": 34050 + }, + { + "epoch": 7.47, + "learning_rate": 3.151365037428446e-05, + "loss": 0.1274, + "step": 34100 + }, + { + "epoch": 7.48, + "learning_rate": 3.1486129458388376e-05, + "loss": 0.1428, + "step": 34150 + }, + { + "epoch": 7.49, + "learning_rate": 3.145860854249229e-05, + "loss": 0.1386, + "step": 34200 + }, + { + "epoch": 7.5, + "learning_rate": 3.143108762659622e-05, + "loss": 0.1441, + "step": 34250 + }, + { + "epoch": 7.51, + "learning_rate": 3.1403566710700134e-05, + "loss": 0.1411, + "step": 34300 + }, + { + "epoch": 7.52, + "learning_rate": 3.137604579480405e-05, + "loss": 0.1215, + "step": 34350 + }, + { + "epoch": 7.53, + "learning_rate": 3.134852487890797e-05, + "loss": 0.1198, + "step": 34400 + }, + { + "epoch": 7.54, + "learning_rate": 3.132100396301189e-05, + "loss": 0.1292, + "step": 34450 + }, + { + "epoch": 7.55, + "learning_rate": 3.129348304711581e-05, + "loss": 0.1204, + "step": 34500 + }, + { + "epoch": 7.55, + "eval_acc": 0.38715548616911655, + "eval_cer": 0.05957218538323315, + "eval_loss": 0.312330961227417, + "eval_runtime": 2394.8652, + "eval_samples_per_second": 3.212, + "eval_steps_per_second": 0.402, + "step": 34500 + }, + { + "epoch": 7.56, + "learning_rate": 3.1265962131219726e-05, + "loss": 0.1424, + "step": 34550 + }, + { + "epoch": 7.58, + "learning_rate": 3.123844121532365e-05, + "loss": 0.1363, + "step": 34600 + }, + { + "epoch": 7.59, + "learning_rate": 3.121092029942757e-05, + "loss": 0.1412, + "step": 34650 + }, + { + "epoch": 7.6, + "learning_rate": 3.1183399383531485e-05, + "loss": 0.1187, + "step": 34700 + }, + { + "epoch": 7.61, + "learning_rate": 3.11558784676354e-05, + "loss": 0.1213, + "step": 34750 + }, + { + "epoch": 7.62, + "learning_rate": 3.1128357551739326e-05, + "loss": 0.1238, + "step": 34800 + }, + { + "epoch": 7.63, + "learning_rate": 3.110083663584324e-05, + "loss": 0.1351, + "step": 34850 + }, + { + "epoch": 7.64, + "learning_rate": 3.107331571994716e-05, + "loss": 0.1293, + "step": 34900 + }, + { + "epoch": 7.65, + "learning_rate": 3.1045794804051084e-05, + "loss": 0.1248, + "step": 34950 + }, + { + "epoch": 7.66, + "learning_rate": 3.1018273888154994e-05, + "loss": 0.1319, + "step": 35000 + }, + { + "epoch": 7.66, + "eval_acc": 0.40795631819969363, + "eval_cer": 0.059308619083130364, + "eval_loss": 0.303150475025177, + "eval_runtime": 2433.7241, + "eval_samples_per_second": 3.161, + "eval_steps_per_second": 0.395, + "step": 35000 + }, + { + "epoch": 7.67, + "learning_rate": 3.099075297225892e-05, + "loss": 0.1334, + "step": 35050 + }, + { + "epoch": 7.69, + "learning_rate": 3.096323205636284e-05, + "loss": 0.1513, + "step": 35100 + }, + { + "epoch": 7.7, + "learning_rate": 3.093571114046675e-05, + "loss": 0.1355, + "step": 35150 + }, + { + "epoch": 7.71, + "learning_rate": 3.090819022457068e-05, + "loss": 0.1327, + "step": 35200 + }, + { + "epoch": 7.72, + "learning_rate": 3.0880669308674594e-05, + "loss": 0.1423, + "step": 35250 + }, + { + "epoch": 7.73, + "learning_rate": 3.085314839277851e-05, + "loss": 0.1371, + "step": 35300 + }, + { + "epoch": 7.74, + "learning_rate": 3.082562747688243e-05, + "loss": 0.129, + "step": 35350 + }, + { + "epoch": 7.75, + "learning_rate": 3.079810656098635e-05, + "loss": 0.1376, + "step": 35400 + }, + { + "epoch": 7.76, + "learning_rate": 3.077058564509027e-05, + "loss": 0.1265, + "step": 35450 + }, + { + "epoch": 7.77, + "learning_rate": 3.0743064729194186e-05, + "loss": 0.1355, + "step": 35500 + }, + { + "epoch": 7.77, + "eval_acc": 0.41419656780886677, + "eval_cer": 0.058490013162811105, + "eval_loss": 0.2999359667301178, + "eval_runtime": 2422.3339, + "eval_samples_per_second": 3.175, + "eval_steps_per_second": 0.397, + "step": 35500 + }, + { + "epoch": 7.78, + "learning_rate": 3.071554381329811e-05, + "loss": 0.1298, + "step": 35550 + }, + { + "epoch": 7.79, + "learning_rate": 3.068802289740203e-05, + "loss": 0.123, + "step": 35600 + }, + { + "epoch": 7.81, + "learning_rate": 3.0660501981505945e-05, + "loss": 0.1333, + "step": 35650 + }, + { + "epoch": 7.82, + "learning_rate": 3.063298106560987e-05, + "loss": 0.1413, + "step": 35700 + }, + { + "epoch": 7.83, + "learning_rate": 3.0605460149713786e-05, + "loss": 0.1272, + "step": 35750 + }, + { + "epoch": 7.84, + "learning_rate": 3.05779392338177e-05, + "loss": 0.1461, + "step": 35800 + }, + { + "epoch": 7.85, + "learning_rate": 3.055041831792162e-05, + "loss": 0.124, + "step": 35850 + }, + { + "epoch": 7.86, + "learning_rate": 3.0522897402025544e-05, + "loss": 0.1459, + "step": 35900 + }, + { + "epoch": 7.87, + "learning_rate": 3.049537648612946e-05, + "loss": 0.1232, + "step": 35950 + }, + { + "epoch": 7.88, + "learning_rate": 3.046785557023338e-05, + "loss": 0.1235, + "step": 36000 + }, + { + "epoch": 7.88, + "eval_acc": 0.4006760269889917, + "eval_cer": 0.05912722345305962, + "eval_loss": 0.30008625984191895, + "eval_runtime": 2418.0589, + "eval_samples_per_second": 3.181, + "eval_steps_per_second": 0.398, + "step": 36000 + }, + { + "epoch": 7.89, + "learning_rate": 3.04403346543373e-05, + "loss": 0.1278, + "step": 36050 + }, + { + "epoch": 7.9, + "learning_rate": 3.0412813738441216e-05, + "loss": 0.1375, + "step": 36100 + }, + { + "epoch": 7.92, + "learning_rate": 3.0385292822545137e-05, + "loss": 0.1287, + "step": 36150 + }, + { + "epoch": 7.93, + "learning_rate": 3.0357771906649057e-05, + "loss": 0.1281, + "step": 36200 + }, + { + "epoch": 7.94, + "learning_rate": 3.0330250990752974e-05, + "loss": 0.1262, + "step": 36250 + }, + { + "epoch": 7.95, + "learning_rate": 3.0302730074856895e-05, + "loss": 0.1331, + "step": 36300 + }, + { + "epoch": 7.96, + "learning_rate": 3.027520915896081e-05, + "loss": 0.1328, + "step": 36350 + }, + { + "epoch": 7.97, + "learning_rate": 3.0247688243064733e-05, + "loss": 0.1299, + "step": 36400 + }, + { + "epoch": 7.98, + "learning_rate": 3.0220167327168646e-05, + "loss": 0.1315, + "step": 36450 + }, + { + "epoch": 7.99, + "learning_rate": 3.0192646411272567e-05, + "loss": 0.1359, + "step": 36500 + }, + { + "epoch": 7.99, + "eval_acc": 0.42108684341899544, + "eval_cer": 0.058217144522704684, + "eval_loss": 0.2916148602962494, + "eval_runtime": 2429.407, + "eval_samples_per_second": 3.166, + "eval_steps_per_second": 0.396, + "step": 36500 + }, + { + "epoch": 8.0, + "learning_rate": 3.016512549537649e-05, + "loss": 0.1183, + "step": 36550 + }, + { + "epoch": 8.01, + "learning_rate": 3.0137604579480405e-05, + "loss": 0.0797, + "step": 36600 + }, + { + "epoch": 8.02, + "learning_rate": 3.0110083663584325e-05, + "loss": 0.0818, + "step": 36650 + }, + { + "epoch": 8.04, + "learning_rate": 3.0082562747688242e-05, + "loss": 0.0873, + "step": 36700 + }, + { + "epoch": 8.05, + "learning_rate": 3.0055041831792163e-05, + "loss": 0.0832, + "step": 36750 + }, + { + "epoch": 8.06, + "learning_rate": 3.0027520915896084e-05, + "loss": 0.0891, + "step": 36800 + }, + { + "epoch": 8.07, + "learning_rate": 3e-05, + "loss": 0.0808, + "step": 36850 + }, + { + "epoch": 8.08, + "learning_rate": 2.997247908410392e-05, + "loss": 0.09, + "step": 36900 + }, + { + "epoch": 8.09, + "learning_rate": 2.994495816820784e-05, + "loss": 0.09, + "step": 36950 + }, + { + "epoch": 8.1, + "learning_rate": 2.991743725231176e-05, + "loss": 0.0861, + "step": 37000 + }, + { + "epoch": 8.1, + "eval_acc": 0.42667706702721303, + "eval_cer": 0.05797993485261217, + "eval_loss": 0.2984052300453186, + "eval_runtime": 2430.5266, + "eval_samples_per_second": 3.165, + "eval_steps_per_second": 0.396, + "step": 37000 + }, + { + "epoch": 8.11, + "learning_rate": 2.988991633641568e-05, + "loss": 0.0973, + "step": 37050 + }, + { + "epoch": 8.12, + "learning_rate": 2.9862395420519597e-05, + "loss": 0.0949, + "step": 37100 + }, + { + "epoch": 8.13, + "learning_rate": 2.9834874504623517e-05, + "loss": 0.0886, + "step": 37150 + }, + { + "epoch": 8.14, + "learning_rate": 2.980735358872743e-05, + "loss": 0.0875, + "step": 37200 + }, + { + "epoch": 8.16, + "learning_rate": 2.9779832672831355e-05, + "loss": 0.0888, + "step": 37250 + }, + { + "epoch": 8.17, + "learning_rate": 2.9752311756935276e-05, + "loss": 0.0913, + "step": 37300 + }, + { + "epoch": 8.18, + "learning_rate": 2.972479084103919e-05, + "loss": 0.0912, + "step": 37350 + }, + { + "epoch": 8.19, + "learning_rate": 2.9697269925143113e-05, + "loss": 0.0904, + "step": 37400 + }, + { + "epoch": 8.2, + "learning_rate": 2.9669749009247027e-05, + "loss": 0.0888, + "step": 37450 + }, + { + "epoch": 8.21, + "learning_rate": 2.9642228093350948e-05, + "loss": 0.0857, + "step": 37500 + }, + { + "epoch": 8.21, + "eval_acc": 0.42407696302339093, + "eval_cer": 0.05735047651236669, + "eval_loss": 0.29710087180137634, + "eval_runtime": 2434.9281, + "eval_samples_per_second": 3.159, + "eval_steps_per_second": 0.395, + "step": 37500 + }, + { + "epoch": 8.22, + "learning_rate": 2.9614707177454865e-05, + "loss": 0.0946, + "step": 37550 + }, + { + "epoch": 8.23, + "learning_rate": 2.9587186261558785e-05, + "loss": 0.0943, + "step": 37600 + }, + { + "epoch": 8.24, + "learning_rate": 2.9559665345662706e-05, + "loss": 0.0993, + "step": 37650 + }, + { + "epoch": 8.25, + "learning_rate": 2.9532144429766623e-05, + "loss": 0.0859, + "step": 37700 + }, + { + "epoch": 8.27, + "learning_rate": 2.9504623513870544e-05, + "loss": 0.0987, + "step": 37750 + }, + { + "epoch": 8.28, + "learning_rate": 2.947710259797446e-05, + "loss": 0.0885, + "step": 37800 + }, + { + "epoch": 8.29, + "learning_rate": 2.944958168207838e-05, + "loss": 0.0898, + "step": 37850 + }, + { + "epoch": 8.3, + "learning_rate": 2.9422060766182302e-05, + "loss": 0.1001, + "step": 37900 + }, + { + "epoch": 8.31, + "learning_rate": 2.939453985028622e-05, + "loss": 0.109, + "step": 37950 + }, + { + "epoch": 8.32, + "learning_rate": 2.936701893439014e-05, + "loss": 0.0947, + "step": 38000 + }, + { + "epoch": 8.32, + "eval_acc": 0.4429277170511014, + "eval_cer": 0.05624039727193376, + "eval_loss": 0.2943662106990814, + "eval_runtime": 2422.719, + "eval_samples_per_second": 3.175, + "eval_steps_per_second": 0.397, + "step": 38000 + }, + { + "epoch": 8.33, + "learning_rate": 2.9339498018494053e-05, + "loss": 0.0991, + "step": 38050 + }, + { + "epoch": 8.34, + "learning_rate": 2.9311977102597977e-05, + "loss": 0.0995, + "step": 38100 + }, + { + "epoch": 8.35, + "learning_rate": 2.9284456186701898e-05, + "loss": 0.0896, + "step": 38150 + }, + { + "epoch": 8.36, + "learning_rate": 2.925693527080581e-05, + "loss": 0.0948, + "step": 38200 + }, + { + "epoch": 8.37, + "learning_rate": 2.9229414354909736e-05, + "loss": 0.105, + "step": 38250 + }, + { + "epoch": 8.39, + "learning_rate": 2.920189343901365e-05, + "loss": 0.0986, + "step": 38300 + }, + { + "epoch": 8.4, + "learning_rate": 2.917437252311757e-05, + "loss": 0.1048, + "step": 38350 + }, + { + "epoch": 8.41, + "learning_rate": 2.9146851607221487e-05, + "loss": 0.1033, + "step": 38400 + }, + { + "epoch": 8.42, + "learning_rate": 2.9119330691325408e-05, + "loss": 0.0923, + "step": 38450 + }, + { + "epoch": 8.43, + "learning_rate": 2.9091809775429328e-05, + "loss": 0.0946, + "step": 38500 + }, + { + "epoch": 8.43, + "eval_acc": 0.45059802386237674, + "eval_cer": 0.05642644407200631, + "eval_loss": 0.29671648144721985, + "eval_runtime": 2404.4375, + "eval_samples_per_second": 3.199, + "eval_steps_per_second": 0.4, + "step": 38500 + }, + { + "epoch": 8.44, + "learning_rate": 2.9064288859533245e-05, + "loss": 0.1035, + "step": 38550 + }, + { + "epoch": 8.45, + "learning_rate": 2.9036767943637166e-05, + "loss": 0.1006, + "step": 38600 + }, + { + "epoch": 8.46, + "learning_rate": 2.9009247027741083e-05, + "loss": 0.1006, + "step": 38650 + }, + { + "epoch": 8.47, + "learning_rate": 2.8981726111845004e-05, + "loss": 0.0997, + "step": 38700 + }, + { + "epoch": 8.48, + "learning_rate": 2.8954205195948924e-05, + "loss": 0.0979, + "step": 38750 + }, + { + "epoch": 8.5, + "learning_rate": 2.892668428005284e-05, + "loss": 0.0924, + "step": 38800 + }, + { + "epoch": 8.51, + "learning_rate": 2.8899163364156762e-05, + "loss": 0.0983, + "step": 38850 + }, + { + "epoch": 8.52, + "learning_rate": 2.8871642448260676e-05, + "loss": 0.0963, + "step": 38900 + }, + { + "epoch": 8.53, + "learning_rate": 2.88441215323646e-05, + "loss": 0.1025, + "step": 38950 + }, + { + "epoch": 8.54, + "learning_rate": 2.881660061646852e-05, + "loss": 0.0925, + "step": 39000 + }, + { + "epoch": 8.54, + "eval_acc": 0.4494279770606568, + "eval_cer": 0.0552233414315371, + "eval_loss": 0.2928614020347595, + "eval_runtime": 2430.8041, + "eval_samples_per_second": 3.164, + "eval_steps_per_second": 0.396, + "step": 39000 + }, + { + "epoch": 8.55, + "learning_rate": 2.8789079700572434e-05, + "loss": 0.0912, + "step": 39050 + }, + { + "epoch": 8.56, + "learning_rate": 2.8761558784676358e-05, + "loss": 0.0937, + "step": 39100 + }, + { + "epoch": 8.57, + "learning_rate": 2.8734037868780272e-05, + "loss": 0.0989, + "step": 39150 + }, + { + "epoch": 8.58, + "learning_rate": 2.8706516952884192e-05, + "loss": 0.0957, + "step": 39200 + }, + { + "epoch": 8.59, + "learning_rate": 2.8678996036988116e-05, + "loss": 0.0942, + "step": 39250 + }, + { + "epoch": 8.6, + "learning_rate": 2.865147512109203e-05, + "loss": 0.0929, + "step": 39300 + }, + { + "epoch": 8.62, + "learning_rate": 2.862395420519595e-05, + "loss": 0.111, + "step": 39350 + }, + { + "epoch": 8.63, + "learning_rate": 2.8596433289299868e-05, + "loss": 0.0936, + "step": 39400 + }, + { + "epoch": 8.64, + "learning_rate": 2.8568912373403788e-05, + "loss": 0.0991, + "step": 39450 + }, + { + "epoch": 8.65, + "learning_rate": 2.8541391457507705e-05, + "loss": 0.1092, + "step": 39500 + }, + { + "epoch": 8.65, + "eval_acc": 0.46203848147919413, + "eval_cer": 0.05521713987153468, + "eval_loss": 0.2892671227455139, + "eval_runtime": 2443.2305, + "eval_samples_per_second": 3.148, + "eval_steps_per_second": 0.394, + "step": 39500 + }, + { + "epoch": 8.66, + "learning_rate": 2.851442095992955e-05, + "loss": 0.0868, + "step": 39550 + }, + { + "epoch": 8.67, + "learning_rate": 2.8486900044033465e-05, + "loss": 0.099, + "step": 39600 + }, + { + "epoch": 8.68, + "learning_rate": 2.8459379128137386e-05, + "loss": 0.095, + "step": 39650 + }, + { + "epoch": 8.69, + "learning_rate": 2.8431858212241303e-05, + "loss": 0.0928, + "step": 39700 + }, + { + "epoch": 8.7, + "learning_rate": 2.8404337296345223e-05, + "loss": 0.0954, + "step": 39750 + }, + { + "epoch": 8.71, + "learning_rate": 2.8376816380449144e-05, + "loss": 0.0973, + "step": 39800 + }, + { + "epoch": 8.73, + "learning_rate": 2.834929546455306e-05, + "loss": 0.0921, + "step": 39850 + }, + { + "epoch": 8.74, + "learning_rate": 2.832177454865698e-05, + "loss": 0.101, + "step": 39900 + }, + { + "epoch": 8.75, + "learning_rate": 2.82942536327609e-05, + "loss": 0.1072, + "step": 39950 + }, + { + "epoch": 8.76, + "learning_rate": 2.826673271686482e-05, + "loss": 0.0945, + "step": 40000 + }, + { + "epoch": 8.76, + "eval_acc": 0.4663286530855007, + "eval_cer": 0.054930317721422824, + "eval_loss": 0.2871682941913605, + "eval_runtime": 2419.7382, + "eval_samples_per_second": 3.179, + "eval_steps_per_second": 0.398, + "step": 40000 + }, + { + "epoch": 8.77, + "learning_rate": 2.823921180096874e-05, + "loss": 0.1034, + "step": 40050 + }, + { + "epoch": 8.78, + "learning_rate": 2.8211690885072657e-05, + "loss": 0.0949, + "step": 40100 + }, + { + "epoch": 8.79, + "learning_rate": 2.8184169969176578e-05, + "loss": 0.1056, + "step": 40150 + }, + { + "epoch": 8.8, + "learning_rate": 2.815664905328049e-05, + "loss": 0.1044, + "step": 40200 + }, + { + "epoch": 8.81, + "learning_rate": 2.8129128137384415e-05, + "loss": 0.0857, + "step": 40250 + }, + { + "epoch": 8.82, + "learning_rate": 2.8101607221488336e-05, + "loss": 0.105, + "step": 40300 + }, + { + "epoch": 8.83, + "learning_rate": 2.807408630559225e-05, + "loss": 0.1038, + "step": 40350 + }, + { + "epoch": 8.85, + "learning_rate": 2.8046565389696174e-05, + "loss": 0.11, + "step": 40400 + }, + { + "epoch": 8.86, + "learning_rate": 2.8019044473800087e-05, + "loss": 0.098, + "step": 40450 + }, + { + "epoch": 8.87, + "learning_rate": 2.7991523557904008e-05, + "loss": 0.1001, + "step": 40500 + }, + { + "epoch": 8.87, + "eval_acc": 0.4759490378996426, + "eval_cer": 0.053654346750925196, + "eval_loss": 0.28374138474464417, + "eval_runtime": 2405.4491, + "eval_samples_per_second": 3.198, + "eval_steps_per_second": 0.4, + "step": 40500 + }, + { + "epoch": 8.88, + "learning_rate": 2.7964002642007925e-05, + "loss": 0.0927, + "step": 40550 + }, + { + "epoch": 8.89, + "learning_rate": 2.7936481726111846e-05, + "loss": 0.0938, + "step": 40600 + }, + { + "epoch": 8.9, + "learning_rate": 2.7908960810215766e-05, + "loss": 0.1148, + "step": 40650 + }, + { + "epoch": 8.91, + "learning_rate": 2.7881439894319683e-05, + "loss": 0.1037, + "step": 40700 + }, + { + "epoch": 8.92, + "learning_rate": 2.7853918978423604e-05, + "loss": 0.0882, + "step": 40750 + }, + { + "epoch": 8.93, + "learning_rate": 2.782639806252752e-05, + "loss": 0.0999, + "step": 40800 + }, + { + "epoch": 8.94, + "learning_rate": 2.7798877146631442e-05, + "loss": 0.1035, + "step": 40850 + }, + { + "epoch": 8.96, + "learning_rate": 2.7771356230735362e-05, + "loss": 0.0908, + "step": 40900 + }, + { + "epoch": 8.97, + "learning_rate": 2.774383531483928e-05, + "loss": 0.1049, + "step": 40950 + }, + { + "epoch": 8.98, + "learning_rate": 2.77163143989432e-05, + "loss": 0.101, + "step": 41000 + }, + { + "epoch": 8.98, + "eval_acc": 0.49232969312372205, + "eval_cer": 0.05347605190085566, + "eval_loss": 0.2792474329471588, + "eval_runtime": 2433.842, + "eval_samples_per_second": 3.16, + "eval_steps_per_second": 0.395, + "step": 41000 + }, + { + "epoch": 8.99, + "learning_rate": 2.7688793483047114e-05, + "loss": 0.0845, + "step": 41050 + }, + { + "epoch": 9.0, + "learning_rate": 2.7661272567151038e-05, + "loss": 0.0994, + "step": 41100 + }, + { + "epoch": 9.01, + "learning_rate": 2.7633751651254958e-05, + "loss": 0.0719, + "step": 41150 + }, + { + "epoch": 9.02, + "learning_rate": 2.7606230735358872e-05, + "loss": 0.0663, + "step": 41200 + }, + { + "epoch": 9.03, + "learning_rate": 2.7578709819462796e-05, + "loss": 0.0644, + "step": 41250 + }, + { + "epoch": 9.04, + "learning_rate": 2.755118890356671e-05, + "loss": 0.0698, + "step": 41300 + }, + { + "epoch": 9.05, + "learning_rate": 2.752366798767063e-05, + "loss": 0.0657, + "step": 41350 + }, + { + "epoch": 9.06, + "learning_rate": 2.7496147071774547e-05, + "loss": 0.0686, + "step": 41400 + }, + { + "epoch": 9.08, + "learning_rate": 2.7468626155878468e-05, + "loss": 0.0643, + "step": 41450 + }, + { + "epoch": 9.09, + "learning_rate": 2.744110523998239e-05, + "loss": 0.072, + "step": 41500 + }, + { + "epoch": 9.09, + "eval_acc": 0.4877795111170333, + "eval_cer": 0.05345899761084901, + "eval_loss": 0.2827744781970978, + "eval_runtime": 2418.7702, + "eval_samples_per_second": 3.18, + "eval_steps_per_second": 0.398, + "step": 41500 + }, + { + "epoch": 9.1, + "learning_rate": 2.7413584324086306e-05, + "loss": 0.073, + "step": 41550 + }, + { + "epoch": 9.11, + "learning_rate": 2.7386063408190226e-05, + "loss": 0.064, + "step": 41600 + }, + { + "epoch": 9.12, + "learning_rate": 2.7358542492294143e-05, + "loss": 0.0693, + "step": 41650 + }, + { + "epoch": 9.13, + "learning_rate": 2.7331021576398064e-05, + "loss": 0.0669, + "step": 41700 + }, + { + "epoch": 9.14, + "learning_rate": 2.7304051078819903e-05, + "loss": 0.0641, + "step": 41750 + }, + { + "epoch": 9.15, + "learning_rate": 2.7276530162923824e-05, + "loss": 0.0681, + "step": 41800 + }, + { + "epoch": 9.16, + "learning_rate": 2.724900924702774e-05, + "loss": 0.0629, + "step": 41850 + }, + { + "epoch": 9.17, + "learning_rate": 2.722148833113166e-05, + "loss": 0.0694, + "step": 41900 + }, + { + "epoch": 9.18, + "learning_rate": 2.7193967415235582e-05, + "loss": 0.0798, + "step": 41950 + }, + { + "epoch": 9.2, + "learning_rate": 2.71664464993395e-05, + "loss": 0.0729, + "step": 42000 + }, + { + "epoch": 9.2, + "eval_acc": 0.48452938111225563, + "eval_cer": 0.05131170746001157, + "eval_loss": 0.28362253308296204, + "eval_runtime": 2420.0313, + "eval_samples_per_second": 3.178, + "eval_steps_per_second": 0.398, + "step": 42000 + }, + { + "epoch": 9.21, + "learning_rate": 2.713892558344342e-05, + "loss": 0.0652, + "step": 42050 + }, + { + "epoch": 9.22, + "learning_rate": 2.7111404667547337e-05, + "loss": 0.0757, + "step": 42100 + }, + { + "epoch": 9.23, + "learning_rate": 2.7083883751651257e-05, + "loss": 0.063, + "step": 42150 + }, + { + "epoch": 9.24, + "learning_rate": 2.7056362835755178e-05, + "loss": 0.0695, + "step": 42200 + }, + { + "epoch": 9.25, + "learning_rate": 2.7028841919859095e-05, + "loss": 0.0743, + "step": 42250 + }, + { + "epoch": 9.26, + "learning_rate": 2.7001321003963016e-05, + "loss": 0.0705, + "step": 42300 + }, + { + "epoch": 9.27, + "learning_rate": 2.697380008806693e-05, + "loss": 0.0773, + "step": 42350 + }, + { + "epoch": 9.28, + "learning_rate": 2.6946279172170853e-05, + "loss": 0.0663, + "step": 42400 + }, + { + "epoch": 9.29, + "learning_rate": 2.6918758256274767e-05, + "loss": 0.0712, + "step": 42450 + }, + { + "epoch": 9.31, + "learning_rate": 2.6891237340378688e-05, + "loss": 0.0681, + "step": 42500 + }, + { + "epoch": 9.31, + "eval_acc": 0.48998959952028215, + "eval_cer": 0.05191790995024798, + "eval_loss": 0.2831544876098633, + "eval_runtime": 2415.2607, + "eval_samples_per_second": 3.185, + "eval_steps_per_second": 0.398, + "step": 42500 + }, + { + "epoch": 9.32, + "learning_rate": 2.686371642448261e-05, + "loss": 0.0778, + "step": 42550 + }, + { + "epoch": 9.33, + "learning_rate": 2.6836195508586525e-05, + "loss": 0.0703, + "step": 42600 + }, + { + "epoch": 9.34, + "learning_rate": 2.6808674592690446e-05, + "loss": 0.0618, + "step": 42650 + }, + { + "epoch": 9.35, + "learning_rate": 2.6781153676794363e-05, + "loss": 0.0721, + "step": 42700 + }, + { + "epoch": 9.36, + "learning_rate": 2.6753632760898284e-05, + "loss": 0.0661, + "step": 42750 + }, + { + "epoch": 9.37, + "learning_rate": 2.6726111845002204e-05, + "loss": 0.0746, + "step": 42800 + }, + { + "epoch": 9.38, + "learning_rate": 2.669859092910612e-05, + "loss": 0.0652, + "step": 42850 + }, + { + "epoch": 9.39, + "learning_rate": 2.6671070013210042e-05, + "loss": 0.063, + "step": 42900 + }, + { + "epoch": 9.4, + "learning_rate": 2.664354909731396e-05, + "loss": 0.0797, + "step": 42950 + }, + { + "epoch": 9.41, + "learning_rate": 2.661602818141788e-05, + "loss": 0.0691, + "step": 43000 + }, + { + "epoch": 9.41, + "eval_acc": 0.4944097763267798, + "eval_cer": 0.05157682415011496, + "eval_loss": 0.28549954295158386, + "eval_runtime": 2402.7984, + "eval_samples_per_second": 3.201, + "eval_steps_per_second": 0.4, + "step": 43000 + }, + { + "epoch": 9.43, + "learning_rate": 2.65885072655218e-05, + "loss": 0.0733, + "step": 43050 + }, + { + "epoch": 9.44, + "learning_rate": 2.6560986349625717e-05, + "loss": 0.0792, + "step": 43100 + }, + { + "epoch": 9.45, + "learning_rate": 2.6533465433729638e-05, + "loss": 0.0673, + "step": 43150 + }, + { + "epoch": 9.46, + "learning_rate": 2.6505944517833552e-05, + "loss": 0.0756, + "step": 43200 + }, + { + "epoch": 9.47, + "learning_rate": 2.6478423601937476e-05, + "loss": 0.0727, + "step": 43250 + }, + { + "epoch": 9.48, + "learning_rate": 2.645090268604139e-05, + "loss": 0.0759, + "step": 43300 + }, + { + "epoch": 9.49, + "learning_rate": 2.642338177014531e-05, + "loss": 0.0636, + "step": 43350 + }, + { + "epoch": 9.5, + "learning_rate": 2.6395860854249234e-05, + "loss": 0.0728, + "step": 43400 + }, + { + "epoch": 9.51, + "learning_rate": 2.6368339938353148e-05, + "loss": 0.075, + "step": 43450 + }, + { + "epoch": 9.52, + "learning_rate": 2.634081902245707e-05, + "loss": 0.0716, + "step": 43500 + }, + { + "epoch": 9.52, + "eval_acc": 0.5058502339435972, + "eval_cer": 0.0509907767298864, + "eval_loss": 0.2799459993839264, + "eval_runtime": 2413.6447, + "eval_samples_per_second": 3.187, + "eval_steps_per_second": 0.399, + "step": 43500 + }, + { + "epoch": 9.54, + "learning_rate": 2.6313298106560986e-05, + "loss": 0.0671, + "step": 43550 + }, + { + "epoch": 9.55, + "learning_rate": 2.6285777190664906e-05, + "loss": 0.0664, + "step": 43600 + }, + { + "epoch": 9.56, + "learning_rate": 2.6258256274768827e-05, + "loss": 0.0721, + "step": 43650 + }, + { + "epoch": 9.57, + "learning_rate": 2.6230735358872744e-05, + "loss": 0.0649, + "step": 43700 + }, + { + "epoch": 9.58, + "learning_rate": 2.6203214442976664e-05, + "loss": 0.0679, + "step": 43750 + }, + { + "epoch": 9.59, + "learning_rate": 2.617569352708058e-05, + "loss": 0.0704, + "step": 43800 + }, + { + "epoch": 9.6, + "learning_rate": 2.6148172611184502e-05, + "loss": 0.0746, + "step": 43850 + }, + { + "epoch": 9.61, + "learning_rate": 2.612120211360634e-05, + "loss": 0.0787, + "step": 43900 + }, + { + "epoch": 9.62, + "learning_rate": 2.6093681197710262e-05, + "loss": 0.076, + "step": 43950 + }, + { + "epoch": 9.63, + "learning_rate": 2.606616028181418e-05, + "loss": 0.0805, + "step": 44000 + }, + { + "epoch": 9.63, + "eval_acc": 0.5122204887529614, + "eval_cer": 0.051007831019893056, + "eval_loss": 0.28180253505706787, + "eval_runtime": 2416.357, + "eval_samples_per_second": 3.183, + "eval_steps_per_second": 0.398, + "step": 44000 + }, + { + "epoch": 9.64, + "learning_rate": 2.60386393659181e-05, + "loss": 0.068, + "step": 44050 + }, + { + "epoch": 9.66, + "learning_rate": 2.601111845002202e-05, + "loss": 0.0765, + "step": 44100 + }, + { + "epoch": 9.67, + "learning_rate": 2.5983597534125937e-05, + "loss": 0.0748, + "step": 44150 + }, + { + "epoch": 9.68, + "learning_rate": 2.5956076618229858e-05, + "loss": 0.0749, + "step": 44200 + }, + { + "epoch": 9.69, + "learning_rate": 2.5928555702333775e-05, + "loss": 0.0757, + "step": 44250 + }, + { + "epoch": 9.7, + "learning_rate": 2.5901034786437695e-05, + "loss": 0.0761, + "step": 44300 + }, + { + "epoch": 9.71, + "learning_rate": 2.587351387054161e-05, + "loss": 0.0698, + "step": 44350 + }, + { + "epoch": 9.72, + "learning_rate": 2.5845992954645533e-05, + "loss": 0.0716, + "step": 44400 + }, + { + "epoch": 9.73, + "learning_rate": 2.5818472038749454e-05, + "loss": 0.0719, + "step": 44450 + }, + { + "epoch": 9.74, + "learning_rate": 2.5790951122853367e-05, + "loss": 0.0773, + "step": 44500 + }, + { + "epoch": 9.74, + "eval_acc": 0.5215808631667211, + "eval_cer": 0.04890550217907315, + "eval_loss": 0.2750154733657837, + "eval_runtime": 2420.5632, + "eval_samples_per_second": 3.178, + "eval_steps_per_second": 0.397, + "step": 44500 + }, + { + "epoch": 9.75, + "learning_rate": 2.5763430206957288e-05, + "loss": 0.0713, + "step": 44550 + }, + { + "epoch": 9.77, + "learning_rate": 2.5735909291061205e-05, + "loss": 0.0703, + "step": 44600 + }, + { + "epoch": 9.78, + "learning_rate": 2.5708388375165126e-05, + "loss": 0.0701, + "step": 44650 + }, + { + "epoch": 9.79, + "learning_rate": 2.5680867459269046e-05, + "loss": 0.074, + "step": 44700 + }, + { + "epoch": 9.8, + "learning_rate": 2.5653346543372963e-05, + "loss": 0.071, + "step": 44750 + }, + { + "epoch": 9.81, + "learning_rate": 2.5625825627476884e-05, + "loss": 0.0702, + "step": 44800 + }, + { + "epoch": 9.82, + "learning_rate": 2.55983047115808e-05, + "loss": 0.074, + "step": 44850 + }, + { + "epoch": 9.83, + "learning_rate": 2.5570783795684722e-05, + "loss": 0.0752, + "step": 44900 + }, + { + "epoch": 9.84, + "learning_rate": 2.5543262879788642e-05, + "loss": 0.0796, + "step": 44950 + }, + { + "epoch": 9.85, + "learning_rate": 2.551574196389256e-05, + "loss": 0.0754, + "step": 45000 + }, + { + "epoch": 9.85, + "eval_acc": 0.5257410295728365, + "eval_cer": 0.04867759484898426, + "eval_loss": 0.27724218368530273, + "eval_runtime": 2426.3329, + "eval_samples_per_second": 3.17, + "eval_steps_per_second": 0.396, + "step": 45000 + }, + { + "epoch": 9.86, + "learning_rate": 2.548822104799648e-05, + "loss": 0.0691, + "step": 45050 + }, + { + "epoch": 9.87, + "learning_rate": 2.5460700132100397e-05, + "loss": 0.0832, + "step": 45100 + }, + { + "epoch": 9.89, + "learning_rate": 2.5433179216204318e-05, + "loss": 0.0752, + "step": 45150 + }, + { + "epoch": 9.9, + "learning_rate": 2.540565830030824e-05, + "loss": 0.0717, + "step": 45200 + }, + { + "epoch": 9.91, + "learning_rate": 2.5378137384412155e-05, + "loss": 0.074, + "step": 45250 + }, + { + "epoch": 9.92, + "learning_rate": 2.5350616468516076e-05, + "loss": 0.0846, + "step": 45300 + }, + { + "epoch": 9.93, + "learning_rate": 2.532309555261999e-05, + "loss": 0.0768, + "step": 45350 + }, + { + "epoch": 9.94, + "learning_rate": 2.5295574636723914e-05, + "loss": 0.0718, + "step": 45400 + }, + { + "epoch": 9.95, + "learning_rate": 2.5268053720827828e-05, + "loss": 0.0704, + "step": 45450 + }, + { + "epoch": 9.96, + "learning_rate": 2.5240532804931748e-05, + "loss": 0.0681, + "step": 45500 + }, + { + "epoch": 9.96, + "eval_acc": 0.530031201179143, + "eval_cer": 0.04874581200901087, + "eval_loss": 0.2730477452278137, + "eval_runtime": 2408.063, + "eval_samples_per_second": 3.194, + "eval_steps_per_second": 0.399, + "step": 45500 + }, + { + "epoch": 9.97, + "learning_rate": 2.5213011889035672e-05, + "loss": 0.0724, + "step": 45550 + }, + { + "epoch": 9.98, + "learning_rate": 2.5185490973139586e-05, + "loss": 0.0707, + "step": 45600 + }, + { + "epoch": 10.0, + "learning_rate": 2.5157970057243506e-05, + "loss": 0.0693, + "step": 45650 + }, + { + "epoch": 10.01, + "learning_rate": 2.5130449141347424e-05, + "loss": 0.0596, + "step": 45700 + }, + { + "epoch": 10.02, + "learning_rate": 2.5102928225451344e-05, + "loss": 0.0487, + "step": 45750 + }, + { + "epoch": 10.03, + "learning_rate": 2.5075407309555265e-05, + "loss": 0.0455, + "step": 45800 + }, + { + "epoch": 10.04, + "learning_rate": 2.5047886393659182e-05, + "loss": 0.0541, + "step": 45850 + }, + { + "epoch": 10.05, + "learning_rate": 2.5020365477763102e-05, + "loss": 0.0526, + "step": 45900 + }, + { + "epoch": 10.06, + "learning_rate": 2.499284456186702e-05, + "loss": 0.0524, + "step": 45950 + }, + { + "epoch": 10.07, + "learning_rate": 2.496532364597094e-05, + "loss": 0.0469, + "step": 46000 + }, + { + "epoch": 10.07, + "eval_acc": 0.537181487189654, + "eval_cer": 0.0483907726988724, + "eval_loss": 0.2742944061756134, + "eval_runtime": 2441.5786, + "eval_samples_per_second": 3.15, + "eval_steps_per_second": 0.394, + "step": 46000 + }, + { + "epoch": 10.08, + "learning_rate": 2.493835314839278e-05, + "loss": 0.0515, + "step": 46050 + }, + { + "epoch": 10.09, + "learning_rate": 2.4910832232496696e-05, + "loss": 0.0491, + "step": 46100 + }, + { + "epoch": 10.1, + "learning_rate": 2.488331131660062e-05, + "loss": 0.0444, + "step": 46150 + }, + { + "epoch": 10.12, + "learning_rate": 2.4855790400704537e-05, + "loss": 0.0501, + "step": 46200 + }, + { + "epoch": 10.13, + "learning_rate": 2.4828269484808455e-05, + "loss": 0.0479, + "step": 46250 + }, + { + "epoch": 10.14, + "learning_rate": 2.4800748568912375e-05, + "loss": 0.0488, + "step": 46300 + }, + { + "epoch": 10.15, + "learning_rate": 2.4773227653016292e-05, + "loss": 0.0472, + "step": 46350 + }, + { + "epoch": 10.16, + "learning_rate": 2.4745706737120213e-05, + "loss": 0.0534, + "step": 46400 + }, + { + "epoch": 10.17, + "learning_rate": 2.4718185821224133e-05, + "loss": 0.0521, + "step": 46450 + }, + { + "epoch": 10.18, + "learning_rate": 2.469066490532805e-05, + "loss": 0.0475, + "step": 46500 + }, + { + "epoch": 10.18, + "eval_acc": 0.5322412895823919, + "eval_cer": 0.047517903128531984, + "eval_loss": 0.2764817178249359, + "eval_runtime": 2427.6992, + "eval_samples_per_second": 3.168, + "eval_steps_per_second": 0.396, + "step": 46500 + }, + { + "epoch": 10.19, + "learning_rate": 2.4663143989431968e-05, + "loss": 0.0658, + "step": 46550 + }, + { + "epoch": 10.2, + "learning_rate": 2.463562307353589e-05, + "loss": 0.0578, + "step": 46600 + }, + { + "epoch": 10.21, + "learning_rate": 2.4608102157639806e-05, + "loss": 0.0495, + "step": 46650 + }, + { + "epoch": 10.22, + "learning_rate": 2.4580581241743726e-05, + "loss": 0.0526, + "step": 46700 + }, + { + "epoch": 10.24, + "learning_rate": 2.4553060325847647e-05, + "loss": 0.0475, + "step": 46750 + }, + { + "epoch": 10.25, + "learning_rate": 2.4525539409951564e-05, + "loss": 0.0586, + "step": 46800 + }, + { + "epoch": 10.26, + "learning_rate": 2.4498018494055484e-05, + "loss": 0.0507, + "step": 46850 + }, + { + "epoch": 10.27, + "learning_rate": 2.44704975781594e-05, + "loss": 0.0495, + "step": 46900 + }, + { + "epoch": 10.28, + "learning_rate": 2.444297666226332e-05, + "loss": 0.0481, + "step": 46950 + }, + { + "epoch": 10.29, + "learning_rate": 2.4415455746367243e-05, + "loss": 0.0586, + "step": 47000 + }, + { + "epoch": 10.29, + "eval_acc": 0.5439417575995915, + "eval_cer": 0.04793495803869463, + "eval_loss": 0.2732994556427002, + "eval_runtime": 2419.184, + "eval_samples_per_second": 3.18, + "eval_steps_per_second": 0.398, + "step": 47000 + }, + { + "epoch": 10.3, + "learning_rate": 2.438793483047116e-05, + "loss": 0.0487, + "step": 47050 + }, + { + "epoch": 10.31, + "learning_rate": 2.4360413914575077e-05, + "loss": 0.0552, + "step": 47100 + }, + { + "epoch": 10.32, + "learning_rate": 2.4332892998678997e-05, + "loss": 0.051, + "step": 47150 + }, + { + "epoch": 10.33, + "learning_rate": 2.430592250110084e-05, + "loss": 0.0518, + "step": 47200 + }, + { + "epoch": 10.35, + "learning_rate": 2.4278401585204757e-05, + "loss": 0.0478, + "step": 47250 + }, + { + "epoch": 10.36, + "learning_rate": 2.4250880669308674e-05, + "loss": 0.0518, + "step": 47300 + }, + { + "epoch": 10.37, + "learning_rate": 2.4223359753412595e-05, + "loss": 0.0559, + "step": 47350 + }, + { + "epoch": 10.38, + "learning_rate": 2.4195838837516512e-05, + "loss": 0.0503, + "step": 47400 + }, + { + "epoch": 10.39, + "learning_rate": 2.4168317921620433e-05, + "loss": 0.0512, + "step": 47450 + }, + { + "epoch": 10.4, + "learning_rate": 2.4140797005724353e-05, + "loss": 0.0592, + "step": 47500 + }, + { + "epoch": 10.4, + "eval_acc": 0.5471918876043692, + "eval_cer": 0.04605433496796119, + "eval_loss": 0.2718336880207062, + "eval_runtime": 2441.2739, + "eval_samples_per_second": 3.151, + "eval_steps_per_second": 0.394, + "step": 47500 + }, + { + "epoch": 10.41, + "learning_rate": 2.411327608982827e-05, + "loss": 0.0501, + "step": 47550 + }, + { + "epoch": 10.42, + "learning_rate": 2.408575517393219e-05, + "loss": 0.058, + "step": 47600 + }, + { + "epoch": 10.43, + "learning_rate": 2.4058234258036108e-05, + "loss": 0.0536, + "step": 47650 + }, + { + "epoch": 10.44, + "learning_rate": 2.4030713342140025e-05, + "loss": 0.0479, + "step": 47700 + }, + { + "epoch": 10.45, + "learning_rate": 2.400319242624395e-05, + "loss": 0.0519, + "step": 47750 + }, + { + "epoch": 10.47, + "learning_rate": 2.3975671510347866e-05, + "loss": 0.0589, + "step": 47800 + }, + { + "epoch": 10.48, + "learning_rate": 2.3948150594451783e-05, + "loss": 0.0509, + "step": 47850 + }, + { + "epoch": 10.49, + "learning_rate": 2.3920629678555704e-05, + "loss": 0.0592, + "step": 47900 + }, + { + "epoch": 10.5, + "learning_rate": 2.389310876265962e-05, + "loss": 0.0604, + "step": 47950 + }, + { + "epoch": 10.51, + "learning_rate": 2.3865587846763542e-05, + "loss": 0.0507, + "step": 48000 + }, + { + "epoch": 10.51, + "eval_acc": 0.5499219968083825, + "eval_cer": 0.045651233567803984, + "eval_loss": 0.27467742562294006, + "eval_runtime": 2421.7457, + "eval_samples_per_second": 3.176, + "eval_steps_per_second": 0.397, + "step": 48000 + }, + { + "epoch": 10.52, + "learning_rate": 2.3838066930867462e-05, + "loss": 0.0564, + "step": 48050 + }, + { + "epoch": 10.53, + "learning_rate": 2.381054601497138e-05, + "loss": 0.0573, + "step": 48100 + }, + { + "epoch": 10.54, + "learning_rate": 2.37830250990753e-05, + "loss": 0.0565, + "step": 48150 + }, + { + "epoch": 10.55, + "learning_rate": 2.3755504183179217e-05, + "loss": 0.0569, + "step": 48200 + }, + { + "epoch": 10.56, + "learning_rate": 2.3727983267283134e-05, + "loss": 0.0567, + "step": 48250 + }, + { + "epoch": 10.58, + "learning_rate": 2.370046235138706e-05, + "loss": 0.0536, + "step": 48300 + }, + { + "epoch": 10.59, + "learning_rate": 2.3672941435490975e-05, + "loss": 0.0527, + "step": 48350 + }, + { + "epoch": 10.6, + "learning_rate": 2.3645420519594893e-05, + "loss": 0.0538, + "step": 48400 + }, + { + "epoch": 10.61, + "learning_rate": 2.3617899603698813e-05, + "loss": 0.0557, + "step": 48450 + }, + { + "epoch": 10.62, + "learning_rate": 2.359037868780273e-05, + "loss": 0.0506, + "step": 48500 + }, + { + "epoch": 10.62, + "eval_acc": 0.5536921476139246, + "eval_cer": 0.04698301857832338, + "eval_loss": 0.27325424551963806, + "eval_runtime": 2412.269, + "eval_samples_per_second": 3.189, + "eval_steps_per_second": 0.399, + "step": 48500 + }, + { + "epoch": 10.63, + "learning_rate": 2.3562857771906648e-05, + "loss": 0.0534, + "step": 48550 + }, + { + "epoch": 10.64, + "learning_rate": 2.353533685601057e-05, + "loss": 0.056, + "step": 48600 + }, + { + "epoch": 10.65, + "learning_rate": 2.350781594011449e-05, + "loss": 0.0527, + "step": 48650 + }, + { + "epoch": 10.66, + "learning_rate": 2.3480295024218406e-05, + "loss": 0.0519, + "step": 48700 + }, + { + "epoch": 10.67, + "learning_rate": 2.3452774108322326e-05, + "loss": 0.0509, + "step": 48750 + }, + { + "epoch": 10.68, + "learning_rate": 2.3425253192426244e-05, + "loss": 0.0568, + "step": 48800 + }, + { + "epoch": 10.7, + "learning_rate": 2.3397732276530164e-05, + "loss": 0.0508, + "step": 48850 + }, + { + "epoch": 10.71, + "learning_rate": 2.3370211360634085e-05, + "loss": 0.0524, + "step": 48900 + }, + { + "epoch": 10.72, + "learning_rate": 2.3342690444738002e-05, + "loss": 0.0527, + "step": 48950 + }, + { + "epoch": 10.73, + "learning_rate": 2.3315169528841922e-05, + "loss": 0.0537, + "step": 49000 + }, + { + "epoch": 10.73, + "eval_acc": 0.5526521060123957, + "eval_cer": 0.045635729667797935, + "eval_loss": 0.27340859174728394, + "eval_runtime": 2413.049, + "eval_samples_per_second": 3.188, + "eval_steps_per_second": 0.399, + "step": 49000 + }, + { + "epoch": 10.74, + "learning_rate": 2.328764861294584e-05, + "loss": 0.0585, + "step": 49050 + }, + { + "epoch": 10.75, + "learning_rate": 2.3260127697049757e-05, + "loss": 0.0502, + "step": 49100 + }, + { + "epoch": 10.76, + "learning_rate": 2.323260678115368e-05, + "loss": 0.0572, + "step": 49150 + }, + { + "epoch": 10.77, + "learning_rate": 2.3205085865257598e-05, + "loss": 0.0575, + "step": 49200 + }, + { + "epoch": 10.78, + "learning_rate": 2.3177564949361515e-05, + "loss": 0.0495, + "step": 49250 + }, + { + "epoch": 10.79, + "learning_rate": 2.3150044033465436e-05, + "loss": 0.0614, + "step": 49300 + }, + { + "epoch": 10.81, + "learning_rate": 2.3122523117569353e-05, + "loss": 0.0519, + "step": 49350 + }, + { + "epoch": 10.82, + "learning_rate": 2.309500220167327e-05, + "loss": 0.0605, + "step": 49400 + }, + { + "epoch": 10.83, + "learning_rate": 2.3067481285777194e-05, + "loss": 0.051, + "step": 49450 + }, + { + "epoch": 10.84, + "learning_rate": 2.303996036988111e-05, + "loss": 0.053, + "step": 49500 + }, + { + "epoch": 10.84, + "eval_acc": 0.5647425896301687, + "eval_cer": 0.04586363699788682, + "eval_loss": 0.27086734771728516, + "eval_runtime": 2433.5124, + "eval_samples_per_second": 3.161, + "eval_steps_per_second": 0.395, + "step": 49500 + }, + { + "epoch": 10.85, + "learning_rate": 2.3012439453985028e-05, + "loss": 0.0509, + "step": 49550 + }, + { + "epoch": 10.86, + "learning_rate": 2.298491853808895e-05, + "loss": 0.0547, + "step": 49600 + }, + { + "epoch": 10.87, + "learning_rate": 2.2957397622192866e-05, + "loss": 0.0511, + "step": 49650 + }, + { + "epoch": 10.88, + "learning_rate": 2.2929876706296786e-05, + "loss": 0.0539, + "step": 49700 + }, + { + "epoch": 10.89, + "learning_rate": 2.2902355790400707e-05, + "loss": 0.0499, + "step": 49750 + }, + { + "epoch": 10.9, + "learning_rate": 2.2874834874504624e-05, + "loss": 0.0509, + "step": 49800 + }, + { + "epoch": 10.91, + "learning_rate": 2.2847313958608545e-05, + "loss": 0.0557, + "step": 49850 + }, + { + "epoch": 10.93, + "learning_rate": 2.2819793042712462e-05, + "loss": 0.0489, + "step": 49900 + }, + { + "epoch": 10.94, + "learning_rate": 2.279227212681638e-05, + "loss": 0.0545, + "step": 49950 + }, + { + "epoch": 10.95, + "learning_rate": 2.2764751210920303e-05, + "loss": 0.0571, + "step": 50000 + }, + { + "epoch": 10.95, + "eval_acc": 0.5704628184385774, + "eval_cer": 0.04416130877722291, + "eval_loss": 0.26752549409866333, + "eval_runtime": 2423.2125, + "eval_samples_per_second": 3.174, + "eval_steps_per_second": 0.397, + "step": 50000 + }, + { + "epoch": 10.96, + "learning_rate": 2.273723029502422e-05, + "loss": 0.0535, + "step": 50050 + }, + { + "epoch": 10.97, + "learning_rate": 2.2709709379128137e-05, + "loss": 0.0542, + "step": 50100 + }, + { + "epoch": 10.98, + "learning_rate": 2.2682188463232058e-05, + "loss": 0.0502, + "step": 50150 + }, + { + "epoch": 10.99, + "learning_rate": 2.2654667547335975e-05, + "loss": 0.0495, + "step": 50200 + }, + { + "epoch": 11.0, + "learning_rate": 2.2627146631439896e-05, + "loss": 0.0475, + "step": 50250 + }, + { + "epoch": 11.01, + "learning_rate": 2.2599625715543816e-05, + "loss": 0.0356, + "step": 50300 + }, + { + "epoch": 11.02, + "learning_rate": 2.2572104799647733e-05, + "loss": 0.0355, + "step": 50350 + }, + { + "epoch": 11.04, + "learning_rate": 2.254458388375165e-05, + "loss": 0.0369, + "step": 50400 + }, + { + "epoch": 11.05, + "learning_rate": 2.251706296785557e-05, + "loss": 0.0373, + "step": 50450 + }, + { + "epoch": 11.06, + "learning_rate": 2.2489542051959488e-05, + "loss": 0.0351, + "step": 50500 + }, + { + "epoch": 11.06, + "eval_acc": 0.577353094048706, + "eval_cer": 0.043815571807088075, + "eval_loss": 0.2698224186897278, + "eval_runtime": 2426.9108, + "eval_samples_per_second": 3.169, + "eval_steps_per_second": 0.396, + "step": 50500 + }, + { + "epoch": 11.07, + "learning_rate": 2.246202113606341e-05, + "loss": 0.0364, + "step": 50550 + }, + { + "epoch": 11.08, + "learning_rate": 2.243450022016733e-05, + "loss": 0.0352, + "step": 50600 + }, + { + "epoch": 11.09, + "learning_rate": 2.2406979304271246e-05, + "loss": 0.0345, + "step": 50650 + }, + { + "epoch": 11.1, + "learning_rate": 2.2379458388375167e-05, + "loss": 0.0358, + "step": 50700 + }, + { + "epoch": 11.11, + "learning_rate": 2.2351937472479084e-05, + "loss": 0.0394, + "step": 50750 + }, + { + "epoch": 11.12, + "learning_rate": 2.2324416556583005e-05, + "loss": 0.0387, + "step": 50800 + }, + { + "epoch": 11.13, + "learning_rate": 2.2296895640686925e-05, + "loss": 0.0418, + "step": 50850 + }, + { + "epoch": 11.14, + "learning_rate": 2.2269374724790842e-05, + "loss": 0.0398, + "step": 50900 + }, + { + "epoch": 11.16, + "learning_rate": 2.224185380889476e-05, + "loss": 0.0382, + "step": 50950 + }, + { + "epoch": 11.17, + "learning_rate": 2.221433289299868e-05, + "loss": 0.0391, + "step": 51000 + }, + { + "epoch": 11.17, + "eval_acc": 0.5743629744443106, + "eval_cer": 0.04380937024708566, + "eval_loss": 0.2706660032272339, + "eval_runtime": 2428.3853, + "eval_samples_per_second": 3.168, + "eval_steps_per_second": 0.396, + "step": 51000 + }, + { + "epoch": 11.18, + "learning_rate": 2.2186811977102597e-05, + "loss": 0.0401, + "step": 51050 + }, + { + "epoch": 11.19, + "learning_rate": 2.2159291061206518e-05, + "loss": 0.0417, + "step": 51100 + }, + { + "epoch": 11.2, + "learning_rate": 2.213177014531044e-05, + "loss": 0.0388, + "step": 51150 + }, + { + "epoch": 11.21, + "learning_rate": 2.2104249229414356e-05, + "loss": 0.0409, + "step": 51200 + }, + { + "epoch": 11.22, + "learning_rate": 2.2076728313518276e-05, + "loss": 0.0389, + "step": 51250 + }, + { + "epoch": 11.23, + "learning_rate": 2.2049207397622193e-05, + "loss": 0.0378, + "step": 51300 + }, + { + "epoch": 11.24, + "learning_rate": 2.2021686481726114e-05, + "loss": 0.0389, + "step": 51350 + }, + { + "epoch": 11.25, + "learning_rate": 2.199416556583003e-05, + "loss": 0.0369, + "step": 51400 + }, + { + "epoch": 11.26, + "learning_rate": 2.196664464993395e-05, + "loss": 0.0338, + "step": 51450 + }, + { + "epoch": 11.28, + "learning_rate": 2.193912373403787e-05, + "loss": 0.0343, + "step": 51500 + }, + { + "epoch": 11.28, + "eval_acc": 0.5776131044490882, + "eval_cer": 0.044525650427365004, + "eval_loss": 0.2710331678390503, + "eval_runtime": 2447.563, + "eval_samples_per_second": 3.143, + "eval_steps_per_second": 0.393, + "step": 51500 + }, + { + "epoch": 11.29, + "learning_rate": 2.191160281814179e-05, + "loss": 0.0411, + "step": 51550 + }, + { + "epoch": 11.3, + "learning_rate": 2.1884081902245707e-05, + "loss": 0.0439, + "step": 51600 + }, + { + "epoch": 11.31, + "learning_rate": 2.1856560986349627e-05, + "loss": 0.0392, + "step": 51650 + }, + { + "epoch": 11.32, + "learning_rate": 2.1829040070453548e-05, + "loss": 0.0382, + "step": 51700 + }, + { + "epoch": 11.33, + "learning_rate": 2.1801519154557465e-05, + "loss": 0.0406, + "step": 51750 + }, + { + "epoch": 11.34, + "learning_rate": 2.1773998238661382e-05, + "loss": 0.0359, + "step": 51800 + }, + { + "epoch": 11.35, + "learning_rate": 2.1746477322765303e-05, + "loss": 0.0376, + "step": 51850 + }, + { + "epoch": 11.36, + "learning_rate": 2.171895640686922e-05, + "loss": 0.0334, + "step": 51900 + }, + { + "epoch": 11.37, + "learning_rate": 2.169143549097314e-05, + "loss": 0.0385, + "step": 51950 + }, + { + "epoch": 11.39, + "learning_rate": 2.166391457507706e-05, + "loss": 0.0399, + "step": 52000 + }, + { + "epoch": 11.39, + "eval_acc": 0.5835933436578792, + "eval_cer": 0.044110145907202955, + "eval_loss": 0.27085205912590027, + "eval_runtime": 2433.2038, + "eval_samples_per_second": 3.161, + "eval_steps_per_second": 0.395, + "step": 52000 + }, + { + "epoch": 11.4, + "learning_rate": 2.1636393659180978e-05, + "loss": 0.0355, + "step": 52050 + }, + { + "epoch": 11.41, + "learning_rate": 2.16088727432849e-05, + "loss": 0.0365, + "step": 52100 + }, + { + "epoch": 11.42, + "learning_rate": 2.1581351827388816e-05, + "loss": 0.0355, + "step": 52150 + }, + { + "epoch": 11.43, + "learning_rate": 2.1553830911492736e-05, + "loss": 0.0415, + "step": 52200 + }, + { + "epoch": 11.44, + "learning_rate": 2.1526309995596657e-05, + "loss": 0.0416, + "step": 52250 + }, + { + "epoch": 11.45, + "learning_rate": 2.1498789079700574e-05, + "loss": 0.043, + "step": 52300 + }, + { + "epoch": 11.46, + "learning_rate": 2.147126816380449e-05, + "loss": 0.0341, + "step": 52350 + }, + { + "epoch": 11.47, + "learning_rate": 2.144429766622633e-05, + "loss": 0.0339, + "step": 52400 + }, + { + "epoch": 11.48, + "learning_rate": 2.1416776750330254e-05, + "loss": 0.0419, + "step": 52450 + }, + { + "epoch": 11.49, + "learning_rate": 2.138925583443417e-05, + "loss": 0.0374, + "step": 52500 + }, + { + "epoch": 11.49, + "eval_acc": 0.5847633904595991, + "eval_cer": 0.04424502983725556, + "eval_loss": 0.2762584090232849, + "eval_runtime": 2430.2459, + "eval_samples_per_second": 3.165, + "eval_steps_per_second": 0.396, + "step": 52500 + }, + { + "epoch": 11.51, + "learning_rate": 2.136173491853809e-05, + "loss": 0.0375, + "step": 52550 + }, + { + "epoch": 11.52, + "learning_rate": 2.133421400264201e-05, + "loss": 0.0433, + "step": 52600 + }, + { + "epoch": 11.53, + "learning_rate": 2.1306693086745926e-05, + "loss": 0.0387, + "step": 52650 + }, + { + "epoch": 11.54, + "learning_rate": 2.1279172170849847e-05, + "loss": 0.0398, + "step": 52700 + }, + { + "epoch": 11.55, + "learning_rate": 2.1251651254953767e-05, + "loss": 0.0348, + "step": 52750 + }, + { + "epoch": 11.56, + "learning_rate": 2.1224130339057684e-05, + "loss": 0.0381, + "step": 52800 + }, + { + "epoch": 11.57, + "learning_rate": 2.1196609423161605e-05, + "loss": 0.0389, + "step": 52850 + }, + { + "epoch": 11.58, + "learning_rate": 2.1169088507265522e-05, + "loss": 0.0401, + "step": 52900 + }, + { + "epoch": 11.59, + "learning_rate": 2.114156759136944e-05, + "loss": 0.0372, + "step": 52950 + }, + { + "epoch": 11.6, + "learning_rate": 2.1114046675473363e-05, + "loss": 0.0418, + "step": 53000 + }, + { + "epoch": 11.6, + "eval_acc": 0.5852834112603635, + "eval_cer": 0.04214890255643807, + "eval_loss": 0.27044418454170227, + "eval_runtime": 2419.8311, + "eval_samples_per_second": 3.179, + "eval_steps_per_second": 0.398, + "step": 53000 + }, + { + "epoch": 11.62, + "learning_rate": 2.108652575957728e-05, + "loss": 0.0423, + "step": 53050 + }, + { + "epoch": 11.63, + "learning_rate": 2.1059004843681198e-05, + "loss": 0.0386, + "step": 53100 + }, + { + "epoch": 11.64, + "learning_rate": 2.1031483927785118e-05, + "loss": 0.0466, + "step": 53150 + }, + { + "epoch": 11.65, + "learning_rate": 2.1003963011889035e-05, + "loss": 0.0385, + "step": 53200 + }, + { + "epoch": 11.66, + "learning_rate": 2.0976442095992956e-05, + "loss": 0.041, + "step": 53250 + }, + { + "epoch": 11.67, + "learning_rate": 2.0948921180096876e-05, + "loss": 0.0397, + "step": 53300 + }, + { + "epoch": 11.68, + "learning_rate": 2.0921400264200794e-05, + "loss": 0.0384, + "step": 53350 + }, + { + "epoch": 11.69, + "learning_rate": 2.089387934830471e-05, + "loss": 0.0357, + "step": 53400 + }, + { + "epoch": 11.7, + "learning_rate": 2.086635843240863e-05, + "loss": 0.0361, + "step": 53450 + }, + { + "epoch": 11.71, + "learning_rate": 2.083883751651255e-05, + "loss": 0.0386, + "step": 53500 + }, + { + "epoch": 11.71, + "eval_acc": 0.5934737388724033, + "eval_cer": 0.04196130536636491, + "eval_loss": 0.26918068528175354, + "eval_runtime": 2422.4006, + "eval_samples_per_second": 3.175, + "eval_steps_per_second": 0.397, + "step": 53500 + }, + { + "epoch": 11.72, + "learning_rate": 2.081131660061647e-05, + "loss": 0.0345, + "step": 53550 + }, + { + "epoch": 11.74, + "learning_rate": 2.078379568472039e-05, + "loss": 0.039, + "step": 53600 + }, + { + "epoch": 11.75, + "learning_rate": 2.0756274768824307e-05, + "loss": 0.0389, + "step": 53650 + }, + { + "epoch": 11.76, + "learning_rate": 2.0728753852928227e-05, + "loss": 0.0425, + "step": 53700 + }, + { + "epoch": 11.77, + "learning_rate": 2.0701232937032145e-05, + "loss": 0.0363, + "step": 53750 + }, + { + "epoch": 11.78, + "learning_rate": 2.0673712021136065e-05, + "loss": 0.0411, + "step": 53800 + }, + { + "epoch": 11.79, + "learning_rate": 2.0646191105239986e-05, + "loss": 0.0411, + "step": 53850 + }, + { + "epoch": 11.8, + "learning_rate": 2.0618670189343903e-05, + "loss": 0.0419, + "step": 53900 + }, + { + "epoch": 11.81, + "learning_rate": 2.059114927344782e-05, + "loss": 0.0375, + "step": 53950 + }, + { + "epoch": 11.82, + "learning_rate": 2.056362835755174e-05, + "loss": 0.0398, + "step": 54000 + }, + { + "epoch": 11.82, + "eval_acc": 0.5977639104787098, + "eval_cer": 0.041258978696091, + "eval_loss": 0.26716116070747375, + "eval_runtime": 2416.0995, + "eval_samples_per_second": 3.184, + "eval_steps_per_second": 0.398, + "step": 54000 + }, + { + "epoch": 11.83, + "learning_rate": 2.0536107441655658e-05, + "loss": 0.0503, + "step": 54050 + }, + { + "epoch": 11.85, + "learning_rate": 2.0508586525759578e-05, + "loss": 0.0335, + "step": 54100 + }, + { + "epoch": 11.86, + "learning_rate": 2.04810656098635e-05, + "loss": 0.0428, + "step": 54150 + }, + { + "epoch": 11.87, + "learning_rate": 2.0453544693967416e-05, + "loss": 0.0377, + "step": 54200 + }, + { + "epoch": 11.88, + "learning_rate": 2.0426023778071337e-05, + "loss": 0.0444, + "step": 54250 + }, + { + "epoch": 11.89, + "learning_rate": 2.0398502862175254e-05, + "loss": 0.0392, + "step": 54300 + }, + { + "epoch": 11.9, + "learning_rate": 2.037098194627917e-05, + "loss": 0.0385, + "step": 54350 + }, + { + "epoch": 11.91, + "learning_rate": 2.034346103038309e-05, + "loss": 0.0413, + "step": 54400 + }, + { + "epoch": 11.92, + "learning_rate": 2.0315940114487012e-05, + "loss": 0.0397, + "step": 54450 + }, + { + "epoch": 11.93, + "learning_rate": 2.028841919859093e-05, + "loss": 0.0367, + "step": 54500 + }, + { + "epoch": 11.93, + "eval_acc": 0.5965938636769899, + "eval_cer": 0.04078300896590537, + "eval_loss": 0.2696912884712219, + "eval_runtime": 2403.6892, + "eval_samples_per_second": 3.2, + "eval_steps_per_second": 0.4, + "step": 54500 + }, + { + "epoch": 11.94, + "learning_rate": 2.026089828269485e-05, + "loss": 0.0405, + "step": 54550 + }, + { + "epoch": 11.95, + "learning_rate": 2.0233377366798767e-05, + "loss": 0.0317, + "step": 54600 + }, + { + "epoch": 11.97, + "learning_rate": 2.0205856450902687e-05, + "loss": 0.0477, + "step": 54650 + }, + { + "epoch": 11.98, + "learning_rate": 2.0178335535006608e-05, + "loss": 0.04, + "step": 54700 + }, + { + "epoch": 11.99, + "learning_rate": 2.0150814619110525e-05, + "loss": 0.0421, + "step": 54750 + }, + { + "epoch": 12.0, + "learning_rate": 2.0123844121532364e-05, + "loss": 0.045, + "step": 54800 + }, + { + "epoch": 12.01, + "learning_rate": 2.0096323205636285e-05, + "loss": 0.0279, + "step": 54850 + }, + { + "epoch": 12.02, + "learning_rate": 2.0068802289740205e-05, + "loss": 0.0243, + "step": 54900 + }, + { + "epoch": 12.03, + "learning_rate": 2.0041281373844122e-05, + "loss": 0.0224, + "step": 54950 + }, + { + "epoch": 12.04, + "learning_rate": 2.0013760457948043e-05, + "loss": 0.0259, + "step": 55000 + }, + { + "epoch": 12.04, + "eval_acc": 0.6123244929001138, + "eval_cer": 0.04014579867565686, + "eval_loss": 0.2668377161026001, + "eval_runtime": 2417.0943, + "eval_samples_per_second": 3.182, + "eval_steps_per_second": 0.398, + "step": 55000 + }, + { + "epoch": 12.05, + "learning_rate": 1.998623954205196e-05, + "loss": 0.0245, + "step": 55050 + }, + { + "epoch": 12.06, + "learning_rate": 1.9958718626155877e-05, + "loss": 0.0267, + "step": 55100 + }, + { + "epoch": 12.08, + "learning_rate": 1.99311977102598e-05, + "loss": 0.0247, + "step": 55150 + }, + { + "epoch": 12.09, + "learning_rate": 1.990367679436372e-05, + "loss": 0.0263, + "step": 55200 + }, + { + "epoch": 12.1, + "learning_rate": 1.9876155878467636e-05, + "loss": 0.0249, + "step": 55250 + }, + { + "epoch": 12.11, + "learning_rate": 1.9848634962571556e-05, + "loss": 0.0269, + "step": 55300 + }, + { + "epoch": 12.12, + "learning_rate": 1.9821114046675473e-05, + "loss": 0.0254, + "step": 55350 + }, + { + "epoch": 12.13, + "learning_rate": 1.979359313077939e-05, + "loss": 0.0288, + "step": 55400 + }, + { + "epoch": 12.14, + "learning_rate": 1.9766072214883314e-05, + "loss": 0.0262, + "step": 55450 + }, + { + "epoch": 12.15, + "learning_rate": 1.973855129898723e-05, + "loss": 0.0302, + "step": 55500 + }, + { + "epoch": 12.15, + "eval_acc": 0.6038741548876918, + "eval_cer": 0.04154735123620347, + "eval_loss": 0.2702733278274536, + "eval_runtime": 2435.2652, + "eval_samples_per_second": 3.159, + "eval_steps_per_second": 0.395, + "step": 55500 + }, + { + "epoch": 12.16, + "learning_rate": 1.971103038309115e-05, + "loss": 0.0272, + "step": 55550 + }, + { + "epoch": 12.17, + "learning_rate": 1.968350946719507e-05, + "loss": 0.0241, + "step": 55600 + }, + { + "epoch": 12.18, + "learning_rate": 1.9655988551298987e-05, + "loss": 0.0271, + "step": 55650 + }, + { + "epoch": 12.2, + "learning_rate": 1.9628467635402907e-05, + "loss": 0.0306, + "step": 55700 + }, + { + "epoch": 12.21, + "learning_rate": 1.9600946719506828e-05, + "loss": 0.0288, + "step": 55750 + }, + { + "epoch": 12.22, + "learning_rate": 1.9573425803610745e-05, + "loss": 0.0272, + "step": 55800 + }, + { + "epoch": 12.23, + "learning_rate": 1.9545904887714665e-05, + "loss": 0.0288, + "step": 55850 + }, + { + "epoch": 12.24, + "learning_rate": 1.9518383971818583e-05, + "loss": 0.0253, + "step": 55900 + }, + { + "epoch": 12.25, + "learning_rate": 1.94908630559225e-05, + "loss": 0.0293, + "step": 55950 + }, + { + "epoch": 12.26, + "learning_rate": 1.9463342140026424e-05, + "loss": 0.0266, + "step": 56000 + }, + { + "epoch": 12.26, + "eval_acc": 0.6030941236865452, + "eval_cer": 0.041094637356026906, + "eval_loss": 0.2693229615688324, + "eval_runtime": 2418.4882, + "eval_samples_per_second": 3.18, + "eval_steps_per_second": 0.398, + "step": 56000 + }, + { + "epoch": 12.27, + "learning_rate": 1.943582122413034e-05, + "loss": 0.028, + "step": 56050 + }, + { + "epoch": 12.28, + "learning_rate": 1.9408300308234258e-05, + "loss": 0.0316, + "step": 56100 + }, + { + "epoch": 12.29, + "learning_rate": 1.938077939233818e-05, + "loss": 0.0266, + "step": 56150 + }, + { + "epoch": 12.3, + "learning_rate": 1.9353258476442096e-05, + "loss": 0.0267, + "step": 56200 + }, + { + "epoch": 12.32, + "learning_rate": 1.9325737560546016e-05, + "loss": 0.028, + "step": 56250 + }, + { + "epoch": 12.33, + "learning_rate": 1.9298216644649937e-05, + "loss": 0.0286, + "step": 56300 + }, + { + "epoch": 12.34, + "learning_rate": 1.9270695728753854e-05, + "loss": 0.0326, + "step": 56350 + }, + { + "epoch": 12.35, + "learning_rate": 1.924317481285777e-05, + "loss": 0.03, + "step": 56400 + }, + { + "epoch": 12.36, + "learning_rate": 1.9215653896961692e-05, + "loss": 0.0289, + "step": 56450 + }, + { + "epoch": 12.37, + "learning_rate": 1.918813298106561e-05, + "loss": 0.0316, + "step": 56500 + }, + { + "epoch": 12.37, + "eval_acc": 0.6142745709029804, + "eval_cer": 0.04024192285569435, + "eval_loss": 0.2692607641220093, + "eval_runtime": 2418.0408, + "eval_samples_per_second": 3.181, + "eval_steps_per_second": 0.398, + "step": 56500 + }, + { + "epoch": 12.38, + "learning_rate": 1.916061206516953e-05, + "loss": 0.0285, + "step": 56550 + }, + { + "epoch": 12.39, + "learning_rate": 1.913309114927345e-05, + "loss": 0.0271, + "step": 56600 + }, + { + "epoch": 12.4, + "learning_rate": 1.9105570233377367e-05, + "loss": 0.0278, + "step": 56650 + }, + { + "epoch": 12.41, + "learning_rate": 1.9078049317481288e-05, + "loss": 0.0245, + "step": 56700 + }, + { + "epoch": 12.43, + "learning_rate": 1.9050528401585205e-05, + "loss": 0.0245, + "step": 56750 + }, + { + "epoch": 12.44, + "learning_rate": 1.9023557904007047e-05, + "loss": 0.0266, + "step": 56800 + }, + { + "epoch": 12.45, + "learning_rate": 1.8996036988110965e-05, + "loss": 0.0293, + "step": 56850 + }, + { + "epoch": 12.46, + "learning_rate": 1.8968516072214885e-05, + "loss": 0.026, + "step": 56900 + }, + { + "epoch": 12.47, + "learning_rate": 1.8940995156318802e-05, + "loss": 0.0299, + "step": 56950 + }, + { + "epoch": 12.48, + "learning_rate": 1.891347424042272e-05, + "loss": 0.0279, + "step": 57000 + }, + { + "epoch": 12.48, + "eval_acc": 0.6073842952928518, + "eval_cer": 0.04096130381597491, + "eval_loss": 0.27060824632644653, + "eval_runtime": 2418.1417, + "eval_samples_per_second": 3.181, + "eval_steps_per_second": 0.398, + "step": 57000 + }, + { + "epoch": 12.49, + "learning_rate": 1.8885953324526643e-05, + "loss": 0.0291, + "step": 57050 + }, + { + "epoch": 12.5, + "learning_rate": 1.885843240863056e-05, + "loss": 0.0284, + "step": 57100 + }, + { + "epoch": 12.51, + "learning_rate": 1.8830911492734478e-05, + "loss": 0.0268, + "step": 57150 + }, + { + "epoch": 12.52, + "learning_rate": 1.8803390576838398e-05, + "loss": 0.0304, + "step": 57200 + }, + { + "epoch": 12.53, + "learning_rate": 1.8775869660942315e-05, + "loss": 0.0376, + "step": 57250 + }, + { + "epoch": 12.55, + "learning_rate": 1.8748348745046236e-05, + "loss": 0.0262, + "step": 57300 + }, + { + "epoch": 12.56, + "learning_rate": 1.8720827829150157e-05, + "loss": 0.0256, + "step": 57350 + }, + { + "epoch": 12.57, + "learning_rate": 1.8693306913254074e-05, + "loss": 0.03, + "step": 57400 + }, + { + "epoch": 12.58, + "learning_rate": 1.8665785997357994e-05, + "loss": 0.0288, + "step": 57450 + }, + { + "epoch": 12.59, + "learning_rate": 1.863826508146191e-05, + "loss": 0.0322, + "step": 57500 + }, + { + "epoch": 12.59, + "eval_acc": 0.6142745709029804, + "eval_cer": 0.039677580895474254, + "eval_loss": 0.27082282304763794, + "eval_runtime": 2424.4199, + "eval_samples_per_second": 3.173, + "eval_steps_per_second": 0.397, + "step": 57500 + }, + { + "epoch": 12.6, + "learning_rate": 1.861074416556583e-05, + "loss": 0.0287, + "step": 57550 + }, + { + "epoch": 12.61, + "learning_rate": 1.8583223249669753e-05, + "loss": 0.0265, + "step": 57600 + }, + { + "epoch": 12.62, + "learning_rate": 1.855570233377367e-05, + "loss": 0.0256, + "step": 57650 + }, + { + "epoch": 12.63, + "learning_rate": 1.8528181417877587e-05, + "loss": 0.0292, + "step": 57700 + }, + { + "epoch": 12.64, + "learning_rate": 1.8500660501981507e-05, + "loss": 0.0252, + "step": 57750 + }, + { + "epoch": 12.66, + "learning_rate": 1.8473139586085425e-05, + "loss": 0.025, + "step": 57800 + }, + { + "epoch": 12.67, + "learning_rate": 1.8445618670189345e-05, + "loss": 0.028, + "step": 57850 + }, + { + "epoch": 12.68, + "learning_rate": 1.8418097754293266e-05, + "loss": 0.0261, + "step": 57900 + }, + { + "epoch": 12.69, + "learning_rate": 1.8390576838397183e-05, + "loss": 0.0291, + "step": 57950 + }, + { + "epoch": 12.7, + "learning_rate": 1.8363055922501103e-05, + "loss": 0.0305, + "step": 58000 + }, + { + "epoch": 12.7, + "eval_acc": 0.6158346333052737, + "eval_cer": 0.04006052722562361, + "eval_loss": 0.2681460380554199, + "eval_runtime": 2417.4931, + "eval_samples_per_second": 3.182, + "eval_steps_per_second": 0.398, + "step": 58000 + }, + { + "epoch": 12.71, + "learning_rate": 1.833553500660502e-05, + "loss": 0.0234, + "step": 58050 + }, + { + "epoch": 12.72, + "learning_rate": 1.8308014090708938e-05, + "loss": 0.0296, + "step": 58100 + }, + { + "epoch": 12.73, + "learning_rate": 1.8280493174812858e-05, + "loss": 0.025, + "step": 58150 + }, + { + "epoch": 12.74, + "learning_rate": 1.825297225891678e-05, + "loss": 0.0267, + "step": 58200 + }, + { + "epoch": 12.75, + "learning_rate": 1.8225451343020696e-05, + "loss": 0.0232, + "step": 58250 + }, + { + "epoch": 12.76, + "learning_rate": 1.8197930427124617e-05, + "loss": 0.0284, + "step": 58300 + }, + { + "epoch": 12.78, + "learning_rate": 1.8170409511228534e-05, + "loss": 0.0295, + "step": 58350 + }, + { + "epoch": 12.79, + "learning_rate": 1.814288859533245e-05, + "loss": 0.0308, + "step": 58400 + }, + { + "epoch": 12.8, + "learning_rate": 1.8115367679436375e-05, + "loss": 0.0295, + "step": 58450 + }, + { + "epoch": 12.81, + "learning_rate": 1.8087846763540292e-05, + "loss": 0.0279, + "step": 58500 + }, + { + "epoch": 12.81, + "eval_acc": 0.6254550181194156, + "eval_cer": 0.039162851415273515, + "eval_loss": 0.26786795258522034, + "eval_runtime": 2425.3747, + "eval_samples_per_second": 3.171, + "eval_steps_per_second": 0.397, + "step": 58500 + }, + { + "epoch": 12.82, + "learning_rate": 1.806032584764421e-05, + "loss": 0.0267, + "step": 58550 + }, + { + "epoch": 12.83, + "learning_rate": 1.803280493174813e-05, + "loss": 0.0283, + "step": 58600 + }, + { + "epoch": 12.84, + "learning_rate": 1.8005284015852047e-05, + "loss": 0.0296, + "step": 58650 + }, + { + "epoch": 12.85, + "learning_rate": 1.7977763099955967e-05, + "loss": 0.0278, + "step": 58700 + }, + { + "epoch": 12.86, + "learning_rate": 1.7950242184059888e-05, + "loss": 0.0284, + "step": 58750 + }, + { + "epoch": 12.87, + "learning_rate": 1.7922721268163805e-05, + "loss": 0.0263, + "step": 58800 + }, + { + "epoch": 12.89, + "learning_rate": 1.7895200352267726e-05, + "loss": 0.0246, + "step": 58850 + }, + { + "epoch": 12.9, + "learning_rate": 1.7867679436371643e-05, + "loss": 0.027, + "step": 58900 + }, + { + "epoch": 12.91, + "learning_rate": 1.784015852047556e-05, + "loss": 0.0323, + "step": 58950 + }, + { + "epoch": 12.92, + "learning_rate": 1.7812637604579484e-05, + "loss": 0.0273, + "step": 59000 + }, + { + "epoch": 12.92, + "eval_acc": 0.6214248569134913, + "eval_cer": 0.03904502177522756, + "eval_loss": 0.26956528425216675, + "eval_runtime": 2416.1553, + "eval_samples_per_second": 3.184, + "eval_steps_per_second": 0.398, + "step": 59000 + }, + { + "epoch": 12.93, + "learning_rate": 1.77851166886834e-05, + "loss": 0.0295, + "step": 59050 + }, + { + "epoch": 12.94, + "learning_rate": 1.775759577278732e-05, + "loss": 0.0284, + "step": 59100 + }, + { + "epoch": 12.95, + "learning_rate": 1.773007485689124e-05, + "loss": 0.0266, + "step": 59150 + }, + { + "epoch": 12.96, + "learning_rate": 1.7702553940995156e-05, + "loss": 0.0311, + "step": 59200 + }, + { + "epoch": 12.97, + "learning_rate": 1.7675033025099073e-05, + "loss": 0.0276, + "step": 59250 + }, + { + "epoch": 12.98, + "learning_rate": 1.7647512109202997e-05, + "loss": 0.0285, + "step": 59300 + }, + { + "epoch": 12.99, + "learning_rate": 1.7619991193306914e-05, + "loss": 0.0299, + "step": 59350 + }, + { + "epoch": 13.01, + "learning_rate": 1.759247027741083e-05, + "loss": 0.0266, + "step": 59400 + }, + { + "epoch": 13.02, + "learning_rate": 1.7564949361514752e-05, + "loss": 0.0196, + "step": 59450 + }, + { + "epoch": 13.03, + "learning_rate": 1.753742844561867e-05, + "loss": 0.0166, + "step": 59500 + }, + { + "epoch": 13.03, + "eval_acc": 0.6300052001261044, + "eval_cer": 0.03765122116468397, + "eval_loss": 0.26843369007110596, + "eval_runtime": 2419.8398, + "eval_samples_per_second": 3.179, + "eval_steps_per_second": 0.398, + "step": 59500 + }, + { + "epoch": 13.04, + "learning_rate": 1.750990752972259e-05, + "loss": 0.0165, + "step": 59550 + }, + { + "epoch": 13.05, + "learning_rate": 1.748238661382651e-05, + "loss": 0.0168, + "step": 59600 + }, + { + "epoch": 13.06, + "learning_rate": 1.7454865697930427e-05, + "loss": 0.0195, + "step": 59650 + }, + { + "epoch": 13.07, + "learning_rate": 1.7427344782034348e-05, + "loss": 0.019, + "step": 59700 + }, + { + "epoch": 13.08, + "learning_rate": 1.7399823866138265e-05, + "loss": 0.018, + "step": 59750 + }, + { + "epoch": 13.09, + "learning_rate": 1.7372302950242182e-05, + "loss": 0.0188, + "step": 59800 + }, + { + "epoch": 13.1, + "learning_rate": 1.7344782034346106e-05, + "loss": 0.0192, + "step": 59850 + }, + { + "epoch": 13.12, + "learning_rate": 1.7317261118450023e-05, + "loss": 0.0201, + "step": 59900 + }, + { + "epoch": 13.13, + "learning_rate": 1.728974020255394e-05, + "loss": 0.0193, + "step": 59950 + }, + { + "epoch": 13.14, + "learning_rate": 1.726221928665786e-05, + "loss": 0.0178, + "step": 60000 + }, + { + "epoch": 13.14, + "eval_acc": 0.6283151325236199, + "eval_cer": 0.03863571881506793, + "eval_loss": 0.26756173372268677, + "eval_runtime": 2429.9446, + "eval_samples_per_second": 3.166, + "eval_steps_per_second": 0.396, + "step": 60000 + }, + { + "epoch": 13.15, + "learning_rate": 1.723469837076178e-05, + "loss": 0.0204, + "step": 60050 + }, + { + "epoch": 13.16, + "learning_rate": 1.72071774548657e-05, + "loss": 0.0182, + "step": 60100 + }, + { + "epoch": 13.17, + "learning_rate": 1.717965653896962e-05, + "loss": 0.02, + "step": 60150 + }, + { + "epoch": 13.18, + "learning_rate": 1.7152135623073537e-05, + "loss": 0.0218, + "step": 60200 + }, + { + "epoch": 13.19, + "learning_rate": 1.7124614707177454e-05, + "loss": 0.0205, + "step": 60250 + }, + { + "epoch": 13.2, + "learning_rate": 1.7097093791281374e-05, + "loss": 0.0164, + "step": 60300 + }, + { + "epoch": 13.21, + "learning_rate": 1.706957287538529e-05, + "loss": 0.0191, + "step": 60350 + }, + { + "epoch": 13.22, + "learning_rate": 1.7042051959489212e-05, + "loss": 0.019, + "step": 60400 + }, + { + "epoch": 13.24, + "learning_rate": 1.7014531043593133e-05, + "loss": 0.019, + "step": 60450 + }, + { + "epoch": 13.25, + "learning_rate": 1.698701012769705e-05, + "loss": 0.0191, + "step": 60500 + }, + { + "epoch": 13.25, + "eval_acc": 0.6319552781289709, + "eval_cer": 0.03849618371501351, + "eval_loss": 0.2676541805267334, + "eval_runtime": 2429.477, + "eval_samples_per_second": 3.166, + "eval_steps_per_second": 0.396, + "step": 60500 + }, + { + "epoch": 13.26, + "learning_rate": 1.695948921180097e-05, + "loss": 0.0165, + "step": 60550 + }, + { + "epoch": 13.27, + "learning_rate": 1.6931968295904888e-05, + "loss": 0.0189, + "step": 60600 + }, + { + "epoch": 13.28, + "learning_rate": 1.6904447380008808e-05, + "loss": 0.0184, + "step": 60650 + }, + { + "epoch": 13.29, + "learning_rate": 1.687692646411273e-05, + "loss": 0.0165, + "step": 60700 + }, + { + "epoch": 13.3, + "learning_rate": 1.6849405548216646e-05, + "loss": 0.019, + "step": 60750 + }, + { + "epoch": 13.31, + "learning_rate": 1.6821884632320563e-05, + "loss": 0.0203, + "step": 60800 + }, + { + "epoch": 13.32, + "learning_rate": 1.6794363716424484e-05, + "loss": 0.0193, + "step": 60850 + }, + { + "epoch": 13.33, + "learning_rate": 1.67668428005284e-05, + "loss": 0.0191, + "step": 60900 + }, + { + "epoch": 13.35, + "learning_rate": 1.673932188463232e-05, + "loss": 0.0208, + "step": 60950 + }, + { + "epoch": 13.36, + "learning_rate": 1.6711800968736242e-05, + "loss": 0.0189, + "step": 61000 + }, + { + "epoch": 13.36, + "eval_acc": 0.635595423734322, + "eval_cer": 0.03698610385442458, + "eval_loss": 0.268144428730011, + "eval_runtime": 2411.8799, + "eval_samples_per_second": 3.189, + "eval_steps_per_second": 0.399, + "step": 61000 + }, + { + "epoch": 13.37, + "learning_rate": 1.668428005284016e-05, + "loss": 0.0202, + "step": 61050 + }, + { + "epoch": 13.38, + "learning_rate": 1.665675913694408e-05, + "loss": 0.0176, + "step": 61100 + }, + { + "epoch": 13.39, + "learning_rate": 1.662978863936592e-05, + "loss": 0.0198, + "step": 61150 + }, + { + "epoch": 13.4, + "learning_rate": 1.660226772346984e-05, + "loss": 0.0209, + "step": 61200 + }, + { + "epoch": 13.41, + "learning_rate": 1.6574746807573756e-05, + "loss": 0.0199, + "step": 61250 + }, + { + "epoch": 13.42, + "learning_rate": 1.6547225891677677e-05, + "loss": 0.0195, + "step": 61300 + }, + { + "epoch": 13.43, + "learning_rate": 1.6519704975781594e-05, + "loss": 0.0173, + "step": 61350 + }, + { + "epoch": 13.44, + "learning_rate": 1.649218405988551e-05, + "loss": 0.0187, + "step": 61400 + }, + { + "epoch": 13.45, + "learning_rate": 1.6464663143989435e-05, + "loss": 0.0234, + "step": 61450 + }, + { + "epoch": 13.47, + "learning_rate": 1.6437142228093352e-05, + "loss": 0.0168, + "step": 61500 + }, + { + "epoch": 13.47, + "eval_acc": 0.6306552261270598, + "eval_cer": 0.038207811174901046, + "eval_loss": 0.2689424753189087, + "eval_runtime": 2415.0879, + "eval_samples_per_second": 3.185, + "eval_steps_per_second": 0.398, + "step": 61500 + }, + { + "epoch": 13.48, + "learning_rate": 1.640962131219727e-05, + "loss": 0.0206, + "step": 61550 + }, + { + "epoch": 13.49, + "learning_rate": 1.638210039630119e-05, + "loss": 0.0195, + "step": 61600 + }, + { + "epoch": 13.5, + "learning_rate": 1.6354579480405107e-05, + "loss": 0.0177, + "step": 61650 + }, + { + "epoch": 13.51, + "learning_rate": 1.6327058564509028e-05, + "loss": 0.0198, + "step": 61700 + }, + { + "epoch": 13.52, + "learning_rate": 1.629953764861295e-05, + "loss": 0.0192, + "step": 61750 + }, + { + "epoch": 13.53, + "learning_rate": 1.6272016732716866e-05, + "loss": 0.0175, + "step": 61800 + }, + { + "epoch": 13.54, + "learning_rate": 1.6244495816820786e-05, + "loss": 0.0176, + "step": 61850 + }, + { + "epoch": 13.55, + "learning_rate": 1.6216974900924703e-05, + "loss": 0.0174, + "step": 61900 + }, + { + "epoch": 13.56, + "learning_rate": 1.618945398502862e-05, + "loss": 0.0178, + "step": 61950 + }, + { + "epoch": 13.57, + "learning_rate": 1.6161933069132544e-05, + "loss": 0.0193, + "step": 62000 + }, + { + "epoch": 13.57, + "eval_acc": 0.6322152885293532, + "eval_cer": 0.03755199620464528, + "eval_loss": 0.26729756593704224, + "eval_runtime": 2401.4022, + "eval_samples_per_second": 3.203, + "eval_steps_per_second": 0.401, + "step": 62000 + }, + { + "epoch": 13.59, + "learning_rate": 1.613441215323646e-05, + "loss": 0.0199, + "step": 62050 + }, + { + "epoch": 13.6, + "learning_rate": 1.610689123734038e-05, + "loss": 0.0223, + "step": 62100 + }, + { + "epoch": 13.61, + "learning_rate": 1.60793703214443e-05, + "loss": 0.0206, + "step": 62150 + }, + { + "epoch": 13.62, + "learning_rate": 1.6051849405548216e-05, + "loss": 0.0205, + "step": 62200 + }, + { + "epoch": 13.63, + "learning_rate": 1.6024328489652134e-05, + "loss": 0.0201, + "step": 62250 + }, + { + "epoch": 13.64, + "learning_rate": 1.5996807573756058e-05, + "loss": 0.0197, + "step": 62300 + }, + { + "epoch": 13.65, + "learning_rate": 1.5969286657859975e-05, + "loss": 0.0183, + "step": 62350 + }, + { + "epoch": 13.66, + "learning_rate": 1.5941765741963892e-05, + "loss": 0.0184, + "step": 62400 + }, + { + "epoch": 13.67, + "learning_rate": 1.5914244826067812e-05, + "loss": 0.0197, + "step": 62450 + }, + { + "epoch": 13.68, + "learning_rate": 1.588672391017173e-05, + "loss": 0.0184, + "step": 62500 + }, + { + "epoch": 13.68, + "eval_acc": 0.6339053561318375, + "eval_cer": 0.037992306964817, + "eval_loss": 0.26707085967063904, + "eval_runtime": 2418.7114, + "eval_samples_per_second": 3.18, + "eval_steps_per_second": 0.398, + "step": 62500 + }, + { + "epoch": 13.7, + "learning_rate": 1.585920299427565e-05, + "loss": 0.0202, + "step": 62550 + }, + { + "epoch": 13.71, + "learning_rate": 1.583168207837957e-05, + "loss": 0.0191, + "step": 62600 + }, + { + "epoch": 13.72, + "learning_rate": 1.5804161162483488e-05, + "loss": 0.0217, + "step": 62650 + }, + { + "epoch": 13.73, + "learning_rate": 1.577664024658741e-05, + "loss": 0.0214, + "step": 62700 + }, + { + "epoch": 13.74, + "learning_rate": 1.5749119330691326e-05, + "loss": 0.0191, + "step": 62750 + }, + { + "epoch": 13.75, + "learning_rate": 1.5721598414795243e-05, + "loss": 0.0167, + "step": 62800 + }, + { + "epoch": 13.76, + "learning_rate": 1.5694077498899167e-05, + "loss": 0.0187, + "step": 62850 + }, + { + "epoch": 13.77, + "learning_rate": 1.5666556583003084e-05, + "loss": 0.0185, + "step": 62900 + }, + { + "epoch": 13.78, + "learning_rate": 1.5639035667107e-05, + "loss": 0.0185, + "step": 62950 + }, + { + "epoch": 13.79, + "learning_rate": 1.561151475121092e-05, + "loss": 0.0184, + "step": 63000 + }, + { + "epoch": 13.79, + "eval_acc": 0.6333853353310731, + "eval_cer": 0.037341143164563044, + "eval_loss": 0.266510546207428, + "eval_runtime": 2421.5513, + "eval_samples_per_second": 3.176, + "eval_steps_per_second": 0.397, + "step": 63000 + }, + { + "epoch": 13.8, + "learning_rate": 1.558399383531484e-05, + "loss": 0.0197, + "step": 63050 + }, + { + "epoch": 13.82, + "learning_rate": 1.555647291941876e-05, + "loss": 0.0174, + "step": 63100 + }, + { + "epoch": 13.83, + "learning_rate": 1.552895200352268e-05, + "loss": 0.0182, + "step": 63150 + }, + { + "epoch": 13.84, + "learning_rate": 1.5501431087626597e-05, + "loss": 0.0191, + "step": 63200 + }, + { + "epoch": 13.85, + "learning_rate": 1.5473910171730514e-05, + "loss": 0.0202, + "step": 63250 + }, + { + "epoch": 13.86, + "learning_rate": 1.5446389255834435e-05, + "loss": 0.0194, + "step": 63300 + }, + { + "epoch": 13.87, + "learning_rate": 1.5418868339938352e-05, + "loss": 0.0179, + "step": 63350 + }, + { + "epoch": 13.88, + "learning_rate": 1.5391347424042272e-05, + "loss": 0.0167, + "step": 63400 + }, + { + "epoch": 13.89, + "learning_rate": 1.5363826508146193e-05, + "loss": 0.0179, + "step": 63450 + }, + { + "epoch": 13.9, + "learning_rate": 1.533630559225011e-05, + "loss": 0.0176, + "step": 63500 + }, + { + "epoch": 13.9, + "eval_acc": 0.6387155485389086, + "eval_cer": 0.0379597487748043, + "eval_loss": 0.26856788992881775, + "eval_runtime": 2412.2522, + "eval_samples_per_second": 3.189, + "eval_steps_per_second": 0.399, + "step": 63500 + }, + { + "epoch": 13.91, + "learning_rate": 1.530878467635403e-05, + "loss": 0.0198, + "step": 63550 + }, + { + "epoch": 13.93, + "learning_rate": 1.5281263760457948e-05, + "loss": 0.0187, + "step": 63600 + }, + { + "epoch": 13.94, + "learning_rate": 1.5253742844561868e-05, + "loss": 0.0218, + "step": 63650 + }, + { + "epoch": 13.95, + "learning_rate": 1.5226221928665787e-05, + "loss": 0.0207, + "step": 63700 + }, + { + "epoch": 13.96, + "learning_rate": 1.5198701012769706e-05, + "loss": 0.0175, + "step": 63750 + }, + { + "epoch": 13.97, + "learning_rate": 1.5171180096873625e-05, + "loss": 0.0203, + "step": 63800 + }, + { + "epoch": 13.98, + "learning_rate": 1.5143659180977542e-05, + "loss": 0.0187, + "step": 63850 + }, + { + "epoch": 13.99, + "learning_rate": 1.5116138265081461e-05, + "loss": 0.0182, + "step": 63900 + }, + { + "epoch": 14.0, + "learning_rate": 1.5088617349185383e-05, + "loss": 0.0195, + "step": 63950 + }, + { + "epoch": 14.01, + "learning_rate": 1.50610964332893e-05, + "loss": 0.0126, + "step": 64000 + }, + { + "epoch": 14.01, + "eval_acc": 0.639235569339673, + "eval_cer": 0.037051220234449976, + "eval_loss": 0.2667127847671509, + "eval_runtime": 2419.4475, + "eval_samples_per_second": 3.179, + "eval_steps_per_second": 0.398, + "step": 64000 + }, + { + "epoch": 14.02, + "learning_rate": 1.503357551739322e-05, + "loss": 0.0141, + "step": 64050 + }, + { + "epoch": 14.03, + "learning_rate": 1.5006054601497138e-05, + "loss": 0.012, + "step": 64100 + }, + { + "epoch": 14.05, + "learning_rate": 1.4978533685601057e-05, + "loss": 0.0117, + "step": 64150 + }, + { + "epoch": 14.06, + "learning_rate": 1.4951012769704978e-05, + "loss": 0.0114, + "step": 64200 + }, + { + "epoch": 14.07, + "learning_rate": 1.4923491853808896e-05, + "loss": 0.0118, + "step": 64250 + }, + { + "epoch": 14.08, + "learning_rate": 1.4895970937912815e-05, + "loss": 0.0124, + "step": 64300 + }, + { + "epoch": 14.09, + "learning_rate": 1.4868450022016733e-05, + "loss": 0.0124, + "step": 64350 + }, + { + "epoch": 14.1, + "learning_rate": 1.4840929106120651e-05, + "loss": 0.0125, + "step": 64400 + }, + { + "epoch": 14.11, + "learning_rate": 1.481340819022457e-05, + "loss": 0.0136, + "step": 64450 + }, + { + "epoch": 14.12, + "learning_rate": 1.478588727432849e-05, + "loss": 0.0139, + "step": 64500 + }, + { + "epoch": 14.12, + "eval_acc": 0.6450858033482727, + "eval_cer": 0.03637680058418695, + "eval_loss": 0.2650182843208313, + "eval_runtime": 2417.3045, + "eval_samples_per_second": 3.182, + "eval_steps_per_second": 0.398, + "step": 64500 + }, + { + "epoch": 14.13, + "learning_rate": 1.475836635843241e-05, + "loss": 0.0126, + "step": 64550 + }, + { + "epoch": 14.14, + "learning_rate": 1.4730845442536328e-05, + "loss": 0.0128, + "step": 64600 + }, + { + "epoch": 14.16, + "learning_rate": 1.4703324526640247e-05, + "loss": 0.0128, + "step": 64650 + }, + { + "epoch": 14.17, + "learning_rate": 1.4675803610744166e-05, + "loss": 0.012, + "step": 64700 + }, + { + "epoch": 14.18, + "learning_rate": 1.4648282694848083e-05, + "loss": 0.0126, + "step": 64750 + }, + { + "epoch": 14.19, + "learning_rate": 1.4620761778952006e-05, + "loss": 0.012, + "step": 64800 + }, + { + "epoch": 14.2, + "learning_rate": 1.4593240863055923e-05, + "loss": 0.0114, + "step": 64850 + }, + { + "epoch": 14.21, + "learning_rate": 1.4565719947159842e-05, + "loss": 0.0111, + "step": 64900 + }, + { + "epoch": 14.22, + "learning_rate": 1.453819903126376e-05, + "loss": 0.0123, + "step": 64950 + }, + { + "epoch": 14.23, + "learning_rate": 1.451067811536768e-05, + "loss": 0.0111, + "step": 65000 + }, + { + "epoch": 14.23, + "eval_acc": 0.6524960997591659, + "eval_cer": 0.03636594785418272, + "eval_loss": 0.26658621430397034, + "eval_runtime": 2418.1674, + "eval_samples_per_second": 3.181, + "eval_steps_per_second": 0.398, + "step": 65000 + }, + { + "epoch": 14.24, + "learning_rate": 1.44831571994716e-05, + "loss": 0.0126, + "step": 65050 + }, + { + "epoch": 14.25, + "learning_rate": 1.4455636283575519e-05, + "loss": 0.0128, + "step": 65100 + }, + { + "epoch": 14.26, + "learning_rate": 1.4428115367679438e-05, + "loss": 0.0121, + "step": 65150 + }, + { + "epoch": 14.28, + "learning_rate": 1.4400594451783357e-05, + "loss": 0.0121, + "step": 65200 + }, + { + "epoch": 14.29, + "learning_rate": 1.4373073535887274e-05, + "loss": 0.0126, + "step": 65250 + }, + { + "epoch": 14.3, + "learning_rate": 1.4345552619991193e-05, + "loss": 0.0113, + "step": 65300 + }, + { + "epoch": 14.31, + "learning_rate": 1.4318031704095115e-05, + "loss": 0.0148, + "step": 65350 + }, + { + "epoch": 14.32, + "learning_rate": 1.4290510788199032e-05, + "loss": 0.0126, + "step": 65400 + }, + { + "epoch": 14.33, + "learning_rate": 1.426298987230295e-05, + "loss": 0.0122, + "step": 65450 + }, + { + "epoch": 14.34, + "learning_rate": 1.423546895640687e-05, + "loss": 0.0113, + "step": 65500 + }, + { + "epoch": 14.34, + "eval_acc": 0.6461258449498016, + "eval_cer": 0.03683881680436714, + "eval_loss": 0.2688175439834595, + "eval_runtime": 2406.3904, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 65500 + }, + { + "epoch": 14.35, + "learning_rate": 1.4207948040510789e-05, + "loss": 0.0108, + "step": 65550 + }, + { + "epoch": 14.36, + "learning_rate": 1.4180427124614709e-05, + "loss": 0.0137, + "step": 65600 + }, + { + "epoch": 14.37, + "learning_rate": 1.4153456627036548e-05, + "loss": 0.0121, + "step": 65650 + }, + { + "epoch": 14.39, + "learning_rate": 1.4125935711140467e-05, + "loss": 0.0106, + "step": 65700 + }, + { + "epoch": 14.4, + "learning_rate": 1.4098414795244386e-05, + "loss": 0.0174, + "step": 65750 + }, + { + "epoch": 14.41, + "learning_rate": 1.4070893879348305e-05, + "loss": 0.0121, + "step": 65800 + }, + { + "epoch": 14.42, + "learning_rate": 1.4043923381770147e-05, + "loss": 0.0117, + "step": 65850 + }, + { + "epoch": 14.43, + "learning_rate": 1.4016402465874064e-05, + "loss": 0.0113, + "step": 65900 + }, + { + "epoch": 14.44, + "learning_rate": 1.3988881549977983e-05, + "loss": 0.0112, + "step": 65950 + }, + { + "epoch": 14.45, + "learning_rate": 1.3961360634081902e-05, + "loss": 0.0139, + "step": 66000 + }, + { + "epoch": 14.45, + "eval_acc": 0.6544461777620324, + "eval_cer": 0.03642796345420691, + "eval_loss": 0.2679358124732971, + "eval_runtime": 2423.2201, + "eval_samples_per_second": 3.174, + "eval_steps_per_second": 0.397, + "step": 66000 + }, + { + "epoch": 14.46, + "learning_rate": 1.3933839718185823e-05, + "loss": 0.014, + "step": 66050 + }, + { + "epoch": 14.47, + "learning_rate": 1.3906318802289742e-05, + "loss": 0.015, + "step": 66100 + }, + { + "epoch": 14.48, + "learning_rate": 1.387879788639366e-05, + "loss": 0.0131, + "step": 66150 + }, + { + "epoch": 14.49, + "learning_rate": 1.385127697049758e-05, + "loss": 0.012, + "step": 66200 + }, + { + "epoch": 14.51, + "learning_rate": 1.3823756054601496e-05, + "loss": 0.0132, + "step": 66250 + }, + { + "epoch": 14.52, + "learning_rate": 1.3796235138705415e-05, + "loss": 0.0108, + "step": 66300 + }, + { + "epoch": 14.53, + "learning_rate": 1.3768714222809338e-05, + "loss": 0.0103, + "step": 66350 + }, + { + "epoch": 14.54, + "learning_rate": 1.3741193306913255e-05, + "loss": 0.0136, + "step": 66400 + }, + { + "epoch": 14.55, + "learning_rate": 1.3713672391017174e-05, + "loss": 0.0123, + "step": 66450 + }, + { + "epoch": 14.56, + "learning_rate": 1.3686151475121092e-05, + "loss": 0.0125, + "step": 66500 + }, + { + "epoch": 14.56, + "eval_acc": 0.650286011355917, + "eval_cer": 0.03602951322405151, + "eval_loss": 0.26753196120262146, + "eval_runtime": 2416.4508, + "eval_samples_per_second": 3.183, + "eval_steps_per_second": 0.398, + "step": 66500 + }, + { + "epoch": 14.57, + "learning_rate": 1.3658630559225011e-05, + "loss": 0.0124, + "step": 66550 + }, + { + "epoch": 14.58, + "learning_rate": 1.3631109643328928e-05, + "loss": 0.0129, + "step": 66600 + }, + { + "epoch": 14.59, + "learning_rate": 1.360358872743285e-05, + "loss": 0.0118, + "step": 66650 + }, + { + "epoch": 14.6, + "learning_rate": 1.357606781153677e-05, + "loss": 0.0118, + "step": 66700 + }, + { + "epoch": 14.61, + "learning_rate": 1.3548546895640687e-05, + "loss": 0.0099, + "step": 66750 + }, + { + "epoch": 14.63, + "learning_rate": 1.3521025979744606e-05, + "loss": 0.0118, + "step": 66800 + }, + { + "epoch": 14.64, + "learning_rate": 1.3493505063848524e-05, + "loss": 0.0113, + "step": 66850 + }, + { + "epoch": 14.65, + "learning_rate": 1.3465984147952445e-05, + "loss": 0.0108, + "step": 66900 + }, + { + "epoch": 14.66, + "learning_rate": 1.3438463232056364e-05, + "loss": 0.0113, + "step": 66950 + }, + { + "epoch": 14.67, + "learning_rate": 1.3410942316160283e-05, + "loss": 0.0153, + "step": 67000 + }, + { + "epoch": 14.67, + "eval_acc": 0.6463858553501838, + "eval_cer": 0.03614424208409626, + "eval_loss": 0.2684445381164551, + "eval_runtime": 2411.1808, + "eval_samples_per_second": 3.19, + "eval_steps_per_second": 0.399, + "step": 67000 + }, + { + "epoch": 14.68, + "learning_rate": 1.3383421400264202e-05, + "loss": 0.0128, + "step": 67050 + }, + { + "epoch": 14.69, + "learning_rate": 1.3355900484368119e-05, + "loss": 0.0101, + "step": 67100 + }, + { + "epoch": 14.7, + "learning_rate": 1.3328379568472038e-05, + "loss": 0.0115, + "step": 67150 + }, + { + "epoch": 14.71, + "learning_rate": 1.330085865257596e-05, + "loss": 0.0113, + "step": 67200 + }, + { + "epoch": 14.72, + "learning_rate": 1.3273337736679877e-05, + "loss": 0.0129, + "step": 67250 + }, + { + "epoch": 14.74, + "learning_rate": 1.3245816820783796e-05, + "loss": 0.0133, + "step": 67300 + }, + { + "epoch": 14.75, + "learning_rate": 1.3218295904887715e-05, + "loss": 0.0134, + "step": 67350 + }, + { + "epoch": 14.76, + "learning_rate": 1.3190774988991634e-05, + "loss": 0.0116, + "step": 67400 + }, + { + "epoch": 14.77, + "learning_rate": 1.3163254073095554e-05, + "loss": 0.0101, + "step": 67450 + }, + { + "epoch": 14.78, + "learning_rate": 1.3135733157199473e-05, + "loss": 0.0126, + "step": 67500 + }, + { + "epoch": 14.78, + "eval_acc": 0.6523660945589748, + "eval_cer": 0.03534889201378607, + "eval_loss": 0.26629167795181274, + "eval_runtime": 2409.1288, + "eval_samples_per_second": 3.193, + "eval_steps_per_second": 0.399, + "step": 67500 + }, + { + "epoch": 14.79, + "learning_rate": 1.3108212241303392e-05, + "loss": 0.0127, + "step": 67550 + }, + { + "epoch": 14.8, + "learning_rate": 1.3080691325407309e-05, + "loss": 0.0104, + "step": 67600 + }, + { + "epoch": 14.81, + "learning_rate": 1.3053170409511228e-05, + "loss": 0.0119, + "step": 67650 + }, + { + "epoch": 14.82, + "learning_rate": 1.3025649493615147e-05, + "loss": 0.0142, + "step": 67700 + }, + { + "epoch": 14.83, + "learning_rate": 1.2998128577719067e-05, + "loss": 0.0123, + "step": 67750 + }, + { + "epoch": 14.84, + "learning_rate": 1.2970607661822986e-05, + "loss": 0.0124, + "step": 67800 + }, + { + "epoch": 14.86, + "learning_rate": 1.2943086745926905e-05, + "loss": 0.0118, + "step": 67850 + }, + { + "epoch": 14.87, + "learning_rate": 1.2915565830030824e-05, + "loss": 0.0127, + "step": 67900 + }, + { + "epoch": 14.88, + "learning_rate": 1.2888044914134743e-05, + "loss": 0.0141, + "step": 67950 + }, + { + "epoch": 14.89, + "learning_rate": 1.2860523998238663e-05, + "loss": 0.0123, + "step": 68000 + }, + { + "epoch": 14.89, + "eval_acc": 0.6497659905551526, + "eval_cer": 0.035796954723960814, + "eval_loss": 0.2682942748069763, + "eval_runtime": 2405.0061, + "eval_samples_per_second": 3.198, + "eval_steps_per_second": 0.4, + "step": 68000 + }, + { + "epoch": 14.9, + "learning_rate": 1.2833003082342582e-05, + "loss": 0.0138, + "step": 68050 + }, + { + "epoch": 14.91, + "learning_rate": 1.28054821664465e-05, + "loss": 0.0124, + "step": 68100 + }, + { + "epoch": 14.92, + "learning_rate": 1.2777961250550418e-05, + "loss": 0.0113, + "step": 68150 + }, + { + "epoch": 14.93, + "learning_rate": 1.2750440334654337e-05, + "loss": 0.0113, + "step": 68200 + }, + { + "epoch": 14.94, + "learning_rate": 1.2722919418758256e-05, + "loss": 0.015, + "step": 68250 + }, + { + "epoch": 14.95, + "learning_rate": 1.2695398502862177e-05, + "loss": 0.0124, + "step": 68300 + }, + { + "epoch": 14.97, + "learning_rate": 1.2667877586966095e-05, + "loss": 0.0132, + "step": 68350 + }, + { + "epoch": 14.98, + "learning_rate": 1.2640356671070014e-05, + "loss": 0.0118, + "step": 68400 + }, + { + "epoch": 14.99, + "learning_rate": 1.2612835755173933e-05, + "loss": 0.0132, + "step": 68450 + }, + { + "epoch": 15.0, + "learning_rate": 1.258531483927785e-05, + "loss": 0.0119, + "step": 68500 + }, + { + "epoch": 15.0, + "eval_acc": 0.6535361413606947, + "eval_cer": 0.035972148794029137, + "eval_loss": 0.26940032839775085, + "eval_runtime": 2401.4191, + "eval_samples_per_second": 3.203, + "eval_steps_per_second": 0.401, + "step": 68500 + }, + { + "epoch": 15.01, + "learning_rate": 1.2557793923381773e-05, + "loss": 0.0098, + "step": 68550 + }, + { + "epoch": 15.02, + "learning_rate": 1.2530273007485691e-05, + "loss": 0.0087, + "step": 68600 + }, + { + "epoch": 15.03, + "learning_rate": 1.2502752091589609e-05, + "loss": 0.0081, + "step": 68650 + }, + { + "epoch": 15.04, + "learning_rate": 1.2475231175693527e-05, + "loss": 0.0081, + "step": 68700 + }, + { + "epoch": 15.05, + "learning_rate": 1.2447710259797446e-05, + "loss": 0.0081, + "step": 68750 + }, + { + "epoch": 15.06, + "learning_rate": 1.2420189343901365e-05, + "loss": 0.0087, + "step": 68800 + }, + { + "epoch": 15.07, + "learning_rate": 1.2392668428005284e-05, + "loss": 0.0093, + "step": 68850 + }, + { + "epoch": 15.09, + "learning_rate": 1.2365147512109205e-05, + "loss": 0.0082, + "step": 68900 + }, + { + "epoch": 15.1, + "learning_rate": 1.2337626596213123e-05, + "loss": 0.0089, + "step": 68950 + }, + { + "epoch": 15.11, + "learning_rate": 1.231010568031704e-05, + "loss": 0.0077, + "step": 69000 + }, + { + "epoch": 15.11, + "eval_acc": 0.6544461777620324, + "eval_cer": 0.035460520093829606, + "eval_loss": 0.2669413983821869, + "eval_runtime": 2412.9546, + "eval_samples_per_second": 3.188, + "eval_steps_per_second": 0.399, + "step": 69000 + }, + { + "epoch": 15.12, + "learning_rate": 1.2282584764420961e-05, + "loss": 0.0081, + "step": 69050 + }, + { + "epoch": 15.13, + "learning_rate": 1.225506384852488e-05, + "loss": 0.007, + "step": 69100 + }, + { + "epoch": 15.14, + "learning_rate": 1.2227542932628797e-05, + "loss": 0.0088, + "step": 69150 + }, + { + "epoch": 15.15, + "learning_rate": 1.2200022016732718e-05, + "loss": 0.0091, + "step": 69200 + }, + { + "epoch": 15.16, + "learning_rate": 1.2172501100836637e-05, + "loss": 0.0089, + "step": 69250 + }, + { + "epoch": 15.17, + "learning_rate": 1.2144980184940555e-05, + "loss": 0.0072, + "step": 69300 + }, + { + "epoch": 15.18, + "learning_rate": 1.2117459269044474e-05, + "loss": 0.0072, + "step": 69350 + }, + { + "epoch": 15.2, + "learning_rate": 1.2089938353148393e-05, + "loss": 0.0067, + "step": 69400 + }, + { + "epoch": 15.21, + "learning_rate": 1.2062417437252314e-05, + "loss": 0.0068, + "step": 69450 + }, + { + "epoch": 15.22, + "learning_rate": 1.2034896521356231e-05, + "loss": 0.0086, + "step": 69500 + }, + { + "epoch": 15.22, + "eval_acc": 0.6547061881624147, + "eval_cer": 0.03550083023384532, + "eval_loss": 0.26964884996414185, + "eval_runtime": 2402.8785, + "eval_samples_per_second": 3.201, + "eval_steps_per_second": 0.4, + "step": 69500 + }, + { + "epoch": 15.23, + "learning_rate": 1.200737560546015e-05, + "loss": 0.0091, + "step": 69550 + }, + { + "epoch": 15.24, + "learning_rate": 1.197985468956407e-05, + "loss": 0.0077, + "step": 69600 + }, + { + "epoch": 15.25, + "learning_rate": 1.1952333773667987e-05, + "loss": 0.0082, + "step": 69650 + }, + { + "epoch": 15.26, + "learning_rate": 1.1924812857771906e-05, + "loss": 0.0069, + "step": 69700 + }, + { + "epoch": 15.27, + "learning_rate": 1.1897291941875827e-05, + "loss": 0.0078, + "step": 69750 + }, + { + "epoch": 15.28, + "learning_rate": 1.1869771025979746e-05, + "loss": 0.0089, + "step": 69800 + }, + { + "epoch": 15.29, + "learning_rate": 1.1842800528401587e-05, + "loss": 0.0079, + "step": 69850 + }, + { + "epoch": 15.3, + "learning_rate": 1.1815279612505505e-05, + "loss": 0.0092, + "step": 69900 + }, + { + "epoch": 15.32, + "learning_rate": 1.1787758696609423e-05, + "loss": 0.0077, + "step": 69950 + }, + { + "epoch": 15.33, + "learning_rate": 1.1760237780713343e-05, + "loss": 0.0072, + "step": 70000 + }, + { + "epoch": 15.33, + "eval_acc": 0.6570462817658546, + "eval_cer": 0.03543881463382114, + "eval_loss": 0.2687400281429291, + "eval_runtime": 2406.8006, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 70000 + }, + { + "epoch": 15.34, + "learning_rate": 1.1732716864817262e-05, + "loss": 0.0069, + "step": 70050 + }, + { + "epoch": 15.35, + "learning_rate": 1.170519594892118e-05, + "loss": 0.0075, + "step": 70100 + }, + { + "epoch": 15.36, + "learning_rate": 1.16776750330251e-05, + "loss": 0.0074, + "step": 70150 + }, + { + "epoch": 15.37, + "learning_rate": 1.1650154117129019e-05, + "loss": 0.0072, + "step": 70200 + }, + { + "epoch": 15.38, + "learning_rate": 1.1622633201232937e-05, + "loss": 0.0073, + "step": 70250 + }, + { + "epoch": 15.39, + "learning_rate": 1.1595112285336856e-05, + "loss": 0.007, + "step": 70300 + }, + { + "epoch": 15.4, + "learning_rate": 1.1567591369440775e-05, + "loss": 0.0081, + "step": 70350 + }, + { + "epoch": 15.41, + "learning_rate": 1.1540070453544696e-05, + "loss": 0.0086, + "step": 70400 + }, + { + "epoch": 15.43, + "learning_rate": 1.1512549537648613e-05, + "loss": 0.0066, + "step": 70450 + }, + { + "epoch": 15.44, + "learning_rate": 1.1485028621752532e-05, + "loss": 0.0087, + "step": 70500 + }, + { + "epoch": 15.44, + "eval_acc": 0.659906396170059, + "eval_cer": 0.034970596853638534, + "eval_loss": 0.2686842978000641, + "eval_runtime": 2416.3582, + "eval_samples_per_second": 3.183, + "eval_steps_per_second": 0.398, + "step": 70500 + }, + { + "epoch": 15.45, + "learning_rate": 1.1457507705856452e-05, + "loss": 0.0068, + "step": 70550 + }, + { + "epoch": 15.46, + "learning_rate": 1.142998678996037e-05, + "loss": 0.0077, + "step": 70600 + }, + { + "epoch": 15.47, + "learning_rate": 1.140246587406429e-05, + "loss": 0.0089, + "step": 70650 + }, + { + "epoch": 15.48, + "learning_rate": 1.1374944958168209e-05, + "loss": 0.0098, + "step": 70700 + }, + { + "epoch": 15.49, + "learning_rate": 1.1347424042272128e-05, + "loss": 0.0088, + "step": 70750 + }, + { + "epoch": 15.5, + "learning_rate": 1.1319903126376047e-05, + "loss": 0.0071, + "step": 70800 + }, + { + "epoch": 15.51, + "learning_rate": 1.1292382210479965e-05, + "loss": 0.0068, + "step": 70850 + }, + { + "epoch": 15.52, + "learning_rate": 1.1264861294583884e-05, + "loss": 0.0078, + "step": 70900 + }, + { + "epoch": 15.53, + "learning_rate": 1.1237340378687803e-05, + "loss": 0.0083, + "step": 70950 + }, + { + "epoch": 15.55, + "learning_rate": 1.1209819462791722e-05, + "loss": 0.0082, + "step": 71000 + }, + { + "epoch": 15.55, + "eval_acc": 0.6561362453645169, + "eval_cer": 0.035308581873770346, + "eval_loss": 0.2689681351184845, + "eval_runtime": 2406.9298, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 71000 + }, + { + "epoch": 15.56, + "learning_rate": 1.1182298546895641e-05, + "loss": 0.0091, + "step": 71050 + }, + { + "epoch": 15.57, + "learning_rate": 1.115477763099956e-05, + "loss": 0.0084, + "step": 71100 + }, + { + "epoch": 15.58, + "learning_rate": 1.1127256715103479e-05, + "loss": 0.0082, + "step": 71150 + }, + { + "epoch": 15.59, + "learning_rate": 1.1099735799207397e-05, + "loss": 0.0082, + "step": 71200 + }, + { + "epoch": 15.6, + "learning_rate": 1.1072214883311318e-05, + "loss": 0.0065, + "step": 71250 + }, + { + "epoch": 15.61, + "learning_rate": 1.1044693967415235e-05, + "loss": 0.0078, + "step": 71300 + }, + { + "epoch": 15.62, + "learning_rate": 1.1017173051519156e-05, + "loss": 0.008, + "step": 71350 + }, + { + "epoch": 15.63, + "learning_rate": 1.0989652135623075e-05, + "loss": 0.0085, + "step": 71400 + }, + { + "epoch": 15.64, + "learning_rate": 1.0962131219726993e-05, + "loss": 0.008, + "step": 71450 + }, + { + "epoch": 15.65, + "learning_rate": 1.0934610303830912e-05, + "loss": 0.008, + "step": 71500 + }, + { + "epoch": 15.65, + "eval_acc": 0.659906396170059, + "eval_cer": 0.03512098468369718, + "eval_loss": 0.26970669627189636, + "eval_runtime": 2418.3397, + "eval_samples_per_second": 3.181, + "eval_steps_per_second": 0.398, + "step": 71500 + }, + { + "epoch": 15.67, + "learning_rate": 1.0907089387934831e-05, + "loss": 0.008, + "step": 71550 + }, + { + "epoch": 15.68, + "learning_rate": 1.087956847203875e-05, + "loss": 0.0079, + "step": 71600 + }, + { + "epoch": 15.69, + "learning_rate": 1.0852047556142669e-05, + "loss": 0.0073, + "step": 71650 + }, + { + "epoch": 15.7, + "learning_rate": 1.0824526640246588e-05, + "loss": 0.0105, + "step": 71700 + }, + { + "epoch": 15.71, + "learning_rate": 1.0797005724350507e-05, + "loss": 0.0089, + "step": 71750 + }, + { + "epoch": 15.72, + "learning_rate": 1.0769484808454425e-05, + "loss": 0.0086, + "step": 71800 + }, + { + "epoch": 15.73, + "learning_rate": 1.0741963892558344e-05, + "loss": 0.0078, + "step": 71850 + }, + { + "epoch": 15.74, + "learning_rate": 1.0714442976662265e-05, + "loss": 0.0075, + "step": 71900 + }, + { + "epoch": 15.75, + "learning_rate": 1.0686922060766184e-05, + "loss": 0.0079, + "step": 71950 + }, + { + "epoch": 15.76, + "learning_rate": 1.0659401144870101e-05, + "loss": 0.0161, + "step": 72000 + }, + { + "epoch": 15.76, + "eval_acc": 0.6601664065704411, + "eval_cer": 0.03435199124339728, + "eval_loss": 0.2675269842147827, + "eval_runtime": 2410.8647, + "eval_samples_per_second": 3.191, + "eval_steps_per_second": 0.399, + "step": 72000 + }, + { + "epoch": 15.78, + "learning_rate": 1.0631880228974021e-05, + "loss": 0.009, + "step": 72050 + }, + { + "epoch": 15.79, + "learning_rate": 1.060435931307794e-05, + "loss": 0.0082, + "step": 72100 + }, + { + "epoch": 15.8, + "learning_rate": 1.0576838397181857e-05, + "loss": 0.0076, + "step": 72150 + }, + { + "epoch": 15.81, + "learning_rate": 1.0549317481285778e-05, + "loss": 0.0078, + "step": 72200 + }, + { + "epoch": 15.82, + "learning_rate": 1.0521796565389697e-05, + "loss": 0.0081, + "step": 72250 + }, + { + "epoch": 15.83, + "learning_rate": 1.0494275649493616e-05, + "loss": 0.0089, + "step": 72300 + }, + { + "epoch": 15.84, + "learning_rate": 1.0466754733597535e-05, + "loss": 0.0077, + "step": 72350 + }, + { + "epoch": 15.85, + "learning_rate": 1.0439233817701453e-05, + "loss": 0.0099, + "step": 72400 + }, + { + "epoch": 15.86, + "learning_rate": 1.0411712901805372e-05, + "loss": 0.0078, + "step": 72450 + }, + { + "epoch": 15.87, + "learning_rate": 1.0384191985909291e-05, + "loss": 0.0092, + "step": 72500 + }, + { + "epoch": 15.87, + "eval_acc": 0.6631565261748367, + "eval_cer": 0.03487912384360286, + "eval_loss": 0.2682670056819916, + "eval_runtime": 2406.0992, + "eval_samples_per_second": 3.197, + "eval_steps_per_second": 0.4, + "step": 72500 + }, + { + "epoch": 15.88, + "learning_rate": 1.035667107001321e-05, + "loss": 0.0075, + "step": 72550 + }, + { + "epoch": 15.9, + "learning_rate": 1.032915015411713e-05, + "loss": 0.0071, + "step": 72600 + }, + { + "epoch": 15.91, + "learning_rate": 1.0301629238221048e-05, + "loss": 0.0083, + "step": 72650 + }, + { + "epoch": 15.92, + "learning_rate": 1.0274108322324967e-05, + "loss": 0.0104, + "step": 72700 + }, + { + "epoch": 15.93, + "learning_rate": 1.0246587406428887e-05, + "loss": 0.0069, + "step": 72750 + }, + { + "epoch": 15.94, + "learning_rate": 1.0219066490532806e-05, + "loss": 0.0078, + "step": 72800 + }, + { + "epoch": 15.95, + "learning_rate": 1.0191545574636723e-05, + "loss": 0.008, + "step": 72850 + }, + { + "epoch": 15.96, + "learning_rate": 1.0164024658740644e-05, + "loss": 0.0076, + "step": 72900 + }, + { + "epoch": 15.97, + "learning_rate": 1.0136503742844563e-05, + "loss": 0.0098, + "step": 72950 + }, + { + "epoch": 15.98, + "learning_rate": 1.0108982826948482e-05, + "loss": 0.0083, + "step": 73000 + }, + { + "epoch": 15.98, + "eval_acc": 0.6601664065704411, + "eval_cer": 0.035355093573788486, + "eval_loss": 0.26840099692344666, + "eval_runtime": 2406.8352, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 73000 + }, + { + "epoch": 15.99, + "learning_rate": 1.00814619110524e-05, + "loss": 0.0075, + "step": 73050 + }, + { + "epoch": 16.01, + "learning_rate": 1.005394099515632e-05, + "loss": 0.0073, + "step": 73100 + }, + { + "epoch": 16.02, + "learning_rate": 1.0026420079260238e-05, + "loss": 0.0059, + "step": 73150 + }, + { + "epoch": 16.03, + "learning_rate": 9.998899163364157e-06, + "loss": 0.005, + "step": 73200 + }, + { + "epoch": 16.04, + "learning_rate": 9.971378247468076e-06, + "loss": 0.0049, + "step": 73250 + }, + { + "epoch": 16.05, + "learning_rate": 9.943857331571996e-06, + "loss": 0.007, + "step": 73300 + }, + { + "epoch": 16.06, + "learning_rate": 9.916336415675914e-06, + "loss": 0.0125, + "step": 73350 + }, + { + "epoch": 16.07, + "learning_rate": 9.888815499779832e-06, + "loss": 0.0051, + "step": 73400 + }, + { + "epoch": 16.08, + "learning_rate": 9.861294583883753e-06, + "loss": 0.0048, + "step": 73450 + }, + { + "epoch": 16.09, + "learning_rate": 9.833773667987672e-06, + "loss": 0.0066, + "step": 73500 + }, + { + "epoch": 16.09, + "eval_acc": 0.6582163285675745, + "eval_cer": 0.03471323211353816, + "eval_loss": 0.2682417035102844, + "eval_runtime": 2398.8589, + "eval_samples_per_second": 3.207, + "eval_steps_per_second": 0.401, + "step": 73500 + }, + { + "epoch": 16.1, + "learning_rate": 9.806803170409513e-06, + "loss": 0.0059, + "step": 73550 + }, + { + "epoch": 16.11, + "learning_rate": 9.77928225451343e-06, + "loss": 0.0056, + "step": 73600 + }, + { + "epoch": 16.13, + "learning_rate": 9.751761338617349e-06, + "loss": 0.0053, + "step": 73650 + }, + { + "epoch": 16.14, + "learning_rate": 9.72424042272127e-06, + "loss": 0.0055, + "step": 73700 + }, + { + "epoch": 16.15, + "learning_rate": 9.696719506825188e-06, + "loss": 0.006, + "step": 73750 + }, + { + "epoch": 16.16, + "learning_rate": 9.669198590929107e-06, + "loss": 0.0063, + "step": 73800 + }, + { + "epoch": 16.17, + "learning_rate": 9.641677675033026e-06, + "loss": 0.0059, + "step": 73850 + }, + { + "epoch": 16.18, + "learning_rate": 9.614156759136945e-06, + "loss": 0.0051, + "step": 73900 + }, + { + "epoch": 16.19, + "learning_rate": 9.586635843240863e-06, + "loss": 0.0048, + "step": 73950 + }, + { + "epoch": 16.2, + "learning_rate": 9.559114927344782e-06, + "loss": 0.0052, + "step": 74000 + }, + { + "epoch": 16.2, + "eval_acc": 0.6578263129670012, + "eval_cer": 0.03493338749362402, + "eval_loss": 0.2687562108039856, + "eval_runtime": 2401.8685, + "eval_samples_per_second": 3.203, + "eval_steps_per_second": 0.401, + "step": 74000 + }, + { + "epoch": 16.21, + "learning_rate": 9.531594011448701e-06, + "loss": 0.0052, + "step": 74050 + }, + { + "epoch": 16.22, + "learning_rate": 9.50407309555262e-06, + "loss": 0.0056, + "step": 74100 + }, + { + "epoch": 16.24, + "learning_rate": 9.476552179656539e-06, + "loss": 0.0052, + "step": 74150 + }, + { + "epoch": 16.25, + "learning_rate": 9.449031263760458e-06, + "loss": 0.0052, + "step": 74200 + }, + { + "epoch": 16.26, + "learning_rate": 9.421510347864378e-06, + "loss": 0.0052, + "step": 74250 + }, + { + "epoch": 16.27, + "learning_rate": 9.393989431968296e-06, + "loss": 0.0051, + "step": 74300 + }, + { + "epoch": 16.28, + "learning_rate": 9.366468516072216e-06, + "loss": 0.0055, + "step": 74350 + }, + { + "epoch": 16.29, + "learning_rate": 9.338947600176135e-06, + "loss": 0.0051, + "step": 74400 + }, + { + "epoch": 16.3, + "learning_rate": 9.311426684280052e-06, + "loss": 0.005, + "step": 74450 + }, + { + "epoch": 16.31, + "learning_rate": 9.283905768383973e-06, + "loss": 0.0048, + "step": 74500 + }, + { + "epoch": 16.31, + "eval_acc": 0.6605564221710145, + "eval_cer": 0.03437834787340756, + "eval_loss": 0.26815786957740784, + "eval_runtime": 2399.2403, + "eval_samples_per_second": 3.206, + "eval_steps_per_second": 0.401, + "step": 74500 + }, + { + "epoch": 16.32, + "learning_rate": 9.256384852487892e-06, + "loss": 0.0048, + "step": 74550 + }, + { + "epoch": 16.33, + "learning_rate": 9.22886393659181e-06, + "loss": 0.0054, + "step": 74600 + }, + { + "epoch": 16.34, + "learning_rate": 9.20134302069573e-06, + "loss": 0.0048, + "step": 74650 + }, + { + "epoch": 16.36, + "learning_rate": 9.173822104799648e-06, + "loss": 0.005, + "step": 74700 + }, + { + "epoch": 16.37, + "learning_rate": 9.146301188903567e-06, + "loss": 0.0058, + "step": 74750 + }, + { + "epoch": 16.38, + "learning_rate": 9.118780273007486e-06, + "loss": 0.0051, + "step": 74800 + }, + { + "epoch": 16.39, + "learning_rate": 9.091259357111405e-06, + "loss": 0.0053, + "step": 74850 + }, + { + "epoch": 16.4, + "learning_rate": 9.063738441215324e-06, + "loss": 0.0062, + "step": 74900 + }, + { + "epoch": 16.41, + "learning_rate": 9.036217525319244e-06, + "loss": 0.0046, + "step": 74950 + }, + { + "epoch": 16.42, + "learning_rate": 9.008696609423161e-06, + "loss": 0.0048, + "step": 75000 + }, + { + "epoch": 16.42, + "eval_acc": 0.6596463857696767, + "eval_cer": 0.03411323118330416, + "eval_loss": 0.2690982222557068, + "eval_runtime": 2398.8209, + "eval_samples_per_second": 3.207, + "eval_steps_per_second": 0.401, + "step": 75000 + }, + { + "epoch": 16.43, + "learning_rate": 8.981175693527082e-06, + "loss": 0.0052, + "step": 75050 + }, + { + "epoch": 16.44, + "learning_rate": 8.953654777631e-06, + "loss": 0.0055, + "step": 75100 + }, + { + "epoch": 16.45, + "learning_rate": 8.926133861734918e-06, + "loss": 0.0062, + "step": 75150 + }, + { + "epoch": 16.47, + "learning_rate": 8.898612945838838e-06, + "loss": 0.0051, + "step": 75200 + }, + { + "epoch": 16.48, + "learning_rate": 8.871092029942757e-06, + "loss": 0.0058, + "step": 75250 + }, + { + "epoch": 16.49, + "learning_rate": 8.843571114046676e-06, + "loss": 0.0056, + "step": 75300 + }, + { + "epoch": 16.5, + "learning_rate": 8.816050198150595e-06, + "loss": 0.0047, + "step": 75350 + }, + { + "epoch": 16.51, + "learning_rate": 8.788529282254514e-06, + "loss": 0.0061, + "step": 75400 + }, + { + "epoch": 16.52, + "learning_rate": 8.761008366358433e-06, + "loss": 0.0056, + "step": 75450 + }, + { + "epoch": 16.53, + "learning_rate": 8.733487450462352e-06, + "loss": 0.0056, + "step": 75500 + }, + { + "epoch": 16.53, + "eval_acc": 0.6636765469756011, + "eval_cer": 0.03454578999347286, + "eval_loss": 0.2687157988548279, + "eval_runtime": 2411.9433, + "eval_samples_per_second": 3.189, + "eval_steps_per_second": 0.399, + "step": 75500 + }, + { + "epoch": 16.54, + "learning_rate": 8.70596653456627e-06, + "loss": 0.0053, + "step": 75550 + }, + { + "epoch": 16.55, + "learning_rate": 8.678445618670191e-06, + "loss": 0.0051, + "step": 75600 + }, + { + "epoch": 16.56, + "learning_rate": 8.650924702774108e-06, + "loss": 0.0047, + "step": 75650 + }, + { + "epoch": 16.57, + "learning_rate": 8.623403786878027e-06, + "loss": 0.0056, + "step": 75700 + }, + { + "epoch": 16.59, + "learning_rate": 8.595882870981948e-06, + "loss": 0.0059, + "step": 75750 + }, + { + "epoch": 16.6, + "learning_rate": 8.568361955085866e-06, + "loss": 0.0057, + "step": 75800 + }, + { + "epoch": 16.61, + "learning_rate": 8.540841039189784e-06, + "loss": 0.0061, + "step": 75850 + }, + { + "epoch": 16.62, + "learning_rate": 8.513320123293704e-06, + "loss": 0.0047, + "step": 75900 + }, + { + "epoch": 16.63, + "learning_rate": 8.485799207397623e-06, + "loss": 0.0048, + "step": 75950 + }, + { + "epoch": 16.64, + "learning_rate": 8.45827829150154e-06, + "loss": 0.0058, + "step": 76000 + }, + { + "epoch": 16.64, + "eval_acc": 0.6631565261748367, + "eval_cer": 0.034531836483467415, + "eval_loss": 0.2691940367221832, + "eval_runtime": 2404.9494, + "eval_samples_per_second": 3.198, + "eval_steps_per_second": 0.4, + "step": 76000 + }, + { + "epoch": 16.65, + "learning_rate": 8.43075737560546e-06, + "loss": 0.0049, + "step": 76050 + }, + { + "epoch": 16.66, + "learning_rate": 8.40323645970938e-06, + "loss": 0.0049, + "step": 76100 + }, + { + "epoch": 16.67, + "learning_rate": 8.375715543813298e-06, + "loss": 0.0053, + "step": 76150 + }, + { + "epoch": 16.68, + "learning_rate": 8.348194627917217e-06, + "loss": 0.0057, + "step": 76200 + }, + { + "epoch": 16.69, + "learning_rate": 8.320673712021136e-06, + "loss": 0.0046, + "step": 76250 + }, + { + "epoch": 16.71, + "learning_rate": 8.293152796125057e-06, + "loss": 0.0048, + "step": 76300 + }, + { + "epoch": 16.72, + "learning_rate": 8.265631880228974e-06, + "loss": 0.0044, + "step": 76350 + }, + { + "epoch": 16.73, + "learning_rate": 8.238110964332893e-06, + "loss": 0.0056, + "step": 76400 + }, + { + "epoch": 16.74, + "learning_rate": 8.210590048436813e-06, + "loss": 0.0047, + "step": 76450 + }, + { + "epoch": 16.75, + "learning_rate": 8.18306913254073e-06, + "loss": 0.0052, + "step": 76500 + }, + { + "epoch": 16.75, + "eval_acc": 0.6653666145780854, + "eval_cer": 0.033903928533222535, + "eval_loss": 0.26831379532814026, + "eval_runtime": 2408.609, + "eval_samples_per_second": 3.194, + "eval_steps_per_second": 0.399, + "step": 76500 + }, + { + "epoch": 16.76, + "learning_rate": 8.15554821664465e-06, + "loss": 0.0053, + "step": 76550 + }, + { + "epoch": 16.77, + "learning_rate": 8.12802730074857e-06, + "loss": 0.0047, + "step": 76600 + }, + { + "epoch": 16.78, + "learning_rate": 8.100506384852489e-06, + "loss": 0.0055, + "step": 76650 + }, + { + "epoch": 16.79, + "learning_rate": 8.072985468956406e-06, + "loss": 0.0045, + "step": 76700 + }, + { + "epoch": 16.8, + "learning_rate": 8.045464553060326e-06, + "loss": 0.0068, + "step": 76750 + }, + { + "epoch": 16.82, + "learning_rate": 8.017943637164245e-06, + "loss": 0.0069, + "step": 76800 + }, + { + "epoch": 16.83, + "learning_rate": 7.990422721268164e-06, + "loss": 0.0057, + "step": 76850 + }, + { + "epoch": 16.84, + "learning_rate": 7.962901805372083e-06, + "loss": 0.0059, + "step": 76900 + }, + { + "epoch": 16.85, + "learning_rate": 7.935380889476002e-06, + "loss": 0.0056, + "step": 76950 + }, + { + "epoch": 16.86, + "learning_rate": 7.907859973579922e-06, + "loss": 0.0046, + "step": 77000 + }, + { + "epoch": 16.86, + "eval_acc": 0.6649765989775122, + "eval_cer": 0.034423309183425094, + "eval_loss": 0.2681449055671692, + "eval_runtime": 2406.5322, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 77000 + }, + { + "epoch": 16.87, + "learning_rate": 7.88033905768384e-06, + "loss": 0.0053, + "step": 77050 + }, + { + "epoch": 16.88, + "learning_rate": 7.852818141787758e-06, + "loss": 0.0048, + "step": 77100 + }, + { + "epoch": 16.89, + "learning_rate": 7.825297225891679e-06, + "loss": 0.0061, + "step": 77150 + }, + { + "epoch": 16.9, + "learning_rate": 7.797776309995596e-06, + "loss": 0.0051, + "step": 77200 + }, + { + "epoch": 16.91, + "learning_rate": 7.770255394099515e-06, + "loss": 0.0047, + "step": 77250 + }, + { + "epoch": 16.92, + "learning_rate": 7.742734478203436e-06, + "loss": 0.006, + "step": 77300 + }, + { + "epoch": 16.94, + "learning_rate": 7.715213562307354e-06, + "loss": 0.0049, + "step": 77350 + }, + { + "epoch": 16.95, + "learning_rate": 7.687692646411273e-06, + "loss": 0.0055, + "step": 77400 + }, + { + "epoch": 16.96, + "learning_rate": 7.660171730515192e-06, + "loss": 0.0053, + "step": 77450 + }, + { + "epoch": 16.97, + "learning_rate": 7.632650814619111e-06, + "loss": 0.0046, + "step": 77500 + }, + { + "epoch": 16.97, + "eval_acc": 0.6622464897734989, + "eval_cer": 0.034120983133307185, + "eval_loss": 0.26827627420425415, + "eval_runtime": 2408.0188, + "eval_samples_per_second": 3.194, + "eval_steps_per_second": 0.399, + "step": 77500 + }, + { + "epoch": 16.98, + "learning_rate": 7.605129898723031e-06, + "loss": 0.0051, + "step": 77550 + }, + { + "epoch": 16.99, + "learning_rate": 7.577608982826949e-06, + "loss": 0.0067, + "step": 77600 + }, + { + "epoch": 17.0, + "learning_rate": 7.550088066930868e-06, + "loss": 0.0043, + "step": 77650 + }, + { + "epoch": 17.01, + "learning_rate": 7.522567151034787e-06, + "loss": 0.0042, + "step": 77700 + }, + { + "epoch": 17.02, + "learning_rate": 7.495046235138705e-06, + "loss": 0.0042, + "step": 77750 + }, + { + "epoch": 17.03, + "learning_rate": 7.467525319242624e-06, + "loss": 0.0039, + "step": 77800 + }, + { + "epoch": 17.05, + "learning_rate": 7.440004403346544e-06, + "loss": 0.0038, + "step": 77850 + }, + { + "epoch": 17.06, + "learning_rate": 7.412483487450463e-06, + "loss": 0.0041, + "step": 77900 + }, + { + "epoch": 17.07, + "learning_rate": 7.384962571554381e-06, + "loss": 0.0036, + "step": 77950 + }, + { + "epoch": 17.08, + "learning_rate": 7.3574416556583005e-06, + "loss": 0.0037, + "step": 78000 + }, + { + "epoch": 17.08, + "eval_acc": 0.6625065001738811, + "eval_cer": 0.03422951043334951, + "eval_loss": 0.26783066987991333, + "eval_runtime": 2413.0451, + "eval_samples_per_second": 3.188, + "eval_steps_per_second": 0.399, + "step": 78000 + }, + { + "epoch": 17.09, + "learning_rate": 7.329920739762219e-06, + "loss": 0.0043, + "step": 78050 + }, + { + "epoch": 17.1, + "learning_rate": 7.302399823866139e-06, + "loss": 0.0033, + "step": 78100 + }, + { + "epoch": 17.11, + "learning_rate": 7.274878907970058e-06, + "loss": 0.0029, + "step": 78150 + }, + { + "epoch": 17.12, + "learning_rate": 7.247357992073976e-06, + "loss": 0.0036, + "step": 78200 + }, + { + "epoch": 17.13, + "learning_rate": 7.219837076177896e-06, + "loss": 0.0033, + "step": 78250 + }, + { + "epoch": 17.14, + "learning_rate": 7.1923161602818145e-06, + "loss": 0.0037, + "step": 78300 + }, + { + "epoch": 17.15, + "learning_rate": 7.164795244385733e-06, + "loss": 0.0034, + "step": 78350 + }, + { + "epoch": 17.17, + "learning_rate": 7.137274328489653e-06, + "loss": 0.0031, + "step": 78400 + }, + { + "epoch": 17.18, + "learning_rate": 7.109753412593571e-06, + "loss": 0.0034, + "step": 78450 + }, + { + "epoch": 17.19, + "learning_rate": 7.08223249669749e-06, + "loss": 0.0038, + "step": 78500 + }, + { + "epoch": 17.19, + "eval_acc": 0.6656266249784677, + "eval_cer": 0.03411323118330416, + "eval_loss": 0.2689207196235657, + "eval_runtime": 2411.1236, + "eval_samples_per_second": 3.19, + "eval_steps_per_second": 0.399, + "step": 78500 + }, + { + "epoch": 17.2, + "learning_rate": 7.05471158080141e-06, + "loss": 0.0038, + "step": 78550 + }, + { + "epoch": 17.21, + "learning_rate": 7.0271906649053286e-06, + "loss": 0.0037, + "step": 78600 + }, + { + "epoch": 17.22, + "learning_rate": 6.999669749009248e-06, + "loss": 0.004, + "step": 78650 + }, + { + "epoch": 17.23, + "learning_rate": 6.972148833113166e-06, + "loss": 0.0038, + "step": 78700 + }, + { + "epoch": 17.24, + "learning_rate": 6.944627917217085e-06, + "loss": 0.0032, + "step": 78750 + }, + { + "epoch": 17.25, + "learning_rate": 6.917107001321005e-06, + "loss": 0.004, + "step": 78800 + }, + { + "epoch": 17.26, + "learning_rate": 6.889586085424924e-06, + "loss": 0.003, + "step": 78850 + }, + { + "epoch": 17.28, + "learning_rate": 6.862065169528842e-06, + "loss": 0.004, + "step": 78900 + }, + { + "epoch": 17.29, + "learning_rate": 6.8345442536327614e-06, + "loss": 0.0035, + "step": 78950 + }, + { + "epoch": 17.3, + "learning_rate": 6.80702333773668e-06, + "loss": 0.004, + "step": 79000 + }, + { + "epoch": 17.3, + "eval_acc": 0.6641965677763655, + "eval_cer": 0.033806253963184436, + "eval_loss": 0.26872220635414124, + "eval_runtime": 2412.878, + "eval_samples_per_second": 3.188, + "eval_steps_per_second": 0.399, + "step": 79000 + }, + { + "epoch": 17.31, + "learning_rate": 6.779502421840598e-06, + "loss": 0.0033, + "step": 79050 + }, + { + "epoch": 17.32, + "learning_rate": 6.751981505944519e-06, + "loss": 0.0033, + "step": 79100 + }, + { + "epoch": 17.33, + "learning_rate": 6.724460590048437e-06, + "loss": 0.0038, + "step": 79150 + }, + { + "epoch": 17.34, + "learning_rate": 6.696939674152356e-06, + "loss": 0.004, + "step": 79200 + }, + { + "epoch": 17.35, + "learning_rate": 6.6694187582562754e-06, + "loss": 0.004, + "step": 79250 + }, + { + "epoch": 17.36, + "learning_rate": 6.6418978423601935e-06, + "loss": 0.0039, + "step": 79300 + }, + { + "epoch": 17.37, + "learning_rate": 6.614376926464114e-06, + "loss": 0.0043, + "step": 79350 + }, + { + "epoch": 17.38, + "learning_rate": 6.587406428885953e-06, + "loss": 0.0041, + "step": 79400 + }, + { + "epoch": 17.4, + "learning_rate": 6.559885512989873e-06, + "loss": 0.0035, + "step": 79450 + }, + { + "epoch": 17.41, + "learning_rate": 6.532364597093792e-06, + "loss": 0.004, + "step": 79500 + }, + { + "epoch": 17.41, + "eval_acc": 0.664846593777321, + "eval_cer": 0.03358609858309858, + "eval_loss": 0.26849421858787537, + "eval_runtime": 2409.5547, + "eval_samples_per_second": 3.192, + "eval_steps_per_second": 0.399, + "step": 79500 + }, + { + "epoch": 17.42, + "learning_rate": 6.50484368119771e-06, + "loss": 0.0034, + "step": 79550 + }, + { + "epoch": 17.43, + "learning_rate": 6.47732276530163e-06, + "loss": 0.0036, + "step": 79600 + }, + { + "epoch": 17.44, + "learning_rate": 6.449801849405548e-06, + "loss": 0.0037, + "step": 79650 + }, + { + "epoch": 17.45, + "learning_rate": 6.422280933509467e-06, + "loss": 0.0031, + "step": 79700 + }, + { + "epoch": 17.46, + "learning_rate": 6.394760017613387e-06, + "loss": 0.0035, + "step": 79750 + }, + { + "epoch": 17.47, + "learning_rate": 6.367239101717305e-06, + "loss": 0.0039, + "step": 79800 + }, + { + "epoch": 17.48, + "learning_rate": 6.339718185821225e-06, + "loss": 0.0036, + "step": 79850 + }, + { + "epoch": 17.49, + "learning_rate": 6.312197269925143e-06, + "loss": 0.004, + "step": 79900 + }, + { + "epoch": 17.51, + "learning_rate": 6.284676354029062e-06, + "loss": 0.0037, + "step": 79950 + }, + { + "epoch": 17.52, + "learning_rate": 6.257155438132982e-06, + "loss": 0.0037, + "step": 80000 + }, + { + "epoch": 17.52, + "eval_acc": 0.6618564741729256, + "eval_cer": 0.03385431605320318, + "eval_loss": 0.26905256509780884, + "eval_runtime": 2409.5418, + "eval_samples_per_second": 3.192, + "eval_steps_per_second": 0.399, + "step": 80000 + }, + { + "epoch": 17.53, + "learning_rate": 6.229634522236901e-06, + "loss": 0.0041, + "step": 80050 + }, + { + "epoch": 17.54, + "learning_rate": 6.20211360634082e-06, + "loss": 0.0036, + "step": 80100 + }, + { + "epoch": 17.55, + "learning_rate": 6.174592690444738e-06, + "loss": 0.0035, + "step": 80150 + }, + { + "epoch": 17.56, + "learning_rate": 6.147071774548657e-06, + "loss": 0.0041, + "step": 80200 + }, + { + "epoch": 17.57, + "learning_rate": 6.119550858652576e-06, + "loss": 0.0037, + "step": 80250 + }, + { + "epoch": 17.58, + "learning_rate": 6.092029942756495e-06, + "loss": 0.0029, + "step": 80300 + }, + { + "epoch": 17.59, + "learning_rate": 6.064509026860414e-06, + "loss": 0.0034, + "step": 80350 + }, + { + "epoch": 17.6, + "learning_rate": 6.036988110964333e-06, + "loss": 0.0034, + "step": 80400 + }, + { + "epoch": 17.61, + "learning_rate": 6.0094671950682526e-06, + "loss": 0.0039, + "step": 80450 + }, + { + "epoch": 17.63, + "learning_rate": 5.981946279172171e-06, + "loss": 0.0036, + "step": 80500 + }, + { + "epoch": 17.63, + "eval_acc": 0.6644565781767476, + "eval_cer": 0.03345586582304779, + "eval_loss": 0.26846277713775635, + "eval_runtime": 2410.0084, + "eval_samples_per_second": 3.192, + "eval_steps_per_second": 0.399, + "step": 80500 + }, + { + "epoch": 17.64, + "learning_rate": 5.95442536327609e-06, + "loss": 0.0041, + "step": 80550 + }, + { + "epoch": 17.65, + "learning_rate": 5.926904447380009e-06, + "loss": 0.0033, + "step": 80600 + }, + { + "epoch": 17.66, + "learning_rate": 5.899383531483928e-06, + "loss": 0.0029, + "step": 80650 + }, + { + "epoch": 17.67, + "learning_rate": 5.871862615587847e-06, + "loss": 0.0047, + "step": 80700 + }, + { + "epoch": 17.68, + "learning_rate": 5.844341699691766e-06, + "loss": 0.0035, + "step": 80750 + }, + { + "epoch": 17.69, + "learning_rate": 5.8168207837956854e-06, + "loss": 0.0035, + "step": 80800 + }, + { + "epoch": 17.7, + "learning_rate": 5.7892998678996035e-06, + "loss": 0.0034, + "step": 80850 + }, + { + "epoch": 17.71, + "learning_rate": 5.761778952003523e-06, + "loss": 0.0045, + "step": 80900 + }, + { + "epoch": 17.72, + "learning_rate": 5.734258036107442e-06, + "loss": 0.0033, + "step": 80950 + }, + { + "epoch": 17.73, + "learning_rate": 5.706737120211361e-06, + "loss": 0.0041, + "step": 81000 + }, + { + "epoch": 17.73, + "eval_acc": 0.666016640579041, + "eval_cer": 0.03374578875316086, + "eval_loss": 0.26800793409347534, + "eval_runtime": 2410.4526, + "eval_samples_per_second": 3.191, + "eval_steps_per_second": 0.399, + "step": 81000 + }, + { + "epoch": 17.75, + "learning_rate": 5.67921620431528e-06, + "loss": 0.0033, + "step": 81050 + }, + { + "epoch": 17.76, + "learning_rate": 5.651695288419199e-06, + "loss": 0.0044, + "step": 81100 + }, + { + "epoch": 17.77, + "learning_rate": 5.624174372523118e-06, + "loss": 0.0035, + "step": 81150 + }, + { + "epoch": 17.78, + "learning_rate": 5.596653456627036e-06, + "loss": 0.0031, + "step": 81200 + }, + { + "epoch": 17.79, + "learning_rate": 5.569132540730956e-06, + "loss": 0.0039, + "step": 81250 + }, + { + "epoch": 17.8, + "learning_rate": 5.541611624834875e-06, + "loss": 0.0043, + "step": 81300 + }, + { + "epoch": 17.81, + "learning_rate": 5.514090708938794e-06, + "loss": 0.0035, + "step": 81350 + }, + { + "epoch": 17.82, + "learning_rate": 5.486569793042713e-06, + "loss": 0.0036, + "step": 81400 + }, + { + "epoch": 17.83, + "learning_rate": 5.4590488771466315e-06, + "loss": 0.004, + "step": 81450 + }, + { + "epoch": 17.84, + "learning_rate": 5.431527961250551e-06, + "loss": 0.0033, + "step": 81500 + }, + { + "epoch": 17.84, + "eval_acc": 0.6654966197782766, + "eval_cer": 0.03366361808312881, + "eval_loss": 0.2681046426296234, + "eval_runtime": 2411.6457, + "eval_samples_per_second": 3.19, + "eval_steps_per_second": 0.399, + "step": 81500 + }, + { + "epoch": 17.86, + "learning_rate": 5.40400704535447e-06, + "loss": 0.0043, + "step": 81550 + }, + { + "epoch": 17.87, + "learning_rate": 5.376486129458388e-06, + "loss": 0.004, + "step": 81600 + }, + { + "epoch": 17.88, + "learning_rate": 5.348965213562308e-06, + "loss": 0.0039, + "step": 81650 + }, + { + "epoch": 17.89, + "learning_rate": 5.321444297666227e-06, + "loss": 0.0036, + "step": 81700 + }, + { + "epoch": 17.9, + "learning_rate": 5.2939233817701455e-06, + "loss": 0.0031, + "step": 81750 + }, + { + "epoch": 17.91, + "learning_rate": 5.266402465874064e-06, + "loss": 0.0033, + "step": 81800 + }, + { + "epoch": 17.92, + "learning_rate": 5.238881549977984e-06, + "loss": 0.0036, + "step": 81850 + }, + { + "epoch": 17.93, + "learning_rate": 5.211360634081903e-06, + "loss": 0.0036, + "step": 81900 + }, + { + "epoch": 17.94, + "learning_rate": 5.183839718185821e-06, + "loss": 0.0044, + "step": 81950 + }, + { + "epoch": 17.95, + "learning_rate": 5.156318802289741e-06, + "loss": 0.0031, + "step": 82000 + }, + { + "epoch": 17.95, + "eval_acc": 0.6632865313750277, + "eval_cer": 0.03349772635306411, + "eval_loss": 0.2679232358932495, + "eval_runtime": 2407.004, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 82000 + }, + { + "epoch": 17.96, + "learning_rate": 5.1287978863936595e-06, + "loss": 0.0033, + "step": 82050 + }, + { + "epoch": 17.98, + "learning_rate": 5.101276970497578e-06, + "loss": 0.009, + "step": 82100 + }, + { + "epoch": 17.99, + "learning_rate": 5.073756054601497e-06, + "loss": 0.0048, + "step": 82150 + }, + { + "epoch": 18.0, + "learning_rate": 5.046235138705416e-06, + "loss": 0.0032, + "step": 82200 + }, + { + "epoch": 18.01, + "learning_rate": 5.018714222809336e-06, + "loss": 0.0031, + "step": 82250 + }, + { + "epoch": 18.02, + "learning_rate": 4.991193306913254e-06, + "loss": 0.0027, + "step": 82300 + }, + { + "epoch": 18.03, + "learning_rate": 4.9636723910171735e-06, + "loss": 0.0028, + "step": 82350 + }, + { + "epoch": 18.04, + "learning_rate": 4.936151475121092e-06, + "loss": 0.0027, + "step": 82400 + }, + { + "epoch": 18.05, + "learning_rate": 4.908630559225011e-06, + "loss": 0.0028, + "step": 82450 + }, + { + "epoch": 18.06, + "learning_rate": 4.88110964332893e-06, + "loss": 0.0025, + "step": 82500 + }, + { + "epoch": 18.06, + "eval_acc": 0.6674466977811432, + "eval_cer": 0.03362175755311249, + "eval_loss": 0.26769447326660156, + "eval_runtime": 2413.9918, + "eval_samples_per_second": 3.186, + "eval_steps_per_second": 0.399, + "step": 82500 + }, + { + "epoch": 18.07, + "learning_rate": 4.853588727432849e-06, + "loss": 0.0026, + "step": 82550 + }, + { + "epoch": 18.09, + "learning_rate": 4.826067811536769e-06, + "loss": 0.0023, + "step": 82600 + }, + { + "epoch": 18.1, + "learning_rate": 4.798546895640687e-06, + "loss": 0.0025, + "step": 82650 + }, + { + "epoch": 18.11, + "learning_rate": 4.771025979744606e-06, + "loss": 0.0024, + "step": 82700 + }, + { + "epoch": 18.12, + "learning_rate": 4.743505063848525e-06, + "loss": 0.0024, + "step": 82750 + }, + { + "epoch": 18.13, + "learning_rate": 4.715984147952444e-06, + "loss": 0.0024, + "step": 82800 + }, + { + "epoch": 18.14, + "learning_rate": 4.688463232056363e-06, + "loss": 0.0026, + "step": 82850 + }, + { + "epoch": 18.15, + "learning_rate": 4.660942316160282e-06, + "loss": 0.0025, + "step": 82900 + }, + { + "epoch": 18.16, + "learning_rate": 4.6334214002642015e-06, + "loss": 0.0023, + "step": 82950 + }, + { + "epoch": 18.17, + "learning_rate": 4.6059004843681195e-06, + "loss": 0.003, + "step": 83000 + }, + { + "epoch": 18.17, + "eval_acc": 0.6671866873807609, + "eval_cer": 0.03333338501300002, + "eval_loss": 0.2686313986778259, + "eval_runtime": 2407.7179, + "eval_samples_per_second": 3.195, + "eval_steps_per_second": 0.4, + "step": 83000 + }, + { + "epoch": 18.18, + "learning_rate": 4.578379568472039e-06, + "loss": 0.0028, + "step": 83050 + }, + { + "epoch": 18.19, + "learning_rate": 4.550858652575958e-06, + "loss": 0.0027, + "step": 83100 + }, + { + "epoch": 18.21, + "learning_rate": 4.523337736679877e-06, + "loss": 0.0072, + "step": 83150 + }, + { + "epoch": 18.22, + "learning_rate": 4.496367239101718e-06, + "loss": 0.0022, + "step": 83200 + }, + { + "epoch": 18.23, + "learning_rate": 4.468846323205637e-06, + "loss": 0.0026, + "step": 83250 + }, + { + "epoch": 18.24, + "learning_rate": 4.4413254073095555e-06, + "loss": 0.0024, + "step": 83300 + }, + { + "epoch": 18.25, + "learning_rate": 4.413804491413474e-06, + "loss": 0.0024, + "step": 83350 + }, + { + "epoch": 18.26, + "learning_rate": 4.386283575517393e-06, + "loss": 0.0028, + "step": 83400 + }, + { + "epoch": 18.27, + "learning_rate": 4.358762659621313e-06, + "loss": 0.0026, + "step": 83450 + }, + { + "epoch": 18.28, + "learning_rate": 4.331241743725231e-06, + "loss": 0.0025, + "step": 83500 + }, + { + "epoch": 18.28, + "eval_acc": 0.667316692580952, + "eval_cer": 0.03348532323305928, + "eval_loss": 0.26833412051200867, + "eval_runtime": 2412.3066, + "eval_samples_per_second": 3.189, + "eval_steps_per_second": 0.399, + "step": 83500 + }, + { + "epoch": 18.29, + "learning_rate": 4.303720827829151e-06, + "loss": 0.0025, + "step": 83550 + }, + { + "epoch": 18.3, + "learning_rate": 4.2761999119330695e-06, + "loss": 0.0029, + "step": 83600 + }, + { + "epoch": 18.32, + "learning_rate": 4.248678996036988e-06, + "loss": 0.0025, + "step": 83650 + }, + { + "epoch": 18.33, + "learning_rate": 4.221158080140907e-06, + "loss": 0.0029, + "step": 83700 + }, + { + "epoch": 18.34, + "learning_rate": 4.193637164244826e-06, + "loss": 0.0026, + "step": 83750 + }, + { + "epoch": 18.35, + "learning_rate": 4.166116248348746e-06, + "loss": 0.0028, + "step": 83800 + }, + { + "epoch": 18.36, + "learning_rate": 4.138595332452664e-06, + "loss": 0.0028, + "step": 83850 + }, + { + "epoch": 18.37, + "learning_rate": 4.1110744165565835e-06, + "loss": 0.0023, + "step": 83900 + }, + { + "epoch": 18.38, + "learning_rate": 4.083553500660502e-06, + "loss": 0.0027, + "step": 83950 + }, + { + "epoch": 18.39, + "learning_rate": 4.056032584764421e-06, + "loss": 0.0028, + "step": 84000 + }, + { + "epoch": 18.39, + "eval_acc": 0.6664066561796143, + "eval_cer": 0.03336904398301393, + "eval_loss": 0.26911476254463196, + "eval_runtime": 2403.7066, + "eval_samples_per_second": 3.2, + "eval_steps_per_second": 0.4, + "step": 84000 + }, + { + "epoch": 18.4, + "learning_rate": 4.02851166886834e-06, + "loss": 0.0032, + "step": 84050 + }, + { + "epoch": 18.41, + "learning_rate": 4.000990752972259e-06, + "loss": 0.0026, + "step": 84100 + }, + { + "epoch": 18.42, + "learning_rate": 3.973469837076179e-06, + "loss": 0.0029, + "step": 84150 + }, + { + "epoch": 18.44, + "learning_rate": 3.945948921180097e-06, + "loss": 0.0028, + "step": 84200 + }, + { + "epoch": 18.45, + "learning_rate": 3.918428005284016e-06, + "loss": 0.0023, + "step": 84250 + }, + { + "epoch": 18.46, + "learning_rate": 3.890907089387935e-06, + "loss": 0.0021, + "step": 84300 + }, + { + "epoch": 18.47, + "learning_rate": 3.863386173491854e-06, + "loss": 0.0028, + "step": 84350 + }, + { + "epoch": 18.48, + "learning_rate": 3.835865257595773e-06, + "loss": 0.0025, + "step": 84400 + }, + { + "epoch": 18.49, + "learning_rate": 3.808344341699692e-06, + "loss": 0.0027, + "step": 84450 + }, + { + "epoch": 18.5, + "learning_rate": 3.780823425803611e-06, + "loss": 0.0024, + "step": 84500 + }, + { + "epoch": 18.5, + "eval_acc": 0.6657566301786587, + "eval_cer": 0.03334578813300486, + "eval_loss": 0.2689560651779175, + "eval_runtime": 2404.1767, + "eval_samples_per_second": 3.199, + "eval_steps_per_second": 0.4, + "step": 84500 + }, + { + "epoch": 18.51, + "learning_rate": 3.7533025099075295e-06, + "loss": 0.0024, + "step": 84550 + }, + { + "epoch": 18.52, + "learning_rate": 3.725781594011449e-06, + "loss": 0.003, + "step": 84600 + }, + { + "epoch": 18.53, + "learning_rate": 3.698260678115368e-06, + "loss": 0.0031, + "step": 84650 + }, + { + "epoch": 18.55, + "learning_rate": 3.670739762219287e-06, + "loss": 0.0024, + "step": 84700 + }, + { + "epoch": 18.56, + "learning_rate": 3.6432188463232054e-06, + "loss": 0.0025, + "step": 84750 + }, + { + "epoch": 18.57, + "learning_rate": 3.6156979304271247e-06, + "loss": 0.0023, + "step": 84800 + }, + { + "epoch": 18.58, + "learning_rate": 3.588177014531044e-06, + "loss": 0.0027, + "step": 84850 + }, + { + "epoch": 18.59, + "learning_rate": 3.5606560986349624e-06, + "loss": 0.0022, + "step": 84900 + }, + { + "epoch": 18.6, + "learning_rate": 3.5331351827388817e-06, + "loss": 0.0029, + "step": 84950 + }, + { + "epoch": 18.61, + "learning_rate": 3.5056142668428006e-06, + "loss": 0.0027, + "step": 85000 + }, + { + "epoch": 18.61, + "eval_acc": 0.6675767029813342, + "eval_cer": 0.033201601862948626, + "eval_loss": 0.2690303921699524, + "eval_runtime": 2408.5181, + "eval_samples_per_second": 3.194, + "eval_steps_per_second": 0.399, + "step": 85000 + }, + { + "epoch": 18.62, + "learning_rate": 3.47809335094672e-06, + "loss": 0.0028, + "step": 85050 + }, + { + "epoch": 18.63, + "learning_rate": 3.4505724350506383e-06, + "loss": 0.0031, + "step": 85100 + }, + { + "epoch": 18.64, + "learning_rate": 3.4230515191545576e-06, + "loss": 0.0026, + "step": 85150 + }, + { + "epoch": 18.65, + "learning_rate": 3.395530603258477e-06, + "loss": 0.0028, + "step": 85200 + }, + { + "epoch": 18.67, + "learning_rate": 3.3680096873623957e-06, + "loss": 0.0029, + "step": 85250 + }, + { + "epoch": 18.68, + "learning_rate": 3.3404887714663146e-06, + "loss": 0.0028, + "step": 85300 + }, + { + "epoch": 18.69, + "learning_rate": 3.3129678555702334e-06, + "loss": 0.0026, + "step": 85350 + }, + { + "epoch": 18.7, + "learning_rate": 3.2854469396741527e-06, + "loss": 0.0025, + "step": 85400 + }, + { + "epoch": 18.71, + "learning_rate": 3.257926023778071e-06, + "loss": 0.0028, + "step": 85450 + }, + { + "epoch": 18.72, + "learning_rate": 3.2304051078819904e-06, + "loss": 0.003, + "step": 85500 + }, + { + "epoch": 18.72, + "eval_acc": 0.6682267289822897, + "eval_cer": 0.033145787822926855, + "eval_loss": 0.26845839619636536, + "eval_runtime": 2403.9703, + "eval_samples_per_second": 3.2, + "eval_steps_per_second": 0.4, + "step": 85500 + }, + { + "epoch": 18.73, + "learning_rate": 3.2028841919859097e-06, + "loss": 0.0025, + "step": 85550 + }, + { + "epoch": 18.74, + "learning_rate": 3.1753632760898286e-06, + "loss": 0.0029, + "step": 85600 + }, + { + "epoch": 18.75, + "learning_rate": 3.147842360193747e-06, + "loss": 0.0032, + "step": 85650 + }, + { + "epoch": 18.76, + "learning_rate": 3.1203214442976663e-06, + "loss": 0.0023, + "step": 85700 + }, + { + "epoch": 18.77, + "learning_rate": 3.092800528401585e-06, + "loss": 0.0028, + "step": 85750 + }, + { + "epoch": 18.79, + "learning_rate": 3.0652796125055045e-06, + "loss": 0.0028, + "step": 85800 + }, + { + "epoch": 18.8, + "learning_rate": 3.0377586966094233e-06, + "loss": 0.0028, + "step": 85850 + }, + { + "epoch": 18.81, + "learning_rate": 3.010237780713342e-06, + "loss": 0.0027, + "step": 85900 + }, + { + "epoch": 18.82, + "learning_rate": 2.982716864817261e-06, + "loss": 0.0027, + "step": 85950 + }, + { + "epoch": 18.83, + "learning_rate": 2.9551959489211803e-06, + "loss": 0.0023, + "step": 86000 + }, + { + "epoch": 18.83, + "eval_acc": 0.6674466977811432, + "eval_cer": 0.033398501393025415, + "eval_loss": 0.2689669132232666, + "eval_runtime": 2404.0292, + "eval_samples_per_second": 3.2, + "eval_steps_per_second": 0.4, + "step": 86000 + }, + { + "epoch": 18.84, + "learning_rate": 2.927675033025099e-06, + "loss": 0.003, + "step": 86050 + }, + { + "epoch": 18.85, + "learning_rate": 2.9001541171290185e-06, + "loss": 0.0025, + "step": 86100 + }, + { + "epoch": 18.86, + "learning_rate": 2.8726332012329373e-06, + "loss": 0.0024, + "step": 86150 + }, + { + "epoch": 18.87, + "learning_rate": 2.845112285336856e-06, + "loss": 0.0022, + "step": 86200 + }, + { + "epoch": 18.88, + "learning_rate": 2.817591369440775e-06, + "loss": 0.0028, + "step": 86250 + }, + { + "epoch": 18.9, + "learning_rate": 2.790070453544694e-06, + "loss": 0.0033, + "step": 86300 + }, + { + "epoch": 18.91, + "learning_rate": 2.762549537648613e-06, + "loss": 0.0027, + "step": 86350 + }, + { + "epoch": 18.92, + "learning_rate": 2.735579040070454e-06, + "loss": 0.0025, + "step": 86400 + }, + { + "epoch": 18.93, + "learning_rate": 2.708058124174373e-06, + "loss": 0.0025, + "step": 86450 + }, + { + "epoch": 18.94, + "learning_rate": 2.6805372082782913e-06, + "loss": 0.0032, + "step": 86500 + }, + { + "epoch": 18.94, + "eval_acc": 0.6691367653836275, + "eval_cer": 0.03319850108294742, + "eval_loss": 0.2692432105541229, + "eval_runtime": 2405.0684, + "eval_samples_per_second": 3.198, + "eval_steps_per_second": 0.4, + "step": 86500 + }, + { + "epoch": 18.95, + "learning_rate": 2.6530162923822106e-06, + "loss": 0.0023, + "step": 86550 + }, + { + "epoch": 18.96, + "learning_rate": 2.6254953764861294e-06, + "loss": 0.0025, + "step": 86600 + }, + { + "epoch": 18.97, + "learning_rate": 2.5979744605900487e-06, + "loss": 0.0025, + "step": 86650 + }, + { + "epoch": 18.98, + "learning_rate": 2.5704535446939676e-06, + "loss": 0.0028, + "step": 86700 + }, + { + "epoch": 18.99, + "learning_rate": 2.5429326287978864e-06, + "loss": 0.0023, + "step": 86750 + }, + { + "epoch": 19.0, + "learning_rate": 2.5154117129018053e-06, + "loss": 0.0028, + "step": 86800 + }, + { + "epoch": 19.02, + "learning_rate": 2.4878907970057246e-06, + "loss": 0.0019, + "step": 86850 + }, + { + "epoch": 19.03, + "learning_rate": 2.4603698811096434e-06, + "loss": 0.0019, + "step": 86900 + }, + { + "epoch": 19.04, + "learning_rate": 2.4328489652135623e-06, + "loss": 0.0025, + "step": 86950 + }, + { + "epoch": 19.05, + "learning_rate": 2.4053280493174816e-06, + "loss": 0.0016, + "step": 87000 + }, + { + "epoch": 19.05, + "eval_acc": 0.6690067601834364, + "eval_cer": 0.03287291918282044, + "eval_loss": 0.2694932222366333, + "eval_runtime": 2407.3344, + "eval_samples_per_second": 3.195, + "eval_steps_per_second": 0.4, + "step": 87000 + }, + { + "epoch": 19.06, + "learning_rate": 2.3778071334214004e-06, + "loss": 0.0018, + "step": 87050 + }, + { + "epoch": 19.07, + "learning_rate": 2.3502862175253193e-06, + "loss": 0.0017, + "step": 87100 + }, + { + "epoch": 19.08, + "learning_rate": 2.322765301629238e-06, + "loss": 0.0021, + "step": 87150 + }, + { + "epoch": 19.09, + "learning_rate": 2.2952443857331574e-06, + "loss": 0.0021, + "step": 87200 + }, + { + "epoch": 19.1, + "learning_rate": 2.2677234698370763e-06, + "loss": 0.0022, + "step": 87250 + }, + { + "epoch": 19.11, + "learning_rate": 2.2402025539409956e-06, + "loss": 0.0021, + "step": 87300 + }, + { + "epoch": 19.13, + "learning_rate": 2.2126816380449144e-06, + "loss": 0.0023, + "step": 87350 + }, + { + "epoch": 19.14, + "learning_rate": 2.185160722148833e-06, + "loss": 0.0018, + "step": 87400 + }, + { + "epoch": 19.15, + "learning_rate": 2.157639806252752e-06, + "loss": 0.0017, + "step": 87450 + }, + { + "epoch": 19.16, + "learning_rate": 2.130118890356671e-06, + "loss": 0.0022, + "step": 87500 + }, + { + "epoch": 19.16, + "eval_acc": 0.6675767029813342, + "eval_cer": 0.03311012885291295, + "eval_loss": 0.26990965008735657, + "eval_runtime": 2408.5917, + "eval_samples_per_second": 3.194, + "eval_steps_per_second": 0.399, + "step": 87500 + }, + { + "epoch": 19.17, + "learning_rate": 2.1025979744605903e-06, + "loss": 0.0021, + "step": 87550 + }, + { + "epoch": 19.18, + "learning_rate": 2.075077058564509e-06, + "loss": 0.0023, + "step": 87600 + }, + { + "epoch": 19.19, + "learning_rate": 2.047556142668428e-06, + "loss": 0.0021, + "step": 87650 + }, + { + "epoch": 19.2, + "learning_rate": 2.020035226772347e-06, + "loss": 0.0022, + "step": 87700 + }, + { + "epoch": 19.21, + "learning_rate": 1.9925143108762658e-06, + "loss": 0.0022, + "step": 87750 + }, + { + "epoch": 19.22, + "learning_rate": 1.964993394980185e-06, + "loss": 0.0022, + "step": 87800 + }, + { + "epoch": 19.23, + "learning_rate": 1.937472479084104e-06, + "loss": 0.0022, + "step": 87850 + }, + { + "epoch": 19.25, + "learning_rate": 1.909951563188023e-06, + "loss": 0.0019, + "step": 87900 + }, + { + "epoch": 19.26, + "learning_rate": 1.8824306472919418e-06, + "loss": 0.002, + "step": 87950 + }, + { + "epoch": 19.27, + "learning_rate": 1.8549097313958611e-06, + "loss": 0.002, + "step": 88000 + }, + { + "epoch": 19.27, + "eval_acc": 0.6703068121853475, + "eval_cer": 0.03311322963291416, + "eval_loss": 0.2691878378391266, + "eval_runtime": 2407.6924, + "eval_samples_per_second": 3.195, + "eval_steps_per_second": 0.4, + "step": 88000 + }, + { + "epoch": 19.28, + "learning_rate": 1.8273888154997798e-06, + "loss": 0.0023, + "step": 88050 + }, + { + "epoch": 19.29, + "learning_rate": 1.799867899603699e-06, + "loss": 0.0023, + "step": 88100 + }, + { + "epoch": 19.3, + "learning_rate": 1.772346983707618e-06, + "loss": 0.0022, + "step": 88150 + }, + { + "epoch": 19.31, + "learning_rate": 1.7448260678115368e-06, + "loss": 0.0019, + "step": 88200 + }, + { + "epoch": 19.32, + "learning_rate": 1.7173051519154558e-06, + "loss": 0.0021, + "step": 88250 + }, + { + "epoch": 19.33, + "learning_rate": 1.6897842360193747e-06, + "loss": 0.0019, + "step": 88300 + }, + { + "epoch": 19.34, + "learning_rate": 1.6622633201232938e-06, + "loss": 0.0022, + "step": 88350 + }, + { + "epoch": 19.36, + "learning_rate": 1.6347424042272126e-06, + "loss": 0.0019, + "step": 88400 + }, + { + "epoch": 19.37, + "learning_rate": 1.607221488331132e-06, + "loss": 0.0023, + "step": 88450 + }, + { + "epoch": 19.38, + "learning_rate": 1.5797005724350506e-06, + "loss": 0.0024, + "step": 88500 + }, + { + "epoch": 19.38, + "eval_acc": 0.6691367653836275, + "eval_cer": 0.033103927292910534, + "eval_loss": 0.26934462785720825, + "eval_runtime": 2406.3405, + "eval_samples_per_second": 3.197, + "eval_steps_per_second": 0.4, + "step": 88500 + }, + { + "epoch": 19.39, + "learning_rate": 1.5521796565389696e-06, + "loss": 0.002, + "step": 88550 + }, + { + "epoch": 19.4, + "learning_rate": 1.5246587406428887e-06, + "loss": 0.0019, + "step": 88600 + }, + { + "epoch": 19.41, + "learning_rate": 1.4971378247468076e-06, + "loss": 0.0021, + "step": 88650 + }, + { + "epoch": 19.42, + "learning_rate": 1.4696169088507267e-06, + "loss": 0.0017, + "step": 88700 + }, + { + "epoch": 19.43, + "learning_rate": 1.4420959929546457e-06, + "loss": 0.0019, + "step": 88750 + }, + { + "epoch": 19.44, + "learning_rate": 1.415125495376486e-06, + "loss": 0.0025, + "step": 88800 + }, + { + "epoch": 19.45, + "learning_rate": 1.3876045794804052e-06, + "loss": 0.0018, + "step": 88850 + }, + { + "epoch": 19.46, + "learning_rate": 1.3600836635843242e-06, + "loss": 0.0018, + "step": 88900 + }, + { + "epoch": 19.48, + "learning_rate": 1.332562747688243e-06, + "loss": 0.0016, + "step": 88950 + }, + { + "epoch": 19.49, + "learning_rate": 1.3050418317921622e-06, + "loss": 0.0024, + "step": 89000 + }, + { + "epoch": 19.49, + "eval_acc": 0.6687467497830543, + "eval_cer": 0.032975244922860344, + "eval_loss": 0.2692321538925171, + "eval_runtime": 2407.041, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 89000 + }, + { + "epoch": 19.5, + "learning_rate": 1.277520915896081e-06, + "loss": 0.0018, + "step": 89050 + }, + { + "epoch": 19.51, + "learning_rate": 1.25e-06, + "loss": 0.0021, + "step": 89100 + }, + { + "epoch": 19.52, + "learning_rate": 1.222479084103919e-06, + "loss": 0.002, + "step": 89150 + }, + { + "epoch": 19.53, + "learning_rate": 1.194958168207838e-06, + "loss": 0.0021, + "step": 89200 + }, + { + "epoch": 19.54, + "learning_rate": 1.167437252311757e-06, + "loss": 0.0017, + "step": 89250 + }, + { + "epoch": 19.55, + "learning_rate": 1.139916336415676e-06, + "loss": 0.0019, + "step": 89300 + }, + { + "epoch": 19.56, + "learning_rate": 1.112395420519595e-06, + "loss": 0.0021, + "step": 89350 + }, + { + "epoch": 19.57, + "learning_rate": 1.084874504623514e-06, + "loss": 0.002, + "step": 89400 + }, + { + "epoch": 19.59, + "learning_rate": 1.057353588727433e-06, + "loss": 0.0024, + "step": 89450 + }, + { + "epoch": 19.6, + "learning_rate": 1.0298326728313518e-06, + "loss": 0.0022, + "step": 89500 + }, + { + "epoch": 19.6, + "eval_acc": 0.6693967757840098, + "eval_cer": 0.032951989072851275, + "eval_loss": 0.2692181169986725, + "eval_runtime": 2408.7909, + "eval_samples_per_second": 3.193, + "eval_steps_per_second": 0.399, + "step": 89500 + }, + { + "epoch": 19.61, + "learning_rate": 1.002311756935271e-06, + "loss": 0.0021, + "step": 89550 + }, + { + "epoch": 19.62, + "learning_rate": 9.747908410391898e-07, + "loss": 0.0021, + "step": 89600 + }, + { + "epoch": 19.63, + "learning_rate": 9.472699251431087e-07, + "loss": 0.002, + "step": 89650 + }, + { + "epoch": 19.64, + "learning_rate": 9.197490092470277e-07, + "loss": 0.0024, + "step": 89700 + }, + { + "epoch": 19.65, + "learning_rate": 8.922280933509468e-07, + "loss": 0.0018, + "step": 89750 + }, + { + "epoch": 19.66, + "learning_rate": 8.647071774548657e-07, + "loss": 0.0021, + "step": 89800 + }, + { + "epoch": 19.67, + "learning_rate": 8.371862615587847e-07, + "loss": 0.002, + "step": 89850 + }, + { + "epoch": 19.68, + "learning_rate": 8.096653456627038e-07, + "loss": 0.0022, + "step": 89900 + }, + { + "epoch": 19.69, + "learning_rate": 7.821444297666227e-07, + "loss": 0.0022, + "step": 89950 + }, + { + "epoch": 19.71, + "learning_rate": 7.546235138705416e-07, + "loss": 0.002, + "step": 90000 + }, + { + "epoch": 19.71, + "eval_acc": 0.6679667185819076, + "eval_cer": 0.03304036130288574, + "eval_loss": 0.2694588303565979, + "eval_runtime": 2407.5846, + "eval_samples_per_second": 3.195, + "eval_steps_per_second": 0.4, + "step": 90000 + }, + { + "epoch": 19.72, + "learning_rate": 7.271025979744607e-07, + "loss": 0.002, + "step": 90050 + }, + { + "epoch": 19.73, + "learning_rate": 6.995816820783795e-07, + "loss": 0.0026, + "step": 90100 + }, + { + "epoch": 19.74, + "learning_rate": 6.720607661822985e-07, + "loss": 0.0021, + "step": 90150 + }, + { + "epoch": 19.75, + "learning_rate": 6.450902686041392e-07, + "loss": 0.0055, + "step": 90200 + }, + { + "epoch": 19.76, + "learning_rate": 6.175693527080582e-07, + "loss": 0.0018, + "step": 90250 + }, + { + "epoch": 19.77, + "learning_rate": 5.900484368119771e-07, + "loss": 0.002, + "step": 90300 + }, + { + "epoch": 19.78, + "learning_rate": 5.625275209158961e-07, + "loss": 0.0019, + "step": 90350 + }, + { + "epoch": 19.79, + "learning_rate": 5.350066050198152e-07, + "loss": 0.0024, + "step": 90400 + }, + { + "epoch": 19.8, + "learning_rate": 5.07485689123734e-07, + "loss": 0.0024, + "step": 90450 + }, + { + "epoch": 19.81, + "learning_rate": 4.79964773227653e-07, + "loss": 0.0025, + "step": 90500 + }, + { + "epoch": 19.81, + "eval_acc": 0.6691367653836275, + "eval_cer": 0.03304036130288574, + "eval_loss": 0.26927751302719116, + "eval_runtime": 2407.5852, + "eval_samples_per_second": 3.195, + "eval_steps_per_second": 0.4, + "step": 90500 + }, + { + "epoch": 19.83, + "learning_rate": 4.52443857331572e-07, + "loss": 0.0025, + "step": 90550 + }, + { + "epoch": 19.84, + "learning_rate": 4.2492294143549103e-07, + "loss": 0.0022, + "step": 90600 + }, + { + "epoch": 19.85, + "learning_rate": 3.9740202553941e-07, + "loss": 0.0022, + "step": 90650 + }, + { + "epoch": 19.86, + "learning_rate": 3.6988110964332896e-07, + "loss": 0.0019, + "step": 90700 + }, + { + "epoch": 19.87, + "learning_rate": 3.4236019374724793e-07, + "loss": 0.0018, + "step": 90750 + }, + { + "epoch": 19.88, + "learning_rate": 3.148392778511669e-07, + "loss": 0.0026, + "step": 90800 + }, + { + "epoch": 19.89, + "learning_rate": 2.8731836195508586e-07, + "loss": 0.0019, + "step": 90850 + }, + { + "epoch": 19.9, + "learning_rate": 2.597974460590049e-07, + "loss": 0.002, + "step": 90900 + }, + { + "epoch": 19.91, + "learning_rate": 2.3227653016292382e-07, + "loss": 0.0019, + "step": 90950 + }, + { + "epoch": 19.92, + "learning_rate": 2.0475561426684281e-07, + "loss": 0.0021, + "step": 91000 + }, + { + "epoch": 19.92, + "eval_acc": 0.669656786184392, + "eval_cer": 0.03293958595284644, + "eval_loss": 0.26924172043800354, + "eval_runtime": 2406.6768, + "eval_samples_per_second": 3.196, + "eval_steps_per_second": 0.4, + "step": 91000 + } + ], + "logging_steps": 50, + "max_steps": 91340, + "num_train_epochs": 20, + "save_steps": 500, + "total_flos": 2.178949606505743e+21, + "trial_name": null, + "trial_params": null +}