{ "best_metric": 6.997288959521361, "best_model_checkpoint": "./checkpoint-18500", "epoch": 1.0, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025, "grad_norm": 10.477725982666016, "learning_rate": 1.0499999999999999e-07, "loss": 1.4117, "step": 25 }, { "epoch": 0.005, "grad_norm": 9.907259941101074, "learning_rate": 2.1437499999999999e-07, "loss": 1.4142, "step": 50 }, { "epoch": 0.0075, "grad_norm": 7.737701416015625, "learning_rate": 3.2374999999999997e-07, "loss": 1.2755, "step": 75 }, { "epoch": 0.01, "grad_norm": 5.850353240966797, "learning_rate": 4.33125e-07, "loss": 0.8498, "step": 100 }, { "epoch": 0.0125, "grad_norm": 5.063548564910889, "learning_rate": 5.425e-07, "loss": 0.5289, "step": 125 }, { "epoch": 0.015, "grad_norm": 4.789053916931152, "learning_rate": 6.518749999999999e-07, "loss": 0.5181, "step": 150 }, { "epoch": 0.0175, "grad_norm": 4.7433295249938965, "learning_rate": 7.612499999999999e-07, "loss": 0.3765, "step": 175 }, { "epoch": 0.02, "grad_norm": 4.427416801452637, "learning_rate": 8.706249999999999e-07, "loss": 0.3585, "step": 200 }, { "epoch": 0.0225, "grad_norm": 4.6719818115234375, "learning_rate": 9.8e-07, "loss": 0.3219, "step": 225 }, { "epoch": 0.025, "grad_norm": 3.8680384159088135, "learning_rate": 1.0893749999999998e-06, "loss": 0.3408, "step": 250 }, { "epoch": 0.0275, "grad_norm": 4.6198883056640625, "learning_rate": 1.19875e-06, "loss": 0.2898, "step": 275 }, { "epoch": 0.03, "grad_norm": 3.788504123687744, "learning_rate": 1.3081249999999999e-06, "loss": 0.2834, "step": 300 }, { "epoch": 0.0325, "grad_norm": 3.9427692890167236, "learning_rate": 1.4175e-06, "loss": 0.2643, "step": 325 }, { "epoch": 0.035, "grad_norm": 3.7088139057159424, "learning_rate": 1.5268749999999997e-06, "loss": 0.2827, "step": 350 }, { "epoch": 0.0375, "grad_norm": 3.7030911445617676, "learning_rate": 1.6362499999999998e-06, "loss": 0.274, "step": 375 }, { "epoch": 0.04, "grad_norm": 3.848259925842285, "learning_rate": 1.745625e-06, "loss": 0.2569, "step": 400 }, { "epoch": 0.0425, "grad_norm": 4.581095218658447, "learning_rate": 1.8549999999999998e-06, "loss": 0.2376, "step": 425 }, { "epoch": 0.045, "grad_norm": 3.210390567779541, "learning_rate": 1.9643749999999997e-06, "loss": 0.2212, "step": 450 }, { "epoch": 0.0475, "grad_norm": 6.222968578338623, "learning_rate": 2.07375e-06, "loss": 0.2237, "step": 475 }, { "epoch": 0.05, "grad_norm": 4.780581951141357, "learning_rate": 2.183125e-06, "loss": 0.2854, "step": 500 }, { "epoch": 0.05, "eval_loss": 0.3763061463832855, "eval_runtime": 710.1849, "eval_samples_per_second": 2.963, "eval_steps_per_second": 0.37, "eval_wer": 24.98364027297373, "step": 500 }, { "epoch": 0.0525, "grad_norm": 4.559229850769043, "learning_rate": 2.2925e-06, "loss": 0.2996, "step": 525 }, { "epoch": 0.055, "grad_norm": 5.154053211212158, "learning_rate": 2.401875e-06, "loss": 0.279, "step": 550 }, { "epoch": 0.0575, "grad_norm": 4.185644626617432, "learning_rate": 2.5112499999999995e-06, "loss": 0.3062, "step": 575 }, { "epoch": 0.06, "grad_norm": 4.019911289215088, "learning_rate": 2.6206249999999996e-06, "loss": 0.2767, "step": 600 }, { "epoch": 0.0625, "grad_norm": 4.4241814613342285, "learning_rate": 2.7299999999999997e-06, "loss": 0.2678, "step": 625 }, { "epoch": 0.065, "grad_norm": 3.515200614929199, "learning_rate": 2.839375e-06, "loss": 0.2363, "step": 650 }, { "epoch": 0.0675, "grad_norm": 3.769805908203125, "learning_rate": 2.94875e-06, "loss": 0.2189, "step": 675 }, { "epoch": 0.07, "grad_norm": 2.9042553901672363, "learning_rate": 3.0581249999999996e-06, "loss": 0.1915, "step": 700 }, { "epoch": 0.0725, "grad_norm": 3.3644585609436035, "learning_rate": 3.1674999999999997e-06, "loss": 0.173, "step": 725 }, { "epoch": 0.075, "grad_norm": 2.877382278442383, "learning_rate": 3.276875e-06, "loss": 0.1761, "step": 750 }, { "epoch": 0.0775, "grad_norm": 3.1041688919067383, "learning_rate": 3.38625e-06, "loss": 0.1808, "step": 775 }, { "epoch": 0.08, "grad_norm": 2.6400063037872314, "learning_rate": 3.495625e-06, "loss": 0.1803, "step": 800 }, { "epoch": 0.0825, "grad_norm": 2.7639315128326416, "learning_rate": 3.6049999999999994e-06, "loss": 0.1761, "step": 825 }, { "epoch": 0.085, "grad_norm": 3.150437831878662, "learning_rate": 3.7143749999999995e-06, "loss": 0.1568, "step": 850 }, { "epoch": 0.0875, "grad_norm": 2.7653350830078125, "learning_rate": 3.82375e-06, "loss": 0.1607, "step": 875 }, { "epoch": 0.09, "grad_norm": 2.9058663845062256, "learning_rate": 3.933125e-06, "loss": 0.1582, "step": 900 }, { "epoch": 0.0925, "grad_norm": 3.995867967605591, "learning_rate": 4.0425e-06, "loss": 0.1511, "step": 925 }, { "epoch": 0.095, "grad_norm": 2.890871047973633, "learning_rate": 4.151874999999999e-06, "loss": 0.1434, "step": 950 }, { "epoch": 0.0975, "grad_norm": 3.846221923828125, "learning_rate": 4.261249999999999e-06, "loss": 0.148, "step": 975 }, { "epoch": 0.1, "grad_norm": 2.5118014812469482, "learning_rate": 4.370624999999999e-06, "loss": 0.1425, "step": 1000 }, { "epoch": 0.1, "eval_loss": 0.3326057493686676, "eval_runtime": 708.044, "eval_samples_per_second": 2.972, "eval_steps_per_second": 0.371, "eval_wer": 19.865382817612414, "step": 1000 }, { "epoch": 0.1025, "grad_norm": 1.8984228372573853, "learning_rate": 4.363333333333333e-06, "loss": 0.1399, "step": 1025 }, { "epoch": 0.105, "grad_norm": 3.2076616287231445, "learning_rate": 4.351180555555555e-06, "loss": 0.1296, "step": 1050 }, { "epoch": 0.1075, "grad_norm": 2.5135371685028076, "learning_rate": 4.3390277777777776e-06, "loss": 0.1367, "step": 1075 }, { "epoch": 0.11, "grad_norm": 2.203226327896118, "learning_rate": 4.326875e-06, "loss": 0.14, "step": 1100 }, { "epoch": 0.1125, "grad_norm": 2.7522339820861816, "learning_rate": 4.3147222222222215e-06, "loss": 0.1353, "step": 1125 }, { "epoch": 0.115, "grad_norm": 3.1133899688720703, "learning_rate": 4.302569444444444e-06, "loss": 0.1279, "step": 1150 }, { "epoch": 0.1175, "grad_norm": 2.4988021850585938, "learning_rate": 4.290416666666666e-06, "loss": 0.1299, "step": 1175 }, { "epoch": 0.12, "grad_norm": 3.556793689727783, "learning_rate": 4.278263888888889e-06, "loss": 0.1354, "step": 1200 }, { "epoch": 0.1225, "grad_norm": 2.6451408863067627, "learning_rate": 4.266111111111111e-06, "loss": 0.1162, "step": 1225 }, { "epoch": 0.125, "grad_norm": 2.8439857959747314, "learning_rate": 4.2539583333333335e-06, "loss": 0.1196, "step": 1250 }, { "epoch": 0.1275, "grad_norm": 2.9119701385498047, "learning_rate": 4.241805555555555e-06, "loss": 0.1204, "step": 1275 }, { "epoch": 0.13, "grad_norm": 2.476182699203491, "learning_rate": 4.2296527777777775e-06, "loss": 0.1392, "step": 1300 }, { "epoch": 0.1325, "grad_norm": 2.926199436187744, "learning_rate": 4.2175e-06, "loss": 0.1266, "step": 1325 }, { "epoch": 0.135, "grad_norm": 3.0363776683807373, "learning_rate": 4.205347222222221e-06, "loss": 0.1226, "step": 1350 }, { "epoch": 0.1375, "grad_norm": 2.5452053546905518, "learning_rate": 4.193194444444444e-06, "loss": 0.1271, "step": 1375 }, { "epoch": 0.14, "grad_norm": 3.2663893699645996, "learning_rate": 4.181041666666666e-06, "loss": 0.1239, "step": 1400 }, { "epoch": 0.1425, "grad_norm": 2.63163423538208, "learning_rate": 4.168888888888889e-06, "loss": 0.1339, "step": 1425 }, { "epoch": 0.145, "grad_norm": 3.97284197807312, "learning_rate": 4.156736111111111e-06, "loss": 0.1583, "step": 1450 }, { "epoch": 0.1475, "grad_norm": 3.9999539852142334, "learning_rate": 4.144583333333333e-06, "loss": 0.1879, "step": 1475 }, { "epoch": 0.15, "grad_norm": 3.2094714641571045, "learning_rate": 4.132430555555555e-06, "loss": 0.2196, "step": 1500 }, { "epoch": 0.15, "eval_loss": 0.28015321493148804, "eval_runtime": 706.4935, "eval_samples_per_second": 2.978, "eval_steps_per_second": 0.372, "eval_wer": 16.24754604094606, "step": 1500 }, { "epoch": 0.1525, "grad_norm": 3.753072738647461, "learning_rate": 4.120277777777777e-06, "loss": 0.2047, "step": 1525 }, { "epoch": 0.155, "grad_norm": 3.337786912918091, "learning_rate": 4.108125e-06, "loss": 0.179, "step": 1550 }, { "epoch": 0.1575, "grad_norm": 4.403894424438477, "learning_rate": 4.095972222222221e-06, "loss": 0.1901, "step": 1575 }, { "epoch": 0.16, "grad_norm": 4.327981948852539, "learning_rate": 4.083819444444444e-06, "loss": 0.2483, "step": 1600 }, { "epoch": 0.1625, "grad_norm": 2.880566120147705, "learning_rate": 4.071666666666666e-06, "loss": 0.1599, "step": 1625 }, { "epoch": 0.165, "grad_norm": 2.6572518348693848, "learning_rate": 4.0595138888888885e-06, "loss": 0.1288, "step": 1650 }, { "epoch": 0.1675, "grad_norm": 2.644718647003174, "learning_rate": 4.047361111111111e-06, "loss": 0.1199, "step": 1675 }, { "epoch": 0.17, "grad_norm": 2.4643406867980957, "learning_rate": 4.035208333333333e-06, "loss": 0.1102, "step": 1700 }, { "epoch": 0.1725, "grad_norm": 2.1531741619110107, "learning_rate": 4.023055555555556e-06, "loss": 0.1076, "step": 1725 }, { "epoch": 0.175, "grad_norm": 2.2584221363067627, "learning_rate": 4.010902777777777e-06, "loss": 0.1022, "step": 1750 }, { "epoch": 0.1775, "grad_norm": 3.2955706119537354, "learning_rate": 3.99875e-06, "loss": 0.1394, "step": 1775 }, { "epoch": 0.18, "grad_norm": 2.7437069416046143, "learning_rate": 3.986597222222222e-06, "loss": 0.1682, "step": 1800 }, { "epoch": 0.1825, "grad_norm": 3.333926200866699, "learning_rate": 3.974444444444444e-06, "loss": 0.1786, "step": 1825 }, { "epoch": 0.185, "grad_norm": 4.198625564575195, "learning_rate": 3.962291666666666e-06, "loss": 0.1854, "step": 1850 }, { "epoch": 0.1875, "grad_norm": 3.8604891300201416, "learning_rate": 3.950138888888888e-06, "loss": 0.1749, "step": 1875 }, { "epoch": 0.19, "grad_norm": 3.369309902191162, "learning_rate": 3.937986111111111e-06, "loss": 0.1682, "step": 1900 }, { "epoch": 0.1925, "grad_norm": 2.824620485305786, "learning_rate": 3.925833333333333e-06, "loss": 0.1818, "step": 1925 }, { "epoch": 0.195, "grad_norm": 5.417325019836426, "learning_rate": 3.9136805555555556e-06, "loss": 0.5527, "step": 1950 }, { "epoch": 0.1975, "grad_norm": 3.8870439529418945, "learning_rate": 3.901527777777777e-06, "loss": 0.3351, "step": 1975 }, { "epoch": 0.2, "grad_norm": 3.868849754333496, "learning_rate": 3.8893749999999995e-06, "loss": 0.2338, "step": 2000 }, { "epoch": 0.2, "eval_loss": 0.2536047399044037, "eval_runtime": 708.8501, "eval_samples_per_second": 2.968, "eval_steps_per_second": 0.371, "eval_wer": 14.611573338319156, "step": 2000 }, { "epoch": 0.2025, "grad_norm": 3.378929615020752, "learning_rate": 3.877222222222222e-06, "loss": 0.2232, "step": 2025 }, { "epoch": 0.205, "grad_norm": 3.124642848968506, "learning_rate": 3.865069444444444e-06, "loss": 0.2075, "step": 2050 }, { "epoch": 0.2075, "grad_norm": 3.0724921226501465, "learning_rate": 3.852916666666667e-06, "loss": 0.1672, "step": 2075 }, { "epoch": 0.21, "grad_norm": 2.526949882507324, "learning_rate": 3.840763888888889e-06, "loss": 0.1289, "step": 2100 }, { "epoch": 0.2125, "grad_norm": 2.116128444671631, "learning_rate": 3.828611111111111e-06, "loss": 0.103, "step": 2125 }, { "epoch": 0.215, "grad_norm": 2.4315547943115234, "learning_rate": 3.816458333333333e-06, "loss": 0.1147, "step": 2150 }, { "epoch": 0.2175, "grad_norm": 2.3774735927581787, "learning_rate": 3.804305555555555e-06, "loss": 0.0938, "step": 2175 }, { "epoch": 0.22, "grad_norm": 2.0546915531158447, "learning_rate": 3.7921527777777774e-06, "loss": 0.0893, "step": 2200 }, { "epoch": 0.2225, "grad_norm": 1.9727087020874023, "learning_rate": 3.78e-06, "loss": 0.0905, "step": 2225 }, { "epoch": 0.225, "grad_norm": 2.453181266784668, "learning_rate": 3.7678472222222218e-06, "loss": 0.0901, "step": 2250 }, { "epoch": 0.2275, "grad_norm": 2.917994499206543, "learning_rate": 3.755694444444444e-06, "loss": 0.0974, "step": 2275 }, { "epoch": 0.23, "grad_norm": 2.1703708171844482, "learning_rate": 3.7435416666666666e-06, "loss": 0.0964, "step": 2300 }, { "epoch": 0.2325, "grad_norm": 1.5881738662719727, "learning_rate": 3.7313888888888886e-06, "loss": 0.0968, "step": 2325 }, { "epoch": 0.235, "grad_norm": 2.321181297302246, "learning_rate": 3.719236111111111e-06, "loss": 0.096, "step": 2350 }, { "epoch": 0.2375, "grad_norm": 1.99794340133667, "learning_rate": 3.7070833333333333e-06, "loss": 0.095, "step": 2375 }, { "epoch": 0.24, "grad_norm": 1.7585641145706177, "learning_rate": 3.694930555555555e-06, "loss": 0.096, "step": 2400 }, { "epoch": 0.2425, "grad_norm": 4.207826614379883, "learning_rate": 3.6827777777777773e-06, "loss": 0.136, "step": 2425 }, { "epoch": 0.245, "grad_norm": 3.926739454269409, "learning_rate": 3.6706249999999997e-06, "loss": 0.1441, "step": 2450 }, { "epoch": 0.2475, "grad_norm": 3.350090503692627, "learning_rate": 3.6584722222222217e-06, "loss": 0.1481, "step": 2475 }, { "epoch": 0.25, "grad_norm": 2.7415804862976074, "learning_rate": 3.646319444444444e-06, "loss": 0.1383, "step": 2500 }, { "epoch": 0.25, "eval_loss": 0.2451382428407669, "eval_runtime": 707.0274, "eval_samples_per_second": 2.976, "eval_steps_per_second": 0.372, "eval_wer": 12.896139104421799, "step": 2500 }, { "epoch": 0.2525, "grad_norm": 3.6080198287963867, "learning_rate": 3.6341666666666665e-06, "loss": 0.1621, "step": 2525 }, { "epoch": 0.255, "grad_norm": 3.2124452590942383, "learning_rate": 3.6220138888888884e-06, "loss": 0.1701, "step": 2550 }, { "epoch": 0.2575, "grad_norm": 5.213758945465088, "learning_rate": 3.609861111111111e-06, "loss": 0.1651, "step": 2575 }, { "epoch": 0.26, "grad_norm": 3.1016101837158203, "learning_rate": 3.5977083333333332e-06, "loss": 0.1704, "step": 2600 }, { "epoch": 0.2625, "grad_norm": 3.539231777191162, "learning_rate": 3.585555555555555e-06, "loss": 0.1491, "step": 2625 }, { "epoch": 0.265, "grad_norm": 3.615736246109009, "learning_rate": 3.5734027777777776e-06, "loss": 0.1419, "step": 2650 }, { "epoch": 0.2675, "grad_norm": 3.6625783443450928, "learning_rate": 3.5612499999999996e-06, "loss": 0.1549, "step": 2675 }, { "epoch": 0.27, "grad_norm": 4.053696155548096, "learning_rate": 3.5490972222222215e-06, "loss": 0.1552, "step": 2700 }, { "epoch": 0.2725, "grad_norm": 3.5233747959136963, "learning_rate": 3.536944444444444e-06, "loss": 0.1405, "step": 2725 }, { "epoch": 0.275, "grad_norm": 3.158508777618408, "learning_rate": 3.5247916666666663e-06, "loss": 0.1454, "step": 2750 }, { "epoch": 0.2775, "grad_norm": 2.932004690170288, "learning_rate": 3.5126388888888883e-06, "loss": 0.1274, "step": 2775 }, { "epoch": 0.28, "grad_norm": 3.1219630241394043, "learning_rate": 3.5004861111111107e-06, "loss": 0.1362, "step": 2800 }, { "epoch": 0.2825, "grad_norm": 3.800915479660034, "learning_rate": 3.488333333333333e-06, "loss": 0.1485, "step": 2825 }, { "epoch": 0.285, "grad_norm": 4.207260608673096, "learning_rate": 3.4761805555555555e-06, "loss": 0.1374, "step": 2850 }, { "epoch": 0.2875, "grad_norm": 3.761274576187134, "learning_rate": 3.4640277777777775e-06, "loss": 0.1486, "step": 2875 }, { "epoch": 0.29, "grad_norm": 3.5679919719696045, "learning_rate": 3.451875e-06, "loss": 0.129, "step": 2900 }, { "epoch": 0.2925, "grad_norm": 1.9846292734146118, "learning_rate": 3.4397222222222223e-06, "loss": 0.1128, "step": 2925 }, { "epoch": 0.295, "grad_norm": 1.9838203191757202, "learning_rate": 3.4275694444444443e-06, "loss": 0.0963, "step": 2950 }, { "epoch": 0.2975, "grad_norm": 2.4575395584106445, "learning_rate": 3.4154166666666662e-06, "loss": 0.0905, "step": 2975 }, { "epoch": 0.3, "grad_norm": 2.5287511348724365, "learning_rate": 3.4032638888888886e-06, "loss": 0.0848, "step": 3000 }, { "epoch": 0.3, "eval_loss": 0.22799938917160034, "eval_runtime": 706.3086, "eval_samples_per_second": 2.979, "eval_steps_per_second": 0.372, "eval_wer": 12.24642423109283, "step": 3000 }, { "epoch": 0.3025, "grad_norm": 2.036485195159912, "learning_rate": 3.3911111111111106e-06, "loss": 0.1027, "step": 3025 }, { "epoch": 0.305, "grad_norm": 2.429426670074463, "learning_rate": 3.378958333333333e-06, "loss": 0.0888, "step": 3050 }, { "epoch": 0.3075, "grad_norm": 3.2264842987060547, "learning_rate": 3.3668055555555554e-06, "loss": 0.0891, "step": 3075 }, { "epoch": 0.31, "grad_norm": 2.1697287559509277, "learning_rate": 3.3546527777777774e-06, "loss": 0.0942, "step": 3100 }, { "epoch": 0.3125, "grad_norm": 1.8250914812088013, "learning_rate": 3.3424999999999998e-06, "loss": 0.0778, "step": 3125 }, { "epoch": 0.315, "grad_norm": 2.361642837524414, "learning_rate": 3.330347222222222e-06, "loss": 0.071, "step": 3150 }, { "epoch": 0.3175, "grad_norm": 2.525662422180176, "learning_rate": 3.318194444444444e-06, "loss": 0.0812, "step": 3175 }, { "epoch": 0.32, "grad_norm": 2.92868971824646, "learning_rate": 3.3060416666666665e-06, "loss": 0.0867, "step": 3200 }, { "epoch": 0.3225, "grad_norm": 3.01084303855896, "learning_rate": 3.293888888888889e-06, "loss": 0.1004, "step": 3225 }, { "epoch": 0.325, "grad_norm": 2.5622479915618896, "learning_rate": 3.2817361111111105e-06, "loss": 0.1369, "step": 3250 }, { "epoch": 0.3275, "grad_norm": 3.024960994720459, "learning_rate": 3.269583333333333e-06, "loss": 0.1223, "step": 3275 }, { "epoch": 0.33, "grad_norm": 2.302908182144165, "learning_rate": 3.2574305555555553e-06, "loss": 0.1227, "step": 3300 }, { "epoch": 0.3325, "grad_norm": 2.6310644149780273, "learning_rate": 3.2452777777777772e-06, "loss": 0.116, "step": 3325 }, { "epoch": 0.335, "grad_norm": 3.9306952953338623, "learning_rate": 3.2331249999999996e-06, "loss": 0.1295, "step": 3350 }, { "epoch": 0.3375, "grad_norm": 3.0914199352264404, "learning_rate": 3.220972222222222e-06, "loss": 0.1377, "step": 3375 }, { "epoch": 0.34, "grad_norm": 2.895296812057495, "learning_rate": 3.208819444444444e-06, "loss": 0.1095, "step": 3400 }, { "epoch": 0.3425, "grad_norm": 2.693249464035034, "learning_rate": 3.1966666666666664e-06, "loss": 0.0994, "step": 3425 }, { "epoch": 0.345, "grad_norm": 2.2948951721191406, "learning_rate": 3.184513888888889e-06, "loss": 0.089, "step": 3450 }, { "epoch": 0.3475, "grad_norm": 3.0244202613830566, "learning_rate": 3.172361111111111e-06, "loss": 0.0792, "step": 3475 }, { "epoch": 0.35, "grad_norm": 2.2885923385620117, "learning_rate": 3.160208333333333e-06, "loss": 0.0854, "step": 3500 }, { "epoch": 0.35, "eval_loss": 0.21524131298065186, "eval_runtime": 710.795, "eval_samples_per_second": 2.96, "eval_steps_per_second": 0.37, "eval_wer": 11.414415256614003, "step": 3500 }, { "epoch": 0.3525, "grad_norm": 2.1606826782226562, "learning_rate": 3.148055555555555e-06, "loss": 0.0792, "step": 3525 }, { "epoch": 0.355, "grad_norm": 2.569694757461548, "learning_rate": 3.135902777777777e-06, "loss": 0.0741, "step": 3550 }, { "epoch": 0.3575, "grad_norm": 2.1960206031799316, "learning_rate": 3.1237499999999995e-06, "loss": 0.0898, "step": 3575 }, { "epoch": 0.36, "grad_norm": 1.5436307191848755, "learning_rate": 3.111597222222222e-06, "loss": 0.0866, "step": 3600 }, { "epoch": 0.3625, "grad_norm": 1.547991394996643, "learning_rate": 3.0994444444444443e-06, "loss": 0.0773, "step": 3625 }, { "epoch": 0.365, "grad_norm": 1.775875449180603, "learning_rate": 3.0872916666666663e-06, "loss": 0.0781, "step": 3650 }, { "epoch": 0.3675, "grad_norm": 2.008694887161255, "learning_rate": 3.0751388888888887e-06, "loss": 0.0776, "step": 3675 }, { "epoch": 0.37, "grad_norm": 2.060523509979248, "learning_rate": 3.062986111111111e-06, "loss": 0.0705, "step": 3700 }, { "epoch": 0.3725, "grad_norm": 1.7352454662322998, "learning_rate": 3.050833333333333e-06, "loss": 0.0766, "step": 3725 }, { "epoch": 0.375, "grad_norm": 2.0210118293762207, "learning_rate": 3.0386805555555555e-06, "loss": 0.0738, "step": 3750 }, { "epoch": 0.3775, "grad_norm": 2.9827768802642822, "learning_rate": 3.026527777777778e-06, "loss": 0.0805, "step": 3775 }, { "epoch": 0.38, "grad_norm": 1.917271614074707, "learning_rate": 3.0143749999999994e-06, "loss": 0.0809, "step": 3800 }, { "epoch": 0.3825, "grad_norm": 1.8139671087265015, "learning_rate": 3.002222222222222e-06, "loss": 0.0793, "step": 3825 }, { "epoch": 0.385, "grad_norm": 1.2721606492996216, "learning_rate": 2.990069444444444e-06, "loss": 0.076, "step": 3850 }, { "epoch": 0.3875, "grad_norm": 1.770393967628479, "learning_rate": 2.977916666666666e-06, "loss": 0.0852, "step": 3875 }, { "epoch": 0.39, "grad_norm": 3.8949978351593018, "learning_rate": 2.9657638888888886e-06, "loss": 0.1024, "step": 3900 }, { "epoch": 0.3925, "grad_norm": 3.057736873626709, "learning_rate": 2.953611111111111e-06, "loss": 0.132, "step": 3925 }, { "epoch": 0.395, "grad_norm": 2.3417954444885254, "learning_rate": 2.941458333333333e-06, "loss": 0.1353, "step": 3950 }, { "epoch": 0.3975, "grad_norm": 3.2658793926239014, "learning_rate": 2.9293055555555553e-06, "loss": 0.1273, "step": 3975 }, { "epoch": 0.4, "grad_norm": 2.675097942352295, "learning_rate": 2.9171527777777777e-06, "loss": 0.1304, "step": 4000 }, { "epoch": 0.4, "eval_loss": 0.20972196757793427, "eval_runtime": 712.1718, "eval_samples_per_second": 2.954, "eval_steps_per_second": 0.369, "eval_wer": 11.143311208750116, "step": 4000 }, { "epoch": 0.4025, "grad_norm": 2.3427741527557373, "learning_rate": 2.9049999999999997e-06, "loss": 0.1199, "step": 4025 }, { "epoch": 0.405, "grad_norm": 2.558414936065674, "learning_rate": 2.892847222222222e-06, "loss": 0.0967, "step": 4050 }, { "epoch": 0.4075, "grad_norm": 2.3467886447906494, "learning_rate": 2.8806944444444445e-06, "loss": 0.0753, "step": 4075 }, { "epoch": 0.41, "grad_norm": 1.8797414302825928, "learning_rate": 2.868541666666666e-06, "loss": 0.0722, "step": 4100 }, { "epoch": 0.4125, "grad_norm": 1.9732948541641235, "learning_rate": 2.8563888888888885e-06, "loss": 0.079, "step": 4125 }, { "epoch": 0.415, "grad_norm": 1.6624417304992676, "learning_rate": 2.844236111111111e-06, "loss": 0.0759, "step": 4150 }, { "epoch": 0.4175, "grad_norm": 2.2147104740142822, "learning_rate": 2.832083333333333e-06, "loss": 0.0785, "step": 4175 }, { "epoch": 0.42, "grad_norm": 2.9194717407226562, "learning_rate": 2.8199305555555552e-06, "loss": 0.082, "step": 4200 }, { "epoch": 0.4225, "grad_norm": 1.4472960233688354, "learning_rate": 2.8077777777777776e-06, "loss": 0.0648, "step": 4225 }, { "epoch": 0.425, "grad_norm": 2.260117292404175, "learning_rate": 2.795625e-06, "loss": 0.0704, "step": 4250 }, { "epoch": 0.4275, "grad_norm": 1.4469400644302368, "learning_rate": 2.783472222222222e-06, "loss": 0.0596, "step": 4275 }, { "epoch": 0.43, "grad_norm": 1.5367531776428223, "learning_rate": 2.7713194444444444e-06, "loss": 0.0634, "step": 4300 }, { "epoch": 0.4325, "grad_norm": 1.6302293539047241, "learning_rate": 2.759166666666667e-06, "loss": 0.0632, "step": 4325 }, { "epoch": 0.435, "grad_norm": 2.5782358646392822, "learning_rate": 2.7470138888888888e-06, "loss": 0.0726, "step": 4350 }, { "epoch": 0.4375, "grad_norm": 2.604379415512085, "learning_rate": 2.7348611111111107e-06, "loss": 0.1113, "step": 4375 }, { "epoch": 0.44, "grad_norm": 2.614187479019165, "learning_rate": 2.7227083333333327e-06, "loss": 0.117, "step": 4400 }, { "epoch": 0.4425, "grad_norm": 3.8513214588165283, "learning_rate": 2.710555555555555e-06, "loss": 0.1195, "step": 4425 }, { "epoch": 0.445, "grad_norm": 3.170259475708008, "learning_rate": 2.6984027777777775e-06, "loss": 0.1101, "step": 4450 }, { "epoch": 0.4475, "grad_norm": 2.086357593536377, "learning_rate": 2.68625e-06, "loss": 0.1257, "step": 4475 }, { "epoch": 0.45, "grad_norm": 2.893723249435425, "learning_rate": 2.674097222222222e-06, "loss": 0.1328, "step": 4500 }, { "epoch": 0.45, "eval_loss": 0.2054819017648697, "eval_runtime": 697.6383, "eval_samples_per_second": 3.016, "eval_steps_per_second": 0.377, "eval_wer": 10.601103113022342, "step": 4500 }, { "epoch": 0.4525, "grad_norm": 1.4827159643173218, "learning_rate": 2.6619444444444443e-06, "loss": 0.0913, "step": 4525 }, { "epoch": 0.455, "grad_norm": 2.4595947265625, "learning_rate": 2.6497916666666667e-06, "loss": 0.0732, "step": 4550 }, { "epoch": 0.4575, "grad_norm": 2.2387216091156006, "learning_rate": 2.6376388888888886e-06, "loss": 0.0683, "step": 4575 }, { "epoch": 0.46, "grad_norm": 1.948084831237793, "learning_rate": 2.625486111111111e-06, "loss": 0.0583, "step": 4600 }, { "epoch": 0.4625, "grad_norm": 1.7423083782196045, "learning_rate": 2.6133333333333334e-06, "loss": 0.0671, "step": 4625 }, { "epoch": 0.465, "grad_norm": 3.097956657409668, "learning_rate": 2.601180555555555e-06, "loss": 0.0634, "step": 4650 }, { "epoch": 0.4675, "grad_norm": 1.8214906454086304, "learning_rate": 2.5890277777777774e-06, "loss": 0.06, "step": 4675 }, { "epoch": 0.47, "grad_norm": 1.5531638860702515, "learning_rate": 2.5768749999999998e-06, "loss": 0.0613, "step": 4700 }, { "epoch": 0.4725, "grad_norm": 2.4331068992614746, "learning_rate": 2.5647222222222218e-06, "loss": 0.0671, "step": 4725 }, { "epoch": 0.475, "grad_norm": 1.6229099035263062, "learning_rate": 2.552569444444444e-06, "loss": 0.0692, "step": 4750 }, { "epoch": 0.4775, "grad_norm": 1.9549888372421265, "learning_rate": 2.5404166666666666e-06, "loss": 0.067, "step": 4775 }, { "epoch": 0.48, "grad_norm": 1.8940644264221191, "learning_rate": 2.5282638888888885e-06, "loss": 0.0628, "step": 4800 }, { "epoch": 0.4825, "grad_norm": 2.394017457962036, "learning_rate": 2.516111111111111e-06, "loss": 0.062, "step": 4825 }, { "epoch": 0.485, "grad_norm": 1.7159110307693481, "learning_rate": 2.5039583333333333e-06, "loss": 0.0683, "step": 4850 }, { "epoch": 0.4875, "grad_norm": 2.217200994491577, "learning_rate": 2.4918055555555553e-06, "loss": 0.072, "step": 4875 }, { "epoch": 0.49, "grad_norm": 2.1954920291900635, "learning_rate": 2.4796527777777777e-06, "loss": 0.0648, "step": 4900 }, { "epoch": 0.4925, "grad_norm": 1.6776762008666992, "learning_rate": 2.4674999999999997e-06, "loss": 0.0651, "step": 4925 }, { "epoch": 0.495, "grad_norm": 1.6577339172363281, "learning_rate": 2.4553472222222216e-06, "loss": 0.0793, "step": 4950 }, { "epoch": 0.4975, "grad_norm": 1.3751758337020874, "learning_rate": 2.443194444444444e-06, "loss": 0.0801, "step": 4975 }, { "epoch": 0.5, "grad_norm": 1.7591667175292969, "learning_rate": 2.4310416666666664e-06, "loss": 0.0737, "step": 5000 }, { "epoch": 0.5, "eval_loss": 0.2079373449087143, "eval_runtime": 741.2693, "eval_samples_per_second": 2.838, "eval_steps_per_second": 0.355, "eval_wer": 10.535664204917266, "step": 5000 }, { "epoch": 0.5025, "grad_norm": 1.534965991973877, "learning_rate": 2.4188888888888884e-06, "loss": 0.0951, "step": 5025 }, { "epoch": 0.505, "grad_norm": 2.0740888118743896, "learning_rate": 2.406736111111111e-06, "loss": 0.0923, "step": 5050 }, { "epoch": 0.5075, "grad_norm": 2.0447919368743896, "learning_rate": 2.394583333333333e-06, "loss": 0.0912, "step": 5075 }, { "epoch": 0.51, "grad_norm": 2.5050876140594482, "learning_rate": 2.3824305555555556e-06, "loss": 0.091, "step": 5100 }, { "epoch": 0.5125, "grad_norm": 2.5950522422790527, "learning_rate": 2.3702777777777776e-06, "loss": 0.1221, "step": 5125 }, { "epoch": 0.515, "grad_norm": 2.09688401222229, "learning_rate": 2.358125e-06, "loss": 0.1183, "step": 5150 }, { "epoch": 0.5175, "grad_norm": 1.8842414617538452, "learning_rate": 2.3459722222222224e-06, "loss": 0.103, "step": 5175 }, { "epoch": 0.52, "grad_norm": 2.3348171710968018, "learning_rate": 2.3338194444444443e-06, "loss": 0.0837, "step": 5200 }, { "epoch": 0.5225, "grad_norm": 2.318730354309082, "learning_rate": 2.3216666666666663e-06, "loss": 0.0742, "step": 5225 }, { "epoch": 0.525, "grad_norm": 2.2568023204803467, "learning_rate": 2.3095138888888887e-06, "loss": 0.0683, "step": 5250 }, { "epoch": 0.5275, "grad_norm": 2.0094385147094727, "learning_rate": 2.2973611111111107e-06, "loss": 0.0721, "step": 5275 }, { "epoch": 0.53, "grad_norm": 1.421152949333191, "learning_rate": 2.285208333333333e-06, "loss": 0.0589, "step": 5300 }, { "epoch": 0.5325, "grad_norm": 2.1582698822021484, "learning_rate": 2.2730555555555555e-06, "loss": 0.0586, "step": 5325 }, { "epoch": 0.535, "grad_norm": 1.6149888038635254, "learning_rate": 2.2609027777777775e-06, "loss": 0.0672, "step": 5350 }, { "epoch": 0.5375, "grad_norm": 1.0912553071975708, "learning_rate": 2.24875e-06, "loss": 0.0622, "step": 5375 }, { "epoch": 0.54, "grad_norm": 1.4000557661056519, "learning_rate": 2.2365972222222223e-06, "loss": 0.0582, "step": 5400 }, { "epoch": 0.5425, "grad_norm": 2.2583179473876953, "learning_rate": 2.2244444444444442e-06, "loss": 0.0736, "step": 5425 }, { "epoch": 0.545, "grad_norm": 1.2943270206451416, "learning_rate": 2.2122916666666666e-06, "loss": 0.0798, "step": 5450 }, { "epoch": 0.5475, "grad_norm": 2.3791120052337646, "learning_rate": 2.200138888888889e-06, "loss": 0.0652, "step": 5475 }, { "epoch": 0.55, "grad_norm": 2.3950374126434326, "learning_rate": 2.1879861111111106e-06, "loss": 0.0804, "step": 5500 }, { "epoch": 0.55, "eval_loss": 0.21333837509155273, "eval_runtime": 736.1139, "eval_samples_per_second": 2.858, "eval_steps_per_second": 0.357, "eval_wer": 10.114985509956062, "step": 5500 }, { "epoch": 0.5525, "grad_norm": 2.716646194458008, "learning_rate": 2.1758333333333334e-06, "loss": 0.1312, "step": 5525 }, { "epoch": 0.555, "grad_norm": 3.002995014190674, "learning_rate": 2.1636805555555554e-06, "loss": 0.1307, "step": 5550 }, { "epoch": 0.5575, "grad_norm": 3.199950695037842, "learning_rate": 2.1515277777777773e-06, "loss": 0.1318, "step": 5575 }, { "epoch": 0.56, "grad_norm": 2.7231669425964355, "learning_rate": 2.1393749999999997e-06, "loss": 0.1124, "step": 5600 }, { "epoch": 0.5625, "grad_norm": 3.109504222869873, "learning_rate": 2.127222222222222e-06, "loss": 0.099, "step": 5625 }, { "epoch": 0.565, "grad_norm": 2.460961103439331, "learning_rate": 2.115069444444444e-06, "loss": 0.108, "step": 5650 }, { "epoch": 0.5675, "grad_norm": 2.3244142532348633, "learning_rate": 2.1029166666666665e-06, "loss": 0.1019, "step": 5675 }, { "epoch": 0.57, "grad_norm": 2.6423542499542236, "learning_rate": 2.0907638888888885e-06, "loss": 0.1013, "step": 5700 }, { "epoch": 0.5725, "grad_norm": 2.9905734062194824, "learning_rate": 2.078611111111111e-06, "loss": 0.1023, "step": 5725 }, { "epoch": 0.575, "grad_norm": 2.482689142227173, "learning_rate": 2.0664583333333333e-06, "loss": 0.1054, "step": 5750 }, { "epoch": 0.5775, "grad_norm": 3.2029247283935547, "learning_rate": 2.0543055555555553e-06, "loss": 0.0979, "step": 5775 }, { "epoch": 0.58, "grad_norm": 2.660365343093872, "learning_rate": 2.0421527777777776e-06, "loss": 0.1061, "step": 5800 }, { "epoch": 0.5825, "grad_norm": 1.6689801216125488, "learning_rate": 2.03e-06, "loss": 0.0835, "step": 5825 }, { "epoch": 0.585, "grad_norm": 2.385451078414917, "learning_rate": 2.017847222222222e-06, "loss": 0.0804, "step": 5850 }, { "epoch": 0.5875, "grad_norm": 1.6894644498825073, "learning_rate": 2.0056944444444444e-06, "loss": 0.0645, "step": 5875 }, { "epoch": 0.59, "grad_norm": 1.8109933137893677, "learning_rate": 1.9935416666666664e-06, "loss": 0.0636, "step": 5900 }, { "epoch": 0.5925, "grad_norm": 1.9173272848129272, "learning_rate": 1.981388888888889e-06, "loss": 0.0655, "step": 5925 }, { "epoch": 0.595, "grad_norm": 1.292048454284668, "learning_rate": 1.969236111111111e-06, "loss": 0.0685, "step": 5950 }, { "epoch": 0.5975, "grad_norm": 1.9021435976028442, "learning_rate": 1.957083333333333e-06, "loss": 0.0827, "step": 5975 }, { "epoch": 0.6, "grad_norm": 1.9284971952438354, "learning_rate": 1.944930555555555e-06, "loss": 0.0964, "step": 6000 }, { "epoch": 0.6, "eval_loss": 0.19876334071159363, "eval_runtime": 737.2676, "eval_samples_per_second": 2.854, "eval_steps_per_second": 0.357, "eval_wer": 9.460596428905301, "step": 6000 }, { "epoch": 0.6025, "grad_norm": 2.5324220657348633, "learning_rate": 1.9327777777777775e-06, "loss": 0.0984, "step": 6025 }, { "epoch": 0.605, "grad_norm": 3.8770506381988525, "learning_rate": 1.920625e-06, "loss": 0.1188, "step": 6050 }, { "epoch": 0.6075, "grad_norm": 2.608128070831299, "learning_rate": 1.908472222222222e-06, "loss": 0.1205, "step": 6075 }, { "epoch": 0.61, "grad_norm": 3.6785027980804443, "learning_rate": 1.8968055555555555e-06, "loss": 0.124, "step": 6100 }, { "epoch": 0.6125, "grad_norm": 3.3710997104644775, "learning_rate": 1.8846527777777775e-06, "loss": 0.0956, "step": 6125 }, { "epoch": 0.615, "grad_norm": 1.9446475505828857, "learning_rate": 1.8724999999999999e-06, "loss": 0.0695, "step": 6150 }, { "epoch": 0.6175, "grad_norm": 1.7988864183425903, "learning_rate": 1.860347222222222e-06, "loss": 0.0593, "step": 6175 }, { "epoch": 0.62, "grad_norm": 2.8231289386749268, "learning_rate": 1.8481944444444442e-06, "loss": 0.0618, "step": 6200 }, { "epoch": 0.6225, "grad_norm": 1.2713103294372559, "learning_rate": 1.8360416666666666e-06, "loss": 0.0589, "step": 6225 }, { "epoch": 0.625, "grad_norm": 1.4379982948303223, "learning_rate": 1.8238888888888886e-06, "loss": 0.0653, "step": 6250 }, { "epoch": 0.6275, "grad_norm": 1.9029752016067505, "learning_rate": 1.8117361111111108e-06, "loss": 0.0719, "step": 6275 }, { "epoch": 0.63, "grad_norm": 1.8491439819335938, "learning_rate": 1.7995833333333332e-06, "loss": 0.0755, "step": 6300 }, { "epoch": 0.6325, "grad_norm": 2.654759645462036, "learning_rate": 1.7874305555555554e-06, "loss": 0.1089, "step": 6325 }, { "epoch": 0.635, "grad_norm": 2.6172823905944824, "learning_rate": 1.7752777777777778e-06, "loss": 0.1134, "step": 6350 }, { "epoch": 0.6375, "grad_norm": 2.702260732650757, "learning_rate": 1.763125e-06, "loss": 0.0968, "step": 6375 }, { "epoch": 0.64, "grad_norm": 4.263683319091797, "learning_rate": 1.750972222222222e-06, "loss": 0.1098, "step": 6400 }, { "epoch": 0.6425, "grad_norm": 2.830216646194458, "learning_rate": 1.7388194444444443e-06, "loss": 0.112, "step": 6425 }, { "epoch": 0.645, "grad_norm": 2.4576544761657715, "learning_rate": 1.7266666666666665e-06, "loss": 0.1098, "step": 6450 }, { "epoch": 0.6475, "grad_norm": 2.05035400390625, "learning_rate": 1.7145138888888887e-06, "loss": 0.08, "step": 6475 }, { "epoch": 0.65, "grad_norm": 3.8243772983551025, "learning_rate": 1.702361111111111e-06, "loss": 0.0811, "step": 6500 }, { "epoch": 0.65, "eval_loss": 0.2018933892250061, "eval_runtime": 748.1968, "eval_samples_per_second": 2.812, "eval_steps_per_second": 0.352, "eval_wer": 9.493315882957837, "step": 6500 }, { "epoch": 0.6525, "grad_norm": 2.500188112258911, "learning_rate": 1.690208333333333e-06, "loss": 0.0931, "step": 6525 }, { "epoch": 0.655, "grad_norm": 3.491649866104126, "learning_rate": 1.6780555555555553e-06, "loss": 0.0994, "step": 6550 }, { "epoch": 0.6575, "grad_norm": 3.9422085285186768, "learning_rate": 1.6659027777777777e-06, "loss": 0.1092, "step": 6575 }, { "epoch": 0.66, "grad_norm": 1.6561498641967773, "learning_rate": 1.6537499999999998e-06, "loss": 0.0993, "step": 6600 }, { "epoch": 0.6625, "grad_norm": 2.729180335998535, "learning_rate": 1.6415972222222222e-06, "loss": 0.0961, "step": 6625 }, { "epoch": 0.665, "grad_norm": 2.735954999923706, "learning_rate": 1.6294444444444444e-06, "loss": 0.0693, "step": 6650 }, { "epoch": 0.6675, "grad_norm": 2.0468339920043945, "learning_rate": 1.6172916666666664e-06, "loss": 0.0656, "step": 6675 }, { "epoch": 0.67, "grad_norm": 2.0588066577911377, "learning_rate": 1.6051388888888888e-06, "loss": 0.0634, "step": 6700 }, { "epoch": 0.6725, "grad_norm": 1.9771475791931152, "learning_rate": 1.592986111111111e-06, "loss": 0.0549, "step": 6725 }, { "epoch": 0.675, "grad_norm": 1.8498605489730835, "learning_rate": 1.5808333333333332e-06, "loss": 0.05, "step": 6750 }, { "epoch": 0.6775, "grad_norm": 1.4784091711044312, "learning_rate": 1.5686805555555556e-06, "loss": 0.0597, "step": 6775 }, { "epoch": 0.68, "grad_norm": 2.249415636062622, "learning_rate": 1.5565277777777777e-06, "loss": 0.0623, "step": 6800 }, { "epoch": 0.6825, "grad_norm": 2.035832405090332, "learning_rate": 1.5443749999999997e-06, "loss": 0.0661, "step": 6825 }, { "epoch": 0.685, "grad_norm": 1.8243813514709473, "learning_rate": 1.5322222222222221e-06, "loss": 0.0657, "step": 6850 }, { "epoch": 0.6875, "grad_norm": 1.8485840559005737, "learning_rate": 1.5200694444444443e-06, "loss": 0.064, "step": 6875 }, { "epoch": 0.69, "grad_norm": 1.0504404306411743, "learning_rate": 1.5079166666666665e-06, "loss": 0.0566, "step": 6900 }, { "epoch": 0.6925, "grad_norm": 1.517008900642395, "learning_rate": 1.4957638888888889e-06, "loss": 0.055, "step": 6925 }, { "epoch": 0.695, "grad_norm": 2.008049488067627, "learning_rate": 1.4836111111111109e-06, "loss": 0.0488, "step": 6950 }, { "epoch": 0.6975, "grad_norm": 2.667545795440674, "learning_rate": 1.471458333333333e-06, "loss": 0.0618, "step": 6975 }, { "epoch": 0.7, "grad_norm": 2.0049033164978027, "learning_rate": 1.4593055555555554e-06, "loss": 0.0677, "step": 7000 }, { "epoch": 0.7, "eval_loss": 0.1916087418794632, "eval_runtime": 931.2949, "eval_samples_per_second": 2.259, "eval_steps_per_second": 0.282, "eval_wer": 8.923062540899318, "step": 7000 }, { "epoch": 0.7025, "grad_norm": 1.7208157777786255, "learning_rate": 1.4471527777777776e-06, "loss": 0.058, "step": 7025 }, { "epoch": 0.705, "grad_norm": 1.9330742359161377, "learning_rate": 1.435e-06, "loss": 0.0561, "step": 7050 }, { "epoch": 0.7075, "grad_norm": 1.479172945022583, "learning_rate": 1.4228472222222222e-06, "loss": 0.0541, "step": 7075 }, { "epoch": 0.71, "grad_norm": 2.0435240268707275, "learning_rate": 1.4106944444444442e-06, "loss": 0.0661, "step": 7100 }, { "epoch": 0.7125, "grad_norm": 2.289618492126465, "learning_rate": 1.3985416666666666e-06, "loss": 0.086, "step": 7125 }, { "epoch": 0.715, "grad_norm": 1.9679251909255981, "learning_rate": 1.3863888888888888e-06, "loss": 0.0932, "step": 7150 }, { "epoch": 0.7175, "grad_norm": 2.483372211456299, "learning_rate": 1.374236111111111e-06, "loss": 0.1006, "step": 7175 }, { "epoch": 0.72, "grad_norm": 2.003504991531372, "learning_rate": 1.3620833333333334e-06, "loss": 0.1038, "step": 7200 }, { "epoch": 0.7225, "grad_norm": 3.6193089485168457, "learning_rate": 1.3499305555555555e-06, "loss": 0.1124, "step": 7225 }, { "epoch": 0.725, "grad_norm": 2.1037306785583496, "learning_rate": 1.3377777777777775e-06, "loss": 0.0895, "step": 7250 }, { "epoch": 0.7275, "grad_norm": 2.3373475074768066, "learning_rate": 1.325625e-06, "loss": 0.0561, "step": 7275 }, { "epoch": 0.73, "grad_norm": 1.7390323877334595, "learning_rate": 1.313472222222222e-06, "loss": 0.0585, "step": 7300 }, { "epoch": 0.7325, "grad_norm": 2.1054818630218506, "learning_rate": 1.3013194444444443e-06, "loss": 0.056, "step": 7325 }, { "epoch": 0.735, "grad_norm": 1.6178271770477295, "learning_rate": 1.2891666666666667e-06, "loss": 0.0478, "step": 7350 }, { "epoch": 0.7375, "grad_norm": 1.6325420141220093, "learning_rate": 1.2770138888888887e-06, "loss": 0.0584, "step": 7375 }, { "epoch": 0.74, "grad_norm": 1.3367409706115723, "learning_rate": 1.2648611111111108e-06, "loss": 0.0592, "step": 7400 }, { "epoch": 0.7425, "grad_norm": 1.820987343788147, "learning_rate": 1.2527083333333332e-06, "loss": 0.0721, "step": 7425 }, { "epoch": 0.745, "grad_norm": 2.2737059593200684, "learning_rate": 1.2405555555555554e-06, "loss": 0.0648, "step": 7450 }, { "epoch": 0.7475, "grad_norm": 1.8018834590911865, "learning_rate": 1.2284027777777778e-06, "loss": 0.0624, "step": 7475 }, { "epoch": 0.75, "grad_norm": 2.9225475788116455, "learning_rate": 1.21625e-06, "loss": 0.1114, "step": 7500 }, { "epoch": 0.75, "eval_loss": 0.20291149616241455, "eval_runtime": 791.1262, "eval_samples_per_second": 2.659, "eval_steps_per_second": 0.332, "eval_wer": 9.325044404973356, "step": 7500 }, { "epoch": 0.7525, "grad_norm": 2.9410438537597656, "learning_rate": 1.204097222222222e-06, "loss": 0.1191, "step": 7525 }, { "epoch": 0.755, "grad_norm": 2.8533949851989746, "learning_rate": 1.1919444444444444e-06, "loss": 0.0953, "step": 7550 }, { "epoch": 0.7575, "grad_norm": 2.5898656845092773, "learning_rate": 1.1797916666666666e-06, "loss": 0.0944, "step": 7575 }, { "epoch": 0.76, "grad_norm": 3.2420718669891357, "learning_rate": 1.1676388888888887e-06, "loss": 0.1152, "step": 7600 }, { "epoch": 0.7625, "grad_norm": 3.174917459487915, "learning_rate": 1.1554861111111111e-06, "loss": 0.103, "step": 7625 }, { "epoch": 0.765, "grad_norm": 4.257075786590576, "learning_rate": 1.1433333333333331e-06, "loss": 0.0895, "step": 7650 }, { "epoch": 0.7675, "grad_norm": 2.4197463989257812, "learning_rate": 1.1311805555555553e-06, "loss": 0.1022, "step": 7675 }, { "epoch": 0.77, "grad_norm": 3.049309253692627, "learning_rate": 1.1190277777777777e-06, "loss": 0.1037, "step": 7700 }, { "epoch": 0.7725, "grad_norm": 2.6753711700439453, "learning_rate": 1.1068749999999999e-06, "loss": 0.1022, "step": 7725 }, { "epoch": 0.775, "grad_norm": 1.661902904510498, "learning_rate": 1.0947222222222223e-06, "loss": 0.0865, "step": 7750 }, { "epoch": 0.7775, "grad_norm": 1.6167248487472534, "learning_rate": 1.0825694444444443e-06, "loss": 0.0691, "step": 7775 }, { "epoch": 0.78, "grad_norm": 2.3439841270446777, "learning_rate": 1.0704166666666667e-06, "loss": 0.0658, "step": 7800 }, { "epoch": 0.7825, "grad_norm": 1.208755373954773, "learning_rate": 1.0582638888888888e-06, "loss": 0.0615, "step": 7825 }, { "epoch": 0.785, "grad_norm": 1.3961541652679443, "learning_rate": 1.046111111111111e-06, "loss": 0.0639, "step": 7850 }, { "epoch": 0.7875, "grad_norm": 2.2298717498779297, "learning_rate": 1.0339583333333332e-06, "loss": 0.0569, "step": 7875 }, { "epoch": 0.79, "grad_norm": 1.792493224143982, "learning_rate": 1.0218055555555554e-06, "loss": 0.0577, "step": 7900 }, { "epoch": 0.7925, "grad_norm": 2.541646957397461, "learning_rate": 1.0096527777777776e-06, "loss": 0.0864, "step": 7925 }, { "epoch": 0.795, "grad_norm": 3.885256052017212, "learning_rate": 9.975e-07, "loss": 0.0981, "step": 7950 }, { "epoch": 0.7975, "grad_norm": 3.558501720428467, "learning_rate": 9.853472222222222e-07, "loss": 0.1503, "step": 7975 }, { "epoch": 0.8, "grad_norm": 2.8580429553985596, "learning_rate": 9.731944444444443e-07, "loss": 0.1142, "step": 8000 }, { "epoch": 0.8, "eval_loss": 0.18952301144599915, "eval_runtime": 875.6834, "eval_samples_per_second": 2.403, "eval_steps_per_second": 0.3, "eval_wer": 8.997849864447977, "step": 8000 }, { "epoch": 0.8025, "grad_norm": 1.9699584245681763, "learning_rate": 9.610416666666665e-07, "loss": 0.0986, "step": 8025 }, { "epoch": 0.805, "grad_norm": 2.1994383335113525, "learning_rate": 9.488888888888887e-07, "loss": 0.0918, "step": 8050 }, { "epoch": 0.8075, "grad_norm": 3.011695384979248, "learning_rate": 9.36736111111111e-07, "loss": 0.1067, "step": 8075 }, { "epoch": 0.81, "grad_norm": 3.6199569702148438, "learning_rate": 9.245833333333333e-07, "loss": 0.1596, "step": 8100 }, { "epoch": 0.8125, "grad_norm": 4.223966121673584, "learning_rate": 9.124305555555555e-07, "loss": 0.1043, "step": 8125 }, { "epoch": 0.815, "grad_norm": 2.444288492202759, "learning_rate": 9.002777777777777e-07, "loss": 0.0882, "step": 8150 }, { "epoch": 0.8175, "grad_norm": 2.6045594215393066, "learning_rate": 8.88125e-07, "loss": 0.0922, "step": 8175 }, { "epoch": 0.82, "grad_norm": 1.9436824321746826, "learning_rate": 8.759722222222222e-07, "loss": 0.0869, "step": 8200 }, { "epoch": 0.8225, "grad_norm": 2.2735321521759033, "learning_rate": 8.638194444444444e-07, "loss": 0.0691, "step": 8225 }, { "epoch": 0.825, "grad_norm": 2.0563557147979736, "learning_rate": 8.516666666666665e-07, "loss": 0.0572, "step": 8250 }, { "epoch": 0.8275, "grad_norm": 2.353659152984619, "learning_rate": 8.395138888888888e-07, "loss": 0.0599, "step": 8275 }, { "epoch": 0.83, "grad_norm": 1.769374132156372, "learning_rate": 8.273611111111111e-07, "loss": 0.0556, "step": 8300 }, { "epoch": 0.8325, "grad_norm": 1.9082075357437134, "learning_rate": 8.152083333333332e-07, "loss": 0.056, "step": 8325 }, { "epoch": 0.835, "grad_norm": 1.9177392721176147, "learning_rate": 8.030555555555555e-07, "loss": 0.0564, "step": 8350 }, { "epoch": 0.8375, "grad_norm": 1.5918048620224, "learning_rate": 7.909027777777778e-07, "loss": 0.051, "step": 8375 }, { "epoch": 0.84, "grad_norm": 1.6221609115600586, "learning_rate": 7.787499999999998e-07, "loss": 0.0535, "step": 8400 }, { "epoch": 0.8425, "grad_norm": 2.2959134578704834, "learning_rate": 7.665972222222221e-07, "loss": 0.0496, "step": 8425 }, { "epoch": 0.845, "grad_norm": 1.5135602951049805, "learning_rate": 7.544444444444444e-07, "loss": 0.0541, "step": 8450 }, { "epoch": 0.8475, "grad_norm": 2.2790639400482178, "learning_rate": 7.422916666666666e-07, "loss": 0.059, "step": 8475 }, { "epoch": 0.85, "grad_norm": 1.3687411546707153, "learning_rate": 7.301388888888888e-07, "loss": 0.0466, "step": 8500 }, { "epoch": 0.85, "eval_loss": 0.19359956681728363, "eval_runtime": 907.833, "eval_samples_per_second": 2.318, "eval_steps_per_second": 0.29, "eval_wer": 8.85762363279424, "step": 8500 }, { "epoch": 0.8525, "grad_norm": 1.4170043468475342, "learning_rate": 7.179861111111111e-07, "loss": 0.0526, "step": 8525 }, { "epoch": 0.855, "grad_norm": 1.928134799003601, "learning_rate": 7.058333333333333e-07, "loss": 0.0621, "step": 8550 }, { "epoch": 0.8575, "grad_norm": 2.6064600944519043, "learning_rate": 6.936805555555555e-07, "loss": 0.0911, "step": 8575 }, { "epoch": 0.86, "grad_norm": 3.145573854446411, "learning_rate": 6.815277777777777e-07, "loss": 0.1084, "step": 8600 }, { "epoch": 0.8625, "grad_norm": 2.989288330078125, "learning_rate": 6.693749999999999e-07, "loss": 0.0855, "step": 8625 }, { "epoch": 0.865, "grad_norm": 2.2333595752716064, "learning_rate": 6.572222222222222e-07, "loss": 0.0791, "step": 8650 }, { "epoch": 0.8675, "grad_norm": 3.1057262420654297, "learning_rate": 6.455555555555555e-07, "loss": 0.0888, "step": 8675 }, { "epoch": 0.87, "grad_norm": 3.1091811656951904, "learning_rate": 6.334027777777778e-07, "loss": 0.0782, "step": 8700 }, { "epoch": 0.8725, "grad_norm": 1.896377444267273, "learning_rate": 6.212499999999999e-07, "loss": 0.0524, "step": 8725 }, { "epoch": 0.875, "grad_norm": 1.7059510946273804, "learning_rate": 6.090972222222222e-07, "loss": 0.0519, "step": 8750 }, { "epoch": 0.8775, "grad_norm": 2.008694648742676, "learning_rate": 5.969444444444445e-07, "loss": 0.0463, "step": 8775 }, { "epoch": 0.88, "grad_norm": 1.6565593481063843, "learning_rate": 5.847916666666665e-07, "loss": 0.0472, "step": 8800 }, { "epoch": 0.8825, "grad_norm": 1.9553823471069336, "learning_rate": 5.726388888888888e-07, "loss": 0.0537, "step": 8825 }, { "epoch": 0.885, "grad_norm": 1.8511940240859985, "learning_rate": 5.604861111111111e-07, "loss": 0.052, "step": 8850 }, { "epoch": 0.8875, "grad_norm": 2.0512773990631104, "learning_rate": 5.483333333333332e-07, "loss": 0.047, "step": 8875 }, { "epoch": 0.89, "grad_norm": 2.1243960857391357, "learning_rate": 5.361805555555555e-07, "loss": 0.0691, "step": 8900 }, { "epoch": 0.8925, "grad_norm": 1.5564519166946411, "learning_rate": 5.240277777777777e-07, "loss": 0.0658, "step": 8925 }, { "epoch": 0.895, "grad_norm": 1.2789862155914307, "learning_rate": 5.11875e-07, "loss": 0.0599, "step": 8950 }, { "epoch": 0.8975, "grad_norm": 1.5436575412750244, "learning_rate": 4.997222222222222e-07, "loss": 0.0518, "step": 8975 }, { "epoch": 0.9, "grad_norm": 1.5844892263412476, "learning_rate": 4.875694444444444e-07, "loss": 0.0664, "step": 9000 }, { "epoch": 0.9, "eval_loss": 0.1876339614391327, "eval_runtime": 913.4782, "eval_samples_per_second": 2.303, "eval_steps_per_second": 0.288, "eval_wer": 8.969804618117228, "step": 9000 }, { "epoch": 0.9025, "grad_norm": 2.977297782897949, "learning_rate": 4.754166666666666e-07, "loss": 0.0695, "step": 9025 }, { "epoch": 0.905, "grad_norm": 2.295393705368042, "learning_rate": 4.6326388888888887e-07, "loss": 0.0746, "step": 9050 }, { "epoch": 0.9075, "grad_norm": 3.3525807857513428, "learning_rate": 4.5111111111111106e-07, "loss": 0.0877, "step": 9075 }, { "epoch": 0.91, "grad_norm": 2.6071155071258545, "learning_rate": 4.389583333333333e-07, "loss": 0.0975, "step": 9100 }, { "epoch": 0.9125, "grad_norm": 1.9555648565292358, "learning_rate": 4.2680555555555553e-07, "loss": 0.0892, "step": 9125 }, { "epoch": 0.915, "grad_norm": 2.0066351890563965, "learning_rate": 4.1465277777777777e-07, "loss": 0.0977, "step": 9150 }, { "epoch": 0.9175, "grad_norm": 2.748582124710083, "learning_rate": 4.0249999999999996e-07, "loss": 0.0951, "step": 9175 }, { "epoch": 0.92, "grad_norm": 2.9122252464294434, "learning_rate": 3.9034722222222214e-07, "loss": 0.0941, "step": 9200 }, { "epoch": 0.9225, "grad_norm": 3.365471601486206, "learning_rate": 3.7819444444444444e-07, "loss": 0.0973, "step": 9225 }, { "epoch": 0.925, "grad_norm": 3.4445292949676514, "learning_rate": 3.660416666666666e-07, "loss": 0.103, "step": 9250 }, { "epoch": 0.9275, "grad_norm": 2.2794156074523926, "learning_rate": 3.5388888888888886e-07, "loss": 0.0916, "step": 9275 }, { "epoch": 0.93, "grad_norm": 2.6229422092437744, "learning_rate": 3.417361111111111e-07, "loss": 0.0939, "step": 9300 }, { "epoch": 0.9325, "grad_norm": 3.1410930156707764, "learning_rate": 3.2958333333333334e-07, "loss": 0.0847, "step": 9325 }, { "epoch": 0.935, "grad_norm": 2.6069798469543457, "learning_rate": 3.174305555555555e-07, "loss": 0.081, "step": 9350 }, { "epoch": 0.9375, "grad_norm": 3.345421552658081, "learning_rate": 3.052777777777777e-07, "loss": 0.0665, "step": 9375 }, { "epoch": 0.94, "grad_norm": 1.0620834827423096, "learning_rate": 2.93125e-07, "loss": 0.0815, "step": 9400 }, { "epoch": 0.9425, "grad_norm": 3.5785439014434814, "learning_rate": 2.809722222222222e-07, "loss": 0.0828, "step": 9425 }, { "epoch": 0.945, "grad_norm": 2.2039730548858643, "learning_rate": 2.688194444444444e-07, "loss": 0.0766, "step": 9450 }, { "epoch": 0.9475, "grad_norm": 2.28944730758667, "learning_rate": 2.5666666666666666e-07, "loss": 0.0885, "step": 9475 }, { "epoch": 0.95, "grad_norm": 2.7810850143432617, "learning_rate": 2.445138888888889e-07, "loss": 0.0759, "step": 9500 }, { "epoch": 0.95, "eval_loss": 0.1826871782541275, "eval_runtime": 896.8387, "eval_samples_per_second": 2.346, "eval_steps_per_second": 0.293, "eval_wer": 8.820229971019913, "step": 9500 }, { "epoch": 0.9525, "grad_norm": 1.9420568943023682, "learning_rate": 2.3236111111111109e-07, "loss": 0.0562, "step": 9525 }, { "epoch": 0.955, "grad_norm": 1.5195990800857544, "learning_rate": 2.2020833333333332e-07, "loss": 0.0494, "step": 9550 }, { "epoch": 0.9575, "grad_norm": 3.816654920578003, "learning_rate": 2.080555555555555e-07, "loss": 0.0601, "step": 9575 }, { "epoch": 0.96, "grad_norm": 1.4153363704681396, "learning_rate": 1.9590277777777775e-07, "loss": 0.0568, "step": 9600 }, { "epoch": 0.9625, "grad_norm": 1.35164213180542, "learning_rate": 1.8375e-07, "loss": 0.0505, "step": 9625 }, { "epoch": 0.965, "grad_norm": 2.035141706466675, "learning_rate": 1.715972222222222e-07, "loss": 0.052, "step": 9650 }, { "epoch": 0.9675, "grad_norm": 2.966444969177246, "learning_rate": 1.5944444444444444e-07, "loss": 0.0596, "step": 9675 }, { "epoch": 0.97, "grad_norm": 2.619551658630371, "learning_rate": 1.4729166666666665e-07, "loss": 0.085, "step": 9700 }, { "epoch": 0.9725, "grad_norm": 3.322847604751587, "learning_rate": 1.351388888888889e-07, "loss": 0.0827, "step": 9725 }, { "epoch": 0.975, "grad_norm": 2.4450652599334717, "learning_rate": 1.229861111111111e-07, "loss": 0.083, "step": 9750 }, { "epoch": 0.9775, "grad_norm": 1.8799556493759155, "learning_rate": 1.1083333333333333e-07, "loss": 0.083, "step": 9775 }, { "epoch": 0.98, "grad_norm": 1.9747653007507324, "learning_rate": 9.868055555555554e-08, "loss": 0.0797, "step": 9800 }, { "epoch": 0.9825, "grad_norm": 1.730596899986267, "learning_rate": 8.652777777777776e-08, "loss": 0.0785, "step": 9825 }, { "epoch": 0.985, "grad_norm": 1.8359075784683228, "learning_rate": 7.4375e-08, "loss": 0.0628, "step": 9850 }, { "epoch": 0.9875, "grad_norm": 1.883272409439087, "learning_rate": 6.222222222222221e-08, "loss": 0.0569, "step": 9875 }, { "epoch": 0.99, "grad_norm": 1.6352424621582031, "learning_rate": 5.006944444444444e-08, "loss": 0.0662, "step": 9900 }, { "epoch": 0.9925, "grad_norm": 3.131132125854492, "learning_rate": 3.791666666666666e-08, "loss": 0.0536, "step": 9925 }, { "epoch": 0.995, "grad_norm": 1.6641780138015747, "learning_rate": 2.5763888888888887e-08, "loss": 0.058, "step": 9950 }, { "epoch": 0.9975, "grad_norm": 1.125093698501587, "learning_rate": 1.361111111111111e-08, "loss": 0.0565, "step": 9975 }, { "epoch": 1.0, "grad_norm": 1.5283586978912354, "learning_rate": 1.4583333333333332e-09, "loss": 0.0555, "step": 10000 }, { "epoch": 1.0, "eval_loss": 0.1833505630493164, "eval_runtime": 892.3712, "eval_samples_per_second": 2.358, "eval_steps_per_second": 0.295, "eval_wer": 8.642610077591849, "step": 10000 }, { "epoch": 0.50125, "grad_norm": 1.4627561569213867, "learning_rate": 2.2975657894736842e-06, "loss": 0.0814, "step": 10025 }, { "epoch": 0.5025, "grad_norm": 2.185753583908081, "learning_rate": 2.2918092105263158e-06, "loss": 0.0876, "step": 10050 }, { "epoch": 0.50375, "grad_norm": 0.9880486726760864, "learning_rate": 2.2860526315789473e-06, "loss": 0.0793, "step": 10075 }, { "epoch": 0.505, "grad_norm": 3.606826066970825, "learning_rate": 2.2802960526315788e-06, "loss": 0.0738, "step": 10100 }, { "epoch": 0.50625, "grad_norm": 2.34165620803833, "learning_rate": 2.2745394736842103e-06, "loss": 0.0815, "step": 10125 }, { "epoch": 0.5075, "grad_norm": 2.599682092666626, "learning_rate": 2.2687828947368418e-06, "loss": 0.1344, "step": 10150 }, { "epoch": 0.50875, "grad_norm": 3.036947727203369, "learning_rate": 2.2630263157894737e-06, "loss": 0.1211, "step": 10175 }, { "epoch": 0.51, "grad_norm": 3.8418281078338623, "learning_rate": 2.2572697368421048e-06, "loss": 0.0942, "step": 10200 }, { "epoch": 0.51125, "grad_norm": 2.113969087600708, "learning_rate": 2.2515131578947367e-06, "loss": 0.098, "step": 10225 }, { "epoch": 0.5125, "grad_norm": 3.1699957847595215, "learning_rate": 2.2457565789473682e-06, "loss": 0.0902, "step": 10250 }, { "epoch": 0.51375, "grad_norm": 2.7608108520507812, "learning_rate": 2.2399999999999997e-06, "loss": 0.0814, "step": 10275 }, { "epoch": 0.515, "grad_norm": 3.569058656692505, "learning_rate": 2.2342434210526312e-06, "loss": 0.1012, "step": 10300 }, { "epoch": 0.51625, "grad_norm": 1.851914882659912, "learning_rate": 2.228486842105263e-06, "loss": 0.0845, "step": 10325 }, { "epoch": 0.5175, "grad_norm": 1.3573824167251587, "learning_rate": 2.2227302631578947e-06, "loss": 0.0782, "step": 10350 }, { "epoch": 0.51875, "grad_norm": 1.2335045337677002, "learning_rate": 2.216973684210526e-06, "loss": 0.0521, "step": 10375 }, { "epoch": 0.52, "grad_norm": 1.1534448862075806, "learning_rate": 2.211217105263158e-06, "loss": 0.0508, "step": 10400 }, { "epoch": 0.52125, "grad_norm": 1.4138410091400146, "learning_rate": 2.205460526315789e-06, "loss": 0.0503, "step": 10425 }, { "epoch": 0.5225, "grad_norm": 1.6200624704360962, "learning_rate": 2.199703947368421e-06, "loss": 0.0585, "step": 10450 }, { "epoch": 0.52375, "grad_norm": 1.5052680969238281, "learning_rate": 2.193947368421052e-06, "loss": 0.0568, "step": 10475 }, { "epoch": 0.525, "grad_norm": 2.1464121341705322, "learning_rate": 2.188190789473684e-06, "loss": 0.0603, "step": 10500 }, { "epoch": 0.525, "eval_loss": 0.18723848462104797, "eval_runtime": 710.2414, "eval_samples_per_second": 2.962, "eval_steps_per_second": 0.37, "eval_wer": 9.334392820416939, "step": 10500 }, { "epoch": 0.52625, "grad_norm": 1.2822283506393433, "learning_rate": 2.1824342105263156e-06, "loss": 0.0594, "step": 10525 }, { "epoch": 0.5275, "grad_norm": 1.7284327745437622, "learning_rate": 2.176677631578947e-06, "loss": 0.061, "step": 10550 }, { "epoch": 0.52875, "grad_norm": 1.000122308731079, "learning_rate": 2.1709210526315786e-06, "loss": 0.0595, "step": 10575 }, { "epoch": 0.53, "grad_norm": 1.9401737451553345, "learning_rate": 2.1651644736842106e-06, "loss": 0.0646, "step": 10600 }, { "epoch": 0.53125, "grad_norm": 2.531494617462158, "learning_rate": 2.159407894736842e-06, "loss": 0.0591, "step": 10625 }, { "epoch": 0.5325, "grad_norm": 1.7631025314331055, "learning_rate": 2.1536513157894736e-06, "loss": 0.0586, "step": 10650 }, { "epoch": 0.53375, "grad_norm": 2.3051741123199463, "learning_rate": 2.147894736842105e-06, "loss": 0.069, "step": 10675 }, { "epoch": 0.535, "grad_norm": 3.6795778274536133, "learning_rate": 2.1421381578947366e-06, "loss": 0.0914, "step": 10700 }, { "epoch": 0.53625, "grad_norm": 2.9934849739074707, "learning_rate": 2.136381578947368e-06, "loss": 0.091, "step": 10725 }, { "epoch": 0.5375, "grad_norm": 2.177293300628662, "learning_rate": 2.1306249999999996e-06, "loss": 0.0928, "step": 10750 }, { "epoch": 0.53875, "grad_norm": 2.365135669708252, "learning_rate": 2.124868421052631e-06, "loss": 0.0793, "step": 10775 }, { "epoch": 0.54, "grad_norm": 2.412344455718994, "learning_rate": 2.119111842105263e-06, "loss": 0.0932, "step": 10800 }, { "epoch": 0.54125, "grad_norm": 2.7734503746032715, "learning_rate": 2.1133552631578946e-06, "loss": 0.0968, "step": 10825 }, { "epoch": 0.5425, "grad_norm": 2.8189616203308105, "learning_rate": 2.107598684210526e-06, "loss": 0.0897, "step": 10850 }, { "epoch": 0.54375, "grad_norm": 2.111459255218506, "learning_rate": 2.101842105263158e-06, "loss": 0.0984, "step": 10875 }, { "epoch": 0.545, "grad_norm": 3.80507493019104, "learning_rate": 2.0960855263157895e-06, "loss": 0.0744, "step": 10900 }, { "epoch": 0.54625, "grad_norm": 1.7927796840667725, "learning_rate": 2.090328947368421e-06, "loss": 0.071, "step": 10925 }, { "epoch": 0.5475, "grad_norm": 2.8072099685668945, "learning_rate": 2.0845723684210525e-06, "loss": 0.0672, "step": 10950 }, { "epoch": 0.54875, "grad_norm": 3.3706977367401123, "learning_rate": 2.078815789473684e-06, "loss": 0.0655, "step": 10975 }, { "epoch": 0.55, "grad_norm": 2.1349880695343018, "learning_rate": 2.0730592105263155e-06, "loss": 0.0727, "step": 11000 }, { "epoch": 0.55, "eval_loss": 0.18381889164447784, "eval_runtime": 708.1011, "eval_samples_per_second": 2.971, "eval_steps_per_second": 0.371, "eval_wer": 9.362438066747686, "step": 11000 }, { "epoch": 0.55125, "grad_norm": 2.3166699409484863, "learning_rate": 2.067302631578947e-06, "loss": 0.0734, "step": 11025 }, { "epoch": 0.5525, "grad_norm": 4.275175094604492, "learning_rate": 2.061546052631579e-06, "loss": 0.0817, "step": 11050 }, { "epoch": 0.55375, "grad_norm": 3.2193310260772705, "learning_rate": 2.0557894736842105e-06, "loss": 0.0963, "step": 11075 }, { "epoch": 0.555, "grad_norm": 3.10903000831604, "learning_rate": 2.050032894736842e-06, "loss": 0.0956, "step": 11100 }, { "epoch": 0.55625, "grad_norm": 2.998255729675293, "learning_rate": 2.0442763157894735e-06, "loss": 0.0799, "step": 11125 }, { "epoch": 0.5575, "grad_norm": 1.82035231590271, "learning_rate": 2.038519736842105e-06, "loss": 0.054, "step": 11150 }, { "epoch": 0.55875, "grad_norm": 2.447342872619629, "learning_rate": 2.0327631578947365e-06, "loss": 0.0444, "step": 11175 }, { "epoch": 0.56, "grad_norm": 1.5837682485580444, "learning_rate": 2.0270065789473684e-06, "loss": 0.0527, "step": 11200 }, { "epoch": 0.56125, "grad_norm": 2.164294958114624, "learning_rate": 2.02125e-06, "loss": 0.0396, "step": 11225 }, { "epoch": 0.5625, "grad_norm": 1.8219966888427734, "learning_rate": 2.0154934210526314e-06, "loss": 0.0407, "step": 11250 }, { "epoch": 0.56375, "grad_norm": 1.4640647172927856, "learning_rate": 2.009736842105263e-06, "loss": 0.0352, "step": 11275 }, { "epoch": 0.565, "grad_norm": 1.7748467922210693, "learning_rate": 2.0039802631578944e-06, "loss": 0.0466, "step": 11300 }, { "epoch": 0.56625, "grad_norm": 1.62334406375885, "learning_rate": 1.9982236842105264e-06, "loss": 0.0547, "step": 11325 }, { "epoch": 0.5675, "grad_norm": 1.711351752281189, "learning_rate": 1.992467105263158e-06, "loss": 0.0507, "step": 11350 }, { "epoch": 0.56875, "grad_norm": 1.8084019422531128, "learning_rate": 1.9867105263157894e-06, "loss": 0.055, "step": 11375 }, { "epoch": 0.57, "grad_norm": 1.749001383781433, "learning_rate": 1.980953947368421e-06, "loss": 0.0591, "step": 11400 }, { "epoch": 0.57125, "grad_norm": 1.7468432188034058, "learning_rate": 1.9751973684210524e-06, "loss": 0.056, "step": 11425 }, { "epoch": 0.5725, "grad_norm": 1.9768993854522705, "learning_rate": 1.969440789473684e-06, "loss": 0.0565, "step": 11450 }, { "epoch": 0.57375, "grad_norm": 1.7782701253890991, "learning_rate": 1.9636842105263154e-06, "loss": 0.0585, "step": 11475 }, { "epoch": 0.575, "grad_norm": 1.6111152172088623, "learning_rate": 1.957927631578947e-06, "loss": 0.0523, "step": 11500 }, { "epoch": 0.575, "eval_loss": 0.20218180119991302, "eval_runtime": 711.0961, "eval_samples_per_second": 2.959, "eval_steps_per_second": 0.37, "eval_wer": 8.890343086846778, "step": 11500 }, { "epoch": 0.57625, "grad_norm": 1.4439009428024292, "learning_rate": 1.952171052631579e-06, "loss": 0.0549, "step": 11525 }, { "epoch": 0.5775, "grad_norm": 1.8588802814483643, "learning_rate": 1.9464144736842103e-06, "loss": 0.0629, "step": 11550 }, { "epoch": 0.57875, "grad_norm": 1.5505831241607666, "learning_rate": 1.940657894736842e-06, "loss": 0.0546, "step": 11575 }, { "epoch": 0.58, "grad_norm": 1.7028990983963013, "learning_rate": 1.9349013157894738e-06, "loss": 0.0468, "step": 11600 }, { "epoch": 0.58125, "grad_norm": 1.779950499534607, "learning_rate": 1.9291447368421053e-06, "loss": 0.0421, "step": 11625 }, { "epoch": 0.5825, "grad_norm": 0.975740909576416, "learning_rate": 1.923388157894737e-06, "loss": 0.048, "step": 11650 }, { "epoch": 0.58375, "grad_norm": 2.127458333969116, "learning_rate": 1.9176315789473683e-06, "loss": 0.0531, "step": 11675 }, { "epoch": 0.585, "grad_norm": 2.019409656524658, "learning_rate": 1.911875e-06, "loss": 0.0505, "step": 11700 }, { "epoch": 0.58625, "grad_norm": 2.5619285106658936, "learning_rate": 1.9061184210526313e-06, "loss": 0.0537, "step": 11725 }, { "epoch": 0.5875, "grad_norm": 1.2684053182601929, "learning_rate": 1.900361842105263e-06, "loss": 0.0525, "step": 11750 }, { "epoch": 0.58875, "grad_norm": 2.3113842010498047, "learning_rate": 1.8946052631578945e-06, "loss": 0.0644, "step": 11775 }, { "epoch": 0.59, "grad_norm": 2.3800573348999023, "learning_rate": 1.888848684210526e-06, "loss": 0.0773, "step": 11800 }, { "epoch": 0.59125, "grad_norm": 2.133885383605957, "learning_rate": 1.8830921052631575e-06, "loss": 0.0932, "step": 11825 }, { "epoch": 0.5925, "grad_norm": 2.8818199634552, "learning_rate": 1.8773355263157893e-06, "loss": 0.1037, "step": 11850 }, { "epoch": 0.59375, "grad_norm": 2.8853447437286377, "learning_rate": 1.871578947368421e-06, "loss": 0.105, "step": 11875 }, { "epoch": 0.595, "grad_norm": 2.3003904819488525, "learning_rate": 1.8658223684210525e-06, "loss": 0.0839, "step": 11900 }, { "epoch": 0.59625, "grad_norm": 2.091970443725586, "learning_rate": 1.8600657894736842e-06, "loss": 0.0863, "step": 11925 }, { "epoch": 0.5975, "grad_norm": 2.0606272220611572, "learning_rate": 1.8543092105263157e-06, "loss": 0.0725, "step": 11950 }, { "epoch": 0.59875, "grad_norm": 2.6063759326934814, "learning_rate": 1.8485526315789472e-06, "loss": 0.076, "step": 11975 }, { "epoch": 0.6, "grad_norm": 4.2756500244140625, "learning_rate": 1.8427960526315787e-06, "loss": 0.0719, "step": 12000 }, { "epoch": 0.6, "eval_loss": 0.18398858606815338, "eval_runtime": 707.6122, "eval_samples_per_second": 2.973, "eval_steps_per_second": 0.372, "eval_wer": 9.007198279891558, "step": 12000 }, { "epoch": 0.60125, "grad_norm": 1.7154215574264526, "learning_rate": 1.8370394736842104e-06, "loss": 0.0794, "step": 12025 }, { "epoch": 0.6025, "grad_norm": 2.567319393157959, "learning_rate": 1.831282894736842e-06, "loss": 0.0644, "step": 12050 }, { "epoch": 0.60375, "grad_norm": 2.0420191287994385, "learning_rate": 1.8255263157894734e-06, "loss": 0.0586, "step": 12075 }, { "epoch": 0.605, "grad_norm": 1.4417909383773804, "learning_rate": 1.819769736842105e-06, "loss": 0.0609, "step": 12100 }, { "epoch": 0.60625, "grad_norm": 2.08272123336792, "learning_rate": 1.8140131578947367e-06, "loss": 0.0553, "step": 12125 }, { "epoch": 0.6075, "grad_norm": 1.9581290483474731, "learning_rate": 1.8082565789473682e-06, "loss": 0.0545, "step": 12150 }, { "epoch": 0.60875, "grad_norm": 1.2744098901748657, "learning_rate": 1.8024999999999997e-06, "loss": 0.0462, "step": 12175 }, { "epoch": 0.61, "grad_norm": 1.2340375185012817, "learning_rate": 1.7967434210526314e-06, "loss": 0.0548, "step": 12200 }, { "epoch": 0.61125, "grad_norm": 3.383394956588745, "learning_rate": 1.7909868421052631e-06, "loss": 0.0457, "step": 12225 }, { "epoch": 0.6125, "grad_norm": 1.444368600845337, "learning_rate": 1.7852302631578946e-06, "loss": 0.0555, "step": 12250 }, { "epoch": 0.61375, "grad_norm": 2.78483510017395, "learning_rate": 1.7794736842105261e-06, "loss": 0.0587, "step": 12275 }, { "epoch": 0.615, "grad_norm": 2.3910155296325684, "learning_rate": 1.7737171052631578e-06, "loss": 0.0616, "step": 12300 }, { "epoch": 0.61625, "grad_norm": 0.8650920391082764, "learning_rate": 1.7679605263157894e-06, "loss": 0.0569, "step": 12325 }, { "epoch": 0.6175, "grad_norm": 1.3172664642333984, "learning_rate": 1.7622039473684209e-06, "loss": 0.0586, "step": 12350 }, { "epoch": 0.61875, "grad_norm": 1.5684549808502197, "learning_rate": 1.7564473684210526e-06, "loss": 0.0547, "step": 12375 }, { "epoch": 0.62, "grad_norm": 1.466041922569275, "learning_rate": 1.750690789473684e-06, "loss": 0.0605, "step": 12400 }, { "epoch": 0.62125, "grad_norm": 1.2134391069412231, "learning_rate": 1.7449342105263156e-06, "loss": 0.0463, "step": 12425 }, { "epoch": 0.6225, "grad_norm": 2.5045247077941895, "learning_rate": 1.739177631578947e-06, "loss": 0.0546, "step": 12450 }, { "epoch": 0.62375, "grad_norm": 1.6241180896759033, "learning_rate": 1.7334210526315788e-06, "loss": 0.0589, "step": 12475 }, { "epoch": 0.625, "grad_norm": 2.421562433242798, "learning_rate": 1.7276644736842103e-06, "loss": 0.0505, "step": 12500 }, { "epoch": 0.625, "eval_loss": 0.1859678477048874, "eval_runtime": 710.1897, "eval_samples_per_second": 2.963, "eval_steps_per_second": 0.37, "eval_wer": 8.563148546321399, "step": 12500 }, { "epoch": 0.62625, "grad_norm": 1.6244548559188843, "learning_rate": 1.7219078947368418e-06, "loss": 0.0649, "step": 12525 }, { "epoch": 0.6275, "grad_norm": 2.513892889022827, "learning_rate": 1.7161513157894738e-06, "loss": 0.0649, "step": 12550 }, { "epoch": 0.62875, "grad_norm": 2.4743428230285645, "learning_rate": 1.7103947368421053e-06, "loss": 0.0663, "step": 12575 }, { "epoch": 0.63, "grad_norm": 3.1427981853485107, "learning_rate": 1.7046381578947368e-06, "loss": 0.0749, "step": 12600 }, { "epoch": 0.63125, "grad_norm": 2.6378421783447266, "learning_rate": 1.6988815789473683e-06, "loss": 0.0866, "step": 12625 }, { "epoch": 0.6325, "grad_norm": 1.720384955406189, "learning_rate": 1.693125e-06, "loss": 0.0923, "step": 12650 }, { "epoch": 0.63375, "grad_norm": 2.1166465282440186, "learning_rate": 1.6873684210526315e-06, "loss": 0.0796, "step": 12675 }, { "epoch": 0.635, "grad_norm": 3.424236297607422, "learning_rate": 1.681611842105263e-06, "loss": 0.0863, "step": 12700 }, { "epoch": 0.63625, "grad_norm": 2.5471181869506836, "learning_rate": 1.6758552631578945e-06, "loss": 0.0807, "step": 12725 }, { "epoch": 0.6375, "grad_norm": 1.8772817850112915, "learning_rate": 1.6700986842105262e-06, "loss": 0.069, "step": 12750 }, { "epoch": 0.63875, "grad_norm": 2.452240467071533, "learning_rate": 1.6643421052631577e-06, "loss": 0.077, "step": 12775 }, { "epoch": 0.64, "grad_norm": 1.8149327039718628, "learning_rate": 1.6585855263157892e-06, "loss": 0.0499, "step": 12800 }, { "epoch": 0.64125, "grad_norm": 1.941320538520813, "learning_rate": 1.6528289473684207e-06, "loss": 0.0594, "step": 12825 }, { "epoch": 0.6425, "grad_norm": 1.69181489944458, "learning_rate": 1.6470723684210525e-06, "loss": 0.0502, "step": 12850 }, { "epoch": 0.64375, "grad_norm": 0.5586689114570618, "learning_rate": 1.641315789473684e-06, "loss": 0.0529, "step": 12875 }, { "epoch": 0.645, "grad_norm": 1.7759673595428467, "learning_rate": 1.6355592105263157e-06, "loss": 0.0555, "step": 12900 }, { "epoch": 0.64625, "grad_norm": 2.413004159927368, "learning_rate": 1.6298026315789474e-06, "loss": 0.0769, "step": 12925 }, { "epoch": 0.6475, "grad_norm": 2.904203176498413, "learning_rate": 1.624046052631579e-06, "loss": 0.0746, "step": 12950 }, { "epoch": 0.64875, "grad_norm": 2.8384039402008057, "learning_rate": 1.6182894736842104e-06, "loss": 0.0807, "step": 12975 }, { "epoch": 0.65, "grad_norm": 2.031804323196411, "learning_rate": 1.612532894736842e-06, "loss": 0.0678, "step": 13000 }, { "epoch": 0.65, "eval_loss": 0.18515139818191528, "eval_runtime": 707.7982, "eval_samples_per_second": 2.973, "eval_steps_per_second": 0.372, "eval_wer": 8.12377302047303, "step": 13000 }, { "epoch": 0.65125, "grad_norm": 2.3889620304107666, "learning_rate": 1.6067763157894736e-06, "loss": 0.0835, "step": 13025 }, { "epoch": 0.6525, "grad_norm": 4.161228656768799, "learning_rate": 1.6010197368421051e-06, "loss": 0.0979, "step": 13050 }, { "epoch": 0.65375, "grad_norm": 2.2576959133148193, "learning_rate": 1.5952631578947366e-06, "loss": 0.0757, "step": 13075 }, { "epoch": 0.655, "grad_norm": 2.2937376499176025, "learning_rate": 1.5895065789473682e-06, "loss": 0.0929, "step": 13100 }, { "epoch": 0.65625, "grad_norm": 2.472297191619873, "learning_rate": 1.5837499999999999e-06, "loss": 0.0872, "step": 13125 }, { "epoch": 0.6575, "grad_norm": 3.056525945663452, "learning_rate": 1.5779934210526314e-06, "loss": 0.0841, "step": 13150 }, { "epoch": 0.65875, "grad_norm": 2.3019163608551025, "learning_rate": 1.5722368421052629e-06, "loss": 0.082, "step": 13175 }, { "epoch": 0.66, "grad_norm": 2.3816184997558594, "learning_rate": 1.5664802631578946e-06, "loss": 0.0813, "step": 13200 }, { "epoch": 0.66125, "grad_norm": 2.625826358795166, "learning_rate": 1.5607236842105263e-06, "loss": 0.0732, "step": 13225 }, { "epoch": 0.6625, "grad_norm": 2.82149338722229, "learning_rate": 1.5549671052631578e-06, "loss": 0.0842, "step": 13250 }, { "epoch": 0.66375, "grad_norm": 3.182973623275757, "learning_rate": 1.5492105263157893e-06, "loss": 0.0928, "step": 13275 }, { "epoch": 0.665, "grad_norm": 2.779914140701294, "learning_rate": 1.543453947368421e-06, "loss": 0.0872, "step": 13300 }, { "epoch": 0.66625, "grad_norm": 1.7561771869659424, "learning_rate": 1.5376973684210526e-06, "loss": 0.0757, "step": 13325 }, { "epoch": 0.6675, "grad_norm": 2.4447529315948486, "learning_rate": 1.531940789473684e-06, "loss": 0.075, "step": 13350 }, { "epoch": 0.66875, "grad_norm": 1.7688288688659668, "learning_rate": 1.5261842105263156e-06, "loss": 0.0782, "step": 13375 }, { "epoch": 0.67, "grad_norm": 2.298830509185791, "learning_rate": 1.5204276315789473e-06, "loss": 0.0665, "step": 13400 }, { "epoch": 0.67125, "grad_norm": 1.16276216506958, "learning_rate": 1.5146710526315788e-06, "loss": 0.0563, "step": 13425 }, { "epoch": 0.6725, "grad_norm": 1.7327269315719604, "learning_rate": 1.5089144736842103e-06, "loss": 0.0583, "step": 13450 }, { "epoch": 0.67375, "grad_norm": 1.8701986074447632, "learning_rate": 1.503157894736842e-06, "loss": 0.052, "step": 13475 }, { "epoch": 0.675, "grad_norm": 2.23791766166687, "learning_rate": 1.4974013157894735e-06, "loss": 0.0586, "step": 13500 }, { "epoch": 0.675, "eval_loss": 0.1887647807598114, "eval_runtime": 709.3443, "eval_samples_per_second": 2.966, "eval_steps_per_second": 0.371, "eval_wer": 8.764139478358418, "step": 13500 }, { "epoch": 0.67625, "grad_norm": 1.300486445426941, "learning_rate": 1.491644736842105e-06, "loss": 0.0546, "step": 13525 }, { "epoch": 0.6775, "grad_norm": 2.624974012374878, "learning_rate": 1.4858881578947365e-06, "loss": 0.0689, "step": 13550 }, { "epoch": 0.67875, "grad_norm": 3.1279780864715576, "learning_rate": 1.4801315789473685e-06, "loss": 0.1001, "step": 13575 }, { "epoch": 0.68, "grad_norm": 3.4166171550750732, "learning_rate": 1.474375e-06, "loss": 0.1019, "step": 13600 }, { "epoch": 0.68125, "grad_norm": 2.749969005584717, "learning_rate": 1.4686184210526315e-06, "loss": 0.099, "step": 13625 }, { "epoch": 0.6825, "grad_norm": 4.281369686126709, "learning_rate": 1.4628618421052632e-06, "loss": 0.0841, "step": 13650 }, { "epoch": 0.68375, "grad_norm": 3.0503993034362793, "learning_rate": 1.4571052631578947e-06, "loss": 0.0971, "step": 13675 }, { "epoch": 0.685, "grad_norm": 3.777414083480835, "learning_rate": 1.4513486842105262e-06, "loss": 0.0912, "step": 13700 }, { "epoch": 0.68625, "grad_norm": 2.426450490951538, "learning_rate": 1.4455921052631577e-06, "loss": 0.0814, "step": 13725 }, { "epoch": 0.6875, "grad_norm": 2.190294027328491, "learning_rate": 1.4398355263157894e-06, "loss": 0.081, "step": 13750 }, { "epoch": 0.68875, "grad_norm": 2.2566046714782715, "learning_rate": 1.434078947368421e-06, "loss": 0.0748, "step": 13775 }, { "epoch": 0.69, "grad_norm": 3.511096239089966, "learning_rate": 1.4283223684210524e-06, "loss": 0.0856, "step": 13800 }, { "epoch": 0.69125, "grad_norm": 1.93797767162323, "learning_rate": 1.422565789473684e-06, "loss": 0.0791, "step": 13825 }, { "epoch": 0.6925, "grad_norm": 2.7521746158599854, "learning_rate": 1.4168092105263157e-06, "loss": 0.0756, "step": 13850 }, { "epoch": 0.69375, "grad_norm": 2.9946696758270264, "learning_rate": 1.4110526315789472e-06, "loss": 0.0774, "step": 13875 }, { "epoch": 0.695, "grad_norm": 2.7771806716918945, "learning_rate": 1.4052960526315787e-06, "loss": 0.0746, "step": 13900 }, { "epoch": 0.69625, "grad_norm": 3.62237548828125, "learning_rate": 1.3995394736842106e-06, "loss": 0.094, "step": 13925 }, { "epoch": 0.6975, "grad_norm": 2.18898344039917, "learning_rate": 1.3937828947368421e-06, "loss": 0.0905, "step": 13950 }, { "epoch": 0.69875, "grad_norm": 3.8469576835632324, "learning_rate": 1.3880263157894736e-06, "loss": 0.0935, "step": 13975 }, { "epoch": 0.7, "grad_norm": 2.3852436542510986, "learning_rate": 1.3822697368421051e-06, "loss": 0.0818, "step": 14000 }, { "epoch": 0.7, "eval_loss": 0.1821947991847992, "eval_runtime": 707.0234, "eval_samples_per_second": 2.976, "eval_steps_per_second": 0.372, "eval_wer": 8.254650836683181, "step": 14000 }, { "epoch": 0.70125, "grad_norm": 3.8027591705322266, "learning_rate": 1.3767434210526314e-06, "loss": 0.1071, "step": 14025 }, { "epoch": 0.7025, "grad_norm": 5.558282852172852, "learning_rate": 1.3709868421052631e-06, "loss": 0.1747, "step": 14050 }, { "epoch": 0.70375, "grad_norm": 3.226308584213257, "learning_rate": 1.3652302631578946e-06, "loss": 0.1869, "step": 14075 }, { "epoch": 0.705, "grad_norm": 3.2964353561401367, "learning_rate": 1.3594736842105261e-06, "loss": 0.1834, "step": 14100 }, { "epoch": 0.70625, "grad_norm": 3.5981297492980957, "learning_rate": 1.3537171052631576e-06, "loss": 0.1835, "step": 14125 }, { "epoch": 0.7075, "grad_norm": 4.036584377288818, "learning_rate": 1.3479605263157894e-06, "loss": 0.1821, "step": 14150 }, { "epoch": 0.70875, "grad_norm": 3.1639156341552734, "learning_rate": 1.3422039473684209e-06, "loss": 0.1418, "step": 14175 }, { "epoch": 0.71, "grad_norm": 1.7206283807754517, "learning_rate": 1.3364473684210526e-06, "loss": 0.1215, "step": 14200 }, { "epoch": 0.71125, "grad_norm": 1.677225112915039, "learning_rate": 1.3306907894736843e-06, "loss": 0.1062, "step": 14225 }, { "epoch": 0.7125, "grad_norm": 3.1991124153137207, "learning_rate": 1.3249342105263158e-06, "loss": 0.0836, "step": 14250 }, { "epoch": 0.71375, "grad_norm": 1.7974333763122559, "learning_rate": 1.3191776315789473e-06, "loss": 0.0759, "step": 14275 }, { "epoch": 0.715, "grad_norm": 3.726328134536743, "learning_rate": 1.3134210526315788e-06, "loss": 0.0898, "step": 14300 }, { "epoch": 0.71625, "grad_norm": 1.6985043287277222, "learning_rate": 1.3076644736842105e-06, "loss": 0.0893, "step": 14325 }, { "epoch": 0.7175, "grad_norm": 1.7952812910079956, "learning_rate": 1.301907894736842e-06, "loss": 0.066, "step": 14350 }, { "epoch": 0.71875, "grad_norm": 1.50443696975708, "learning_rate": 1.2961513157894735e-06, "loss": 0.0582, "step": 14375 }, { "epoch": 0.72, "grad_norm": 2.0075905323028564, "learning_rate": 1.290394736842105e-06, "loss": 0.0647, "step": 14400 }, { "epoch": 0.72125, "grad_norm": 2.2095861434936523, "learning_rate": 1.2846381578947368e-06, "loss": 0.0628, "step": 14425 }, { "epoch": 0.7225, "grad_norm": 2.0118868350982666, "learning_rate": 1.2788815789473683e-06, "loss": 0.0611, "step": 14450 }, { "epoch": 0.72375, "grad_norm": 3.3328588008880615, "learning_rate": 1.2731249999999998e-06, "loss": 0.0714, "step": 14475 }, { "epoch": 0.725, "grad_norm": 1.2586523294448853, "learning_rate": 1.2673684210526313e-06, "loss": 0.0583, "step": 14500 }, { "epoch": 0.725, "eval_loss": 0.13493549823760986, "eval_runtime": 706.0016, "eval_samples_per_second": 2.98, "eval_steps_per_second": 0.373, "eval_wer": 7.876040011218098, "step": 14500 }, { "epoch": 0.72625, "grad_norm": 3.8481645584106445, "learning_rate": 1.261611842105263e-06, "loss": 0.0592, "step": 14525 }, { "epoch": 0.7275, "grad_norm": 2.096135377883911, "learning_rate": 1.2558552631578947e-06, "loss": 0.0534, "step": 14550 }, { "epoch": 0.72875, "grad_norm": 1.6991406679153442, "learning_rate": 1.2500986842105262e-06, "loss": 0.0472, "step": 14575 }, { "epoch": 0.73, "grad_norm": 1.1058001518249512, "learning_rate": 1.244342105263158e-06, "loss": 0.0436, "step": 14600 }, { "epoch": 0.73125, "grad_norm": 1.507546305656433, "learning_rate": 1.2385855263157894e-06, "loss": 0.0509, "step": 14625 }, { "epoch": 0.7325, "grad_norm": 1.7745000123977661, "learning_rate": 1.232828947368421e-06, "loss": 0.0486, "step": 14650 }, { "epoch": 0.73375, "grad_norm": 1.6939218044281006, "learning_rate": 1.2270723684210525e-06, "loss": 0.0524, "step": 14675 }, { "epoch": 0.735, "grad_norm": 2.7204201221466064, "learning_rate": 1.2213157894736842e-06, "loss": 0.0781, "step": 14700 }, { "epoch": 0.73625, "grad_norm": 3.627716302871704, "learning_rate": 1.2155592105263157e-06, "loss": 0.0911, "step": 14725 }, { "epoch": 0.7375, "grad_norm": 2.6972947120666504, "learning_rate": 1.2098026315789472e-06, "loss": 0.0781, "step": 14750 }, { "epoch": 0.73875, "grad_norm": 2.0274033546447754, "learning_rate": 1.2040460526315787e-06, "loss": 0.0681, "step": 14775 }, { "epoch": 0.74, "grad_norm": 2.9205024242401123, "learning_rate": 1.1982894736842104e-06, "loss": 0.0633, "step": 14800 }, { "epoch": 0.74125, "grad_norm": 2.2337961196899414, "learning_rate": 1.192532894736842e-06, "loss": 0.0787, "step": 14825 }, { "epoch": 0.7425, "grad_norm": 1.7139707803726196, "learning_rate": 1.1867763157894734e-06, "loss": 0.0769, "step": 14850 }, { "epoch": 0.74375, "grad_norm": 2.301008462905884, "learning_rate": 1.1810197368421054e-06, "loss": 0.0585, "step": 14875 }, { "epoch": 0.745, "grad_norm": 2.3375415802001953, "learning_rate": 1.1752631578947369e-06, "loss": 0.0517, "step": 14900 }, { "epoch": 0.74625, "grad_norm": 2.461021900177002, "learning_rate": 1.1695065789473684e-06, "loss": 0.0584, "step": 14925 }, { "epoch": 0.7475, "grad_norm": 2.318147897720337, "learning_rate": 1.1637499999999999e-06, "loss": 0.05, "step": 14950 }, { "epoch": 0.74875, "grad_norm": 1.8461962938308716, "learning_rate": 1.1579934210526316e-06, "loss": 0.0472, "step": 14975 }, { "epoch": 0.75, "grad_norm": 2.298811674118042, "learning_rate": 1.152236842105263e-06, "loss": 0.0516, "step": 15000 }, { "epoch": 0.75, "eval_loss": 0.1431730091571808, "eval_runtime": 701.5357, "eval_samples_per_second": 2.999, "eval_steps_per_second": 0.375, "eval_wer": 7.838646349443769, "step": 15000 }, { "epoch": 0.75125, "grad_norm": 3.0281970500946045, "learning_rate": 1.1464802631578946e-06, "loss": 0.0801, "step": 15025 }, { "epoch": 0.7525, "grad_norm": 2.353001356124878, "learning_rate": 1.140723684210526e-06, "loss": 0.1058, "step": 15050 }, { "epoch": 0.75375, "grad_norm": 3.8827781677246094, "learning_rate": 1.1349671052631578e-06, "loss": 0.1098, "step": 15075 }, { "epoch": 0.755, "grad_norm": 3.6072864532470703, "learning_rate": 1.1292105263157893e-06, "loss": 0.1215, "step": 15100 }, { "epoch": 0.75625, "grad_norm": 3.3091938495635986, "learning_rate": 1.1234539473684208e-06, "loss": 0.1242, "step": 15125 }, { "epoch": 0.7575, "grad_norm": 2.6782429218292236, "learning_rate": 1.1176973684210526e-06, "loss": 0.1441, "step": 15150 }, { "epoch": 0.75875, "grad_norm": 3.580221652984619, "learning_rate": 1.111940789473684e-06, "loss": 0.1064, "step": 15175 }, { "epoch": 0.76, "grad_norm": 1.4896968603134155, "learning_rate": 1.1061842105263156e-06, "loss": 0.077, "step": 15200 }, { "epoch": 0.76125, "grad_norm": 2.086003303527832, "learning_rate": 1.1004276315789473e-06, "loss": 0.0651, "step": 15225 }, { "epoch": 0.7625, "grad_norm": 1.9408849477767944, "learning_rate": 1.094671052631579e-06, "loss": 0.0594, "step": 15250 }, { "epoch": 0.76375, "grad_norm": 1.6820451021194458, "learning_rate": 1.0889144736842103e-06, "loss": 0.048, "step": 15275 }, { "epoch": 0.765, "grad_norm": 1.6949853897094727, "learning_rate": 1.083157894736842e-06, "loss": 0.0511, "step": 15300 }, { "epoch": 0.76625, "grad_norm": 1.7007721662521362, "learning_rate": 1.0774013157894737e-06, "loss": 0.063, "step": 15325 }, { "epoch": 0.7675, "grad_norm": 2.4390954971313477, "learning_rate": 1.0716447368421052e-06, "loss": 0.0748, "step": 15350 }, { "epoch": 0.76875, "grad_norm": 2.7969071865081787, "learning_rate": 1.0658881578947367e-06, "loss": 0.0737, "step": 15375 }, { "epoch": 0.77, "grad_norm": 2.2695958614349365, "learning_rate": 1.0601315789473682e-06, "loss": 0.0842, "step": 15400 }, { "epoch": 0.77125, "grad_norm": 2.307033061981201, "learning_rate": 1.054375e-06, "loss": 0.0974, "step": 15425 }, { "epoch": 0.7725, "grad_norm": 2.258702278137207, "learning_rate": 1.0486184210526315e-06, "loss": 0.0811, "step": 15450 }, { "epoch": 0.77375, "grad_norm": 2.347611665725708, "learning_rate": 1.0428618421052632e-06, "loss": 0.0788, "step": 15475 }, { "epoch": 0.775, "grad_norm": 2.832035541534424, "learning_rate": 1.0371052631578947e-06, "loss": 0.0721, "step": 15500 }, { "epoch": 0.775, "eval_loss": 0.14393839240074158, "eval_runtime": 713.1729, "eval_samples_per_second": 2.95, "eval_steps_per_second": 0.369, "eval_wer": 7.79657847994765, "step": 15500 }, { "epoch": 0.77625, "grad_norm": 1.697174310684204, "learning_rate": 1.0313486842105262e-06, "loss": 0.0558, "step": 15525 }, { "epoch": 0.7775, "grad_norm": 2.2344281673431396, "learning_rate": 1.0255921052631577e-06, "loss": 0.0551, "step": 15550 }, { "epoch": 0.77875, "grad_norm": 1.4713681936264038, "learning_rate": 1.0198355263157894e-06, "loss": 0.0637, "step": 15575 }, { "epoch": 0.78, "grad_norm": 2.4913697242736816, "learning_rate": 1.014078947368421e-06, "loss": 0.0733, "step": 15600 }, { "epoch": 0.78125, "grad_norm": 2.8358447551727295, "learning_rate": 1.0083223684210524e-06, "loss": 0.1286, "step": 15625 }, { "epoch": 0.7825, "grad_norm": 1.840749740600586, "learning_rate": 1.0025657894736842e-06, "loss": 0.107, "step": 15650 }, { "epoch": 0.78375, "grad_norm": 2.2246243953704834, "learning_rate": 9.968092105263157e-07, "loss": 0.0651, "step": 15675 }, { "epoch": 0.785, "grad_norm": 2.0589306354522705, "learning_rate": 9.910526315789474e-07, "loss": 0.0598, "step": 15700 }, { "epoch": 0.78625, "grad_norm": 1.2811946868896484, "learning_rate": 9.852960526315789e-07, "loss": 0.0486, "step": 15725 }, { "epoch": 0.7875, "grad_norm": 1.0992165803909302, "learning_rate": 9.795394736842104e-07, "loss": 0.0578, "step": 15750 }, { "epoch": 0.78875, "grad_norm": 1.471144437789917, "learning_rate": 9.737828947368419e-07, "loss": 0.043, "step": 15775 }, { "epoch": 0.79, "grad_norm": 2.463345766067505, "learning_rate": 9.680263157894736e-07, "loss": 0.0662, "step": 15800 }, { "epoch": 0.79125, "grad_norm": 1.2828388214111328, "learning_rate": 9.622697368421051e-07, "loss": 0.063, "step": 15825 }, { "epoch": 0.7925, "grad_norm": 1.633901834487915, "learning_rate": 9.565131578947368e-07, "loss": 0.0492, "step": 15850 }, { "epoch": 0.79375, "grad_norm": 1.09126615524292, "learning_rate": 9.507565789473683e-07, "loss": 0.0495, "step": 15875 }, { "epoch": 0.795, "grad_norm": 1.1589980125427246, "learning_rate": 9.45e-07, "loss": 0.0443, "step": 15900 }, { "epoch": 0.79625, "grad_norm": 2.09328556060791, "learning_rate": 9.392434210526315e-07, "loss": 0.0619, "step": 15925 }, { "epoch": 0.7975, "grad_norm": 1.5910670757293701, "learning_rate": 9.334868421052631e-07, "loss": 0.0507, "step": 15950 }, { "epoch": 0.79875, "grad_norm": 1.148314118385315, "learning_rate": 9.277302631578947e-07, "loss": 0.0599, "step": 15975 }, { "epoch": 0.8, "grad_norm": 2.303746461868286, "learning_rate": 9.219736842105263e-07, "loss": 0.0697, "step": 16000 }, { "epoch": 0.8, "eval_loss": 0.1345137059688568, "eval_runtime": 709.5423, "eval_samples_per_second": 2.965, "eval_steps_per_second": 0.371, "eval_wer": 7.647003832850332, "step": 16000 }, { "epoch": 0.80125, "grad_norm": 1.8277431726455688, "learning_rate": 9.162171052631578e-07, "loss": 0.0642, "step": 16025 }, { "epoch": 0.8025, "grad_norm": 2.3583319187164307, "learning_rate": 9.104605263157894e-07, "loss": 0.0814, "step": 16050 }, { "epoch": 0.80375, "grad_norm": 2.4556453227996826, "learning_rate": 9.047039473684209e-07, "loss": 0.0792, "step": 16075 }, { "epoch": 0.805, "grad_norm": 1.3626729249954224, "learning_rate": 8.989473684210525e-07, "loss": 0.0731, "step": 16100 }, { "epoch": 0.80625, "grad_norm": 1.86356782913208, "learning_rate": 8.93190789473684e-07, "loss": 0.0683, "step": 16125 }, { "epoch": 0.8075, "grad_norm": 1.8756046295166016, "learning_rate": 8.874342105263158e-07, "loss": 0.0539, "step": 16150 }, { "epoch": 0.80875, "grad_norm": 1.686668038368225, "learning_rate": 8.816776315789474e-07, "loss": 0.0527, "step": 16175 }, { "epoch": 0.81, "grad_norm": 1.3087671995162964, "learning_rate": 8.759210526315789e-07, "loss": 0.0528, "step": 16200 }, { "epoch": 0.81125, "grad_norm": 1.7911397218704224, "learning_rate": 8.701644736842105e-07, "loss": 0.0433, "step": 16225 }, { "epoch": 0.8125, "grad_norm": 1.8399356603622437, "learning_rate": 8.64407894736842e-07, "loss": 0.0496, "step": 16250 }, { "epoch": 0.81375, "grad_norm": 2.1597609519958496, "learning_rate": 8.586513157894736e-07, "loss": 0.0485, "step": 16275 }, { "epoch": 0.815, "grad_norm": 1.462746262550354, "learning_rate": 8.528947368421051e-07, "loss": 0.045, "step": 16300 }, { "epoch": 0.81625, "grad_norm": 1.374795913696289, "learning_rate": 8.471381578947368e-07, "loss": 0.0493, "step": 16325 }, { "epoch": 0.8175, "grad_norm": 1.0946956872940063, "learning_rate": 8.413815789473683e-07, "loss": 0.0421, "step": 16350 }, { "epoch": 0.81875, "grad_norm": 0.6962540149688721, "learning_rate": 8.356249999999999e-07, "loss": 0.0299, "step": 16375 }, { "epoch": 0.82, "grad_norm": 2.319697856903076, "learning_rate": 8.298684210526316e-07, "loss": 0.0349, "step": 16400 }, { "epoch": 0.82125, "grad_norm": 0.8546643257141113, "learning_rate": 8.241118421052631e-07, "loss": 0.0451, "step": 16425 }, { "epoch": 0.8225, "grad_norm": 1.4677948951721191, "learning_rate": 8.183552631578947e-07, "loss": 0.0541, "step": 16450 }, { "epoch": 0.82375, "grad_norm": 1.544314980506897, "learning_rate": 8.125986842105262e-07, "loss": 0.0372, "step": 16475 }, { "epoch": 0.825, "grad_norm": 1.7578595876693726, "learning_rate": 8.068421052631579e-07, "loss": 0.0459, "step": 16500 }, { "epoch": 0.825, "eval_loss": 0.13805165886878967, "eval_runtime": 709.8129, "eval_samples_per_second": 2.964, "eval_steps_per_second": 0.371, "eval_wer": 7.488080770309432, "step": 16500 }, { "epoch": 0.82625, "grad_norm": 2.394965171813965, "learning_rate": 8.010855263157894e-07, "loss": 0.0439, "step": 16525 }, { "epoch": 0.8275, "grad_norm": 1.3294695615768433, "learning_rate": 7.95328947368421e-07, "loss": 0.0456, "step": 16550 }, { "epoch": 0.82875, "grad_norm": 0.9564753770828247, "learning_rate": 7.895723684210525e-07, "loss": 0.0454, "step": 16575 }, { "epoch": 0.83, "grad_norm": 1.0442456007003784, "learning_rate": 7.838157894736841e-07, "loss": 0.0486, "step": 16600 }, { "epoch": 0.83125, "grad_norm": 2.8311030864715576, "learning_rate": 7.780592105263156e-07, "loss": 0.0502, "step": 16625 }, { "epoch": 0.8325, "grad_norm": 2.2450034618377686, "learning_rate": 7.723026315789474e-07, "loss": 0.071, "step": 16650 }, { "epoch": 0.83375, "grad_norm": 2.755880832672119, "learning_rate": 7.667763157894736e-07, "loss": 0.0783, "step": 16675 }, { "epoch": 0.835, "grad_norm": 3.4958858489990234, "learning_rate": 7.610197368421051e-07, "loss": 0.0773, "step": 16700 }, { "epoch": 0.83625, "grad_norm": 2.808371067047119, "learning_rate": 7.552631578947367e-07, "loss": 0.1004, "step": 16725 }, { "epoch": 0.8375, "grad_norm": 2.6730117797851562, "learning_rate": 7.495065789473683e-07, "loss": 0.0713, "step": 16750 }, { "epoch": 0.83875, "grad_norm": 4.398132801055908, "learning_rate": 7.4375e-07, "loss": 0.0818, "step": 16775 }, { "epoch": 0.84, "grad_norm": 1.7946457862854004, "learning_rate": 7.379934210526316e-07, "loss": 0.0603, "step": 16800 }, { "epoch": 0.84125, "grad_norm": 1.8804703950881958, "learning_rate": 7.322368421052631e-07, "loss": 0.052, "step": 16825 }, { "epoch": 0.8425, "grad_norm": 1.8179138898849487, "learning_rate": 7.264802631578947e-07, "loss": 0.055, "step": 16850 }, { "epoch": 0.84375, "grad_norm": 2.0781452655792236, "learning_rate": 7.207236842105262e-07, "loss": 0.0494, "step": 16875 }, { "epoch": 0.845, "grad_norm": 1.1545528173446655, "learning_rate": 7.149671052631578e-07, "loss": 0.0444, "step": 16900 }, { "epoch": 0.84625, "grad_norm": 2.279444932937622, "learning_rate": 7.092105263157893e-07, "loss": 0.0514, "step": 16925 }, { "epoch": 0.8475, "grad_norm": 2.1791815757751465, "learning_rate": 7.03453947368421e-07, "loss": 0.053, "step": 16950 }, { "epoch": 0.84875, "grad_norm": 2.6994404792785645, "learning_rate": 6.976973684210525e-07, "loss": 0.0625, "step": 16975 }, { "epoch": 0.85, "grad_norm": 1.9181281328201294, "learning_rate": 6.919407894736842e-07, "loss": 0.0533, "step": 17000 }, { "epoch": 0.85, "eval_loss": 0.14220376312732697, "eval_runtime": 703.1087, "eval_samples_per_second": 2.992, "eval_steps_per_second": 0.374, "eval_wer": 7.287089838272413, "step": 17000 }, { "epoch": 0.85125, "grad_norm": 1.6095223426818848, "learning_rate": 6.861842105263158e-07, "loss": 0.063, "step": 17025 }, { "epoch": 0.8525, "grad_norm": 1.123022437095642, "learning_rate": 6.804276315789473e-07, "loss": 0.063, "step": 17050 }, { "epoch": 0.85375, "grad_norm": 3.293515205383301, "learning_rate": 6.746710526315789e-07, "loss": 0.0646, "step": 17075 }, { "epoch": 0.855, "grad_norm": 4.111965656280518, "learning_rate": 6.689144736842104e-07, "loss": 0.1174, "step": 17100 }, { "epoch": 0.85625, "grad_norm": 2.747833251953125, "learning_rate": 6.631578947368421e-07, "loss": 0.1526, "step": 17125 }, { "epoch": 0.8575, "grad_norm": 4.171908855438232, "learning_rate": 6.574013157894736e-07, "loss": 0.1611, "step": 17150 }, { "epoch": 0.85875, "grad_norm": 3.9168457984924316, "learning_rate": 6.516447368421052e-07, "loss": 0.1999, "step": 17175 }, { "epoch": 0.86, "grad_norm": 4.75758695602417, "learning_rate": 6.458881578947367e-07, "loss": 0.2584, "step": 17200 }, { "epoch": 0.86125, "grad_norm": 6.345437049865723, "learning_rate": 6.401315789473683e-07, "loss": 0.2406, "step": 17225 }, { "epoch": 0.8625, "grad_norm": 5.058291912078857, "learning_rate": 6.343749999999999e-07, "loss": 0.3128, "step": 17250 }, { "epoch": 0.86375, "grad_norm": 4.156762599945068, "learning_rate": 6.286184210526316e-07, "loss": 0.2145, "step": 17275 }, { "epoch": 0.865, "grad_norm": 1.7109938859939575, "learning_rate": 6.228618421052632e-07, "loss": 0.1099, "step": 17300 }, { "epoch": 0.86625, "grad_norm": 3.0488481521606445, "learning_rate": 6.171052631578947e-07, "loss": 0.097, "step": 17325 }, { "epoch": 0.8675, "grad_norm": 1.8164464235305786, "learning_rate": 6.113486842105263e-07, "loss": 0.0944, "step": 17350 }, { "epoch": 0.86875, "grad_norm": 3.457158088684082, "learning_rate": 6.055921052631578e-07, "loss": 0.1137, "step": 17375 }, { "epoch": 0.87, "grad_norm": 4.14601993560791, "learning_rate": 5.998355263157894e-07, "loss": 0.1102, "step": 17400 }, { "epoch": 0.87125, "grad_norm": 2.3553032875061035, "learning_rate": 5.940789473684209e-07, "loss": 0.0929, "step": 17425 }, { "epoch": 0.8725, "grad_norm": 3.2448887825012207, "learning_rate": 5.883223684210526e-07, "loss": 0.0633, "step": 17450 }, { "epoch": 0.87375, "grad_norm": 1.5024784803390503, "learning_rate": 5.825657894736841e-07, "loss": 0.0475, "step": 17475 }, { "epoch": 0.875, "grad_norm": 1.114351749420166, "learning_rate": 5.768092105263158e-07, "loss": 0.0449, "step": 17500 }, { "epoch": 0.875, "eval_loss": 0.142613023519516, "eval_runtime": 704.7777, "eval_samples_per_second": 2.985, "eval_steps_per_second": 0.373, "eval_wer": 7.721791156398991, "step": 17500 }, { "epoch": 0.87625, "grad_norm": 1.9207721948623657, "learning_rate": 5.710526315789474e-07, "loss": 0.0452, "step": 17525 }, { "epoch": 0.8775, "grad_norm": 1.6070079803466797, "learning_rate": 5.652960526315789e-07, "loss": 0.0528, "step": 17550 }, { "epoch": 0.87875, "grad_norm": 1.528111219406128, "learning_rate": 5.595394736842105e-07, "loss": 0.0473, "step": 17575 }, { "epoch": 0.88, "grad_norm": 1.870785117149353, "learning_rate": 5.53782894736842e-07, "loss": 0.0723, "step": 17600 }, { "epoch": 0.88125, "grad_norm": 2.147519826889038, "learning_rate": 5.480263157894737e-07, "loss": 0.0702, "step": 17625 }, { "epoch": 0.8825, "grad_norm": 2.0077731609344482, "learning_rate": 5.422697368421052e-07, "loss": 0.068, "step": 17650 }, { "epoch": 0.88375, "grad_norm": 1.970958948135376, "learning_rate": 5.365131578947368e-07, "loss": 0.0723, "step": 17675 }, { "epoch": 0.885, "grad_norm": 2.7377431392669678, "learning_rate": 5.307565789473683e-07, "loss": 0.0762, "step": 17700 }, { "epoch": 0.88625, "grad_norm": 3.1114962100982666, "learning_rate": 5.25e-07, "loss": 0.0648, "step": 17725 }, { "epoch": 0.8875, "grad_norm": 1.5071254968643188, "learning_rate": 5.192434210526316e-07, "loss": 0.072, "step": 17750 }, { "epoch": 0.88875, "grad_norm": 1.5089877843856812, "learning_rate": 5.134868421052631e-07, "loss": 0.0488, "step": 17775 }, { "epoch": 0.89, "grad_norm": 1.5960254669189453, "learning_rate": 5.077302631578947e-07, "loss": 0.043, "step": 17800 }, { "epoch": 0.89125, "grad_norm": 1.259710431098938, "learning_rate": 5.019736842105263e-07, "loss": 0.0519, "step": 17825 }, { "epoch": 0.8925, "grad_norm": 1.2747613191604614, "learning_rate": 4.962171052631579e-07, "loss": 0.0479, "step": 17850 }, { "epoch": 0.89375, "grad_norm": 2.354058027267456, "learning_rate": 4.904605263157894e-07, "loss": 0.0499, "step": 17875 }, { "epoch": 0.895, "grad_norm": 2.0054726600646973, "learning_rate": 4.84703947368421e-07, "loss": 0.0458, "step": 17900 }, { "epoch": 0.89625, "grad_norm": 1.4367984533309937, "learning_rate": 4.789473684210526e-07, "loss": 0.0414, "step": 17925 }, { "epoch": 0.8975, "grad_norm": 1.6699531078338623, "learning_rate": 4.731907894736842e-07, "loss": 0.0432, "step": 17950 }, { "epoch": 0.89875, "grad_norm": 1.3422558307647705, "learning_rate": 4.6743421052631575e-07, "loss": 0.048, "step": 17975 }, { "epoch": 0.9, "grad_norm": 1.52657151222229, "learning_rate": 4.6167763157894736e-07, "loss": 0.0424, "step": 18000 }, { "epoch": 0.9, "eval_loss": 0.14172810316085815, "eval_runtime": 712.3572, "eval_samples_per_second": 2.954, "eval_steps_per_second": 0.369, "eval_wer": 7.436664485369731, "step": 18000 }, { "epoch": 0.90125, "grad_norm": 1.3574419021606445, "learning_rate": 4.559210526315789e-07, "loss": 0.052, "step": 18025 }, { "epoch": 0.9025, "grad_norm": 2.5409414768218994, "learning_rate": 4.501644736842105e-07, "loss": 0.0573, "step": 18050 }, { "epoch": 0.90375, "grad_norm": 1.808009386062622, "learning_rate": 4.4440789473684204e-07, "loss": 0.054, "step": 18075 }, { "epoch": 0.905, "grad_norm": 3.3824169635772705, "learning_rate": 4.3865131578947365e-07, "loss": 0.0633, "step": 18100 }, { "epoch": 0.90625, "grad_norm": 1.5174132585525513, "learning_rate": 4.328947368421052e-07, "loss": 0.0754, "step": 18125 }, { "epoch": 0.9075, "grad_norm": 2.60347056388855, "learning_rate": 4.2713815789473677e-07, "loss": 0.0848, "step": 18150 }, { "epoch": 0.90875, "grad_norm": 2.771503210067749, "learning_rate": 4.2138157894736843e-07, "loss": 0.0798, "step": 18175 }, { "epoch": 0.91, "grad_norm": 3.0102124214172363, "learning_rate": 4.15625e-07, "loss": 0.0689, "step": 18200 }, { "epoch": 0.91125, "grad_norm": 0.9840712547302246, "learning_rate": 4.0986842105263155e-07, "loss": 0.0681, "step": 18225 }, { "epoch": 0.9125, "grad_norm": 1.9731576442718506, "learning_rate": 4.0411184210526316e-07, "loss": 0.0588, "step": 18250 }, { "epoch": 0.91375, "grad_norm": 1.3282792568206787, "learning_rate": 3.983552631578947e-07, "loss": 0.0529, "step": 18275 }, { "epoch": 0.915, "grad_norm": 1.1621119976043701, "learning_rate": 3.925986842105263e-07, "loss": 0.0634, "step": 18300 }, { "epoch": 0.91625, "grad_norm": 2.3993144035339355, "learning_rate": 3.8684210526315784e-07, "loss": 0.0623, "step": 18325 }, { "epoch": 0.9175, "grad_norm": 2.014002799987793, "learning_rate": 3.8108552631578945e-07, "loss": 0.0564, "step": 18350 }, { "epoch": 0.91875, "grad_norm": 1.2708508968353271, "learning_rate": 3.75328947368421e-07, "loss": 0.0521, "step": 18375 }, { "epoch": 0.92, "grad_norm": 2.212245225906372, "learning_rate": 3.6957236842105257e-07, "loss": 0.0566, "step": 18400 }, { "epoch": 0.92125, "grad_norm": 2.540544271469116, "learning_rate": 3.638157894736842e-07, "loss": 0.0612, "step": 18425 }, { "epoch": 0.9225, "grad_norm": 3.0146989822387695, "learning_rate": 3.580592105263158e-07, "loss": 0.0782, "step": 18450 }, { "epoch": 0.92375, "grad_norm": 1.8940297365188599, "learning_rate": 3.5230263157894735e-07, "loss": 0.082, "step": 18475 }, { "epoch": 0.925, "grad_norm": 2.1947951316833496, "learning_rate": 3.465460526315789e-07, "loss": 0.0714, "step": 18500 }, { "epoch": 0.925, "eval_loss": 0.13368327915668488, "eval_runtime": 706.5821, "eval_samples_per_second": 2.978, "eval_steps_per_second": 0.372, "eval_wer": 6.997288959521361, "step": 18500 }, { "epoch": 0.92625, "grad_norm": 2.6375064849853516, "learning_rate": 3.407894736842105e-07, "loss": 0.0886, "step": 18525 }, { "epoch": 0.9275, "grad_norm": 2.767137289047241, "learning_rate": 3.350328947368421e-07, "loss": 0.0856, "step": 18550 }, { "epoch": 0.92875, "grad_norm": 1.3164273500442505, "learning_rate": 3.2927631578947364e-07, "loss": 0.0508, "step": 18575 }, { "epoch": 0.93, "grad_norm": 1.2430734634399414, "learning_rate": 3.2351973684210525e-07, "loss": 0.0481, "step": 18600 }, { "epoch": 0.93125, "grad_norm": 2.4302713871002197, "learning_rate": 3.177631578947368e-07, "loss": 0.0566, "step": 18625 }, { "epoch": 0.9325, "grad_norm": 1.2310426235198975, "learning_rate": 3.1200657894736837e-07, "loss": 0.0533, "step": 18650 }, { "epoch": 0.93375, "grad_norm": 0.9681397080421448, "learning_rate": 3.0625e-07, "loss": 0.0429, "step": 18675 }, { "epoch": 0.935, "grad_norm": 1.7636661529541016, "learning_rate": 3.0049342105263154e-07, "loss": 0.0516, "step": 18700 }, { "epoch": 0.93625, "grad_norm": 1.7715063095092773, "learning_rate": 2.9473684210526315e-07, "loss": 0.0555, "step": 18725 }, { "epoch": 0.9375, "grad_norm": 1.9749398231506348, "learning_rate": 2.889802631578947e-07, "loss": 0.0604, "step": 18750 }, { "epoch": 0.93875, "grad_norm": 2.3333206176757812, "learning_rate": 2.832236842105263e-07, "loss": 0.0671, "step": 18775 }, { "epoch": 0.94, "grad_norm": 1.9402034282684326, "learning_rate": 2.774671052631579e-07, "loss": 0.0719, "step": 18800 }, { "epoch": 0.94125, "grad_norm": 3.3181869983673096, "learning_rate": 2.7171052631578944e-07, "loss": 0.0804, "step": 18825 }, { "epoch": 0.9425, "grad_norm": 2.6769981384277344, "learning_rate": 2.6595394736842105e-07, "loss": 0.0677, "step": 18850 }, { "epoch": 0.94375, "grad_norm": 2.763998508453369, "learning_rate": 2.601973684210526e-07, "loss": 0.2073, "step": 18875 }, { "epoch": 0.945, "grad_norm": 1.6679767370224, "learning_rate": 2.5444078947368417e-07, "loss": 0.2077, "step": 18900 }, { "epoch": 0.94625, "grad_norm": 1.71970534324646, "learning_rate": 2.486842105263158e-07, "loss": 0.1115, "step": 18925 }, { "epoch": 0.9475, "grad_norm": 2.557680368423462, "learning_rate": 2.4292763157894734e-07, "loss": 0.0638, "step": 18950 }, { "epoch": 0.94875, "grad_norm": 2.582639455795288, "learning_rate": 2.3717105263157892e-07, "loss": 0.0605, "step": 18975 }, { "epoch": 0.95, "grad_norm": 2.88093638420105, "learning_rate": 2.314144736842105e-07, "loss": 0.0573, "step": 19000 }, { "epoch": 0.95, "eval_loss": 0.14317533373832703, "eval_runtime": 707.1971, "eval_samples_per_second": 2.975, "eval_steps_per_second": 0.372, "eval_wer": 7.665700663737496, "step": 19000 }, { "epoch": 0.95125, "grad_norm": 2.9497323036193848, "learning_rate": 2.2565789473684207e-07, "loss": 0.0664, "step": 19025 }, { "epoch": 0.9525, "grad_norm": 4.702762603759766, "learning_rate": 2.1990131578947368e-07, "loss": 0.0845, "step": 19050 }, { "epoch": 0.95375, "grad_norm": 3.3599348068237305, "learning_rate": 2.1414473684210524e-07, "loss": 0.0813, "step": 19075 }, { "epoch": 0.955, "grad_norm": 2.3196516036987305, "learning_rate": 2.0838815789473682e-07, "loss": 0.0792, "step": 19100 }, { "epoch": 0.95625, "grad_norm": 2.351522445678711, "learning_rate": 2.026315789473684e-07, "loss": 0.0809, "step": 19125 }, { "epoch": 0.9575, "grad_norm": 3.470804214477539, "learning_rate": 1.9710526315789472e-07, "loss": 0.0915, "step": 19150 }, { "epoch": 0.95875, "grad_norm": 1.6629875898361206, "learning_rate": 1.913486842105263e-07, "loss": 0.0727, "step": 19175 }, { "epoch": 0.96, "grad_norm": 1.8704760074615479, "learning_rate": 1.8559210526315789e-07, "loss": 0.0573, "step": 19200 }, { "epoch": 0.96125, "grad_norm": 1.8056179285049438, "learning_rate": 1.7983552631578944e-07, "loss": 0.0512, "step": 19225 }, { "epoch": 0.9625, "grad_norm": 2.044466733932495, "learning_rate": 1.7407894736842103e-07, "loss": 0.0544, "step": 19250 }, { "epoch": 0.96375, "grad_norm": 0.7206035256385803, "learning_rate": 1.6832236842105262e-07, "loss": 0.0436, "step": 19275 }, { "epoch": 0.965, "grad_norm": 1.413913369178772, "learning_rate": 1.625657894736842e-07, "loss": 0.0404, "step": 19300 }, { "epoch": 0.96625, "grad_norm": 1.5140485763549805, "learning_rate": 1.5680921052631579e-07, "loss": 0.0489, "step": 19325 }, { "epoch": 0.9675, "grad_norm": 1.1080636978149414, "learning_rate": 1.5105263157894734e-07, "loss": 0.0351, "step": 19350 }, { "epoch": 0.96875, "grad_norm": 1.465155005455017, "learning_rate": 1.4529605263157893e-07, "loss": 0.0466, "step": 19375 }, { "epoch": 0.97, "grad_norm": 1.5429915189743042, "learning_rate": 1.3953947368421052e-07, "loss": 0.0443, "step": 19400 }, { "epoch": 0.97125, "grad_norm": 1.7784292697906494, "learning_rate": 1.337828947368421e-07, "loss": 0.0459, "step": 19425 }, { "epoch": 0.9725, "grad_norm": 0.8602461218833923, "learning_rate": 1.2802631578947366e-07, "loss": 0.0398, "step": 19450 }, { "epoch": 0.97375, "grad_norm": 0.9731587767601013, "learning_rate": 1.2226973684210524e-07, "loss": 0.0475, "step": 19475 }, { "epoch": 0.975, "grad_norm": 1.2531239986419678, "learning_rate": 1.1651315789473683e-07, "loss": 0.0441, "step": 19500 }, { "epoch": 0.975, "eval_loss": 0.1408335119485855, "eval_runtime": 707.4132, "eval_samples_per_second": 2.974, "eval_steps_per_second": 0.372, "eval_wer": 7.100121529400767, "step": 19500 }, { "epoch": 0.97625, "grad_norm": 2.7600419521331787, "learning_rate": 1.1075657894736842e-07, "loss": 0.0505, "step": 19525 }, { "epoch": 0.9775, "grad_norm": 1.777706265449524, "learning_rate": 1.0499999999999999e-07, "loss": 0.0522, "step": 19550 }, { "epoch": 0.97875, "grad_norm": 3.6460354328155518, "learning_rate": 9.924342105263157e-08, "loss": 0.0672, "step": 19575 }, { "epoch": 0.98, "grad_norm": 1.5261070728302002, "learning_rate": 9.348684210526314e-08, "loss": 0.0783, "step": 19600 }, { "epoch": 0.98125, "grad_norm": 3.3365206718444824, "learning_rate": 8.773026315789472e-08, "loss": 0.0688, "step": 19625 }, { "epoch": 0.9825, "grad_norm": 2.7939553260803223, "learning_rate": 8.197368421052632e-08, "loss": 0.0871, "step": 19650 }, { "epoch": 0.98375, "grad_norm": 1.5764423608779907, "learning_rate": 7.621710526315789e-08, "loss": 0.066, "step": 19675 }, { "epoch": 0.985, "grad_norm": 1.764454960823059, "learning_rate": 7.046052631578947e-08, "loss": 0.0565, "step": 19700 }, { "epoch": 0.98625, "grad_norm": 2.962538957595825, "learning_rate": 6.470394736842104e-08, "loss": 0.0713, "step": 19725 }, { "epoch": 0.9875, "grad_norm": 1.3702257871627808, "learning_rate": 5.894736842105263e-08, "loss": 0.0658, "step": 19750 }, { "epoch": 0.98875, "grad_norm": 2.7916741371154785, "learning_rate": 5.319078947368421e-08, "loss": 0.0708, "step": 19775 }, { "epoch": 0.99, "grad_norm": 3.484490394592285, "learning_rate": 4.766447368421052e-08, "loss": 0.1234, "step": 19800 }, { "epoch": 0.99125, "grad_norm": 3.010957717895508, "learning_rate": 4.1907894736842107e-08, "loss": 0.1044, "step": 19825 }, { "epoch": 0.9925, "grad_norm": 3.347371816635132, "learning_rate": 3.615131578947368e-08, "loss": 0.0744, "step": 19850 }, { "epoch": 0.99375, "grad_norm": 2.686807870864868, "learning_rate": 3.0394736842105264e-08, "loss": 0.0474, "step": 19875 }, { "epoch": 0.995, "grad_norm": 1.5409128665924072, "learning_rate": 2.463815789473684e-08, "loss": 0.0496, "step": 19900 }, { "epoch": 0.99625, "grad_norm": 1.4984127283096313, "learning_rate": 1.8881578947368418e-08, "loss": 0.0456, "step": 19925 }, { "epoch": 0.9975, "grad_norm": 1.1075857877731323, "learning_rate": 1.3124999999999998e-08, "loss": 0.0583, "step": 19950 }, { "epoch": 0.99875, "grad_norm": 1.4185248613357544, "learning_rate": 7.368421052631579e-09, "loss": 0.056, "step": 19975 }, { "epoch": 1.0, "grad_norm": 1.3521877527236938, "learning_rate": 1.6118421052631579e-09, "loss": 0.0453, "step": 20000 }, { "epoch": 1.0, "eval_loss": 0.140655517578125, "eval_runtime": 704.514, "eval_samples_per_second": 2.986, "eval_steps_per_second": 0.373, "eval_wer": 7.100121529400767, "step": 20000 }, { "epoch": 1.0, "step": 20000, "total_flos": 1.0871994580992e+21, "train_loss": 0.036449428302049634, "train_runtime": 47722.2934, "train_samples_per_second": 6.705, "train_steps_per_second": 0.419 } ], "logging_steps": 25, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0871994580992e+21, "train_batch_size": 16, "trial_name": null, "trial_params": null }