diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6391 @@ +{ + "best_metric": 5.539509738576612, + "best_model_checkpoint": "./training/results/checkpoint-20000", + "epoch": 79.13669064748201, + "eval_steps": 1000, + "global_step": 22000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08992805755395683, + "grad_norm": 12.73649787902832, + "learning_rate": 1.2500000000000002e-07, + "loss": 3.2522, + "step": 25 + }, + { + "epoch": 0.17985611510791366, + "grad_norm": 12.000336647033691, + "learning_rate": 2.5000000000000004e-07, + "loss": 3.0617, + "step": 50 + }, + { + "epoch": 0.2697841726618705, + "grad_norm": 10.76065444946289, + "learning_rate": 3.75e-07, + "loss": 2.7165, + "step": 75 + }, + { + "epoch": 0.3597122302158273, + "grad_norm": 8.36201286315918, + "learning_rate": 5.000000000000001e-07, + "loss": 2.2607, + "step": 100 + }, + { + "epoch": 0.44964028776978415, + "grad_norm": 7.234769344329834, + "learning_rate": 6.25e-07, + "loss": 1.8433, + "step": 125 + }, + { + "epoch": 0.539568345323741, + "grad_norm": 6.549698829650879, + "learning_rate": 7.5e-07, + "loss": 1.5515, + "step": 150 + }, + { + "epoch": 0.6294964028776978, + "grad_norm": 7.549570083618164, + "learning_rate": 8.75e-07, + "loss": 1.3346, + "step": 175 + }, + { + "epoch": 0.7194244604316546, + "grad_norm": 5.8322930335998535, + "learning_rate": 1.0000000000000002e-06, + "loss": 1.0572, + "step": 200 + }, + { + "epoch": 0.8093525179856115, + "grad_norm": 3.925255537033081, + "learning_rate": 1.125e-06, + "loss": 0.6348, + "step": 225 + }, + { + "epoch": 0.8992805755395683, + "grad_norm": 3.1902644634246826, + "learning_rate": 1.25e-06, + "loss": 0.4882, + "step": 250 + }, + { + "epoch": 0.9892086330935251, + "grad_norm": 3.355315923690796, + "learning_rate": 1.3750000000000002e-06, + "loss": 0.4032, + "step": 275 + }, + { + "epoch": 1.079136690647482, + "grad_norm": 3.4707915782928467, + "learning_rate": 1.5e-06, + "loss": 0.3355, + "step": 300 + }, + { + "epoch": 1.169064748201439, + "grad_norm": 3.261484384536743, + "learning_rate": 1.6250000000000001e-06, + "loss": 0.2896, + "step": 325 + }, + { + "epoch": 1.2589928057553956, + "grad_norm": 3.3107025623321533, + "learning_rate": 1.75e-06, + "loss": 0.2685, + "step": 350 + }, + { + "epoch": 1.3489208633093526, + "grad_norm": 2.6028969287872314, + "learning_rate": 1.8750000000000003e-06, + "loss": 0.2365, + "step": 375 + }, + { + "epoch": 1.4388489208633093, + "grad_norm": 3.380187749862671, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.2333, + "step": 400 + }, + { + "epoch": 1.5287769784172662, + "grad_norm": 3.0845112800598145, + "learning_rate": 2.125e-06, + "loss": 0.2191, + "step": 425 + }, + { + "epoch": 1.6187050359712232, + "grad_norm": 3.15523099899292, + "learning_rate": 2.25e-06, + "loss": 0.1949, + "step": 450 + }, + { + "epoch": 1.70863309352518, + "grad_norm": 2.5198237895965576, + "learning_rate": 2.375e-06, + "loss": 0.1756, + "step": 475 + }, + { + "epoch": 1.7985611510791366, + "grad_norm": 2.7945399284362793, + "learning_rate": 2.5e-06, + "loss": 0.1748, + "step": 500 + }, + { + "epoch": 1.8884892086330936, + "grad_norm": 3.299269199371338, + "learning_rate": 2.6250000000000003e-06, + "loss": 0.1711, + "step": 525 + }, + { + "epoch": 1.9784172661870505, + "grad_norm": 2.3727056980133057, + "learning_rate": 2.7500000000000004e-06, + "loss": 0.1495, + "step": 550 + }, + { + "epoch": 2.068345323741007, + "grad_norm": 2.1909244060516357, + "learning_rate": 2.875e-06, + "loss": 0.1196, + "step": 575 + }, + { + "epoch": 2.158273381294964, + "grad_norm": 2.45758318901062, + "learning_rate": 3e-06, + "loss": 0.1023, + "step": 600 + }, + { + "epoch": 2.2482014388489207, + "grad_norm": 2.009880542755127, + "learning_rate": 3.125e-06, + "loss": 0.1019, + "step": 625 + }, + { + "epoch": 2.338129496402878, + "grad_norm": 2.2170872688293457, + "learning_rate": 3.2500000000000002e-06, + "loss": 0.0948, + "step": 650 + }, + { + "epoch": 2.4280575539568345, + "grad_norm": 1.9289822578430176, + "learning_rate": 3.3750000000000003e-06, + "loss": 0.0934, + "step": 675 + }, + { + "epoch": 2.5179856115107913, + "grad_norm": 2.0615289211273193, + "learning_rate": 3.5e-06, + "loss": 0.0935, + "step": 700 + }, + { + "epoch": 2.6079136690647484, + "grad_norm": 2.231041193008423, + "learning_rate": 3.625e-06, + "loss": 0.0923, + "step": 725 + }, + { + "epoch": 2.697841726618705, + "grad_norm": 1.953312873840332, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.0844, + "step": 750 + }, + { + "epoch": 2.787769784172662, + "grad_norm": 2.1245667934417725, + "learning_rate": 3.875e-06, + "loss": 0.0831, + "step": 775 + }, + { + "epoch": 2.8776978417266186, + "grad_norm": 1.8499614000320435, + "learning_rate": 4.000000000000001e-06, + "loss": 0.0841, + "step": 800 + }, + { + "epoch": 2.9676258992805753, + "grad_norm": 2.0503857135772705, + "learning_rate": 4.125e-06, + "loss": 0.0854, + "step": 825 + }, + { + "epoch": 3.0575539568345325, + "grad_norm": 2.0084242820739746, + "learning_rate": 4.25e-06, + "loss": 0.0621, + "step": 850 + }, + { + "epoch": 3.147482014388489, + "grad_norm": 1.3122639656066895, + "learning_rate": 4.3750000000000005e-06, + "loss": 0.0434, + "step": 875 + }, + { + "epoch": 3.237410071942446, + "grad_norm": 1.3615615367889404, + "learning_rate": 4.5e-06, + "loss": 0.0416, + "step": 900 + }, + { + "epoch": 3.327338129496403, + "grad_norm": 1.533996343612671, + "learning_rate": 4.625000000000001e-06, + "loss": 0.0451, + "step": 925 + }, + { + "epoch": 3.41726618705036, + "grad_norm": 1.573549509048462, + "learning_rate": 4.75e-06, + "loss": 0.0404, + "step": 950 + }, + { + "epoch": 3.5071942446043165, + "grad_norm": 1.4288333654403687, + "learning_rate": 4.875e-06, + "loss": 0.044, + "step": 975 + }, + { + "epoch": 3.597122302158273, + "grad_norm": 1.5075387954711914, + "learning_rate": 5e-06, + "loss": 0.0479, + "step": 1000 + }, + { + "epoch": 3.597122302158273, + "eval_loss": 0.10350359231233597, + "eval_runtime": 1344.3937, + "eval_samples_per_second": 1.653, + "eval_steps_per_second": 0.103, + "eval_wer": 20.29178701029401, + "step": 1000 + }, + { + "epoch": 3.68705035971223, + "grad_norm": 1.842606782913208, + "learning_rate": 4.998737373737374e-06, + "loss": 0.0467, + "step": 1025 + }, + { + "epoch": 3.776978417266187, + "grad_norm": 1.495784044265747, + "learning_rate": 4.997474747474748e-06, + "loss": 0.0437, + "step": 1050 + }, + { + "epoch": 3.866906474820144, + "grad_norm": 2.054900646209717, + "learning_rate": 4.9962121212121216e-06, + "loss": 0.0497, + "step": 1075 + }, + { + "epoch": 3.956834532374101, + "grad_norm": 1.438658356666565, + "learning_rate": 4.9949494949494956e-06, + "loss": 0.0398, + "step": 1100 + }, + { + "epoch": 4.046762589928058, + "grad_norm": 1.3041224479675293, + "learning_rate": 4.993686868686869e-06, + "loss": 0.0293, + "step": 1125 + }, + { + "epoch": 4.136690647482014, + "grad_norm": 1.2206145524978638, + "learning_rate": 4.992424242424243e-06, + "loss": 0.0227, + "step": 1150 + }, + { + "epoch": 4.226618705035971, + "grad_norm": 1.2926621437072754, + "learning_rate": 4.991161616161617e-06, + "loss": 0.0231, + "step": 1175 + }, + { + "epoch": 4.316546762589928, + "grad_norm": 1.4683257341384888, + "learning_rate": 4.98989898989899e-06, + "loss": 0.023, + "step": 1200 + }, + { + "epoch": 4.406474820143885, + "grad_norm": 1.3095593452453613, + "learning_rate": 4.988636363636364e-06, + "loss": 0.0226, + "step": 1225 + }, + { + "epoch": 4.496402877697841, + "grad_norm": 0.7059262990951538, + "learning_rate": 4.987373737373738e-06, + "loss": 0.0225, + "step": 1250 + }, + { + "epoch": 4.586330935251799, + "grad_norm": 1.1493045091629028, + "learning_rate": 4.986111111111112e-06, + "loss": 0.022, + "step": 1275 + }, + { + "epoch": 4.676258992805756, + "grad_norm": 1.9609806537628174, + "learning_rate": 4.984848484848485e-06, + "loss": 0.0232, + "step": 1300 + }, + { + "epoch": 4.766187050359712, + "grad_norm": 1.5463200807571411, + "learning_rate": 4.983585858585859e-06, + "loss": 0.0206, + "step": 1325 + }, + { + "epoch": 4.856115107913669, + "grad_norm": 0.858127772808075, + "learning_rate": 4.982323232323233e-06, + "loss": 0.0222, + "step": 1350 + }, + { + "epoch": 4.946043165467626, + "grad_norm": 0.8384924530982971, + "learning_rate": 4.981060606060606e-06, + "loss": 0.0201, + "step": 1375 + }, + { + "epoch": 5.0359712230215825, + "grad_norm": 0.9966625571250916, + "learning_rate": 4.97979797979798e-06, + "loss": 0.0173, + "step": 1400 + }, + { + "epoch": 5.125899280575539, + "grad_norm": 0.6609445214271545, + "learning_rate": 4.978535353535353e-06, + "loss": 0.0113, + "step": 1425 + }, + { + "epoch": 5.215827338129497, + "grad_norm": 0.82105952501297, + "learning_rate": 4.977272727272728e-06, + "loss": 0.012, + "step": 1450 + }, + { + "epoch": 5.305755395683454, + "grad_norm": 1.0994760990142822, + "learning_rate": 4.976010101010101e-06, + "loss": 0.0118, + "step": 1475 + }, + { + "epoch": 5.39568345323741, + "grad_norm": 0.4543660283088684, + "learning_rate": 4.974747474747475e-06, + "loss": 0.0112, + "step": 1500 + }, + { + "epoch": 5.485611510791367, + "grad_norm": 3.425143241882324, + "learning_rate": 4.973484848484849e-06, + "loss": 0.0113, + "step": 1525 + }, + { + "epoch": 5.575539568345324, + "grad_norm": 0.7691114544868469, + "learning_rate": 4.9722222222222224e-06, + "loss": 0.0114, + "step": 1550 + }, + { + "epoch": 5.66546762589928, + "grad_norm": 0.5446438789367676, + "learning_rate": 4.9709595959595964e-06, + "loss": 0.0121, + "step": 1575 + }, + { + "epoch": 5.755395683453237, + "grad_norm": 0.7232896089553833, + "learning_rate": 4.9696969696969696e-06, + "loss": 0.0118, + "step": 1600 + }, + { + "epoch": 5.845323741007194, + "grad_norm": 1.3025506734848022, + "learning_rate": 4.968434343434344e-06, + "loss": 0.0135, + "step": 1625 + }, + { + "epoch": 5.935251798561151, + "grad_norm": 1.2080421447753906, + "learning_rate": 4.9671717171717176e-06, + "loss": 0.0126, + "step": 1650 + }, + { + "epoch": 6.025179856115108, + "grad_norm": 0.4218277633190155, + "learning_rate": 4.9659090909090916e-06, + "loss": 0.0094, + "step": 1675 + }, + { + "epoch": 6.115107913669065, + "grad_norm": 0.5942659378051758, + "learning_rate": 4.964646464646465e-06, + "loss": 0.0071, + "step": 1700 + }, + { + "epoch": 6.205035971223022, + "grad_norm": 0.31671133637428284, + "learning_rate": 4.963383838383839e-06, + "loss": 0.008, + "step": 1725 + }, + { + "epoch": 6.294964028776978, + "grad_norm": 0.3538670539855957, + "learning_rate": 4.962121212121213e-06, + "loss": 0.0066, + "step": 1750 + }, + { + "epoch": 6.384892086330935, + "grad_norm": 0.8252100348472595, + "learning_rate": 4.960858585858586e-06, + "loss": 0.006, + "step": 1775 + }, + { + "epoch": 6.474820143884892, + "grad_norm": 0.9238548278808594, + "learning_rate": 4.95959595959596e-06, + "loss": 0.0074, + "step": 1800 + }, + { + "epoch": 6.564748201438849, + "grad_norm": 1.1760324239730835, + "learning_rate": 4.958333333333334e-06, + "loss": 0.0066, + "step": 1825 + }, + { + "epoch": 6.654676258992806, + "grad_norm": 0.3382113575935364, + "learning_rate": 4.957070707070708e-06, + "loss": 0.0103, + "step": 1850 + }, + { + "epoch": 6.744604316546763, + "grad_norm": 0.9418781399726868, + "learning_rate": 4.955808080808081e-06, + "loss": 0.0092, + "step": 1875 + }, + { + "epoch": 6.83453237410072, + "grad_norm": 0.7677399516105652, + "learning_rate": 4.954545454545455e-06, + "loss": 0.009, + "step": 1900 + }, + { + "epoch": 6.924460431654676, + "grad_norm": 0.32002565264701843, + "learning_rate": 4.953282828282829e-06, + "loss": 0.0075, + "step": 1925 + }, + { + "epoch": 7.014388489208633, + "grad_norm": 1.0049771070480347, + "learning_rate": 4.952020202020202e-06, + "loss": 0.0071, + "step": 1950 + }, + { + "epoch": 7.10431654676259, + "grad_norm": 0.513941764831543, + "learning_rate": 4.950757575757576e-06, + "loss": 0.0043, + "step": 1975 + }, + { + "epoch": 7.194244604316546, + "grad_norm": 0.8406050205230713, + "learning_rate": 4.94949494949495e-06, + "loss": 0.005, + "step": 2000 + }, + { + "epoch": 7.194244604316546, + "eval_loss": 0.09395472705364227, + "eval_runtime": 1340.6412, + "eval_samples_per_second": 1.657, + "eval_steps_per_second": 0.104, + "eval_wer": 10.419906687402799, + "step": 2000 + }, + { + "epoch": 7.284172661870503, + "grad_norm": 0.47227388620376587, + "learning_rate": 4.948232323232323e-06, + "loss": 0.005, + "step": 2025 + }, + { + "epoch": 7.374100719424461, + "grad_norm": 0.2972259819507599, + "learning_rate": 4.946969696969697e-06, + "loss": 0.0047, + "step": 2050 + }, + { + "epoch": 7.4640287769784175, + "grad_norm": 0.580878496170044, + "learning_rate": 4.945707070707071e-06, + "loss": 0.0047, + "step": 2075 + }, + { + "epoch": 7.553956834532374, + "grad_norm": 0.0858689397573471, + "learning_rate": 4.944444444444445e-06, + "loss": 0.0047, + "step": 2100 + }, + { + "epoch": 7.643884892086331, + "grad_norm": 0.9921578168869019, + "learning_rate": 4.9431818181818184e-06, + "loss": 0.0049, + "step": 2125 + }, + { + "epoch": 7.733812949640288, + "grad_norm": 0.3222315311431885, + "learning_rate": 4.9419191919191924e-06, + "loss": 0.0039, + "step": 2150 + }, + { + "epoch": 7.823741007194244, + "grad_norm": 0.2401006668806076, + "learning_rate": 4.940656565656566e-06, + "loss": 0.0045, + "step": 2175 + }, + { + "epoch": 7.913669064748201, + "grad_norm": 0.26786544919013977, + "learning_rate": 4.93939393939394e-06, + "loss": 0.0037, + "step": 2200 + }, + { + "epoch": 8.003597122302159, + "grad_norm": 1.120921015739441, + "learning_rate": 4.938131313131314e-06, + "loss": 0.0048, + "step": 2225 + }, + { + "epoch": 8.093525179856115, + "grad_norm": 0.7425853610038757, + "learning_rate": 4.936868686868687e-06, + "loss": 0.0036, + "step": 2250 + }, + { + "epoch": 8.183453237410072, + "grad_norm": 0.19618873298168182, + "learning_rate": 4.935606060606061e-06, + "loss": 0.0038, + "step": 2275 + }, + { + "epoch": 8.273381294964029, + "grad_norm": 0.41672375798225403, + "learning_rate": 4.934343434343435e-06, + "loss": 0.003, + "step": 2300 + }, + { + "epoch": 8.363309352517986, + "grad_norm": 0.3363110423088074, + "learning_rate": 4.933080808080809e-06, + "loss": 0.0031, + "step": 2325 + }, + { + "epoch": 8.453237410071942, + "grad_norm": 0.8529962301254272, + "learning_rate": 4.931818181818182e-06, + "loss": 0.0034, + "step": 2350 + }, + { + "epoch": 8.543165467625899, + "grad_norm": 0.15698625147342682, + "learning_rate": 4.930555555555556e-06, + "loss": 0.0033, + "step": 2375 + }, + { + "epoch": 8.633093525179856, + "grad_norm": 0.19619868695735931, + "learning_rate": 4.92929292929293e-06, + "loss": 0.004, + "step": 2400 + }, + { + "epoch": 8.723021582733812, + "grad_norm": 0.2903304994106293, + "learning_rate": 4.928030303030303e-06, + "loss": 0.0034, + "step": 2425 + }, + { + "epoch": 8.81294964028777, + "grad_norm": 0.5127314329147339, + "learning_rate": 4.926767676767677e-06, + "loss": 0.0035, + "step": 2450 + }, + { + "epoch": 8.902877697841726, + "grad_norm": 1.0652037858963013, + "learning_rate": 4.925505050505051e-06, + "loss": 0.0045, + "step": 2475 + }, + { + "epoch": 8.992805755395683, + "grad_norm": 0.9570706486701965, + "learning_rate": 4.924242424242425e-06, + "loss": 0.0042, + "step": 2500 + }, + { + "epoch": 9.082733812949641, + "grad_norm": 0.5939081907272339, + "learning_rate": 4.922979797979798e-06, + "loss": 0.0032, + "step": 2525 + }, + { + "epoch": 9.172661870503598, + "grad_norm": 0.25739356875419617, + "learning_rate": 4.921717171717172e-06, + "loss": 0.0038, + "step": 2550 + }, + { + "epoch": 9.262589928057555, + "grad_norm": 0.17940430343151093, + "learning_rate": 4.920454545454546e-06, + "loss": 0.0029, + "step": 2575 + }, + { + "epoch": 9.352517985611511, + "grad_norm": 0.33168259263038635, + "learning_rate": 4.919191919191919e-06, + "loss": 0.0028, + "step": 2600 + }, + { + "epoch": 9.442446043165468, + "grad_norm": 0.20831653475761414, + "learning_rate": 4.917929292929293e-06, + "loss": 0.002, + "step": 2625 + }, + { + "epoch": 9.532374100719425, + "grad_norm": 0.19978338479995728, + "learning_rate": 4.9166666666666665e-06, + "loss": 0.0025, + "step": 2650 + }, + { + "epoch": 9.622302158273381, + "grad_norm": 0.23154591023921967, + "learning_rate": 4.915404040404041e-06, + "loss": 0.0033, + "step": 2675 + }, + { + "epoch": 9.712230215827338, + "grad_norm": 0.7622235417366028, + "learning_rate": 4.9141414141414145e-06, + "loss": 0.0039, + "step": 2700 + }, + { + "epoch": 9.802158273381295, + "grad_norm": 0.23092857003211975, + "learning_rate": 4.9128787878787885e-06, + "loss": 0.0044, + "step": 2725 + }, + { + "epoch": 9.892086330935252, + "grad_norm": 0.5034282207489014, + "learning_rate": 4.9116161616161625e-06, + "loss": 0.0035, + "step": 2750 + }, + { + "epoch": 9.982014388489208, + "grad_norm": 0.2582780122756958, + "learning_rate": 4.910353535353536e-06, + "loss": 0.0033, + "step": 2775 + }, + { + "epoch": 10.071942446043165, + "grad_norm": 0.4610576033592224, + "learning_rate": 4.90909090909091e-06, + "loss": 0.0037, + "step": 2800 + }, + { + "epoch": 10.161870503597122, + "grad_norm": 0.217066690325737, + "learning_rate": 4.907828282828283e-06, + "loss": 0.0028, + "step": 2825 + }, + { + "epoch": 10.251798561151078, + "grad_norm": 0.05713683366775513, + "learning_rate": 4.906565656565658e-06, + "loss": 0.003, + "step": 2850 + }, + { + "epoch": 10.341726618705035, + "grad_norm": 0.5356289148330688, + "learning_rate": 4.905303030303031e-06, + "loss": 0.0018, + "step": 2875 + }, + { + "epoch": 10.431654676258994, + "grad_norm": 0.37969082593917847, + "learning_rate": 4.904040404040405e-06, + "loss": 0.0022, + "step": 2900 + }, + { + "epoch": 10.52158273381295, + "grad_norm": 1.078008770942688, + "learning_rate": 4.902777777777778e-06, + "loss": 0.0032, + "step": 2925 + }, + { + "epoch": 10.611510791366907, + "grad_norm": 0.26670244336128235, + "learning_rate": 4.901515151515152e-06, + "loss": 0.0027, + "step": 2950 + }, + { + "epoch": 10.701438848920864, + "grad_norm": 0.673686683177948, + "learning_rate": 4.900252525252526e-06, + "loss": 0.0029, + "step": 2975 + }, + { + "epoch": 10.79136690647482, + "grad_norm": 0.37779000401496887, + "learning_rate": 4.898989898989899e-06, + "loss": 0.0022, + "step": 3000 + }, + { + "epoch": 10.79136690647482, + "eval_loss": 0.10011211037635803, + "eval_runtime": 1344.035, + "eval_samples_per_second": 1.653, + "eval_steps_per_second": 0.103, + "eval_wer": 9.049840776123824, + "step": 3000 + }, + { + "epoch": 10.881294964028777, + "grad_norm": 0.09616148471832275, + "learning_rate": 4.897727272727273e-06, + "loss": 0.0041, + "step": 3025 + }, + { + "epoch": 10.971223021582734, + "grad_norm": 0.8408087491989136, + "learning_rate": 4.896464646464647e-06, + "loss": 0.0046, + "step": 3050 + }, + { + "epoch": 11.06115107913669, + "grad_norm": 0.1868293583393097, + "learning_rate": 4.895202020202021e-06, + "loss": 0.0027, + "step": 3075 + }, + { + "epoch": 11.151079136690647, + "grad_norm": 0.19219942390918732, + "learning_rate": 4.893939393939394e-06, + "loss": 0.0024, + "step": 3100 + }, + { + "epoch": 11.241007194244604, + "grad_norm": 3.7455391883850098, + "learning_rate": 4.892676767676768e-06, + "loss": 0.0027, + "step": 3125 + }, + { + "epoch": 11.33093525179856, + "grad_norm": 0.2693164348602295, + "learning_rate": 4.891414141414142e-06, + "loss": 0.002, + "step": 3150 + }, + { + "epoch": 11.420863309352518, + "grad_norm": 0.8100782632827759, + "learning_rate": 4.890151515151515e-06, + "loss": 0.0033, + "step": 3175 + }, + { + "epoch": 11.510791366906474, + "grad_norm": 0.30300647020339966, + "learning_rate": 4.888888888888889e-06, + "loss": 0.0025, + "step": 3200 + }, + { + "epoch": 11.600719424460431, + "grad_norm": 0.49988773465156555, + "learning_rate": 4.887626262626263e-06, + "loss": 0.002, + "step": 3225 + }, + { + "epoch": 11.690647482014388, + "grad_norm": 0.2162599414587021, + "learning_rate": 4.8863636363636365e-06, + "loss": 0.0024, + "step": 3250 + }, + { + "epoch": 11.780575539568346, + "grad_norm": 2.3612468242645264, + "learning_rate": 4.8851010101010105e-06, + "loss": 0.0045, + "step": 3275 + }, + { + "epoch": 11.870503597122303, + "grad_norm": 0.4287119209766388, + "learning_rate": 4.883838383838384e-06, + "loss": 0.0051, + "step": 3300 + }, + { + "epoch": 11.96043165467626, + "grad_norm": 0.46471118927001953, + "learning_rate": 4.8825757575757585e-06, + "loss": 0.0036, + "step": 3325 + }, + { + "epoch": 12.050359712230216, + "grad_norm": 0.4310344159603119, + "learning_rate": 4.881313131313132e-06, + "loss": 0.0031, + "step": 3350 + }, + { + "epoch": 12.140287769784173, + "grad_norm": 0.8054510951042175, + "learning_rate": 4.880050505050506e-06, + "loss": 0.0036, + "step": 3375 + }, + { + "epoch": 12.23021582733813, + "grad_norm": 0.5783084630966187, + "learning_rate": 4.878787878787879e-06, + "loss": 0.0023, + "step": 3400 + }, + { + "epoch": 12.320143884892087, + "grad_norm": 0.1537202149629593, + "learning_rate": 4.877525252525253e-06, + "loss": 0.0031, + "step": 3425 + }, + { + "epoch": 12.410071942446043, + "grad_norm": 0.25773826241493225, + "learning_rate": 4.876262626262627e-06, + "loss": 0.0029, + "step": 3450 + }, + { + "epoch": 12.5, + "grad_norm": 1.0221893787384033, + "learning_rate": 4.875e-06, + "loss": 0.003, + "step": 3475 + }, + { + "epoch": 12.589928057553957, + "grad_norm": 0.2363336831331253, + "learning_rate": 4.873737373737374e-06, + "loss": 0.0036, + "step": 3500 + }, + { + "epoch": 12.679856115107913, + "grad_norm": 0.9339852333068848, + "learning_rate": 4.872474747474748e-06, + "loss": 0.004, + "step": 3525 + }, + { + "epoch": 12.76978417266187, + "grad_norm": 0.6633305549621582, + "learning_rate": 4.871212121212122e-06, + "loss": 0.0032, + "step": 3550 + }, + { + "epoch": 12.859712230215827, + "grad_norm": 0.7261077761650085, + "learning_rate": 4.869949494949495e-06, + "loss": 0.0028, + "step": 3575 + }, + { + "epoch": 12.949640287769784, + "grad_norm": 0.6666585803031921, + "learning_rate": 4.868686868686869e-06, + "loss": 0.0031, + "step": 3600 + }, + { + "epoch": 13.03956834532374, + "grad_norm": 0.42198774218559265, + "learning_rate": 4.867424242424243e-06, + "loss": 0.0023, + "step": 3625 + }, + { + "epoch": 13.129496402877697, + "grad_norm": 0.1100483238697052, + "learning_rate": 4.866161616161616e-06, + "loss": 0.002, + "step": 3650 + }, + { + "epoch": 13.219424460431656, + "grad_norm": 0.5182665586471558, + "learning_rate": 4.86489898989899e-06, + "loss": 0.003, + "step": 3675 + }, + { + "epoch": 13.309352517985612, + "grad_norm": 0.10821045190095901, + "learning_rate": 4.863636363636364e-06, + "loss": 0.0024, + "step": 3700 + }, + { + "epoch": 13.399280575539569, + "grad_norm": 0.302943617105484, + "learning_rate": 4.862373737373738e-06, + "loss": 0.0022, + "step": 3725 + }, + { + "epoch": 13.489208633093526, + "grad_norm": 0.34953269362449646, + "learning_rate": 4.861111111111111e-06, + "loss": 0.0024, + "step": 3750 + }, + { + "epoch": 13.579136690647482, + "grad_norm": 0.3864242732524872, + "learning_rate": 4.859848484848485e-06, + "loss": 0.0025, + "step": 3775 + }, + { + "epoch": 13.66906474820144, + "grad_norm": 0.23528048396110535, + "learning_rate": 4.858585858585859e-06, + "loss": 0.0028, + "step": 3800 + }, + { + "epoch": 13.758992805755396, + "grad_norm": 0.31728431582450867, + "learning_rate": 4.8573232323232325e-06, + "loss": 0.0041, + "step": 3825 + }, + { + "epoch": 13.848920863309353, + "grad_norm": 0.5803298950195312, + "learning_rate": 4.8560606060606065e-06, + "loss": 0.0028, + "step": 3850 + }, + { + "epoch": 13.93884892086331, + "grad_norm": 0.30145183205604553, + "learning_rate": 4.85479797979798e-06, + "loss": 0.0022, + "step": 3875 + }, + { + "epoch": 14.028776978417266, + "grad_norm": 0.43851757049560547, + "learning_rate": 4.8535353535353545e-06, + "loss": 0.0024, + "step": 3900 + }, + { + "epoch": 14.118705035971223, + "grad_norm": 0.7910506725311279, + "learning_rate": 4.852272727272728e-06, + "loss": 0.0033, + "step": 3925 + }, + { + "epoch": 14.20863309352518, + "grad_norm": 0.3168434500694275, + "learning_rate": 4.851010101010102e-06, + "loss": 0.0028, + "step": 3950 + }, + { + "epoch": 14.298561151079136, + "grad_norm": 0.7242361307144165, + "learning_rate": 4.849747474747475e-06, + "loss": 0.0031, + "step": 3975 + }, + { + "epoch": 14.388489208633093, + "grad_norm": 0.7368125319480896, + "learning_rate": 4.848484848484849e-06, + "loss": 0.0027, + "step": 4000 + }, + { + "epoch": 14.388489208633093, + "eval_loss": 0.09274967014789581, + "eval_runtime": 1343.7242, + "eval_samples_per_second": 1.654, + "eval_steps_per_second": 0.103, + "eval_wer": 9.375694290157742, + "step": 4000 + }, + { + "epoch": 14.47841726618705, + "grad_norm": 0.420599102973938, + "learning_rate": 4.847222222222223e-06, + "loss": 0.0028, + "step": 4025 + }, + { + "epoch": 14.568345323741006, + "grad_norm": 0.3025602698326111, + "learning_rate": 4.845959595959596e-06, + "loss": 0.0028, + "step": 4050 + }, + { + "epoch": 14.658273381294965, + "grad_norm": 0.7078948020935059, + "learning_rate": 4.84469696969697e-06, + "loss": 0.003, + "step": 4075 + }, + { + "epoch": 14.748201438848922, + "grad_norm": 0.5534040331840515, + "learning_rate": 4.843434343434344e-06, + "loss": 0.0031, + "step": 4100 + }, + { + "epoch": 14.838129496402878, + "grad_norm": 0.28715190291404724, + "learning_rate": 4.842171717171718e-06, + "loss": 0.0028, + "step": 4125 + }, + { + "epoch": 14.928057553956835, + "grad_norm": 0.5861944556236267, + "learning_rate": 4.840909090909091e-06, + "loss": 0.0028, + "step": 4150 + }, + { + "epoch": 15.017985611510792, + "grad_norm": 0.102662093937397, + "learning_rate": 4.839646464646465e-06, + "loss": 0.0057, + "step": 4175 + }, + { + "epoch": 15.107913669064748, + "grad_norm": 0.15230265259742737, + "learning_rate": 4.838383838383839e-06, + "loss": 0.0023, + "step": 4200 + }, + { + "epoch": 15.197841726618705, + "grad_norm": 0.12530238926410675, + "learning_rate": 4.837121212121212e-06, + "loss": 0.0017, + "step": 4225 + }, + { + "epoch": 15.287769784172662, + "grad_norm": 0.09885858744382858, + "learning_rate": 4.835858585858586e-06, + "loss": 0.0022, + "step": 4250 + }, + { + "epoch": 15.377697841726619, + "grad_norm": 0.1105910986661911, + "learning_rate": 4.83459595959596e-06, + "loss": 0.0026, + "step": 4275 + }, + { + "epoch": 15.467625899280575, + "grad_norm": 0.3952260911464691, + "learning_rate": 4.833333333333333e-06, + "loss": 0.0021, + "step": 4300 + }, + { + "epoch": 15.557553956834532, + "grad_norm": 0.6049605011940002, + "learning_rate": 4.832070707070707e-06, + "loss": 0.0021, + "step": 4325 + }, + { + "epoch": 15.647482014388489, + "grad_norm": 0.7125779986381531, + "learning_rate": 4.830808080808081e-06, + "loss": 0.0015, + "step": 4350 + }, + { + "epoch": 15.737410071942445, + "grad_norm": 0.16274645924568176, + "learning_rate": 4.829545454545455e-06, + "loss": 0.0019, + "step": 4375 + }, + { + "epoch": 15.827338129496402, + "grad_norm": 0.6492106318473816, + "learning_rate": 4.8282828282828285e-06, + "loss": 0.0019, + "step": 4400 + }, + { + "epoch": 15.917266187050359, + "grad_norm": 0.9411545991897583, + "learning_rate": 4.8270202020202025e-06, + "loss": 0.003, + "step": 4425 + }, + { + "epoch": 16.007194244604317, + "grad_norm": 0.03323192521929741, + "learning_rate": 4.8257575757575765e-06, + "loss": 0.0018, + "step": 4450 + }, + { + "epoch": 16.097122302158272, + "grad_norm": 0.1154596135020256, + "learning_rate": 4.82449494949495e-06, + "loss": 0.0015, + "step": 4475 + }, + { + "epoch": 16.18705035971223, + "grad_norm": 0.41669028997421265, + "learning_rate": 4.823232323232324e-06, + "loss": 0.0016, + "step": 4500 + }, + { + "epoch": 16.276978417266186, + "grad_norm": 0.25636962056159973, + "learning_rate": 4.821969696969697e-06, + "loss": 0.0014, + "step": 4525 + }, + { + "epoch": 16.366906474820144, + "grad_norm": 3.250777244567871, + "learning_rate": 4.820707070707072e-06, + "loss": 0.0027, + "step": 4550 + }, + { + "epoch": 16.4568345323741, + "grad_norm": 1.1029988527297974, + "learning_rate": 4.819444444444445e-06, + "loss": 0.0028, + "step": 4575 + }, + { + "epoch": 16.546762589928058, + "grad_norm": 0.3530588150024414, + "learning_rate": 4.818181818181819e-06, + "loss": 0.0015, + "step": 4600 + }, + { + "epoch": 16.636690647482013, + "grad_norm": 0.0861181914806366, + "learning_rate": 4.816919191919192e-06, + "loss": 0.0023, + "step": 4625 + }, + { + "epoch": 16.72661870503597, + "grad_norm": 0.44006574153900146, + "learning_rate": 4.815656565656566e-06, + "loss": 0.0021, + "step": 4650 + }, + { + "epoch": 16.81654676258993, + "grad_norm": 0.9688239097595215, + "learning_rate": 4.81439393939394e-06, + "loss": 0.0014, + "step": 4675 + }, + { + "epoch": 16.906474820143885, + "grad_norm": 0.848913311958313, + "learning_rate": 4.813131313131313e-06, + "loss": 0.0021, + "step": 4700 + }, + { + "epoch": 16.996402877697843, + "grad_norm": 0.14554986357688904, + "learning_rate": 4.811868686868687e-06, + "loss": 0.0013, + "step": 4725 + }, + { + "epoch": 17.086330935251798, + "grad_norm": 0.31808871030807495, + "learning_rate": 4.810606060606061e-06, + "loss": 0.0019, + "step": 4750 + }, + { + "epoch": 17.176258992805757, + "grad_norm": 0.2081349641084671, + "learning_rate": 4.809343434343435e-06, + "loss": 0.0018, + "step": 4775 + }, + { + "epoch": 17.26618705035971, + "grad_norm": 0.0817071720957756, + "learning_rate": 4.808080808080808e-06, + "loss": 0.0011, + "step": 4800 + }, + { + "epoch": 17.35611510791367, + "grad_norm": 0.148326575756073, + "learning_rate": 4.806818181818182e-06, + "loss": 0.0011, + "step": 4825 + }, + { + "epoch": 17.446043165467625, + "grad_norm": 1.1114903688430786, + "learning_rate": 4.805555555555556e-06, + "loss": 0.0012, + "step": 4850 + }, + { + "epoch": 17.535971223021583, + "grad_norm": 0.5132379531860352, + "learning_rate": 4.804292929292929e-06, + "loss": 0.0015, + "step": 4875 + }, + { + "epoch": 17.62589928057554, + "grad_norm": 0.5439797043800354, + "learning_rate": 4.803030303030303e-06, + "loss": 0.0019, + "step": 4900 + }, + { + "epoch": 17.715827338129497, + "grad_norm": 0.4897061586380005, + "learning_rate": 4.801767676767677e-06, + "loss": 0.0022, + "step": 4925 + }, + { + "epoch": 17.805755395683452, + "grad_norm": 0.13605351746082306, + "learning_rate": 4.800505050505051e-06, + "loss": 0.0017, + "step": 4950 + }, + { + "epoch": 17.89568345323741, + "grad_norm": 0.6285837888717651, + "learning_rate": 4.7992424242424245e-06, + "loss": 0.0014, + "step": 4975 + }, + { + "epoch": 17.985611510791365, + "grad_norm": 0.04884183779358864, + "learning_rate": 4.7979797979797985e-06, + "loss": 0.0011, + "step": 5000 + }, + { + "epoch": 17.985611510791365, + "eval_loss": 0.09266538918018341, + "eval_runtime": 1344.6458, + "eval_samples_per_second": 1.652, + "eval_steps_per_second": 0.103, + "eval_wer": 8.835073687328741, + "step": 5000 + }, + { + "epoch": 18.075539568345324, + "grad_norm": 0.036710768938064575, + "learning_rate": 4.7967171717171725e-06, + "loss": 0.0024, + "step": 5025 + }, + { + "epoch": 18.165467625899282, + "grad_norm": 0.41920551657676697, + "learning_rate": 4.795454545454546e-06, + "loss": 0.0011, + "step": 5050 + }, + { + "epoch": 18.255395683453237, + "grad_norm": 0.2354598492383957, + "learning_rate": 4.79419191919192e-06, + "loss": 0.0018, + "step": 5075 + }, + { + "epoch": 18.345323741007196, + "grad_norm": 0.4095918536186218, + "learning_rate": 4.792929292929293e-06, + "loss": 0.0015, + "step": 5100 + }, + { + "epoch": 18.43525179856115, + "grad_norm": 0.03964778780937195, + "learning_rate": 4.791666666666668e-06, + "loss": 0.0019, + "step": 5125 + }, + { + "epoch": 18.52517985611511, + "grad_norm": 0.9322590827941895, + "learning_rate": 4.790404040404041e-06, + "loss": 0.0014, + "step": 5150 + }, + { + "epoch": 18.615107913669064, + "grad_norm": 0.11062884330749512, + "learning_rate": 4.789141414141415e-06, + "loss": 0.0015, + "step": 5175 + }, + { + "epoch": 18.705035971223023, + "grad_norm": 0.4186955690383911, + "learning_rate": 4.787878787878788e-06, + "loss": 0.0013, + "step": 5200 + }, + { + "epoch": 18.794964028776977, + "grad_norm": 0.40554943680763245, + "learning_rate": 4.786616161616162e-06, + "loss": 0.0017, + "step": 5225 + }, + { + "epoch": 18.884892086330936, + "grad_norm": 0.4156556725502014, + "learning_rate": 4.785353535353536e-06, + "loss": 0.0016, + "step": 5250 + }, + { + "epoch": 18.97482014388489, + "grad_norm": 0.8705348968505859, + "learning_rate": 4.784090909090909e-06, + "loss": 0.003, + "step": 5275 + }, + { + "epoch": 19.06474820143885, + "grad_norm": 0.47541674971580505, + "learning_rate": 4.782828282828283e-06, + "loss": 0.0026, + "step": 5300 + }, + { + "epoch": 19.154676258992804, + "grad_norm": 0.3221082389354706, + "learning_rate": 4.781565656565657e-06, + "loss": 0.0014, + "step": 5325 + }, + { + "epoch": 19.244604316546763, + "grad_norm": 0.26767319440841675, + "learning_rate": 4.78030303030303e-06, + "loss": 0.0015, + "step": 5350 + }, + { + "epoch": 19.334532374100718, + "grad_norm": 0.41984379291534424, + "learning_rate": 4.779040404040404e-06, + "loss": 0.0026, + "step": 5375 + }, + { + "epoch": 19.424460431654676, + "grad_norm": 0.6067033410072327, + "learning_rate": 4.777777777777778e-06, + "loss": 0.0031, + "step": 5400 + }, + { + "epoch": 19.514388489208635, + "grad_norm": 0.23113247752189636, + "learning_rate": 4.776515151515152e-06, + "loss": 0.0027, + "step": 5425 + }, + { + "epoch": 19.60431654676259, + "grad_norm": 0.7052062153816223, + "learning_rate": 4.775252525252525e-06, + "loss": 0.0038, + "step": 5450 + }, + { + "epoch": 19.694244604316548, + "grad_norm": 1.4232673645019531, + "learning_rate": 4.773989898989899e-06, + "loss": 0.0024, + "step": 5475 + }, + { + "epoch": 19.784172661870503, + "grad_norm": 0.12078073620796204, + "learning_rate": 4.772727272727273e-06, + "loss": 0.0014, + "step": 5500 + }, + { + "epoch": 19.87410071942446, + "grad_norm": 1.296155333518982, + "learning_rate": 4.7714646464646465e-06, + "loss": 0.0028, + "step": 5525 + }, + { + "epoch": 19.964028776978417, + "grad_norm": 0.4774380922317505, + "learning_rate": 4.7702020202020205e-06, + "loss": 0.0039, + "step": 5550 + }, + { + "epoch": 20.053956834532375, + "grad_norm": 0.7243533134460449, + "learning_rate": 4.768939393939394e-06, + "loss": 0.0038, + "step": 5575 + }, + { + "epoch": 20.14388489208633, + "grad_norm": 0.03761635348200798, + "learning_rate": 4.7676767676767685e-06, + "loss": 0.0028, + "step": 5600 + }, + { + "epoch": 20.23381294964029, + "grad_norm": 0.3167934715747833, + "learning_rate": 4.766414141414142e-06, + "loss": 0.0023, + "step": 5625 + }, + { + "epoch": 20.323741007194243, + "grad_norm": 0.08072912693023682, + "learning_rate": 4.765151515151516e-06, + "loss": 0.0021, + "step": 5650 + }, + { + "epoch": 20.413669064748202, + "grad_norm": 0.0809144377708435, + "learning_rate": 4.763888888888889e-06, + "loss": 0.0033, + "step": 5675 + }, + { + "epoch": 20.503597122302157, + "grad_norm": 0.021725259721279144, + "learning_rate": 4.762626262626263e-06, + "loss": 0.0022, + "step": 5700 + }, + { + "epoch": 20.593525179856115, + "grad_norm": 0.79271399974823, + "learning_rate": 4.761363636363637e-06, + "loss": 0.0015, + "step": 5725 + }, + { + "epoch": 20.68345323741007, + "grad_norm": 0.10382846742868423, + "learning_rate": 4.76010101010101e-06, + "loss": 0.0019, + "step": 5750 + }, + { + "epoch": 20.77338129496403, + "grad_norm": 0.03259812295436859, + "learning_rate": 4.758838383838385e-06, + "loss": 0.002, + "step": 5775 + }, + { + "epoch": 20.863309352517987, + "grad_norm": 0.6223962306976318, + "learning_rate": 4.757575757575758e-06, + "loss": 0.0036, + "step": 5800 + }, + { + "epoch": 20.953237410071942, + "grad_norm": 1.0351557731628418, + "learning_rate": 4.756313131313132e-06, + "loss": 0.0022, + "step": 5825 + }, + { + "epoch": 21.0431654676259, + "grad_norm": 0.8662335276603699, + "learning_rate": 4.755050505050505e-06, + "loss": 0.0028, + "step": 5850 + }, + { + "epoch": 21.133093525179856, + "grad_norm": 0.13104894757270813, + "learning_rate": 4.753787878787879e-06, + "loss": 0.0028, + "step": 5875 + }, + { + "epoch": 21.223021582733814, + "grad_norm": 0.8010006546974182, + "learning_rate": 4.752525252525253e-06, + "loss": 0.0021, + "step": 5900 + }, + { + "epoch": 21.31294964028777, + "grad_norm": 0.7761834263801575, + "learning_rate": 4.751262626262626e-06, + "loss": 0.0035, + "step": 5925 + }, + { + "epoch": 21.402877697841728, + "grad_norm": 0.05642890930175781, + "learning_rate": 4.75e-06, + "loss": 0.0015, + "step": 5950 + }, + { + "epoch": 21.492805755395683, + "grad_norm": 0.2215975672006607, + "learning_rate": 4.748737373737374e-06, + "loss": 0.0011, + "step": 5975 + }, + { + "epoch": 21.58273381294964, + "grad_norm": 0.5649552345275879, + "learning_rate": 4.747474747474748e-06, + "loss": 0.0017, + "step": 6000 + }, + { + "epoch": 21.58273381294964, + "eval_loss": 0.08750007301568985, + "eval_runtime": 1349.1716, + "eval_samples_per_second": 1.647, + "eval_steps_per_second": 0.103, + "eval_wer": 7.657557579797082, + "step": 6000 + }, + { + "epoch": 21.672661870503596, + "grad_norm": 0.3567905128002167, + "learning_rate": 4.746212121212121e-06, + "loss": 0.0023, + "step": 6025 + }, + { + "epoch": 21.762589928057555, + "grad_norm": 0.7165196537971497, + "learning_rate": 4.744949494949495e-06, + "loss": 0.0019, + "step": 6050 + }, + { + "epoch": 21.85251798561151, + "grad_norm": 0.9009844660758972, + "learning_rate": 4.743686868686869e-06, + "loss": 0.0022, + "step": 6075 + }, + { + "epoch": 21.942446043165468, + "grad_norm": 0.7037338614463806, + "learning_rate": 4.7424242424242426e-06, + "loss": 0.0026, + "step": 6100 + }, + { + "epoch": 22.032374100719423, + "grad_norm": 0.2905846834182739, + "learning_rate": 4.7411616161616166e-06, + "loss": 0.002, + "step": 6125 + }, + { + "epoch": 22.12230215827338, + "grad_norm": 0.7335506677627563, + "learning_rate": 4.7398989898989905e-06, + "loss": 0.0019, + "step": 6150 + }, + { + "epoch": 22.21223021582734, + "grad_norm": 0.3520030677318573, + "learning_rate": 4.7386363636363645e-06, + "loss": 0.0016, + "step": 6175 + }, + { + "epoch": 22.302158273381295, + "grad_norm": 0.3580196797847748, + "learning_rate": 4.737373737373738e-06, + "loss": 0.0014, + "step": 6200 + }, + { + "epoch": 22.392086330935253, + "grad_norm": 0.19062575697898865, + "learning_rate": 4.736111111111112e-06, + "loss": 0.002, + "step": 6225 + }, + { + "epoch": 22.48201438848921, + "grad_norm": 0.6567767858505249, + "learning_rate": 4.734848484848486e-06, + "loss": 0.0021, + "step": 6250 + }, + { + "epoch": 22.571942446043167, + "grad_norm": 0.24819691479206085, + "learning_rate": 4.733585858585859e-06, + "loss": 0.0019, + "step": 6275 + }, + { + "epoch": 22.66187050359712, + "grad_norm": 0.47786185145378113, + "learning_rate": 4.732323232323233e-06, + "loss": 0.0014, + "step": 6300 + }, + { + "epoch": 22.75179856115108, + "grad_norm": 0.05066821351647377, + "learning_rate": 4.731060606060606e-06, + "loss": 0.0018, + "step": 6325 + }, + { + "epoch": 22.841726618705035, + "grad_norm": 0.33751770853996277, + "learning_rate": 4.72979797979798e-06, + "loss": 0.0028, + "step": 6350 + }, + { + "epoch": 22.931654676258994, + "grad_norm": 0.03158155083656311, + "learning_rate": 4.728535353535354e-06, + "loss": 0.0013, + "step": 6375 + }, + { + "epoch": 23.02158273381295, + "grad_norm": 0.05814801901578903, + "learning_rate": 4.727272727272728e-06, + "loss": 0.0021, + "step": 6400 + }, + { + "epoch": 23.111510791366907, + "grad_norm": 0.031183883547782898, + "learning_rate": 4.726010101010101e-06, + "loss": 0.0011, + "step": 6425 + }, + { + "epoch": 23.201438848920862, + "grad_norm": 0.539813756942749, + "learning_rate": 4.724747474747475e-06, + "loss": 0.0009, + "step": 6450 + }, + { + "epoch": 23.29136690647482, + "grad_norm": 0.14558178186416626, + "learning_rate": 4.723484848484849e-06, + "loss": 0.0018, + "step": 6475 + }, + { + "epoch": 23.381294964028775, + "grad_norm": 0.10804769396781921, + "learning_rate": 4.722222222222222e-06, + "loss": 0.0013, + "step": 6500 + }, + { + "epoch": 23.471223021582734, + "grad_norm": 0.3211396038532257, + "learning_rate": 4.720959595959596e-06, + "loss": 0.0015, + "step": 6525 + }, + { + "epoch": 23.56115107913669, + "grad_norm": 0.16721013188362122, + "learning_rate": 4.71969696969697e-06, + "loss": 0.0027, + "step": 6550 + }, + { + "epoch": 23.651079136690647, + "grad_norm": 0.3473891019821167, + "learning_rate": 4.7184343434343434e-06, + "loss": 0.0014, + "step": 6575 + }, + { + "epoch": 23.741007194244606, + "grad_norm": 0.04464249685406685, + "learning_rate": 4.717171717171717e-06, + "loss": 0.0013, + "step": 6600 + }, + { + "epoch": 23.83093525179856, + "grad_norm": 0.21577273309230804, + "learning_rate": 4.715909090909091e-06, + "loss": 0.0025, + "step": 6625 + }, + { + "epoch": 23.92086330935252, + "grad_norm": 1.0553650856018066, + "learning_rate": 4.714646464646465e-06, + "loss": 0.0012, + "step": 6650 + }, + { + "epoch": 24.010791366906474, + "grad_norm": 0.015737203881144524, + "learning_rate": 4.7133838383838386e-06, + "loss": 0.0018, + "step": 6675 + }, + { + "epoch": 24.100719424460433, + "grad_norm": 0.08808793127536774, + "learning_rate": 4.7121212121212126e-06, + "loss": 0.0008, + "step": 6700 + }, + { + "epoch": 24.190647482014388, + "grad_norm": 0.01893734373152256, + "learning_rate": 4.7108585858585866e-06, + "loss": 0.0008, + "step": 6725 + }, + { + "epoch": 24.280575539568346, + "grad_norm": 0.032726775854825974, + "learning_rate": 4.70959595959596e-06, + "loss": 0.0011, + "step": 6750 + }, + { + "epoch": 24.3705035971223, + "grad_norm": 1.2210007905960083, + "learning_rate": 4.708333333333334e-06, + "loss": 0.0014, + "step": 6775 + }, + { + "epoch": 24.46043165467626, + "grad_norm": 0.21317902207374573, + "learning_rate": 4.707070707070707e-06, + "loss": 0.0008, + "step": 6800 + }, + { + "epoch": 24.550359712230215, + "grad_norm": 0.02254541404545307, + "learning_rate": 4.705808080808082e-06, + "loss": 0.0008, + "step": 6825 + }, + { + "epoch": 24.640287769784173, + "grad_norm": 0.19283901154994965, + "learning_rate": 4.704545454545455e-06, + "loss": 0.0006, + "step": 6850 + }, + { + "epoch": 24.730215827338128, + "grad_norm": 0.1615646928548813, + "learning_rate": 4.703282828282829e-06, + "loss": 0.0011, + "step": 6875 + }, + { + "epoch": 24.820143884892087, + "grad_norm": 0.04525255784392357, + "learning_rate": 4.702020202020202e-06, + "loss": 0.0006, + "step": 6900 + }, + { + "epoch": 24.91007194244604, + "grad_norm": 0.17892493307590485, + "learning_rate": 4.700757575757576e-06, + "loss": 0.0011, + "step": 6925 + }, + { + "epoch": 25.0, + "grad_norm": 1.5881894826889038, + "learning_rate": 4.69949494949495e-06, + "loss": 0.0009, + "step": 6950 + }, + { + "epoch": 25.08992805755396, + "grad_norm": 0.028072576969861984, + "learning_rate": 4.698232323232323e-06, + "loss": 0.001, + "step": 6975 + }, + { + "epoch": 25.179856115107913, + "grad_norm": 0.034753262996673584, + "learning_rate": 4.696969696969698e-06, + "loss": 0.001, + "step": 7000 + }, + { + "epoch": 25.179856115107913, + "eval_loss": 0.08996064960956573, + "eval_runtime": 1372.6865, + "eval_samples_per_second": 1.619, + "eval_steps_per_second": 0.101, + "eval_wer": 6.591127897504258, + "step": 7000 + }, + { + "epoch": 25.269784172661872, + "grad_norm": 0.047846052795648575, + "learning_rate": 4.695707070707071e-06, + "loss": 0.0017, + "step": 7025 + }, + { + "epoch": 25.359712230215827, + "grad_norm": 0.08721514046192169, + "learning_rate": 4.694444444444445e-06, + "loss": 0.0012, + "step": 7050 + }, + { + "epoch": 25.449640287769785, + "grad_norm": 0.488505095243454, + "learning_rate": 4.693181818181818e-06, + "loss": 0.001, + "step": 7075 + }, + { + "epoch": 25.53956834532374, + "grad_norm": 0.3541705012321472, + "learning_rate": 4.691919191919192e-06, + "loss": 0.0009, + "step": 7100 + }, + { + "epoch": 25.6294964028777, + "grad_norm": 1.2867228984832764, + "learning_rate": 4.690656565656566e-06, + "loss": 0.0009, + "step": 7125 + }, + { + "epoch": 25.719424460431654, + "grad_norm": 0.06602492183446884, + "learning_rate": 4.6893939393939394e-06, + "loss": 0.001, + "step": 7150 + }, + { + "epoch": 25.809352517985612, + "grad_norm": 0.03555336222052574, + "learning_rate": 4.6881313131313134e-06, + "loss": 0.0016, + "step": 7175 + }, + { + "epoch": 25.899280575539567, + "grad_norm": 0.1011524349451065, + "learning_rate": 4.6868686868686874e-06, + "loss": 0.0028, + "step": 7200 + }, + { + "epoch": 25.989208633093526, + "grad_norm": 0.14894358813762665, + "learning_rate": 4.6856060606060614e-06, + "loss": 0.0026, + "step": 7225 + }, + { + "epoch": 26.07913669064748, + "grad_norm": 0.944786787033081, + "learning_rate": 4.684343434343435e-06, + "loss": 0.0014, + "step": 7250 + }, + { + "epoch": 26.16906474820144, + "grad_norm": 0.4678920805454254, + "learning_rate": 4.683080808080809e-06, + "loss": 0.0016, + "step": 7275 + }, + { + "epoch": 26.258992805755394, + "grad_norm": 0.0241763386875391, + "learning_rate": 4.681818181818183e-06, + "loss": 0.0018, + "step": 7300 + }, + { + "epoch": 26.348920863309353, + "grad_norm": 0.1959693878889084, + "learning_rate": 4.680555555555556e-06, + "loss": 0.0014, + "step": 7325 + }, + { + "epoch": 26.43884892086331, + "grad_norm": 0.05353585258126259, + "learning_rate": 4.67929292929293e-06, + "loss": 0.001, + "step": 7350 + }, + { + "epoch": 26.528776978417266, + "grad_norm": 0.022708551958203316, + "learning_rate": 4.678030303030303e-06, + "loss": 0.0008, + "step": 7375 + }, + { + "epoch": 26.618705035971225, + "grad_norm": 0.28148502111434937, + "learning_rate": 4.676767676767677e-06, + "loss": 0.0012, + "step": 7400 + }, + { + "epoch": 26.70863309352518, + "grad_norm": 0.0556604228913784, + "learning_rate": 4.675505050505051e-06, + "loss": 0.0018, + "step": 7425 + }, + { + "epoch": 26.798561151079138, + "grad_norm": 0.03789166733622551, + "learning_rate": 4.674242424242425e-06, + "loss": 0.0008, + "step": 7450 + }, + { + "epoch": 26.888489208633093, + "grad_norm": 0.18029791116714478, + "learning_rate": 4.672979797979799e-06, + "loss": 0.001, + "step": 7475 + }, + { + "epoch": 26.97841726618705, + "grad_norm": 0.27599871158599854, + "learning_rate": 4.671717171717172e-06, + "loss": 0.0008, + "step": 7500 + }, + { + "epoch": 27.068345323741006, + "grad_norm": 0.4067777693271637, + "learning_rate": 4.670454545454546e-06, + "loss": 0.0017, + "step": 7525 + }, + { + "epoch": 27.158273381294965, + "grad_norm": 0.36876606941223145, + "learning_rate": 4.669191919191919e-06, + "loss": 0.0011, + "step": 7550 + }, + { + "epoch": 27.24820143884892, + "grad_norm": 0.2605381906032562, + "learning_rate": 4.667929292929293e-06, + "loss": 0.0014, + "step": 7575 + }, + { + "epoch": 27.33812949640288, + "grad_norm": 0.02853270247578621, + "learning_rate": 4.666666666666667e-06, + "loss": 0.0008, + "step": 7600 + }, + { + "epoch": 27.428057553956833, + "grad_norm": 0.055020011961460114, + "learning_rate": 4.66540404040404e-06, + "loss": 0.0009, + "step": 7625 + }, + { + "epoch": 27.51798561151079, + "grad_norm": 0.30874237418174744, + "learning_rate": 4.664141414141414e-06, + "loss": 0.0018, + "step": 7650 + }, + { + "epoch": 27.607913669064747, + "grad_norm": 0.09795974940061569, + "learning_rate": 4.662878787878788e-06, + "loss": 0.0014, + "step": 7675 + }, + { + "epoch": 27.697841726618705, + "grad_norm": 0.04705384001135826, + "learning_rate": 4.661616161616162e-06, + "loss": 0.0015, + "step": 7700 + }, + { + "epoch": 27.78776978417266, + "grad_norm": 0.058379877358675, + "learning_rate": 4.6603535353535355e-06, + "loss": 0.0008, + "step": 7725 + }, + { + "epoch": 27.87769784172662, + "grad_norm": 0.047014497220516205, + "learning_rate": 4.6590909090909095e-06, + "loss": 0.0016, + "step": 7750 + }, + { + "epoch": 27.967625899280577, + "grad_norm": 0.6353835463523865, + "learning_rate": 4.6578282828282835e-06, + "loss": 0.0012, + "step": 7775 + }, + { + "epoch": 28.057553956834532, + "grad_norm": 0.13249577581882477, + "learning_rate": 4.656565656565657e-06, + "loss": 0.0007, + "step": 7800 + }, + { + "epoch": 28.14748201438849, + "grad_norm": 0.16413046419620514, + "learning_rate": 4.655303030303031e-06, + "loss": 0.0009, + "step": 7825 + }, + { + "epoch": 28.237410071942445, + "grad_norm": 0.21356362104415894, + "learning_rate": 4.654040404040405e-06, + "loss": 0.0007, + "step": 7850 + }, + { + "epoch": 28.327338129496404, + "grad_norm": 0.0190277099609375, + "learning_rate": 4.652777777777779e-06, + "loss": 0.0007, + "step": 7875 + }, + { + "epoch": 28.41726618705036, + "grad_norm": 0.12108524143695831, + "learning_rate": 4.651515151515152e-06, + "loss": 0.0009, + "step": 7900 + }, + { + "epoch": 28.507194244604317, + "grad_norm": 0.026057908311486244, + "learning_rate": 4.650252525252526e-06, + "loss": 0.0007, + "step": 7925 + }, + { + "epoch": 28.597122302158272, + "grad_norm": 0.09515079110860825, + "learning_rate": 4.6489898989899e-06, + "loss": 0.0008, + "step": 7950 + }, + { + "epoch": 28.68705035971223, + "grad_norm": 0.48142778873443604, + "learning_rate": 4.647727272727273e-06, + "loss": 0.0007, + "step": 7975 + }, + { + "epoch": 28.776978417266186, + "grad_norm": 0.46795013546943665, + "learning_rate": 4.646464646464647e-06, + "loss": 0.0014, + "step": 8000 + }, + { + "epoch": 28.776978417266186, + "eval_loss": 0.09178629517555237, + "eval_runtime": 1347.1747, + "eval_samples_per_second": 1.649, + "eval_steps_per_second": 0.103, + "eval_wer": 7.139154262015849, + "step": 8000 + }, + { + "epoch": 28.866906474820144, + "grad_norm": 0.5243809223175049, + "learning_rate": 4.64520202020202e-06, + "loss": 0.0007, + "step": 8025 + }, + { + "epoch": 28.9568345323741, + "grad_norm": 0.3461306095123291, + "learning_rate": 4.643939393939395e-06, + "loss": 0.001, + "step": 8050 + }, + { + "epoch": 29.046762589928058, + "grad_norm": 0.2795426845550537, + "learning_rate": 4.642676767676768e-06, + "loss": 0.0014, + "step": 8075 + }, + { + "epoch": 29.136690647482013, + "grad_norm": 0.05419691279530525, + "learning_rate": 4.641414141414142e-06, + "loss": 0.0014, + "step": 8100 + }, + { + "epoch": 29.22661870503597, + "grad_norm": 0.08857329189777374, + "learning_rate": 4.640151515151515e-06, + "loss": 0.0016, + "step": 8125 + }, + { + "epoch": 29.31654676258993, + "grad_norm": 0.05129173770546913, + "learning_rate": 4.638888888888889e-06, + "loss": 0.0011, + "step": 8150 + }, + { + "epoch": 29.406474820143885, + "grad_norm": 1.0032382011413574, + "learning_rate": 4.637626262626263e-06, + "loss": 0.0023, + "step": 8175 + }, + { + "epoch": 29.496402877697843, + "grad_norm": 0.4335207939147949, + "learning_rate": 4.636363636363636e-06, + "loss": 0.0028, + "step": 8200 + }, + { + "epoch": 29.586330935251798, + "grad_norm": 0.15561847388744354, + "learning_rate": 4.63510101010101e-06, + "loss": 0.0028, + "step": 8225 + }, + { + "epoch": 29.676258992805757, + "grad_norm": 0.24305035173892975, + "learning_rate": 4.633838383838384e-06, + "loss": 0.0024, + "step": 8250 + }, + { + "epoch": 29.76618705035971, + "grad_norm": 1.3689900636672974, + "learning_rate": 4.632575757575758e-06, + "loss": 0.0036, + "step": 8275 + }, + { + "epoch": 29.85611510791367, + "grad_norm": 0.6511125564575195, + "learning_rate": 4.6313131313131315e-06, + "loss": 0.0025, + "step": 8300 + }, + { + "epoch": 29.946043165467625, + "grad_norm": 0.8534782528877258, + "learning_rate": 4.6300505050505055e-06, + "loss": 0.0029, + "step": 8325 + }, + { + "epoch": 30.035971223021583, + "grad_norm": 0.3412608504295349, + "learning_rate": 4.6287878787878795e-06, + "loss": 0.0028, + "step": 8350 + }, + { + "epoch": 30.12589928057554, + "grad_norm": 0.16232311725616455, + "learning_rate": 4.627525252525253e-06, + "loss": 0.0023, + "step": 8375 + }, + { + "epoch": 30.215827338129497, + "grad_norm": 0.08357956260442734, + "learning_rate": 4.626262626262627e-06, + "loss": 0.0019, + "step": 8400 + }, + { + "epoch": 30.305755395683452, + "grad_norm": 0.412728488445282, + "learning_rate": 4.625000000000001e-06, + "loss": 0.0015, + "step": 8425 + }, + { + "epoch": 30.39568345323741, + "grad_norm": 0.9784059524536133, + "learning_rate": 4.623737373737375e-06, + "loss": 0.0025, + "step": 8450 + }, + { + "epoch": 30.485611510791365, + "grad_norm": 0.38275232911109924, + "learning_rate": 4.622474747474748e-06, + "loss": 0.0016, + "step": 8475 + }, + { + "epoch": 30.575539568345324, + "grad_norm": 0.3518912196159363, + "learning_rate": 4.621212121212122e-06, + "loss": 0.0024, + "step": 8500 + }, + { + "epoch": 30.665467625899282, + "grad_norm": 0.8633609414100647, + "learning_rate": 4.619949494949496e-06, + "loss": 0.0022, + "step": 8525 + }, + { + "epoch": 30.755395683453237, + "grad_norm": 0.23257087171077728, + "learning_rate": 4.618686868686869e-06, + "loss": 0.0016, + "step": 8550 + }, + { + "epoch": 30.845323741007196, + "grad_norm": 1.2157853841781616, + "learning_rate": 4.617424242424243e-06, + "loss": 0.0013, + "step": 8575 + }, + { + "epoch": 30.93525179856115, + "grad_norm": 0.6692176461219788, + "learning_rate": 4.616161616161616e-06, + "loss": 0.0025, + "step": 8600 + }, + { + "epoch": 31.02517985611511, + "grad_norm": 0.08320923149585724, + "learning_rate": 4.61489898989899e-06, + "loss": 0.0015, + "step": 8625 + }, + { + "epoch": 31.115107913669064, + "grad_norm": 0.03867033123970032, + "learning_rate": 4.613636363636364e-06, + "loss": 0.0011, + "step": 8650 + }, + { + "epoch": 31.205035971223023, + "grad_norm": 0.37571918964385986, + "learning_rate": 4.612373737373737e-06, + "loss": 0.002, + "step": 8675 + }, + { + "epoch": 31.294964028776977, + "grad_norm": 0.023200325667858124, + "learning_rate": 4.611111111111112e-06, + "loss": 0.0017, + "step": 8700 + }, + { + "epoch": 31.384892086330936, + "grad_norm": 0.025962859392166138, + "learning_rate": 4.609848484848485e-06, + "loss": 0.0025, + "step": 8725 + }, + { + "epoch": 31.47482014388489, + "grad_norm": 0.07832462340593338, + "learning_rate": 4.608585858585859e-06, + "loss": 0.002, + "step": 8750 + }, + { + "epoch": 31.56474820143885, + "grad_norm": 0.5365622043609619, + "learning_rate": 4.607323232323232e-06, + "loss": 0.0019, + "step": 8775 + }, + { + "epoch": 31.654676258992804, + "grad_norm": 0.042796701192855835, + "learning_rate": 4.606060606060606e-06, + "loss": 0.0012, + "step": 8800 + }, + { + "epoch": 31.744604316546763, + "grad_norm": 0.2298709750175476, + "learning_rate": 4.60479797979798e-06, + "loss": 0.0015, + "step": 8825 + }, + { + "epoch": 31.834532374100718, + "grad_norm": 0.7432539463043213, + "learning_rate": 4.6035353535353535e-06, + "loss": 0.002, + "step": 8850 + }, + { + "epoch": 31.924460431654676, + "grad_norm": 0.05896187201142311, + "learning_rate": 4.6022727272727275e-06, + "loss": 0.0017, + "step": 8875 + }, + { + "epoch": 32.014388489208635, + "grad_norm": 0.6994006633758545, + "learning_rate": 4.6010101010101015e-06, + "loss": 0.0019, + "step": 8900 + }, + { + "epoch": 32.10431654676259, + "grad_norm": 0.6547738909721375, + "learning_rate": 4.5997474747474755e-06, + "loss": 0.0016, + "step": 8925 + }, + { + "epoch": 32.194244604316545, + "grad_norm": 0.13888348639011383, + "learning_rate": 4.598484848484849e-06, + "loss": 0.0014, + "step": 8950 + }, + { + "epoch": 32.28417266187051, + "grad_norm": 0.09715843945741653, + "learning_rate": 4.597222222222223e-06, + "loss": 0.001, + "step": 8975 + }, + { + "epoch": 32.37410071942446, + "grad_norm": 0.05904947221279144, + "learning_rate": 4.595959595959597e-06, + "loss": 0.0014, + "step": 9000 + }, + { + "epoch": 32.37410071942446, + "eval_loss": 0.08943528681993484, + "eval_runtime": 1353.2653, + "eval_samples_per_second": 1.642, + "eval_steps_per_second": 0.103, + "eval_wer": 6.739243131156039, + "step": 9000 + }, + { + "epoch": 32.46402877697842, + "grad_norm": 1.000013828277588, + "learning_rate": 4.59469696969697e-06, + "loss": 0.0012, + "step": 9025 + }, + { + "epoch": 32.55395683453237, + "grad_norm": 0.031857941299676895, + "learning_rate": 4.593434343434344e-06, + "loss": 0.0011, + "step": 9050 + }, + { + "epoch": 32.643884892086334, + "grad_norm": 0.18854251503944397, + "learning_rate": 4.592171717171717e-06, + "loss": 0.0011, + "step": 9075 + }, + { + "epoch": 32.73381294964029, + "grad_norm": 0.06311248987913132, + "learning_rate": 4.590909090909092e-06, + "loss": 0.0009, + "step": 9100 + }, + { + "epoch": 32.82374100719424, + "grad_norm": 0.02462015673518181, + "learning_rate": 4.589646464646465e-06, + "loss": 0.0023, + "step": 9125 + }, + { + "epoch": 32.9136690647482, + "grad_norm": 0.5756279826164246, + "learning_rate": 4.588383838383839e-06, + "loss": 0.0014, + "step": 9150 + }, + { + "epoch": 33.00359712230216, + "grad_norm": 0.39254868030548096, + "learning_rate": 4.587121212121213e-06, + "loss": 0.0012, + "step": 9175 + }, + { + "epoch": 33.093525179856115, + "grad_norm": 0.05750317871570587, + "learning_rate": 4.585858585858586e-06, + "loss": 0.0016, + "step": 9200 + }, + { + "epoch": 33.18345323741007, + "grad_norm": 0.456665962934494, + "learning_rate": 4.58459595959596e-06, + "loss": 0.0005, + "step": 9225 + }, + { + "epoch": 33.273381294964025, + "grad_norm": 0.05247064307332039, + "learning_rate": 4.583333333333333e-06, + "loss": 0.0007, + "step": 9250 + }, + { + "epoch": 33.36330935251799, + "grad_norm": 0.1745249629020691, + "learning_rate": 4.582070707070708e-06, + "loss": 0.0011, + "step": 9275 + }, + { + "epoch": 33.45323741007194, + "grad_norm": 0.1702817678451538, + "learning_rate": 4.580808080808081e-06, + "loss": 0.0011, + "step": 9300 + }, + { + "epoch": 33.5431654676259, + "grad_norm": 0.5600733757019043, + "learning_rate": 4.579545454545455e-06, + "loss": 0.0017, + "step": 9325 + }, + { + "epoch": 33.63309352517986, + "grad_norm": 0.042534805834293365, + "learning_rate": 4.578282828282828e-06, + "loss": 0.002, + "step": 9350 + }, + { + "epoch": 33.723021582733814, + "grad_norm": 0.025305964052677155, + "learning_rate": 4.577020202020202e-06, + "loss": 0.0014, + "step": 9375 + }, + { + "epoch": 33.81294964028777, + "grad_norm": 0.05213531106710434, + "learning_rate": 4.575757575757576e-06, + "loss": 0.001, + "step": 9400 + }, + { + "epoch": 33.902877697841724, + "grad_norm": 0.02446218766272068, + "learning_rate": 4.5744949494949495e-06, + "loss": 0.0006, + "step": 9425 + }, + { + "epoch": 33.992805755395686, + "grad_norm": 0.009959193877875805, + "learning_rate": 4.5732323232323235e-06, + "loss": 0.0009, + "step": 9450 + }, + { + "epoch": 34.08273381294964, + "grad_norm": 0.4287709891796112, + "learning_rate": 4.5719696969696975e-06, + "loss": 0.0007, + "step": 9475 + }, + { + "epoch": 34.172661870503596, + "grad_norm": 0.011952442117035389, + "learning_rate": 4.5707070707070715e-06, + "loss": 0.0004, + "step": 9500 + }, + { + "epoch": 34.26258992805755, + "grad_norm": 0.1948029100894928, + "learning_rate": 4.569444444444445e-06, + "loss": 0.0007, + "step": 9525 + }, + { + "epoch": 34.35251798561151, + "grad_norm": 0.03538801521062851, + "learning_rate": 4.568181818181819e-06, + "loss": 0.0007, + "step": 9550 + }, + { + "epoch": 34.44244604316547, + "grad_norm": 0.03204001113772392, + "learning_rate": 4.566919191919193e-06, + "loss": 0.0006, + "step": 9575 + }, + { + "epoch": 34.53237410071942, + "grad_norm": 0.12747210264205933, + "learning_rate": 4.565656565656566e-06, + "loss": 0.0008, + "step": 9600 + }, + { + "epoch": 34.62230215827338, + "grad_norm": 0.009002352133393288, + "learning_rate": 4.56439393939394e-06, + "loss": 0.0003, + "step": 9625 + }, + { + "epoch": 34.71223021582734, + "grad_norm": 0.057965803891420364, + "learning_rate": 4.563131313131314e-06, + "loss": 0.0009, + "step": 9650 + }, + { + "epoch": 34.802158273381295, + "grad_norm": 0.07385562360286713, + "learning_rate": 4.561868686868687e-06, + "loss": 0.0007, + "step": 9675 + }, + { + "epoch": 34.89208633093525, + "grad_norm": 0.010685013607144356, + "learning_rate": 4.560606060606061e-06, + "loss": 0.0008, + "step": 9700 + }, + { + "epoch": 34.98201438848921, + "grad_norm": 0.038797181099653244, + "learning_rate": 4.559343434343435e-06, + "loss": 0.0003, + "step": 9725 + }, + { + "epoch": 35.07194244604317, + "grad_norm": 0.016779489815235138, + "learning_rate": 4.558080808080809e-06, + "loss": 0.0011, + "step": 9750 + }, + { + "epoch": 35.16187050359712, + "grad_norm": 0.01562959887087345, + "learning_rate": 4.556818181818182e-06, + "loss": 0.0007, + "step": 9775 + }, + { + "epoch": 35.25179856115108, + "grad_norm": 0.025731824338436127, + "learning_rate": 4.555555555555556e-06, + "loss": 0.0005, + "step": 9800 + }, + { + "epoch": 35.34172661870504, + "grad_norm": 0.0950327217578888, + "learning_rate": 4.554292929292929e-06, + "loss": 0.0014, + "step": 9825 + }, + { + "epoch": 35.431654676258994, + "grad_norm": 0.015813730657100677, + "learning_rate": 4.553030303030303e-06, + "loss": 0.0011, + "step": 9850 + }, + { + "epoch": 35.52158273381295, + "grad_norm": 0.07395196706056595, + "learning_rate": 4.551767676767677e-06, + "loss": 0.0006, + "step": 9875 + }, + { + "epoch": 35.611510791366904, + "grad_norm": 0.3815157413482666, + "learning_rate": 4.55050505050505e-06, + "loss": 0.001, + "step": 9900 + }, + { + "epoch": 35.701438848920866, + "grad_norm": 0.028180675581097603, + "learning_rate": 4.549242424242424e-06, + "loss": 0.0007, + "step": 9925 + }, + { + "epoch": 35.79136690647482, + "grad_norm": 0.022708212956786156, + "learning_rate": 4.547979797979798e-06, + "loss": 0.0007, + "step": 9950 + }, + { + "epoch": 35.881294964028775, + "grad_norm": 0.37191152572631836, + "learning_rate": 4.546717171717172e-06, + "loss": 0.0006, + "step": 9975 + }, + { + "epoch": 35.97122302158273, + "grad_norm": 0.045804716646671295, + "learning_rate": 4.5454545454545455e-06, + "loss": 0.0005, + "step": 10000 + }, + { + "epoch": 35.97122302158273, + "eval_loss": 0.08962783217430115, + "eval_runtime": 1349.7416, + "eval_samples_per_second": 1.646, + "eval_steps_per_second": 0.103, + "eval_wer": 6.25786862178775, + "step": 10000 + }, + { + "epoch": 36.06115107913669, + "grad_norm": 0.016676392406225204, + "learning_rate": 4.5441919191919195e-06, + "loss": 0.0003, + "step": 10025 + }, + { + "epoch": 36.15107913669065, + "grad_norm": 0.15673214197158813, + "learning_rate": 4.5429292929292935e-06, + "loss": 0.0009, + "step": 10050 + }, + { + "epoch": 36.2410071942446, + "grad_norm": 0.032344311475753784, + "learning_rate": 4.541666666666667e-06, + "loss": 0.0015, + "step": 10075 + }, + { + "epoch": 36.330935251798564, + "grad_norm": 0.5042840242385864, + "learning_rate": 4.540404040404041e-06, + "loss": 0.0014, + "step": 10100 + }, + { + "epoch": 36.42086330935252, + "grad_norm": 0.02287839725613594, + "learning_rate": 4.539141414141415e-06, + "loss": 0.0013, + "step": 10125 + }, + { + "epoch": 36.510791366906474, + "grad_norm": 0.30796897411346436, + "learning_rate": 4.537878787878789e-06, + "loss": 0.0025, + "step": 10150 + }, + { + "epoch": 36.60071942446043, + "grad_norm": 0.11940345168113708, + "learning_rate": 4.536616161616162e-06, + "loss": 0.0009, + "step": 10175 + }, + { + "epoch": 36.69064748201439, + "grad_norm": 0.12890297174453735, + "learning_rate": 4.535353535353536e-06, + "loss": 0.001, + "step": 10200 + }, + { + "epoch": 36.780575539568346, + "grad_norm": 0.016430262476205826, + "learning_rate": 4.53409090909091e-06, + "loss": 0.0012, + "step": 10225 + }, + { + "epoch": 36.8705035971223, + "grad_norm": 0.08656007796525955, + "learning_rate": 4.532828282828283e-06, + "loss": 0.0015, + "step": 10250 + }, + { + "epoch": 36.960431654676256, + "grad_norm": 0.0869501456618309, + "learning_rate": 4.531565656565657e-06, + "loss": 0.0018, + "step": 10275 + }, + { + "epoch": 37.05035971223022, + "grad_norm": 0.4101605713367462, + "learning_rate": 4.53030303030303e-06, + "loss": 0.0015, + "step": 10300 + }, + { + "epoch": 37.14028776978417, + "grad_norm": 0.0797925516963005, + "learning_rate": 4.529040404040405e-06, + "loss": 0.0007, + "step": 10325 + }, + { + "epoch": 37.23021582733813, + "grad_norm": 0.025322135537862778, + "learning_rate": 4.527777777777778e-06, + "loss": 0.0006, + "step": 10350 + }, + { + "epoch": 37.32014388489208, + "grad_norm": 0.059909917414188385, + "learning_rate": 4.526515151515152e-06, + "loss": 0.0012, + "step": 10375 + }, + { + "epoch": 37.410071942446045, + "grad_norm": 0.062007270753383636, + "learning_rate": 4.525252525252526e-06, + "loss": 0.0012, + "step": 10400 + }, + { + "epoch": 37.5, + "grad_norm": 0.35286614298820496, + "learning_rate": 4.523989898989899e-06, + "loss": 0.0016, + "step": 10425 + }, + { + "epoch": 37.589928057553955, + "grad_norm": 0.1300862431526184, + "learning_rate": 4.522727272727273e-06, + "loss": 0.0006, + "step": 10450 + }, + { + "epoch": 37.67985611510792, + "grad_norm": 0.13838863372802734, + "learning_rate": 4.521464646464646e-06, + "loss": 0.0006, + "step": 10475 + }, + { + "epoch": 37.76978417266187, + "grad_norm": 0.6767460703849792, + "learning_rate": 4.520202020202021e-06, + "loss": 0.0006, + "step": 10500 + }, + { + "epoch": 37.85971223021583, + "grad_norm": 0.03494667634367943, + "learning_rate": 4.518939393939394e-06, + "loss": 0.0013, + "step": 10525 + }, + { + "epoch": 37.94964028776978, + "grad_norm": 0.14763426780700684, + "learning_rate": 4.517676767676768e-06, + "loss": 0.0022, + "step": 10550 + }, + { + "epoch": 38.039568345323744, + "grad_norm": 0.15873517096042633, + "learning_rate": 4.5164141414141415e-06, + "loss": 0.0019, + "step": 10575 + }, + { + "epoch": 38.1294964028777, + "grad_norm": 0.048420246690511703, + "learning_rate": 4.5151515151515155e-06, + "loss": 0.001, + "step": 10600 + }, + { + "epoch": 38.219424460431654, + "grad_norm": 0.038138266652822495, + "learning_rate": 4.5138888888888895e-06, + "loss": 0.0004, + "step": 10625 + }, + { + "epoch": 38.30935251798561, + "grad_norm": 0.024455932900309563, + "learning_rate": 4.512626262626263e-06, + "loss": 0.0007, + "step": 10650 + }, + { + "epoch": 38.39928057553957, + "grad_norm": 0.29704517126083374, + "learning_rate": 4.511363636363637e-06, + "loss": 0.0012, + "step": 10675 + }, + { + "epoch": 38.489208633093526, + "grad_norm": 0.23077060282230377, + "learning_rate": 4.510101010101011e-06, + "loss": 0.0006, + "step": 10700 + }, + { + "epoch": 38.57913669064748, + "grad_norm": 0.04493401572108269, + "learning_rate": 4.508838383838384e-06, + "loss": 0.0007, + "step": 10725 + }, + { + "epoch": 38.669064748201436, + "grad_norm": 0.01225815899670124, + "learning_rate": 4.507575757575758e-06, + "loss": 0.0004, + "step": 10750 + }, + { + "epoch": 38.7589928057554, + "grad_norm": 0.19539327919483185, + "learning_rate": 4.506313131313132e-06, + "loss": 0.0012, + "step": 10775 + }, + { + "epoch": 38.84892086330935, + "grad_norm": 0.4501245319843292, + "learning_rate": 4.505050505050506e-06, + "loss": 0.0016, + "step": 10800 + }, + { + "epoch": 38.93884892086331, + "grad_norm": 0.955757200717926, + "learning_rate": 4.503787878787879e-06, + "loss": 0.002, + "step": 10825 + }, + { + "epoch": 39.02877697841727, + "grad_norm": 0.4927741587162018, + "learning_rate": 4.502525252525253e-06, + "loss": 0.0009, + "step": 10850 + }, + { + "epoch": 39.118705035971225, + "grad_norm": 0.5250554084777832, + "learning_rate": 4.501262626262627e-06, + "loss": 0.0018, + "step": 10875 + }, + { + "epoch": 39.20863309352518, + "grad_norm": 0.5786688327789307, + "learning_rate": 4.5e-06, + "loss": 0.0013, + "step": 10900 + }, + { + "epoch": 39.298561151079134, + "grad_norm": 0.015845810994505882, + "learning_rate": 4.498737373737374e-06, + "loss": 0.0009, + "step": 10925 + }, + { + "epoch": 39.388489208633096, + "grad_norm": 0.01820209249854088, + "learning_rate": 4.497474747474747e-06, + "loss": 0.001, + "step": 10950 + }, + { + "epoch": 39.47841726618705, + "grad_norm": 0.026294970884919167, + "learning_rate": 4.496212121212122e-06, + "loss": 0.0018, + "step": 10975 + }, + { + "epoch": 39.568345323741006, + "grad_norm": 0.4651360511779785, + "learning_rate": 4.494949494949495e-06, + "loss": 0.0016, + "step": 11000 + }, + { + "epoch": 39.568345323741006, + "eval_loss": 0.09019309282302856, + "eval_runtime": 1345.7556, + "eval_samples_per_second": 1.651, + "eval_steps_per_second": 0.103, + "eval_wer": 6.331926238613642, + "step": 11000 + }, + { + "epoch": 39.65827338129496, + "grad_norm": 0.29995694756507874, + "learning_rate": 4.493686868686869e-06, + "loss": 0.0015, + "step": 11025 + }, + { + "epoch": 39.74820143884892, + "grad_norm": 0.3291122019290924, + "learning_rate": 4.492424242424242e-06, + "loss": 0.0015, + "step": 11050 + }, + { + "epoch": 39.83812949640288, + "grad_norm": 0.1785033792257309, + "learning_rate": 4.491161616161616e-06, + "loss": 0.0006, + "step": 11075 + }, + { + "epoch": 39.92805755395683, + "grad_norm": 0.020028244704008102, + "learning_rate": 4.48989898989899e-06, + "loss": 0.002, + "step": 11100 + }, + { + "epoch": 40.01798561151079, + "grad_norm": 0.08107150346040726, + "learning_rate": 4.4886363636363636e-06, + "loss": 0.0014, + "step": 11125 + }, + { + "epoch": 40.10791366906475, + "grad_norm": 0.012092849239706993, + "learning_rate": 4.4873737373737375e-06, + "loss": 0.0018, + "step": 11150 + }, + { + "epoch": 40.197841726618705, + "grad_norm": 0.163823664188385, + "learning_rate": 4.4861111111111115e-06, + "loss": 0.0014, + "step": 11175 + }, + { + "epoch": 40.28776978417266, + "grad_norm": 0.07797440141439438, + "learning_rate": 4.4848484848484855e-06, + "loss": 0.0022, + "step": 11200 + }, + { + "epoch": 40.37769784172662, + "grad_norm": 0.07735186815261841, + "learning_rate": 4.483585858585859e-06, + "loss": 0.0018, + "step": 11225 + }, + { + "epoch": 40.46762589928058, + "grad_norm": 0.3801431953907013, + "learning_rate": 4.482323232323233e-06, + "loss": 0.0013, + "step": 11250 + }, + { + "epoch": 40.55755395683453, + "grad_norm": 0.02574390545487404, + "learning_rate": 4.481060606060607e-06, + "loss": 0.0008, + "step": 11275 + }, + { + "epoch": 40.64748201438849, + "grad_norm": 0.06015799939632416, + "learning_rate": 4.47979797979798e-06, + "loss": 0.0007, + "step": 11300 + }, + { + "epoch": 40.73741007194245, + "grad_norm": 0.011081011034548283, + "learning_rate": 4.478535353535354e-06, + "loss": 0.0009, + "step": 11325 + }, + { + "epoch": 40.827338129496404, + "grad_norm": 0.14023222029209137, + "learning_rate": 4.477272727272728e-06, + "loss": 0.0009, + "step": 11350 + }, + { + "epoch": 40.91726618705036, + "grad_norm": 1.1734967231750488, + "learning_rate": 4.476010101010102e-06, + "loss": 0.0034, + "step": 11375 + }, + { + "epoch": 41.007194244604314, + "grad_norm": 0.018789170309901237, + "learning_rate": 4.474747474747475e-06, + "loss": 0.0012, + "step": 11400 + }, + { + "epoch": 41.097122302158276, + "grad_norm": 0.5469329953193665, + "learning_rate": 4.473484848484849e-06, + "loss": 0.0012, + "step": 11425 + }, + { + "epoch": 41.18705035971223, + "grad_norm": 1.0320335626602173, + "learning_rate": 4.472222222222223e-06, + "loss": 0.0022, + "step": 11450 + }, + { + "epoch": 41.276978417266186, + "grad_norm": 0.13018514215946198, + "learning_rate": 4.470959595959596e-06, + "loss": 0.001, + "step": 11475 + }, + { + "epoch": 41.36690647482014, + "grad_norm": 0.764275848865509, + "learning_rate": 4.46969696969697e-06, + "loss": 0.0017, + "step": 11500 + }, + { + "epoch": 41.4568345323741, + "grad_norm": 0.037678878754377365, + "learning_rate": 4.468434343434343e-06, + "loss": 0.0012, + "step": 11525 + }, + { + "epoch": 41.54676258992806, + "grad_norm": 0.0776861384510994, + "learning_rate": 4.467171717171718e-06, + "loss": 0.0012, + "step": 11550 + }, + { + "epoch": 41.63669064748201, + "grad_norm": 0.1435922086238861, + "learning_rate": 4.465909090909091e-06, + "loss": 0.0014, + "step": 11575 + }, + { + "epoch": 41.726618705035975, + "grad_norm": 0.2661900520324707, + "learning_rate": 4.464646464646465e-06, + "loss": 0.0014, + "step": 11600 + }, + { + "epoch": 41.81654676258993, + "grad_norm": 0.014804186299443245, + "learning_rate": 4.463383838383838e-06, + "loss": 0.0013, + "step": 11625 + }, + { + "epoch": 41.906474820143885, + "grad_norm": 0.5918655395507812, + "learning_rate": 4.462121212121212e-06, + "loss": 0.001, + "step": 11650 + }, + { + "epoch": 41.99640287769784, + "grad_norm": 0.2970104217529297, + "learning_rate": 4.460858585858586e-06, + "loss": 0.0014, + "step": 11675 + }, + { + "epoch": 42.0863309352518, + "grad_norm": 0.24786308407783508, + "learning_rate": 4.4595959595959596e-06, + "loss": 0.0005, + "step": 11700 + }, + { + "epoch": 42.17625899280576, + "grad_norm": 0.39591023325920105, + "learning_rate": 4.4583333333333336e-06, + "loss": 0.0012, + "step": 11725 + }, + { + "epoch": 42.26618705035971, + "grad_norm": 0.014619703404605389, + "learning_rate": 4.4570707070707076e-06, + "loss": 0.0009, + "step": 11750 + }, + { + "epoch": 42.356115107913666, + "grad_norm": 0.014031196013092995, + "learning_rate": 4.4558080808080816e-06, + "loss": 0.0005, + "step": 11775 + }, + { + "epoch": 42.44604316546763, + "grad_norm": 0.0157134011387825, + "learning_rate": 4.454545454545455e-06, + "loss": 0.0005, + "step": 11800 + }, + { + "epoch": 42.53597122302158, + "grad_norm": 0.5443057417869568, + "learning_rate": 4.453282828282829e-06, + "loss": 0.0005, + "step": 11825 + }, + { + "epoch": 42.62589928057554, + "grad_norm": 0.17728668451309204, + "learning_rate": 4.452020202020203e-06, + "loss": 0.001, + "step": 11850 + }, + { + "epoch": 42.71582733812949, + "grad_norm": 0.06720776110887527, + "learning_rate": 4.450757575757576e-06, + "loss": 0.0008, + "step": 11875 + }, + { + "epoch": 42.805755395683455, + "grad_norm": 0.020302429795265198, + "learning_rate": 4.44949494949495e-06, + "loss": 0.0005, + "step": 11900 + }, + { + "epoch": 42.89568345323741, + "grad_norm": 0.02236667089164257, + "learning_rate": 4.448232323232324e-06, + "loss": 0.0008, + "step": 11925 + }, + { + "epoch": 42.985611510791365, + "grad_norm": 0.3039033114910126, + "learning_rate": 4.446969696969697e-06, + "loss": 0.0007, + "step": 11950 + }, + { + "epoch": 43.07553956834533, + "grad_norm": 0.019936522468924522, + "learning_rate": 4.445707070707071e-06, + "loss": 0.0004, + "step": 11975 + }, + { + "epoch": 43.16546762589928, + "grad_norm": 0.006646598689258099, + "learning_rate": 4.444444444444444e-06, + "loss": 0.0007, + "step": 12000 + }, + { + "epoch": 43.16546762589928, + "eval_loss": 0.09005734324455261, + "eval_runtime": 1349.9657, + "eval_samples_per_second": 1.646, + "eval_steps_per_second": 0.103, + "eval_wer": 6.2208398133748055, + "step": 12000 + }, + { + "epoch": 43.25539568345324, + "grad_norm": 0.06663926690816879, + "learning_rate": 4.443181818181819e-06, + "loss": 0.0003, + "step": 12025 + }, + { + "epoch": 43.34532374100719, + "grad_norm": 0.7015880346298218, + "learning_rate": 4.441919191919192e-06, + "loss": 0.0013, + "step": 12050 + }, + { + "epoch": 43.435251798561154, + "grad_norm": 0.09495950490236282, + "learning_rate": 4.440656565656566e-06, + "loss": 0.0009, + "step": 12075 + }, + { + "epoch": 43.52517985611511, + "grad_norm": 0.010513260029256344, + "learning_rate": 4.43939393939394e-06, + "loss": 0.0007, + "step": 12100 + }, + { + "epoch": 43.615107913669064, + "grad_norm": 0.08924310654401779, + "learning_rate": 4.438131313131313e-06, + "loss": 0.0004, + "step": 12125 + }, + { + "epoch": 43.70503597122302, + "grad_norm": 0.015554459765553474, + "learning_rate": 4.436868686868687e-06, + "loss": 0.0005, + "step": 12150 + }, + { + "epoch": 43.79496402877698, + "grad_norm": 0.02140822261571884, + "learning_rate": 4.4356060606060604e-06, + "loss": 0.0012, + "step": 12175 + }, + { + "epoch": 43.884892086330936, + "grad_norm": 0.2149767279624939, + "learning_rate": 4.434343434343435e-06, + "loss": 0.0005, + "step": 12200 + }, + { + "epoch": 43.97482014388489, + "grad_norm": 0.009459302760660648, + "learning_rate": 4.4330808080808084e-06, + "loss": 0.0012, + "step": 12225 + }, + { + "epoch": 44.064748201438846, + "grad_norm": 0.05037049949169159, + "learning_rate": 4.4318181818181824e-06, + "loss": 0.0004, + "step": 12250 + }, + { + "epoch": 44.15467625899281, + "grad_norm": 0.006279121618717909, + "learning_rate": 4.430555555555556e-06, + "loss": 0.0006, + "step": 12275 + }, + { + "epoch": 44.24460431654676, + "grad_norm": 0.03591470420360565, + "learning_rate": 4.42929292929293e-06, + "loss": 0.0006, + "step": 12300 + }, + { + "epoch": 44.33453237410072, + "grad_norm": 0.013430873863399029, + "learning_rate": 4.428030303030304e-06, + "loss": 0.0015, + "step": 12325 + }, + { + "epoch": 44.42446043165468, + "grad_norm": 0.01713446155190468, + "learning_rate": 4.426767676767677e-06, + "loss": 0.0011, + "step": 12350 + }, + { + "epoch": 44.514388489208635, + "grad_norm": 0.6338793039321899, + "learning_rate": 4.425505050505051e-06, + "loss": 0.0023, + "step": 12375 + }, + { + "epoch": 44.60431654676259, + "grad_norm": 0.19725088775157928, + "learning_rate": 4.424242424242425e-06, + "loss": 0.0015, + "step": 12400 + }, + { + "epoch": 44.694244604316545, + "grad_norm": 0.034790072590112686, + "learning_rate": 4.422979797979799e-06, + "loss": 0.0011, + "step": 12425 + }, + { + "epoch": 44.78417266187051, + "grad_norm": 2.0450031757354736, + "learning_rate": 4.421717171717172e-06, + "loss": 0.0012, + "step": 12450 + }, + { + "epoch": 44.87410071942446, + "grad_norm": 0.25726571679115295, + "learning_rate": 4.420454545454546e-06, + "loss": 0.0008, + "step": 12475 + }, + { + "epoch": 44.96402877697842, + "grad_norm": 0.14911916851997375, + "learning_rate": 4.41919191919192e-06, + "loss": 0.002, + "step": 12500 + }, + { + "epoch": 45.05395683453237, + "grad_norm": 0.5396764278411865, + "learning_rate": 4.417929292929293e-06, + "loss": 0.0018, + "step": 12525 + }, + { + "epoch": 45.143884892086334, + "grad_norm": 0.21499969065189362, + "learning_rate": 4.416666666666667e-06, + "loss": 0.0008, + "step": 12550 + }, + { + "epoch": 45.23381294964029, + "grad_norm": 0.12975308299064636, + "learning_rate": 4.415404040404041e-06, + "loss": 0.0011, + "step": 12575 + }, + { + "epoch": 45.32374100719424, + "grad_norm": 0.03521961346268654, + "learning_rate": 4.414141414141415e-06, + "loss": 0.0009, + "step": 12600 + }, + { + "epoch": 45.4136690647482, + "grad_norm": 0.3964645564556122, + "learning_rate": 4.412878787878788e-06, + "loss": 0.0009, + "step": 12625 + }, + { + "epoch": 45.50359712230216, + "grad_norm": 0.04135512188076973, + "learning_rate": 4.411616161616162e-06, + "loss": 0.0007, + "step": 12650 + }, + { + "epoch": 45.593525179856115, + "grad_norm": 0.11724065244197845, + "learning_rate": 4.410353535353536e-06, + "loss": 0.0013, + "step": 12675 + }, + { + "epoch": 45.68345323741007, + "grad_norm": 0.3066418170928955, + "learning_rate": 4.409090909090909e-06, + "loss": 0.002, + "step": 12700 + }, + { + "epoch": 45.773381294964025, + "grad_norm": 0.020460475236177444, + "learning_rate": 4.407828282828283e-06, + "loss": 0.0004, + "step": 12725 + }, + { + "epoch": 45.86330935251799, + "grad_norm": 0.021625172346830368, + "learning_rate": 4.4065656565656565e-06, + "loss": 0.0008, + "step": 12750 + }, + { + "epoch": 45.95323741007194, + "grad_norm": 0.01973818428814411, + "learning_rate": 4.4053030303030305e-06, + "loss": 0.0005, + "step": 12775 + }, + { + "epoch": 46.0431654676259, + "grad_norm": 0.3055168092250824, + "learning_rate": 4.4040404040404044e-06, + "loss": 0.0004, + "step": 12800 + }, + { + "epoch": 46.13309352517986, + "grad_norm": 0.11869470030069351, + "learning_rate": 4.4027777777777784e-06, + "loss": 0.0012, + "step": 12825 + }, + { + "epoch": 46.223021582733814, + "grad_norm": 0.5959618091583252, + "learning_rate": 4.401515151515152e-06, + "loss": 0.0007, + "step": 12850 + }, + { + "epoch": 46.31294964028777, + "grad_norm": 0.08037717640399933, + "learning_rate": 4.400252525252526e-06, + "loss": 0.0006, + "step": 12875 + }, + { + "epoch": 46.402877697841724, + "grad_norm": 0.017363494262099266, + "learning_rate": 4.3989898989899e-06, + "loss": 0.0008, + "step": 12900 + }, + { + "epoch": 46.492805755395686, + "grad_norm": 0.028551748022437096, + "learning_rate": 4.397727272727273e-06, + "loss": 0.001, + "step": 12925 + }, + { + "epoch": 46.58273381294964, + "grad_norm": 0.08840727061033249, + "learning_rate": 4.396464646464647e-06, + "loss": 0.0007, + "step": 12950 + }, + { + "epoch": 46.672661870503596, + "grad_norm": 0.023021990433335304, + "learning_rate": 4.395202020202021e-06, + "loss": 0.0018, + "step": 12975 + }, + { + "epoch": 46.76258992805755, + "grad_norm": 0.05099537596106529, + "learning_rate": 4.393939393939394e-06, + "loss": 0.001, + "step": 13000 + }, + { + "epoch": 46.76258992805755, + "eval_loss": 0.08809197694063187, + "eval_runtime": 1348.5762, + "eval_samples_per_second": 1.648, + "eval_steps_per_second": 0.103, + "eval_wer": 6.154187958231504, + "step": 13000 + }, + { + "epoch": 46.85251798561151, + "grad_norm": 0.02734680473804474, + "learning_rate": 4.392676767676768e-06, + "loss": 0.0006, + "step": 13025 + }, + { + "epoch": 46.94244604316547, + "grad_norm": 0.012311214581131935, + "learning_rate": 4.391414141414142e-06, + "loss": 0.0004, + "step": 13050 + }, + { + "epoch": 47.03237410071942, + "grad_norm": 1.1471985578536987, + "learning_rate": 4.390151515151516e-06, + "loss": 0.0006, + "step": 13075 + }, + { + "epoch": 47.12230215827338, + "grad_norm": 0.04378161579370499, + "learning_rate": 4.388888888888889e-06, + "loss": 0.0009, + "step": 13100 + }, + { + "epoch": 47.21223021582734, + "grad_norm": 0.014206623658537865, + "learning_rate": 4.387626262626263e-06, + "loss": 0.0006, + "step": 13125 + }, + { + "epoch": 47.302158273381295, + "grad_norm": 0.12384720891714096, + "learning_rate": 4.386363636363637e-06, + "loss": 0.0008, + "step": 13150 + }, + { + "epoch": 47.39208633093525, + "grad_norm": 0.12384091317653656, + "learning_rate": 4.38510101010101e-06, + "loss": 0.0006, + "step": 13175 + }, + { + "epoch": 47.48201438848921, + "grad_norm": 0.05459749698638916, + "learning_rate": 4.383838383838384e-06, + "loss": 0.0017, + "step": 13200 + }, + { + "epoch": 47.57194244604317, + "grad_norm": 0.06376705318689346, + "learning_rate": 4.382575757575757e-06, + "loss": 0.0012, + "step": 13225 + }, + { + "epoch": 47.66187050359712, + "grad_norm": 0.09516707807779312, + "learning_rate": 4.381313131313132e-06, + "loss": 0.0005, + "step": 13250 + }, + { + "epoch": 47.75179856115108, + "grad_norm": 0.035159386694431305, + "learning_rate": 4.380050505050505e-06, + "loss": 0.0009, + "step": 13275 + }, + { + "epoch": 47.84172661870504, + "grad_norm": 0.13273297250270844, + "learning_rate": 4.378787878787879e-06, + "loss": 0.0011, + "step": 13300 + }, + { + "epoch": 47.931654676258994, + "grad_norm": 0.6526914834976196, + "learning_rate": 4.3775252525252525e-06, + "loss": 0.0017, + "step": 13325 + }, + { + "epoch": 48.02158273381295, + "grad_norm": 0.10989696532487869, + "learning_rate": 4.3762626262626265e-06, + "loss": 0.0013, + "step": 13350 + }, + { + "epoch": 48.111510791366904, + "grad_norm": 0.12258470058441162, + "learning_rate": 4.3750000000000005e-06, + "loss": 0.001, + "step": 13375 + }, + { + "epoch": 48.201438848920866, + "grad_norm": 0.04794065281748772, + "learning_rate": 4.373737373737374e-06, + "loss": 0.0006, + "step": 13400 + }, + { + "epoch": 48.29136690647482, + "grad_norm": 0.18742027878761292, + "learning_rate": 4.3724747474747485e-06, + "loss": 0.001, + "step": 13425 + }, + { + "epoch": 48.381294964028775, + "grad_norm": 0.047946684062480927, + "learning_rate": 4.371212121212122e-06, + "loss": 0.0008, + "step": 13450 + }, + { + "epoch": 48.47122302158273, + "grad_norm": 0.011459482833743095, + "learning_rate": 4.369949494949496e-06, + "loss": 0.0004, + "step": 13475 + }, + { + "epoch": 48.56115107913669, + "grad_norm": 0.0178390983492136, + "learning_rate": 4.368686868686869e-06, + "loss": 0.0005, + "step": 13500 + }, + { + "epoch": 48.65107913669065, + "grad_norm": 0.02639496698975563, + "learning_rate": 4.367424242424243e-06, + "loss": 0.0006, + "step": 13525 + }, + { + "epoch": 48.7410071942446, + "grad_norm": 0.9992175698280334, + "learning_rate": 4.366161616161617e-06, + "loss": 0.0006, + "step": 13550 + }, + { + "epoch": 48.830935251798564, + "grad_norm": 0.12613770365715027, + "learning_rate": 4.36489898989899e-06, + "loss": 0.0003, + "step": 13575 + }, + { + "epoch": 48.92086330935252, + "grad_norm": 0.008718474768102169, + "learning_rate": 4.363636363636364e-06, + "loss": 0.0006, + "step": 13600 + }, + { + "epoch": 49.010791366906474, + "grad_norm": 0.09226574003696442, + "learning_rate": 4.362373737373738e-06, + "loss": 0.001, + "step": 13625 + }, + { + "epoch": 49.10071942446043, + "grad_norm": 0.01371210440993309, + "learning_rate": 4.361111111111112e-06, + "loss": 0.0005, + "step": 13650 + }, + { + "epoch": 49.19064748201439, + "grad_norm": 0.8040596842765808, + "learning_rate": 4.359848484848485e-06, + "loss": 0.0014, + "step": 13675 + }, + { + "epoch": 49.280575539568346, + "grad_norm": 0.2569543123245239, + "learning_rate": 4.358585858585859e-06, + "loss": 0.0004, + "step": 13700 + }, + { + "epoch": 49.3705035971223, + "grad_norm": 0.04654459282755852, + "learning_rate": 4.357323232323233e-06, + "loss": 0.0003, + "step": 13725 + }, + { + "epoch": 49.460431654676256, + "grad_norm": 0.03116775117814541, + "learning_rate": 4.356060606060606e-06, + "loss": 0.0006, + "step": 13750 + }, + { + "epoch": 49.55035971223022, + "grad_norm": 0.013714387081563473, + "learning_rate": 4.35479797979798e-06, + "loss": 0.0005, + "step": 13775 + }, + { + "epoch": 49.64028776978417, + "grad_norm": 0.012171006761491299, + "learning_rate": 4.353535353535353e-06, + "loss": 0.0005, + "step": 13800 + }, + { + "epoch": 49.73021582733813, + "grad_norm": 0.39719274640083313, + "learning_rate": 4.352272727272727e-06, + "loss": 0.0002, + "step": 13825 + }, + { + "epoch": 49.82014388489208, + "grad_norm": 0.009979949332773685, + "learning_rate": 4.351010101010101e-06, + "loss": 0.0002, + "step": 13850 + }, + { + "epoch": 49.910071942446045, + "grad_norm": 0.010056397877633572, + "learning_rate": 4.349747474747475e-06, + "loss": 0.0001, + "step": 13875 + }, + { + "epoch": 50.0, + "grad_norm": 1.2399721145629883, + "learning_rate": 4.348484848484849e-06, + "loss": 0.0003, + "step": 13900 + }, + { + "epoch": 50.089928057553955, + "grad_norm": 0.008993759751319885, + "learning_rate": 4.3472222222222225e-06, + "loss": 0.0003, + "step": 13925 + }, + { + "epoch": 50.17985611510792, + "grad_norm": 0.0040525756776332855, + "learning_rate": 4.3459595959595965e-06, + "loss": 0.0001, + "step": 13950 + }, + { + "epoch": 50.26978417266187, + "grad_norm": 0.037480395287275314, + "learning_rate": 4.34469696969697e-06, + "loss": 0.0006, + "step": 13975 + }, + { + "epoch": 50.35971223021583, + "grad_norm": 0.011341557838022709, + "learning_rate": 4.343434343434344e-06, + "loss": 0.0001, + "step": 14000 + }, + { + "epoch": 50.35971223021583, + "eval_loss": 0.0883052721619606, + "eval_runtime": 1347.8354, + "eval_samples_per_second": 1.649, + "eval_steps_per_second": 0.103, + "eval_wer": 6.161593719914093, + "step": 14000 + }, + { + "epoch": 50.44964028776978, + "grad_norm": 0.097772017121315, + "learning_rate": 4.342171717171718e-06, + "loss": 0.0003, + "step": 14025 + }, + { + "epoch": 50.539568345323744, + "grad_norm": 0.22011174261569977, + "learning_rate": 4.340909090909091e-06, + "loss": 0.0004, + "step": 14050 + }, + { + "epoch": 50.6294964028777, + "grad_norm": 0.004608627874404192, + "learning_rate": 4.339646464646465e-06, + "loss": 0.002, + "step": 14075 + }, + { + "epoch": 50.719424460431654, + "grad_norm": 0.02777382917702198, + "learning_rate": 4.338383838383839e-06, + "loss": 0.0009, + "step": 14100 + }, + { + "epoch": 50.80935251798561, + "grad_norm": 0.3765215277671814, + "learning_rate": 4.337121212121213e-06, + "loss": 0.0015, + "step": 14125 + }, + { + "epoch": 50.89928057553957, + "grad_norm": 0.014906881377100945, + "learning_rate": 4.335858585858586e-06, + "loss": 0.0019, + "step": 14150 + }, + { + "epoch": 50.989208633093526, + "grad_norm": 0.07598377764225006, + "learning_rate": 4.33459595959596e-06, + "loss": 0.0011, + "step": 14175 + }, + { + "epoch": 51.07913669064748, + "grad_norm": 0.04858017340302467, + "learning_rate": 4.333333333333334e-06, + "loss": 0.002, + "step": 14200 + }, + { + "epoch": 51.169064748201436, + "grad_norm": 0.00848084781318903, + "learning_rate": 4.332070707070707e-06, + "loss": 0.0015, + "step": 14225 + }, + { + "epoch": 51.2589928057554, + "grad_norm": 0.192399799823761, + "learning_rate": 4.330808080808081e-06, + "loss": 0.0014, + "step": 14250 + }, + { + "epoch": 51.34892086330935, + "grad_norm": 0.17804254591464996, + "learning_rate": 4.329545454545455e-06, + "loss": 0.0009, + "step": 14275 + }, + { + "epoch": 51.43884892086331, + "grad_norm": 0.9404972791671753, + "learning_rate": 4.328282828282829e-06, + "loss": 0.0022, + "step": 14300 + }, + { + "epoch": 51.52877697841727, + "grad_norm": 0.06042027473449707, + "learning_rate": 4.327020202020202e-06, + "loss": 0.0009, + "step": 14325 + }, + { + "epoch": 51.618705035971225, + "grad_norm": 0.11593267321586609, + "learning_rate": 4.325757575757576e-06, + "loss": 0.001, + "step": 14350 + }, + { + "epoch": 51.70863309352518, + "grad_norm": 0.042370762676000595, + "learning_rate": 4.32449494949495e-06, + "loss": 0.0009, + "step": 14375 + }, + { + "epoch": 51.798561151079134, + "grad_norm": 0.06264758855104446, + "learning_rate": 4.323232323232323e-06, + "loss": 0.0011, + "step": 14400 + }, + { + "epoch": 51.888489208633096, + "grad_norm": 0.419005811214447, + "learning_rate": 4.321969696969697e-06, + "loss": 0.0013, + "step": 14425 + }, + { + "epoch": 51.97841726618705, + "grad_norm": 0.025492649525403976, + "learning_rate": 4.3207070707070705e-06, + "loss": 0.0008, + "step": 14450 + }, + { + "epoch": 52.068345323741006, + "grad_norm": 0.1695825606584549, + "learning_rate": 4.319444444444445e-06, + "loss": 0.001, + "step": 14475 + }, + { + "epoch": 52.15827338129496, + "grad_norm": 0.21136726438999176, + "learning_rate": 4.3181818181818185e-06, + "loss": 0.0004, + "step": 14500 + }, + { + "epoch": 52.24820143884892, + "grad_norm": 0.00583269540220499, + "learning_rate": 4.3169191919191925e-06, + "loss": 0.0003, + "step": 14525 + }, + { + "epoch": 52.33812949640288, + "grad_norm": 0.05031251907348633, + "learning_rate": 4.315656565656566e-06, + "loss": 0.0005, + "step": 14550 + }, + { + "epoch": 52.42805755395683, + "grad_norm": 1.4654878377914429, + "learning_rate": 4.31439393939394e-06, + "loss": 0.0011, + "step": 14575 + }, + { + "epoch": 52.51798561151079, + "grad_norm": 0.05035277083516121, + "learning_rate": 4.313131313131314e-06, + "loss": 0.0008, + "step": 14600 + }, + { + "epoch": 52.60791366906475, + "grad_norm": 0.3283204138278961, + "learning_rate": 4.311868686868687e-06, + "loss": 0.0024, + "step": 14625 + }, + { + "epoch": 52.697841726618705, + "grad_norm": 0.09352482855319977, + "learning_rate": 4.310606060606061e-06, + "loss": 0.0013, + "step": 14650 + }, + { + "epoch": 52.78776978417266, + "grad_norm": 0.4381198287010193, + "learning_rate": 4.309343434343435e-06, + "loss": 0.0014, + "step": 14675 + }, + { + "epoch": 52.87769784172662, + "grad_norm": 0.4195464551448822, + "learning_rate": 4.308080808080809e-06, + "loss": 0.0006, + "step": 14700 + }, + { + "epoch": 52.96762589928058, + "grad_norm": 0.037935055792331696, + "learning_rate": 4.306818181818182e-06, + "loss": 0.0005, + "step": 14725 + }, + { + "epoch": 53.05755395683453, + "grad_norm": 0.0057031637988984585, + "learning_rate": 4.305555555555556e-06, + "loss": 0.0011, + "step": 14750 + }, + { + "epoch": 53.14748201438849, + "grad_norm": 0.09235268831253052, + "learning_rate": 4.30429292929293e-06, + "loss": 0.0012, + "step": 14775 + }, + { + "epoch": 53.23741007194245, + "grad_norm": 0.4533500075340271, + "learning_rate": 4.303030303030303e-06, + "loss": 0.0013, + "step": 14800 + }, + { + "epoch": 53.327338129496404, + "grad_norm": 0.14968417584896088, + "learning_rate": 4.301767676767677e-06, + "loss": 0.0009, + "step": 14825 + }, + { + "epoch": 53.41726618705036, + "grad_norm": 0.016032686457037926, + "learning_rate": 4.300505050505051e-06, + "loss": 0.0003, + "step": 14850 + }, + { + "epoch": 53.507194244604314, + "grad_norm": 0.04255020618438721, + "learning_rate": 4.299242424242425e-06, + "loss": 0.0002, + "step": 14875 + }, + { + "epoch": 53.597122302158276, + "grad_norm": 0.01301508117467165, + "learning_rate": 4.297979797979798e-06, + "loss": 0.0003, + "step": 14900 + }, + { + "epoch": 53.68705035971223, + "grad_norm": 0.007252383045852184, + "learning_rate": 4.296717171717172e-06, + "loss": 0.0005, + "step": 14925 + }, + { + "epoch": 53.776978417266186, + "grad_norm": 0.13183751702308655, + "learning_rate": 4.295454545454546e-06, + "loss": 0.002, + "step": 14950 + }, + { + "epoch": 53.86690647482014, + "grad_norm": 0.028183195739984512, + "learning_rate": 4.294191919191919e-06, + "loss": 0.0015, + "step": 14975 + }, + { + "epoch": 53.9568345323741, + "grad_norm": 0.1370900571346283, + "learning_rate": 4.292929292929293e-06, + "loss": 0.0007, + "step": 15000 + }, + { + "epoch": 53.9568345323741, + "eval_loss": 0.08864730596542358, + "eval_runtime": 1347.6756, + "eval_samples_per_second": 1.649, + "eval_steps_per_second": 0.103, + "eval_wer": 6.391172332074353, + "step": 15000 + }, + { + "epoch": 54.04676258992806, + "grad_norm": 0.01960013061761856, + "learning_rate": 4.2916666666666665e-06, + "loss": 0.0016, + "step": 15025 + }, + { + "epoch": 54.13669064748201, + "grad_norm": 0.13105234503746033, + "learning_rate": 4.2904040404040405e-06, + "loss": 0.0003, + "step": 15050 + }, + { + "epoch": 54.226618705035975, + "grad_norm": 2.309511423110962, + "learning_rate": 4.2891414141414145e-06, + "loss": 0.0009, + "step": 15075 + }, + { + "epoch": 54.31654676258993, + "grad_norm": 0.018184732645750046, + "learning_rate": 4.287878787878788e-06, + "loss": 0.001, + "step": 15100 + }, + { + "epoch": 54.406474820143885, + "grad_norm": 0.05596456304192543, + "learning_rate": 4.2866161616161625e-06, + "loss": 0.0012, + "step": 15125 + }, + { + "epoch": 54.49640287769784, + "grad_norm": 0.735536515712738, + "learning_rate": 4.285353535353536e-06, + "loss": 0.0014, + "step": 15150 + }, + { + "epoch": 54.5863309352518, + "grad_norm": 0.641944169998169, + "learning_rate": 4.28409090909091e-06, + "loss": 0.0017, + "step": 15175 + }, + { + "epoch": 54.67625899280576, + "grad_norm": 0.02818766050040722, + "learning_rate": 4.282828282828283e-06, + "loss": 0.0013, + "step": 15200 + }, + { + "epoch": 54.76618705035971, + "grad_norm": 0.04384085536003113, + "learning_rate": 4.281565656565657e-06, + "loss": 0.0012, + "step": 15225 + }, + { + "epoch": 54.856115107913666, + "grad_norm": 0.5741293430328369, + "learning_rate": 4.280303030303031e-06, + "loss": 0.0012, + "step": 15250 + }, + { + "epoch": 54.94604316546763, + "grad_norm": 0.5108962059020996, + "learning_rate": 4.279040404040404e-06, + "loss": 0.0013, + "step": 15275 + }, + { + "epoch": 55.03597122302158, + "grad_norm": 0.09613129496574402, + "learning_rate": 4.277777777777778e-06, + "loss": 0.0011, + "step": 15300 + }, + { + "epoch": 55.12589928057554, + "grad_norm": 0.2453729510307312, + "learning_rate": 4.276515151515152e-06, + "loss": 0.0016, + "step": 15325 + }, + { + "epoch": 55.21582733812949, + "grad_norm": 0.03533944860100746, + "learning_rate": 4.275252525252526e-06, + "loss": 0.0013, + "step": 15350 + }, + { + "epoch": 55.305755395683455, + "grad_norm": 0.02793753705918789, + "learning_rate": 4.273989898989899e-06, + "loss": 0.0011, + "step": 15375 + }, + { + "epoch": 55.39568345323741, + "grad_norm": 0.11208122968673706, + "learning_rate": 4.272727272727273e-06, + "loss": 0.0014, + "step": 15400 + }, + { + "epoch": 55.485611510791365, + "grad_norm": 0.23727653920650482, + "learning_rate": 4.271464646464647e-06, + "loss": 0.0007, + "step": 15425 + }, + { + "epoch": 55.57553956834532, + "grad_norm": 0.1095881313085556, + "learning_rate": 4.27020202020202e-06, + "loss": 0.0006, + "step": 15450 + }, + { + "epoch": 55.66546762589928, + "grad_norm": 0.026398301124572754, + "learning_rate": 4.268939393939394e-06, + "loss": 0.0003, + "step": 15475 + }, + { + "epoch": 55.75539568345324, + "grad_norm": 0.3764269948005676, + "learning_rate": 4.267676767676767e-06, + "loss": 0.0007, + "step": 15500 + }, + { + "epoch": 55.84532374100719, + "grad_norm": 0.710081160068512, + "learning_rate": 4.266414141414142e-06, + "loss": 0.0006, + "step": 15525 + }, + { + "epoch": 55.935251798561154, + "grad_norm": 0.01405036449432373, + "learning_rate": 4.265151515151515e-06, + "loss": 0.0009, + "step": 15550 + }, + { + "epoch": 56.02517985611511, + "grad_norm": 0.011654024943709373, + "learning_rate": 4.263888888888889e-06, + "loss": 0.0011, + "step": 15575 + }, + { + "epoch": 56.115107913669064, + "grad_norm": 0.8455324172973633, + "learning_rate": 4.262626262626263e-06, + "loss": 0.0006, + "step": 15600 + }, + { + "epoch": 56.20503597122302, + "grad_norm": 0.7859840989112854, + "learning_rate": 4.2613636363636365e-06, + "loss": 0.0005, + "step": 15625 + }, + { + "epoch": 56.29496402877698, + "grad_norm": 0.012887760065495968, + "learning_rate": 4.2601010101010105e-06, + "loss": 0.0003, + "step": 15650 + }, + { + "epoch": 56.384892086330936, + "grad_norm": 0.27630236744880676, + "learning_rate": 4.258838383838384e-06, + "loss": 0.0005, + "step": 15675 + }, + { + "epoch": 56.47482014388489, + "grad_norm": 0.23494713008403778, + "learning_rate": 4.2575757575757585e-06, + "loss": 0.0003, + "step": 15700 + }, + { + "epoch": 56.564748201438846, + "grad_norm": 0.04018251597881317, + "learning_rate": 4.256313131313132e-06, + "loss": 0.0009, + "step": 15725 + }, + { + "epoch": 56.65467625899281, + "grad_norm": 0.29447436332702637, + "learning_rate": 4.255050505050506e-06, + "loss": 0.0002, + "step": 15750 + }, + { + "epoch": 56.74460431654676, + "grad_norm": 0.048734016716480255, + "learning_rate": 4.253787878787879e-06, + "loss": 0.0008, + "step": 15775 + }, + { + "epoch": 56.83453237410072, + "grad_norm": 0.00981312245130539, + "learning_rate": 4.252525252525253e-06, + "loss": 0.0003, + "step": 15800 + }, + { + "epoch": 56.92446043165468, + "grad_norm": 0.029217666015028954, + "learning_rate": 4.251262626262627e-06, + "loss": 0.0002, + "step": 15825 + }, + { + "epoch": 57.014388489208635, + "grad_norm": 0.0892946720123291, + "learning_rate": 4.25e-06, + "loss": 0.0008, + "step": 15850 + }, + { + "epoch": 57.10431654676259, + "grad_norm": 0.0070861089043319225, + "learning_rate": 4.248737373737374e-06, + "loss": 0.0003, + "step": 15875 + }, + { + "epoch": 57.194244604316545, + "grad_norm": 0.5670444965362549, + "learning_rate": 4.247474747474748e-06, + "loss": 0.0005, + "step": 15900 + }, + { + "epoch": 57.28417266187051, + "grad_norm": 0.4061719477176666, + "learning_rate": 4.246212121212122e-06, + "loss": 0.0009, + "step": 15925 + }, + { + "epoch": 57.37410071942446, + "grad_norm": 0.2658737897872925, + "learning_rate": 4.244949494949495e-06, + "loss": 0.0011, + "step": 15950 + }, + { + "epoch": 57.46402877697842, + "grad_norm": 0.06908473372459412, + "learning_rate": 4.243686868686869e-06, + "loss": 0.0012, + "step": 15975 + }, + { + "epoch": 57.55395683453237, + "grad_norm": 0.12484970688819885, + "learning_rate": 4.242424242424243e-06, + "loss": 0.0008, + "step": 16000 + }, + { + "epoch": 57.55395683453237, + "eval_loss": 0.09175190329551697, + "eval_runtime": 1351.6711, + "eval_samples_per_second": 1.644, + "eval_steps_per_second": 0.103, + "eval_wer": 6.391172332074353, + "step": 16000 + }, + { + "epoch": 57.643884892086334, + "grad_norm": 0.10532079637050629, + "learning_rate": 4.241161616161616e-06, + "loss": 0.0009, + "step": 16025 + }, + { + "epoch": 57.73381294964029, + "grad_norm": 0.0082013588398695, + "learning_rate": 4.23989898989899e-06, + "loss": 0.0009, + "step": 16050 + }, + { + "epoch": 57.82374100719424, + "grad_norm": 0.8880343437194824, + "learning_rate": 4.238636363636364e-06, + "loss": 0.0012, + "step": 16075 + }, + { + "epoch": 57.9136690647482, + "grad_norm": 0.04694369435310364, + "learning_rate": 4.237373737373737e-06, + "loss": 0.0011, + "step": 16100 + }, + { + "epoch": 58.00359712230216, + "grad_norm": 0.4175935387611389, + "learning_rate": 4.236111111111111e-06, + "loss": 0.0007, + "step": 16125 + }, + { + "epoch": 58.093525179856115, + "grad_norm": 0.0991375669836998, + "learning_rate": 4.234848484848485e-06, + "loss": 0.0008, + "step": 16150 + }, + { + "epoch": 58.18345323741007, + "grad_norm": 0.05238619074225426, + "learning_rate": 4.233585858585859e-06, + "loss": 0.0009, + "step": 16175 + }, + { + "epoch": 58.273381294964025, + "grad_norm": 0.024060403928160667, + "learning_rate": 4.2323232323232325e-06, + "loss": 0.0005, + "step": 16200 + }, + { + "epoch": 58.36330935251799, + "grad_norm": 0.514026939868927, + "learning_rate": 4.2310606060606065e-06, + "loss": 0.0017, + "step": 16225 + }, + { + "epoch": 58.45323741007194, + "grad_norm": 0.9123257994651794, + "learning_rate": 4.22979797979798e-06, + "loss": 0.0009, + "step": 16250 + }, + { + "epoch": 58.5431654676259, + "grad_norm": 0.034488383680582047, + "learning_rate": 4.228535353535354e-06, + "loss": 0.0005, + "step": 16275 + }, + { + "epoch": 58.63309352517986, + "grad_norm": 0.08020392805337906, + "learning_rate": 4.227272727272728e-06, + "loss": 0.0021, + "step": 16300 + }, + { + "epoch": 58.723021582733814, + "grad_norm": 0.011538870632648468, + "learning_rate": 4.226010101010101e-06, + "loss": 0.001, + "step": 16325 + }, + { + "epoch": 58.81294964028777, + "grad_norm": 0.4130057692527771, + "learning_rate": 4.224747474747475e-06, + "loss": 0.0009, + "step": 16350 + }, + { + "epoch": 58.902877697841724, + "grad_norm": 0.018940504640340805, + "learning_rate": 4.223484848484849e-06, + "loss": 0.0008, + "step": 16375 + }, + { + "epoch": 58.992805755395686, + "grad_norm": 0.09760510176420212, + "learning_rate": 4.222222222222223e-06, + "loss": 0.0009, + "step": 16400 + }, + { + "epoch": 59.08273381294964, + "grad_norm": 0.6728724241256714, + "learning_rate": 4.220959595959596e-06, + "loss": 0.0015, + "step": 16425 + }, + { + "epoch": 59.172661870503596, + "grad_norm": 0.01400268916040659, + "learning_rate": 4.21969696969697e-06, + "loss": 0.0006, + "step": 16450 + }, + { + "epoch": 59.26258992805755, + "grad_norm": 0.027168823406100273, + "learning_rate": 4.218434343434344e-06, + "loss": 0.0003, + "step": 16475 + }, + { + "epoch": 59.35251798561151, + "grad_norm": 0.025733735412359238, + "learning_rate": 4.217171717171717e-06, + "loss": 0.0003, + "step": 16500 + }, + { + "epoch": 59.44244604316547, + "grad_norm": 0.012072687968611717, + "learning_rate": 4.215909090909091e-06, + "loss": 0.0004, + "step": 16525 + }, + { + "epoch": 59.53237410071942, + "grad_norm": 0.03630650043487549, + "learning_rate": 4.214646464646465e-06, + "loss": 0.0013, + "step": 16550 + }, + { + "epoch": 59.62230215827338, + "grad_norm": 0.13875187933444977, + "learning_rate": 4.213383838383839e-06, + "loss": 0.0008, + "step": 16575 + }, + { + "epoch": 59.71223021582734, + "grad_norm": 0.06004035472869873, + "learning_rate": 4.212121212121212e-06, + "loss": 0.0004, + "step": 16600 + }, + { + "epoch": 59.802158273381295, + "grad_norm": 0.024319609627127647, + "learning_rate": 4.210858585858586e-06, + "loss": 0.0006, + "step": 16625 + }, + { + "epoch": 59.89208633093525, + "grad_norm": 0.0957476794719696, + "learning_rate": 4.20959595959596e-06, + "loss": 0.0016, + "step": 16650 + }, + { + "epoch": 59.98201438848921, + "grad_norm": 0.014447568915784359, + "learning_rate": 4.208333333333333e-06, + "loss": 0.0007, + "step": 16675 + }, + { + "epoch": 60.07194244604317, + "grad_norm": 0.0760221779346466, + "learning_rate": 4.207070707070707e-06, + "loss": 0.0007, + "step": 16700 + }, + { + "epoch": 60.16187050359712, + "grad_norm": 0.08783930540084839, + "learning_rate": 4.2058080808080806e-06, + "loss": 0.0006, + "step": 16725 + }, + { + "epoch": 60.25179856115108, + "grad_norm": 0.020011553540825844, + "learning_rate": 4.204545454545455e-06, + "loss": 0.0002, + "step": 16750 + }, + { + "epoch": 60.34172661870504, + "grad_norm": 0.004587370436638594, + "learning_rate": 4.2032828282828286e-06, + "loss": 0.0001, + "step": 16775 + }, + { + "epoch": 60.431654676258994, + "grad_norm": 0.05192629247903824, + "learning_rate": 4.2020202020202026e-06, + "loss": 0.0007, + "step": 16800 + }, + { + "epoch": 60.52158273381295, + "grad_norm": 0.0028184789698570967, + "learning_rate": 4.2007575757575766e-06, + "loss": 0.0001, + "step": 16825 + }, + { + "epoch": 60.611510791366904, + "grad_norm": 0.11263082921504974, + "learning_rate": 4.19949494949495e-06, + "loss": 0.0004, + "step": 16850 + }, + { + "epoch": 60.701438848920866, + "grad_norm": 0.020229890942573547, + "learning_rate": 4.198232323232324e-06, + "loss": 0.0002, + "step": 16875 + }, + { + "epoch": 60.79136690647482, + "grad_norm": 0.004258246161043644, + "learning_rate": 4.196969696969697e-06, + "loss": 0.0004, + "step": 16900 + }, + { + "epoch": 60.881294964028775, + "grad_norm": 0.005619137082248926, + "learning_rate": 4.195707070707072e-06, + "loss": 0.0001, + "step": 16925 + }, + { + "epoch": 60.97122302158273, + "grad_norm": 0.005032286513596773, + "learning_rate": 4.194444444444445e-06, + "loss": 0.0002, + "step": 16950 + }, + { + "epoch": 61.06115107913669, + "grad_norm": 0.02484523132443428, + "learning_rate": 4.193181818181819e-06, + "loss": 0.0003, + "step": 16975 + }, + { + "epoch": 61.15107913669065, + "grad_norm": 0.0017194038955494761, + "learning_rate": 4.191919191919192e-06, + "loss": 0.0002, + "step": 17000 + }, + { + "epoch": 61.15107913669065, + "eval_loss": 0.09027338027954102, + "eval_runtime": 1359.5537, + "eval_samples_per_second": 1.634, + "eval_steps_per_second": 0.102, + "eval_wer": 5.909797822706065, + "step": 17000 + }, + { + "epoch": 61.2410071942446, + "grad_norm": 0.0024019062984734774, + "learning_rate": 4.190656565656566e-06, + "loss": 0.0002, + "step": 17025 + }, + { + "epoch": 61.330935251798564, + "grad_norm": 0.004478455055505037, + "learning_rate": 4.18939393939394e-06, + "loss": 0.0003, + "step": 17050 + }, + { + "epoch": 61.42086330935252, + "grad_norm": 0.0044603836722671986, + "learning_rate": 4.188131313131313e-06, + "loss": 0.0004, + "step": 17075 + }, + { + "epoch": 61.510791366906474, + "grad_norm": 0.08818788081407547, + "learning_rate": 4.186868686868687e-06, + "loss": 0.0009, + "step": 17100 + }, + { + "epoch": 61.60071942446043, + "grad_norm": 0.0027286384720355272, + "learning_rate": 4.185606060606061e-06, + "loss": 0.0002, + "step": 17125 + }, + { + "epoch": 61.69064748201439, + "grad_norm": 0.0037345695309340954, + "learning_rate": 4.184343434343434e-06, + "loss": 0.0003, + "step": 17150 + }, + { + "epoch": 61.780575539568346, + "grad_norm": 0.014616015367209911, + "learning_rate": 4.183080808080808e-06, + "loss": 0.0004, + "step": 17175 + }, + { + "epoch": 61.8705035971223, + "grad_norm": 0.007769573014229536, + "learning_rate": 4.181818181818182e-06, + "loss": 0.0003, + "step": 17200 + }, + { + "epoch": 61.960431654676256, + "grad_norm": 0.008359814994037151, + "learning_rate": 4.180555555555556e-06, + "loss": 0.0005, + "step": 17225 + }, + { + "epoch": 62.05035971223022, + "grad_norm": 0.0051100486889481544, + "learning_rate": 4.1792929292929294e-06, + "loss": 0.0005, + "step": 17250 + }, + { + "epoch": 62.14028776978417, + "grad_norm": 0.0029563389252871275, + "learning_rate": 4.1780303030303034e-06, + "loss": 0.0001, + "step": 17275 + }, + { + "epoch": 62.23021582733813, + "grad_norm": 0.0030668089166283607, + "learning_rate": 4.1767676767676774e-06, + "loss": 0.0001, + "step": 17300 + }, + { + "epoch": 62.32014388489208, + "grad_norm": 0.02710825577378273, + "learning_rate": 4.175505050505051e-06, + "loss": 0.0006, + "step": 17325 + }, + { + "epoch": 62.410071942446045, + "grad_norm": 0.0027756947092711926, + "learning_rate": 4.1742424242424246e-06, + "loss": 0.0001, + "step": 17350 + }, + { + "epoch": 62.5, + "grad_norm": 0.09106307476758957, + "learning_rate": 4.172979797979798e-06, + "loss": 0.0003, + "step": 17375 + }, + { + "epoch": 62.589928057553955, + "grad_norm": 0.005363088101148605, + "learning_rate": 4.1717171717171726e-06, + "loss": 0.0001, + "step": 17400 + }, + { + "epoch": 62.67985611510792, + "grad_norm": 0.005525332409888506, + "learning_rate": 4.170454545454546e-06, + "loss": 0.0001, + "step": 17425 + }, + { + "epoch": 62.76978417266187, + "grad_norm": 0.007496482692658901, + "learning_rate": 4.16919191919192e-06, + "loss": 0.0001, + "step": 17450 + }, + { + "epoch": 62.85971223021583, + "grad_norm": 0.026290051639080048, + "learning_rate": 4.167929292929293e-06, + "loss": 0.0001, + "step": 17475 + }, + { + "epoch": 62.94964028776978, + "grad_norm": 0.006395560223609209, + "learning_rate": 4.166666666666667e-06, + "loss": 0.0001, + "step": 17500 + }, + { + "epoch": 63.039568345323744, + "grad_norm": 0.004197731614112854, + "learning_rate": 4.165404040404041e-06, + "loss": 0.0001, + "step": 17525 + }, + { + "epoch": 63.1294964028777, + "grad_norm": 0.002505301032215357, + "learning_rate": 4.164141414141414e-06, + "loss": 0.0, + "step": 17550 + }, + { + "epoch": 63.219424460431654, + "grad_norm": 0.0022915108129382133, + "learning_rate": 4.162878787878788e-06, + "loss": 0.0001, + "step": 17575 + }, + { + "epoch": 63.30935251798561, + "grad_norm": 0.0019390948582440615, + "learning_rate": 4.161616161616162e-06, + "loss": 0.0, + "step": 17600 + }, + { + "epoch": 63.39928057553957, + "grad_norm": 0.001307799364440143, + "learning_rate": 4.160353535353536e-06, + "loss": 0.0001, + "step": 17625 + }, + { + "epoch": 63.489208633093526, + "grad_norm": 0.0016936671454459429, + "learning_rate": 4.159090909090909e-06, + "loss": 0.0, + "step": 17650 + }, + { + "epoch": 63.57913669064748, + "grad_norm": 0.0017974688671529293, + "learning_rate": 4.157828282828283e-06, + "loss": 0.0, + "step": 17675 + }, + { + "epoch": 63.669064748201436, + "grad_norm": 0.0027852486819028854, + "learning_rate": 4.156565656565657e-06, + "loss": 0.0, + "step": 17700 + }, + { + "epoch": 63.7589928057554, + "grad_norm": 0.0017096559749916196, + "learning_rate": 4.15530303030303e-06, + "loss": 0.0, + "step": 17725 + }, + { + "epoch": 63.84892086330935, + "grad_norm": 0.0019876237493008375, + "learning_rate": 4.154040404040404e-06, + "loss": 0.0, + "step": 17750 + }, + { + "epoch": 63.93884892086331, + "grad_norm": 0.0011115281376987696, + "learning_rate": 4.152777777777778e-06, + "loss": 0.0, + "step": 17775 + }, + { + "epoch": 64.02877697841727, + "grad_norm": 0.0017126763705164194, + "learning_rate": 4.151515151515152e-06, + "loss": 0.0, + "step": 17800 + }, + { + "epoch": 64.11870503597122, + "grad_norm": 0.0011258955346420407, + "learning_rate": 4.1502525252525254e-06, + "loss": 0.0, + "step": 17825 + }, + { + "epoch": 64.20863309352518, + "grad_norm": 0.0015615399461239576, + "learning_rate": 4.1489898989898994e-06, + "loss": 0.0, + "step": 17850 + }, + { + "epoch": 64.29856115107914, + "grad_norm": 0.001990539487451315, + "learning_rate": 4.1477272727272734e-06, + "loss": 0.0, + "step": 17875 + }, + { + "epoch": 64.38848920863309, + "grad_norm": 0.0013739466667175293, + "learning_rate": 4.146464646464647e-06, + "loss": 0.0, + "step": 17900 + }, + { + "epoch": 64.47841726618705, + "grad_norm": 0.0017153042135760188, + "learning_rate": 4.145202020202021e-06, + "loss": 0.0, + "step": 17925 + }, + { + "epoch": 64.56834532374101, + "grad_norm": 0.0013855737634003162, + "learning_rate": 4.143939393939394e-06, + "loss": 0.0, + "step": 17950 + }, + { + "epoch": 64.65827338129496, + "grad_norm": 0.0023376569151878357, + "learning_rate": 4.142676767676769e-06, + "loss": 0.0001, + "step": 17975 + }, + { + "epoch": 64.74820143884892, + "grad_norm": 0.0007114307954907417, + "learning_rate": 4.141414141414142e-06, + "loss": 0.0, + "step": 18000 + }, + { + "epoch": 64.74820143884892, + "eval_loss": 0.09263601154088974, + "eval_runtime": 1339.2527, + "eval_samples_per_second": 1.659, + "eval_steps_per_second": 0.104, + "eval_wer": 5.658001925498037, + "step": 18000 + }, + { + "epoch": 64.83812949640287, + "grad_norm": 0.0010609790915623307, + "learning_rate": 4.140151515151516e-06, + "loss": 0.0, + "step": 18025 + }, + { + "epoch": 64.92805755395683, + "grad_norm": 0.0020956743974238634, + "learning_rate": 4.138888888888889e-06, + "loss": 0.0, + "step": 18050 + }, + { + "epoch": 65.0179856115108, + "grad_norm": 0.0013533415040001273, + "learning_rate": 4.137626262626263e-06, + "loss": 0.0001, + "step": 18075 + }, + { + "epoch": 65.10791366906474, + "grad_norm": 0.0010088173439726233, + "learning_rate": 4.136363636363637e-06, + "loss": 0.0001, + "step": 18100 + }, + { + "epoch": 65.1978417266187, + "grad_norm": 0.001570379245094955, + "learning_rate": 4.13510101010101e-06, + "loss": 0.0, + "step": 18125 + }, + { + "epoch": 65.28776978417267, + "grad_norm": 0.0016373491380363703, + "learning_rate": 4.133838383838384e-06, + "loss": 0.0, + "step": 18150 + }, + { + "epoch": 65.37769784172662, + "grad_norm": 0.0015006172470748425, + "learning_rate": 4.132575757575758e-06, + "loss": 0.0, + "step": 18175 + }, + { + "epoch": 65.46762589928058, + "grad_norm": 0.0011033018818125129, + "learning_rate": 4.131313131313132e-06, + "loss": 0.0, + "step": 18200 + }, + { + "epoch": 65.55755395683454, + "grad_norm": 0.0013498698826879263, + "learning_rate": 4.130050505050505e-06, + "loss": 0.0, + "step": 18225 + }, + { + "epoch": 65.64748201438849, + "grad_norm": 0.0013445069780573249, + "learning_rate": 4.128787878787879e-06, + "loss": 0.0, + "step": 18250 + }, + { + "epoch": 65.73741007194245, + "grad_norm": 0.0017605924513190985, + "learning_rate": 4.127525252525253e-06, + "loss": 0.0, + "step": 18275 + }, + { + "epoch": 65.8273381294964, + "grad_norm": 0.0018534163245931268, + "learning_rate": 4.126262626262626e-06, + "loss": 0.0, + "step": 18300 + }, + { + "epoch": 65.91726618705036, + "grad_norm": 0.000884951208718121, + "learning_rate": 4.125e-06, + "loss": 0.0, + "step": 18325 + }, + { + "epoch": 66.00719424460432, + "grad_norm": 0.0011815873440355062, + "learning_rate": 4.123737373737374e-06, + "loss": 0.0001, + "step": 18350 + }, + { + "epoch": 66.09712230215827, + "grad_norm": 0.001126173185184598, + "learning_rate": 4.1224747474747475e-06, + "loss": 0.0, + "step": 18375 + }, + { + "epoch": 66.18705035971223, + "grad_norm": 0.0011552530340850353, + "learning_rate": 4.1212121212121215e-06, + "loss": 0.0, + "step": 18400 + }, + { + "epoch": 66.27697841726619, + "grad_norm": 0.001199888065457344, + "learning_rate": 4.119949494949495e-06, + "loss": 0.0, + "step": 18425 + }, + { + "epoch": 66.36690647482014, + "grad_norm": 0.0007247981848195195, + "learning_rate": 4.1186868686868695e-06, + "loss": 0.0, + "step": 18450 + }, + { + "epoch": 66.4568345323741, + "grad_norm": 0.001124533242546022, + "learning_rate": 4.117424242424243e-06, + "loss": 0.0001, + "step": 18475 + }, + { + "epoch": 66.54676258992805, + "grad_norm": 0.0009603950311429799, + "learning_rate": 4.116161616161617e-06, + "loss": 0.0, + "step": 18500 + }, + { + "epoch": 66.63669064748201, + "grad_norm": 0.0016920759808272123, + "learning_rate": 4.114898989898991e-06, + "loss": 0.0001, + "step": 18525 + }, + { + "epoch": 66.72661870503597, + "grad_norm": 0.0007674341322854161, + "learning_rate": 4.113636363636364e-06, + "loss": 0.0, + "step": 18550 + }, + { + "epoch": 66.81654676258992, + "grad_norm": 0.000895792618393898, + "learning_rate": 4.112373737373738e-06, + "loss": 0.0, + "step": 18575 + }, + { + "epoch": 66.90647482014388, + "grad_norm": 0.0009227583650499582, + "learning_rate": 4.111111111111111e-06, + "loss": 0.0, + "step": 18600 + }, + { + "epoch": 66.99640287769785, + "grad_norm": 0.0019231617916375399, + "learning_rate": 4.109848484848486e-06, + "loss": 0.0, + "step": 18625 + }, + { + "epoch": 67.0863309352518, + "grad_norm": 0.0010071933502331376, + "learning_rate": 4.108585858585859e-06, + "loss": 0.0, + "step": 18650 + }, + { + "epoch": 67.17625899280576, + "grad_norm": 0.0009304916602559388, + "learning_rate": 4.107323232323233e-06, + "loss": 0.0002, + "step": 18675 + }, + { + "epoch": 67.26618705035972, + "grad_norm": 0.0008229652885347605, + "learning_rate": 4.106060606060606e-06, + "loss": 0.0, + "step": 18700 + }, + { + "epoch": 67.35611510791367, + "grad_norm": 0.0006714012124575675, + "learning_rate": 4.10479797979798e-06, + "loss": 0.0, + "step": 18725 + }, + { + "epoch": 67.44604316546763, + "grad_norm": 0.0009734642808325589, + "learning_rate": 4.103535353535354e-06, + "loss": 0.0, + "step": 18750 + }, + { + "epoch": 67.53597122302158, + "grad_norm": 0.0007786314818076789, + "learning_rate": 4.102272727272727e-06, + "loss": 0.0, + "step": 18775 + }, + { + "epoch": 67.62589928057554, + "grad_norm": 0.001005512080155313, + "learning_rate": 4.101010101010101e-06, + "loss": 0.0, + "step": 18800 + }, + { + "epoch": 67.7158273381295, + "grad_norm": 0.001331688603386283, + "learning_rate": 4.099747474747475e-06, + "loss": 0.0, + "step": 18825 + }, + { + "epoch": 67.80575539568345, + "grad_norm": 0.000987470499239862, + "learning_rate": 4.098484848484849e-06, + "loss": 0.0, + "step": 18850 + }, + { + "epoch": 67.89568345323741, + "grad_norm": 0.0008799554198049009, + "learning_rate": 4.097222222222222e-06, + "loss": 0.0001, + "step": 18875 + }, + { + "epoch": 67.98561151079137, + "grad_norm": 0.0009637974435463548, + "learning_rate": 4.095959595959596e-06, + "loss": 0.0, + "step": 18900 + }, + { + "epoch": 68.07553956834532, + "grad_norm": 0.0006672360468655825, + "learning_rate": 4.09469696969697e-06, + "loss": 0.0, + "step": 18925 + }, + { + "epoch": 68.16546762589928, + "grad_norm": 0.0008431566529907286, + "learning_rate": 4.0934343434343435e-06, + "loss": 0.0001, + "step": 18950 + }, + { + "epoch": 68.25539568345324, + "grad_norm": 0.0010287058539688587, + "learning_rate": 4.0921717171717175e-06, + "loss": 0.0, + "step": 18975 + }, + { + "epoch": 68.34532374100719, + "grad_norm": 0.0007457846077159047, + "learning_rate": 4.0909090909090915e-06, + "loss": 0.0, + "step": 19000 + }, + { + "epoch": 68.34532374100719, + "eval_loss": 0.09562169760465622, + "eval_runtime": 1339.1079, + "eval_samples_per_second": 1.659, + "eval_steps_per_second": 0.104, + "eval_wer": 5.583944308672146, + "step": 19000 + }, + { + "epoch": 68.43525179856115, + "grad_norm": 0.0009193470468744636, + "learning_rate": 4.0896464646464655e-06, + "loss": 0.0, + "step": 19025 + }, + { + "epoch": 68.5251798561151, + "grad_norm": 0.0008717461605556309, + "learning_rate": 4.088383838383839e-06, + "loss": 0.0, + "step": 19050 + }, + { + "epoch": 68.61510791366906, + "grad_norm": 0.0008119108970277011, + "learning_rate": 4.087121212121213e-06, + "loss": 0.0, + "step": 19075 + }, + { + "epoch": 68.70503597122303, + "grad_norm": 0.0010454319417476654, + "learning_rate": 4.085858585858587e-06, + "loss": 0.0001, + "step": 19100 + }, + { + "epoch": 68.79496402877697, + "grad_norm": 0.0012115614954382181, + "learning_rate": 4.08459595959596e-06, + "loss": 0.0, + "step": 19125 + }, + { + "epoch": 68.88489208633094, + "grad_norm": 0.001058676978573203, + "learning_rate": 4.083333333333334e-06, + "loss": 0.0, + "step": 19150 + }, + { + "epoch": 68.9748201438849, + "grad_norm": 0.0009722402319312096, + "learning_rate": 4.082070707070707e-06, + "loss": 0.0, + "step": 19175 + }, + { + "epoch": 69.06474820143885, + "grad_norm": 0.0006609881529584527, + "learning_rate": 4.080808080808081e-06, + "loss": 0.0, + "step": 19200 + }, + { + "epoch": 69.15467625899281, + "grad_norm": 0.0007030842243693769, + "learning_rate": 4.079545454545455e-06, + "loss": 0.0, + "step": 19225 + }, + { + "epoch": 69.24460431654676, + "grad_norm": 0.0006842823349870741, + "learning_rate": 4.078282828282829e-06, + "loss": 0.0001, + "step": 19250 + }, + { + "epoch": 69.33453237410072, + "grad_norm": 0.000651550421025604, + "learning_rate": 4.077020202020202e-06, + "loss": 0.0, + "step": 19275 + }, + { + "epoch": 69.42446043165468, + "grad_norm": 0.0006407879409380257, + "learning_rate": 4.075757575757576e-06, + "loss": 0.0001, + "step": 19300 + }, + { + "epoch": 69.51438848920863, + "grad_norm": 0.0010551882442086935, + "learning_rate": 4.07449494949495e-06, + "loss": 0.0, + "step": 19325 + }, + { + "epoch": 69.60431654676259, + "grad_norm": 0.0008015549392439425, + "learning_rate": 4.073232323232323e-06, + "loss": 0.0, + "step": 19350 + }, + { + "epoch": 69.69424460431655, + "grad_norm": 0.0008218359434977174, + "learning_rate": 4.071969696969697e-06, + "loss": 0.0, + "step": 19375 + }, + { + "epoch": 69.7841726618705, + "grad_norm": 0.0009953822009265423, + "learning_rate": 4.070707070707071e-06, + "loss": 0.0, + "step": 19400 + }, + { + "epoch": 69.87410071942446, + "grad_norm": 0.0008482063421979547, + "learning_rate": 4.069444444444444e-06, + "loss": 0.0, + "step": 19425 + }, + { + "epoch": 69.96402877697842, + "grad_norm": 0.0008491966291330755, + "learning_rate": 4.068181818181818e-06, + "loss": 0.0, + "step": 19450 + }, + { + "epoch": 70.05395683453237, + "grad_norm": 0.000667088374029845, + "learning_rate": 4.066919191919192e-06, + "loss": 0.0001, + "step": 19475 + }, + { + "epoch": 70.14388489208633, + "grad_norm": 0.0006748430896550417, + "learning_rate": 4.065656565656566e-06, + "loss": 0.0, + "step": 19500 + }, + { + "epoch": 70.23381294964028, + "grad_norm": 0.0006421016296371818, + "learning_rate": 4.0643939393939395e-06, + "loss": 0.0, + "step": 19525 + }, + { + "epoch": 70.32374100719424, + "grad_norm": 0.0009323668200522661, + "learning_rate": 4.0631313131313135e-06, + "loss": 0.0, + "step": 19550 + }, + { + "epoch": 70.4136690647482, + "grad_norm": 0.0008588407654315233, + "learning_rate": 4.0618686868686875e-06, + "loss": 0.0, + "step": 19575 + }, + { + "epoch": 70.50359712230215, + "grad_norm": 0.0006930006784386933, + "learning_rate": 4.060606060606061e-06, + "loss": 0.0, + "step": 19600 + }, + { + "epoch": 70.59352517985612, + "grad_norm": 0.000734307337552309, + "learning_rate": 4.059343434343435e-06, + "loss": 0.0, + "step": 19625 + }, + { + "epoch": 70.68345323741008, + "grad_norm": 0.0007306214538402855, + "learning_rate": 4.058080808080808e-06, + "loss": 0.0001, + "step": 19650 + }, + { + "epoch": 70.77338129496403, + "grad_norm": 0.0005738097243010998, + "learning_rate": 4.056818181818183e-06, + "loss": 0.0, + "step": 19675 + }, + { + "epoch": 70.86330935251799, + "grad_norm": 0.00065003422787413, + "learning_rate": 4.055555555555556e-06, + "loss": 0.0, + "step": 19700 + }, + { + "epoch": 70.95323741007195, + "grad_norm": 0.0006234170868992805, + "learning_rate": 4.05429292929293e-06, + "loss": 0.0, + "step": 19725 + }, + { + "epoch": 71.0431654676259, + "grad_norm": 0.000607940077316016, + "learning_rate": 4.053030303030303e-06, + "loss": 0.0, + "step": 19750 + }, + { + "epoch": 71.13309352517986, + "grad_norm": 0.0005851531168445945, + "learning_rate": 4.051767676767677e-06, + "loss": 0.0, + "step": 19775 + }, + { + "epoch": 71.22302158273381, + "grad_norm": 0.0009296953212469816, + "learning_rate": 4.050505050505051e-06, + "loss": 0.0, + "step": 19800 + }, + { + "epoch": 71.31294964028777, + "grad_norm": 0.0006304428679868579, + "learning_rate": 4.049242424242424e-06, + "loss": 0.0001, + "step": 19825 + }, + { + "epoch": 71.40287769784173, + "grad_norm": 0.000664900871925056, + "learning_rate": 4.047979797979799e-06, + "loss": 0.0, + "step": 19850 + }, + { + "epoch": 71.49280575539568, + "grad_norm": 0.0003695714403875172, + "learning_rate": 4.046717171717172e-06, + "loss": 0.0, + "step": 19875 + }, + { + "epoch": 71.58273381294964, + "grad_norm": 0.000516809755936265, + "learning_rate": 4.045454545454546e-06, + "loss": 0.0, + "step": 19900 + }, + { + "epoch": 71.6726618705036, + "grad_norm": 0.0006113911513239145, + "learning_rate": 4.044191919191919e-06, + "loss": 0.0, + "step": 19925 + }, + { + "epoch": 71.76258992805755, + "grad_norm": 0.000814276107121259, + "learning_rate": 4.042929292929293e-06, + "loss": 0.0, + "step": 19950 + }, + { + "epoch": 71.85251798561151, + "grad_norm": 0.0007162923575378954, + "learning_rate": 4.041666666666667e-06, + "loss": 0.0, + "step": 19975 + }, + { + "epoch": 71.94244604316546, + "grad_norm": 0.000519581779371947, + "learning_rate": 4.04040404040404e-06, + "loss": 0.0, + "step": 20000 + }, + { + "epoch": 71.94244604316546, + "eval_loss": 0.0976732075214386, + "eval_runtime": 1338.7066, + "eval_samples_per_second": 1.66, + "eval_steps_per_second": 0.104, + "eval_wer": 5.539509738576612, + "step": 20000 + }, + { + "epoch": 72.03237410071942, + "grad_norm": 0.0013573451433330774, + "learning_rate": 4.039141414141414e-06, + "loss": 0.0001, + "step": 20025 + }, + { + "epoch": 72.12230215827338, + "grad_norm": 0.0006321736145764589, + "learning_rate": 4.037878787878788e-06, + "loss": 0.0, + "step": 20050 + }, + { + "epoch": 72.21223021582733, + "grad_norm": 0.00046551282866857946, + "learning_rate": 4.036616161616162e-06, + "loss": 0.0, + "step": 20075 + }, + { + "epoch": 72.3021582733813, + "grad_norm": 0.00047266227193176746, + "learning_rate": 4.0353535353535355e-06, + "loss": 0.0, + "step": 20100 + }, + { + "epoch": 72.39208633093526, + "grad_norm": 0.0004692314541898668, + "learning_rate": 4.0340909090909095e-06, + "loss": 0.0, + "step": 20125 + }, + { + "epoch": 72.4820143884892, + "grad_norm": 0.0005892490735277534, + "learning_rate": 4.0328282828282835e-06, + "loss": 0.0, + "step": 20150 + }, + { + "epoch": 72.57194244604317, + "grad_norm": 0.0005393667961470783, + "learning_rate": 4.031565656565657e-06, + "loss": 0.0001, + "step": 20175 + }, + { + "epoch": 72.66187050359713, + "grad_norm": 0.0007663563592359424, + "learning_rate": 4.030303030303031e-06, + "loss": 0.0, + "step": 20200 + }, + { + "epoch": 72.75179856115108, + "grad_norm": 0.0005675546126440167, + "learning_rate": 4.029040404040405e-06, + "loss": 0.0, + "step": 20225 + }, + { + "epoch": 72.84172661870504, + "grad_norm": 0.0006041157757863402, + "learning_rate": 4.027777777777779e-06, + "loss": 0.0, + "step": 20250 + }, + { + "epoch": 72.93165467625899, + "grad_norm": 0.0006022896850481629, + "learning_rate": 4.026515151515152e-06, + "loss": 0.0001, + "step": 20275 + }, + { + "epoch": 73.02158273381295, + "grad_norm": 0.0005813241587020457, + "learning_rate": 4.025252525252526e-06, + "loss": 0.0, + "step": 20300 + }, + { + "epoch": 73.11151079136691, + "grad_norm": 0.0006358566461130977, + "learning_rate": 4.0239898989899e-06, + "loss": 0.0, + "step": 20325 + }, + { + "epoch": 73.20143884892086, + "grad_norm": 0.0006074347766116261, + "learning_rate": 4.022727272727273e-06, + "loss": 0.0, + "step": 20350 + }, + { + "epoch": 73.29136690647482, + "grad_norm": 0.0005062387208454311, + "learning_rate": 4.021464646464647e-06, + "loss": 0.0003, + "step": 20375 + }, + { + "epoch": 73.38129496402878, + "grad_norm": 0.0010172536130994558, + "learning_rate": 4.02020202020202e-06, + "loss": 0.0, + "step": 20400 + }, + { + "epoch": 73.47122302158273, + "grad_norm": 0.0006235135952010751, + "learning_rate": 4.018939393939394e-06, + "loss": 0.0, + "step": 20425 + }, + { + "epoch": 73.56115107913669, + "grad_norm": 0.0009783974383026361, + "learning_rate": 4.017676767676768e-06, + "loss": 0.0, + "step": 20450 + }, + { + "epoch": 73.65107913669064, + "grad_norm": 0.0005355635657906532, + "learning_rate": 4.016414141414141e-06, + "loss": 0.0, + "step": 20475 + }, + { + "epoch": 73.7410071942446, + "grad_norm": 0.0004634314973372966, + "learning_rate": 4.015151515151515e-06, + "loss": 0.0, + "step": 20500 + }, + { + "epoch": 73.83093525179856, + "grad_norm": 0.0005511495401151478, + "learning_rate": 4.013888888888889e-06, + "loss": 0.0, + "step": 20525 + }, + { + "epoch": 73.92086330935251, + "grad_norm": 0.0010061068460345268, + "learning_rate": 4.012626262626263e-06, + "loss": 0.0, + "step": 20550 + }, + { + "epoch": 74.01079136690647, + "grad_norm": 0.3256176710128784, + "learning_rate": 4.011363636363636e-06, + "loss": 0.0007, + "step": 20575 + }, + { + "epoch": 74.10071942446044, + "grad_norm": 0.17023605108261108, + "learning_rate": 4.01010101010101e-06, + "loss": 0.0008, + "step": 20600 + }, + { + "epoch": 74.19064748201438, + "grad_norm": 0.8051077723503113, + "learning_rate": 4.008838383838384e-06, + "loss": 0.0078, + "step": 20625 + }, + { + "epoch": 74.28057553956835, + "grad_norm": 0.4720918536186218, + "learning_rate": 4.0075757575757575e-06, + "loss": 0.0062, + "step": 20650 + }, + { + "epoch": 74.37050359712231, + "grad_norm": 0.4814521074295044, + "learning_rate": 4.0063131313131315e-06, + "loss": 0.0061, + "step": 20675 + }, + { + "epoch": 74.46043165467626, + "grad_norm": 0.7329695820808411, + "learning_rate": 4.0050505050505055e-06, + "loss": 0.0069, + "step": 20700 + }, + { + "epoch": 74.55035971223022, + "grad_norm": 0.713927686214447, + "learning_rate": 4.0037878787878795e-06, + "loss": 0.0061, + "step": 20725 + }, + { + "epoch": 74.64028776978417, + "grad_norm": 0.6485239863395691, + "learning_rate": 4.002525252525253e-06, + "loss": 0.0064, + "step": 20750 + }, + { + "epoch": 74.73021582733813, + "grad_norm": 0.8775496482849121, + "learning_rate": 4.001262626262627e-06, + "loss": 0.0048, + "step": 20775 + }, + { + "epoch": 74.82014388489209, + "grad_norm": 0.2677914798259735, + "learning_rate": 4.000000000000001e-06, + "loss": 0.004, + "step": 20800 + }, + { + "epoch": 74.91007194244604, + "grad_norm": 0.38305044174194336, + "learning_rate": 3.998737373737374e-06, + "loss": 0.0028, + "step": 20825 + }, + { + "epoch": 75.0, + "grad_norm": 0.05106651037931442, + "learning_rate": 3.997474747474748e-06, + "loss": 0.0021, + "step": 20850 + }, + { + "epoch": 75.08992805755396, + "grad_norm": 0.01168102491647005, + "learning_rate": 3.996212121212121e-06, + "loss": 0.0012, + "step": 20875 + }, + { + "epoch": 75.17985611510791, + "grad_norm": 0.22549034655094147, + "learning_rate": 3.994949494949496e-06, + "loss": 0.0015, + "step": 20900 + }, + { + "epoch": 75.26978417266187, + "grad_norm": 0.022075073793530464, + "learning_rate": 3.993686868686869e-06, + "loss": 0.0026, + "step": 20925 + }, + { + "epoch": 75.35971223021583, + "grad_norm": 0.0188248660415411, + "learning_rate": 3.992424242424243e-06, + "loss": 0.0017, + "step": 20950 + }, + { + "epoch": 75.44964028776978, + "grad_norm": 0.47026434540748596, + "learning_rate": 3.991161616161616e-06, + "loss": 0.0026, + "step": 20975 + }, + { + "epoch": 75.53956834532374, + "grad_norm": 0.2045595496892929, + "learning_rate": 3.98989898989899e-06, + "loss": 0.0019, + "step": 21000 + }, + { + "epoch": 75.53956834532374, + "eval_loss": 0.08847362548112869, + "eval_runtime": 1337.9238, + "eval_samples_per_second": 1.661, + "eval_steps_per_second": 0.104, + "eval_wer": 6.294897430200697, + "step": 21000 + }, + { + "epoch": 75.62949640287769, + "grad_norm": 0.0665188655257225, + "learning_rate": 3.988636363636364e-06, + "loss": 0.0014, + "step": 21025 + }, + { + "epoch": 75.71942446043165, + "grad_norm": 0.33609738945961, + "learning_rate": 3.987373737373737e-06, + "loss": 0.0011, + "step": 21050 + }, + { + "epoch": 75.80935251798562, + "grad_norm": 0.4631134867668152, + "learning_rate": 3.986111111111112e-06, + "loss": 0.0023, + "step": 21075 + }, + { + "epoch": 75.89928057553956, + "grad_norm": 0.26408031582832336, + "learning_rate": 3.984848484848485e-06, + "loss": 0.0019, + "step": 21100 + }, + { + "epoch": 75.98920863309353, + "grad_norm": 0.3067505657672882, + "learning_rate": 3.983585858585859e-06, + "loss": 0.0021, + "step": 21125 + }, + { + "epoch": 76.07913669064749, + "grad_norm": 0.0688316822052002, + "learning_rate": 3.982323232323232e-06, + "loss": 0.0024, + "step": 21150 + }, + { + "epoch": 76.16906474820144, + "grad_norm": 1.5255663394927979, + "learning_rate": 3.981060606060606e-06, + "loss": 0.0012, + "step": 21175 + }, + { + "epoch": 76.2589928057554, + "grad_norm": 0.368730753660202, + "learning_rate": 3.97979797979798e-06, + "loss": 0.001, + "step": 21200 + }, + { + "epoch": 76.34892086330935, + "grad_norm": 0.019969308748841286, + "learning_rate": 3.9785353535353535e-06, + "loss": 0.0006, + "step": 21225 + }, + { + "epoch": 76.43884892086331, + "grad_norm": 0.070771723985672, + "learning_rate": 3.9772727272727275e-06, + "loss": 0.0004, + "step": 21250 + }, + { + "epoch": 76.52877697841727, + "grad_norm": 0.023271985352039337, + "learning_rate": 3.9760101010101015e-06, + "loss": 0.0007, + "step": 21275 + }, + { + "epoch": 76.61870503597122, + "grad_norm": 0.027517560869455338, + "learning_rate": 3.9747474747474755e-06, + "loss": 0.0004, + "step": 21300 + }, + { + "epoch": 76.70863309352518, + "grad_norm": 0.009323998354375362, + "learning_rate": 3.973484848484849e-06, + "loss": 0.0007, + "step": 21325 + }, + { + "epoch": 76.79856115107914, + "grad_norm": 0.007815494202077389, + "learning_rate": 3.972222222222223e-06, + "loss": 0.0007, + "step": 21350 + }, + { + "epoch": 76.88848920863309, + "grad_norm": 0.06828250735998154, + "learning_rate": 3.970959595959597e-06, + "loss": 0.0004, + "step": 21375 + }, + { + "epoch": 76.97841726618705, + "grad_norm": 0.4169680178165436, + "learning_rate": 3.96969696969697e-06, + "loss": 0.0007, + "step": 21400 + }, + { + "epoch": 77.06834532374101, + "grad_norm": 0.010289140976965427, + "learning_rate": 3.968434343434344e-06, + "loss": 0.0003, + "step": 21425 + }, + { + "epoch": 77.15827338129496, + "grad_norm": 0.02134793810546398, + "learning_rate": 3.967171717171717e-06, + "loss": 0.0003, + "step": 21450 + }, + { + "epoch": 77.24820143884892, + "grad_norm": 0.005463853012770414, + "learning_rate": 3.965909090909091e-06, + "loss": 0.0001, + "step": 21475 + }, + { + "epoch": 77.33812949640287, + "grad_norm": 0.0035135000944137573, + "learning_rate": 3.964646464646465e-06, + "loss": 0.0001, + "step": 21500 + }, + { + "epoch": 77.42805755395683, + "grad_norm": 0.01657390221953392, + "learning_rate": 3.963383838383839e-06, + "loss": 0.0001, + "step": 21525 + }, + { + "epoch": 77.5179856115108, + "grad_norm": 0.1767745018005371, + "learning_rate": 3.962121212121213e-06, + "loss": 0.0007, + "step": 21550 + }, + { + "epoch": 77.60791366906474, + "grad_norm": 0.016838785260915756, + "learning_rate": 3.960858585858586e-06, + "loss": 0.0001, + "step": 21575 + }, + { + "epoch": 77.6978417266187, + "grad_norm": 0.0039493367075920105, + "learning_rate": 3.95959595959596e-06, + "loss": 0.0001, + "step": 21600 + }, + { + "epoch": 77.78776978417267, + "grad_norm": 0.0031421987805515528, + "learning_rate": 3.958333333333333e-06, + "loss": 0.0003, + "step": 21625 + }, + { + "epoch": 77.87769784172662, + "grad_norm": 0.0026466776616871357, + "learning_rate": 3.957070707070707e-06, + "loss": 0.0003, + "step": 21650 + }, + { + "epoch": 77.96762589928058, + "grad_norm": 0.009947208687663078, + "learning_rate": 3.955808080808081e-06, + "loss": 0.0002, + "step": 21675 + }, + { + "epoch": 78.05755395683454, + "grad_norm": 0.1049116924405098, + "learning_rate": 3.954545454545454e-06, + "loss": 0.0002, + "step": 21700 + }, + { + "epoch": 78.14748201438849, + "grad_norm": 0.0023068960290402174, + "learning_rate": 3.953282828282828e-06, + "loss": 0.0001, + "step": 21725 + }, + { + "epoch": 78.23741007194245, + "grad_norm": 0.003103764960542321, + "learning_rate": 3.952020202020202e-06, + "loss": 0.0001, + "step": 21750 + }, + { + "epoch": 78.3273381294964, + "grad_norm": 0.002706879284232855, + "learning_rate": 3.950757575757576e-06, + "loss": 0.0001, + "step": 21775 + }, + { + "epoch": 78.41726618705036, + "grad_norm": 0.004320697858929634, + "learning_rate": 3.9494949494949496e-06, + "loss": 0.0001, + "step": 21800 + }, + { + "epoch": 78.50719424460432, + "grad_norm": 0.005596183240413666, + "learning_rate": 3.9482323232323236e-06, + "loss": 0.0002, + "step": 21825 + }, + { + "epoch": 78.59712230215827, + "grad_norm": 0.0037838639691472054, + "learning_rate": 3.9469696969696976e-06, + "loss": 0.0003, + "step": 21850 + }, + { + "epoch": 78.68705035971223, + "grad_norm": 0.00796448066830635, + "learning_rate": 3.945707070707071e-06, + "loss": 0.0001, + "step": 21875 + }, + { + "epoch": 78.77697841726619, + "grad_norm": 0.003022188087925315, + "learning_rate": 3.944444444444445e-06, + "loss": 0.0001, + "step": 21900 + }, + { + "epoch": 78.86690647482014, + "grad_norm": 0.0022381923627108335, + "learning_rate": 3.943181818181819e-06, + "loss": 0.0002, + "step": 21925 + }, + { + "epoch": 78.9568345323741, + "grad_norm": 0.0027954999823123217, + "learning_rate": 3.941919191919193e-06, + "loss": 0.0001, + "step": 21950 + }, + { + "epoch": 79.04676258992805, + "grad_norm": 0.0016978129278868437, + "learning_rate": 3.940656565656566e-06, + "loss": 0.0001, + "step": 21975 + }, + { + "epoch": 79.13669064748201, + "grad_norm": 0.0017409235006198287, + "learning_rate": 3.93939393939394e-06, + "loss": 0.0003, + "step": 22000 + }, + { + "epoch": 79.13669064748201, + "eval_loss": 0.0888415277004242, + "eval_runtime": 1337.7919, + "eval_samples_per_second": 1.661, + "eval_steps_per_second": 0.104, + "eval_wer": 5.598755832037325, + "step": 22000 + } + ], + "logging_steps": 25, + "max_steps": 100000, + "num_input_tokens_seen": 0, + "num_train_epochs": 360, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.777235958847242e+21, + "train_batch_size": 64, + "trial_name": null, + "trial_params": null +}