{ "best_metric": 33.472796184515516, "best_model_checkpoint": "./whisper-distil-v3/checkpoint-26000", "epoch": 1.4247356019507917, "eval_steps": 1000, "global_step": 26000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010959504630390707, "grad_norm": 5.807405471801758, "learning_rate": 3.6e-06, "loss": 8.6037, "step": 20 }, { "epoch": 0.0021919009260781414, "grad_norm": 5.726860523223877, "learning_rate": 7.6e-06, "loss": 8.4874, "step": 40 }, { "epoch": 0.003287851389117212, "grad_norm": 7.615314960479736, "learning_rate": 1.16e-05, "loss": 8.0934, "step": 60 }, { "epoch": 0.004383801852156283, "grad_norm": 7.089465618133545, "learning_rate": 1.56e-05, "loss": 7.4227, "step": 80 }, { "epoch": 0.005479752315195353, "grad_norm": 5.158086776733398, "learning_rate": 1.9600000000000002e-05, "loss": 6.3418, "step": 100 }, { "epoch": 0.006575702778234424, "grad_norm": 3.288583278656006, "learning_rate": 2.3400000000000003e-05, "loss": 5.2747, "step": 120 }, { "epoch": 0.007671653241273495, "grad_norm": 3.1715681552886963, "learning_rate": 2.7400000000000002e-05, "loss": 4.3075, "step": 140 }, { "epoch": 0.008767603704312565, "grad_norm": 3.033198833465576, "learning_rate": 3.1400000000000004e-05, "loss": 3.631, "step": 160 }, { "epoch": 0.009863554167351636, "grad_norm": 3.027251720428467, "learning_rate": 3.54e-05, "loss": 3.2186, "step": 180 }, { "epoch": 0.010959504630390707, "grad_norm": 2.9063901901245117, "learning_rate": 3.94e-05, "loss": 2.9226, "step": 200 }, { "epoch": 0.012055455093429777, "grad_norm": 3.1165690422058105, "learning_rate": 4.3400000000000005e-05, "loss": 2.8402, "step": 220 }, { "epoch": 0.013151405556468848, "grad_norm": 2.7977383136749268, "learning_rate": 4.74e-05, "loss": 2.613, "step": 240 }, { "epoch": 0.014247356019507919, "grad_norm": 3.7818286418914795, "learning_rate": 5.14e-05, "loss": 2.377, "step": 260 }, { "epoch": 0.01534330648254699, "grad_norm": 3.2088804244995117, "learning_rate": 5.5400000000000005e-05, "loss": 2.3204, "step": 280 }, { "epoch": 0.01643925694558606, "grad_norm": 3.2518157958984375, "learning_rate": 5.94e-05, "loss": 2.1812, "step": 300 }, { "epoch": 0.01753520740862513, "grad_norm": 3.725226640701294, "learning_rate": 6.340000000000001e-05, "loss": 2.158, "step": 320 }, { "epoch": 0.0186311578716642, "grad_norm": 3.5929486751556396, "learning_rate": 6.740000000000001e-05, "loss": 2.1241, "step": 340 }, { "epoch": 0.019727108334703272, "grad_norm": 4.1317572593688965, "learning_rate": 7.14e-05, "loss": 2.2284, "step": 360 }, { "epoch": 0.020823058797742343, "grad_norm": 3.4276161193847656, "learning_rate": 7.54e-05, "loss": 1.9655, "step": 380 }, { "epoch": 0.021919009260781414, "grad_norm": 3.9775540828704834, "learning_rate": 7.94e-05, "loss": 1.9407, "step": 400 }, { "epoch": 0.023014959723820484, "grad_norm": 3.67511248588562, "learning_rate": 8.34e-05, "loss": 1.9312, "step": 420 }, { "epoch": 0.024110910186859555, "grad_norm": 4.781565189361572, "learning_rate": 8.740000000000001e-05, "loss": 1.9218, "step": 440 }, { "epoch": 0.025206860649898626, "grad_norm": 5.2797698974609375, "learning_rate": 9.140000000000001e-05, "loss": 1.8729, "step": 460 }, { "epoch": 0.026302811112937696, "grad_norm": 6.1737284660339355, "learning_rate": 9.54e-05, "loss": 1.6848, "step": 480 }, { "epoch": 0.027398761575976767, "grad_norm": 4.926702976226807, "learning_rate": 9.94e-05, "loss": 1.8866, "step": 500 }, { "epoch": 0.028494712039015838, "grad_norm": 4.043098449707031, "learning_rate": 0.0001, "loss": 1.749, "step": 520 }, { "epoch": 0.02959066250205491, "grad_norm": 4.022521495819092, "learning_rate": 0.0001, "loss": 1.7654, "step": 540 }, { "epoch": 0.03068661296509398, "grad_norm": 3.1964547634124756, "learning_rate": 0.0001, "loss": 1.7496, "step": 560 }, { "epoch": 0.03178256342813305, "grad_norm": 3.5182583332061768, "learning_rate": 0.0001, "loss": 1.7312, "step": 580 }, { "epoch": 0.03287851389117212, "grad_norm": 3.529665231704712, "learning_rate": 0.0001, "loss": 1.6307, "step": 600 }, { "epoch": 0.03397446435421119, "grad_norm": 3.329401969909668, "learning_rate": 0.0001, "loss": 1.7613, "step": 620 }, { "epoch": 0.03507041481725026, "grad_norm": 3.4481399059295654, "learning_rate": 0.0001, "loss": 1.6204, "step": 640 }, { "epoch": 0.03616636528028933, "grad_norm": 3.3551902770996094, "learning_rate": 0.0001, "loss": 1.5846, "step": 660 }, { "epoch": 0.0372623157433284, "grad_norm": 3.591031074523926, "learning_rate": 0.0001, "loss": 1.6077, "step": 680 }, { "epoch": 0.038358266206367474, "grad_norm": 3.8630764484405518, "learning_rate": 0.0001, "loss": 1.5275, "step": 700 }, { "epoch": 0.039454216669406544, "grad_norm": 3.77461838722229, "learning_rate": 0.0001, "loss": 1.5386, "step": 720 }, { "epoch": 0.040550167132445615, "grad_norm": 2.9158153533935547, "learning_rate": 0.0001, "loss": 1.5536, "step": 740 }, { "epoch": 0.041646117595484686, "grad_norm": 3.761077642440796, "learning_rate": 0.0001, "loss": 1.5607, "step": 760 }, { "epoch": 0.042742068058523756, "grad_norm": 3.5758230686187744, "learning_rate": 0.0001, "loss": 1.5145, "step": 780 }, { "epoch": 0.04383801852156283, "grad_norm": 3.01175856590271, "learning_rate": 0.0001, "loss": 1.5639, "step": 800 }, { "epoch": 0.0449339689846019, "grad_norm": 3.8395230770111084, "learning_rate": 0.0001, "loss": 1.6478, "step": 820 }, { "epoch": 0.04602991944764097, "grad_norm": 2.9240541458129883, "learning_rate": 0.0001, "loss": 1.5303, "step": 840 }, { "epoch": 0.04712586991068004, "grad_norm": 3.603835344314575, "learning_rate": 0.0001, "loss": 1.4436, "step": 860 }, { "epoch": 0.04822182037371911, "grad_norm": 3.1701183319091797, "learning_rate": 0.0001, "loss": 1.5622, "step": 880 }, { "epoch": 0.04931777083675818, "grad_norm": 4.054835796356201, "learning_rate": 0.0001, "loss": 1.4354, "step": 900 }, { "epoch": 0.05041372129979725, "grad_norm": 2.9651615619659424, "learning_rate": 0.0001, "loss": 1.4676, "step": 920 }, { "epoch": 0.05150967176283632, "grad_norm": 3.2480218410491943, "learning_rate": 0.0001, "loss": 1.3769, "step": 940 }, { "epoch": 0.05260562222587539, "grad_norm": 3.494356155395508, "learning_rate": 0.0001, "loss": 1.4928, "step": 960 }, { "epoch": 0.05370157268891446, "grad_norm": 3.394205331802368, "learning_rate": 0.0001, "loss": 1.4045, "step": 980 }, { "epoch": 0.054797523151953534, "grad_norm": 3.333587646484375, "learning_rate": 0.0001, "loss": 1.4953, "step": 1000 }, { "epoch": 0.054797523151953534, "eval_loss": 1.4313914775848389, "eval_runtime": 30911.9498, "eval_samples_per_second": 2.099, "eval_steps_per_second": 0.066, "eval_wer": 70.88587442180551, "step": 1000 }, { "epoch": 0.055893473614992605, "grad_norm": 3.2317609786987305, "learning_rate": 0.0001, "loss": 1.4376, "step": 1020 }, { "epoch": 0.056989424078031675, "grad_norm": 2.9077706336975098, "learning_rate": 0.0001, "loss": 1.4398, "step": 1040 }, { "epoch": 0.058085374541070746, "grad_norm": 3.0054707527160645, "learning_rate": 0.0001, "loss": 1.4326, "step": 1060 }, { "epoch": 0.05918132500410982, "grad_norm": 3.7243480682373047, "learning_rate": 0.0001, "loss": 1.4915, "step": 1080 }, { "epoch": 0.06027727546714889, "grad_norm": 2.9608402252197266, "learning_rate": 0.0001, "loss": 1.4595, "step": 1100 }, { "epoch": 0.06137322593018796, "grad_norm": 2.652829885482788, "learning_rate": 0.0001, "loss": 1.4378, "step": 1120 }, { "epoch": 0.06246917639322703, "grad_norm": 2.9017295837402344, "learning_rate": 0.0001, "loss": 1.4257, "step": 1140 }, { "epoch": 0.0635651268562661, "grad_norm": 3.0610859394073486, "learning_rate": 0.0001, "loss": 1.348, "step": 1160 }, { "epoch": 0.06466107731930516, "grad_norm": 2.701765775680542, "learning_rate": 0.0001, "loss": 1.3853, "step": 1180 }, { "epoch": 0.06575702778234424, "grad_norm": 3.83376145362854, "learning_rate": 0.0001, "loss": 1.4708, "step": 1200 }, { "epoch": 0.0668529782453833, "grad_norm": 3.178449869155884, "learning_rate": 0.0001, "loss": 1.362, "step": 1220 }, { "epoch": 0.06794892870842238, "grad_norm": 3.796205997467041, "learning_rate": 0.0001, "loss": 1.4331, "step": 1240 }, { "epoch": 0.06904487917146145, "grad_norm": 2.8163928985595703, "learning_rate": 0.0001, "loss": 1.2835, "step": 1260 }, { "epoch": 0.07014082963450052, "grad_norm": 2.698793888092041, "learning_rate": 0.0001, "loss": 1.3444, "step": 1280 }, { "epoch": 0.07123678009753959, "grad_norm": 2.584484815597534, "learning_rate": 0.0001, "loss": 1.2145, "step": 1300 }, { "epoch": 0.07233273056057866, "grad_norm": 2.696967363357544, "learning_rate": 0.0001, "loss": 1.2855, "step": 1320 }, { "epoch": 0.07342868102361773, "grad_norm": 3.382924795150757, "learning_rate": 0.0001, "loss": 1.2164, "step": 1340 }, { "epoch": 0.0745246314866568, "grad_norm": 2.8127260208129883, "learning_rate": 0.0001, "loss": 1.2873, "step": 1360 }, { "epoch": 0.07562058194969587, "grad_norm": 2.631011724472046, "learning_rate": 0.0001, "loss": 1.3759, "step": 1380 }, { "epoch": 0.07671653241273495, "grad_norm": 2.913276433944702, "learning_rate": 0.0001, "loss": 1.2688, "step": 1400 }, { "epoch": 0.07781248287577401, "grad_norm": 2.811455488204956, "learning_rate": 0.0001, "loss": 1.2179, "step": 1420 }, { "epoch": 0.07890843333881309, "grad_norm": 2.8242247104644775, "learning_rate": 0.0001, "loss": 1.142, "step": 1440 }, { "epoch": 0.08000438380185215, "grad_norm": 3.1733341217041016, "learning_rate": 0.0001, "loss": 1.2934, "step": 1460 }, { "epoch": 0.08110033426489123, "grad_norm": 2.491945743560791, "learning_rate": 0.0001, "loss": 1.3274, "step": 1480 }, { "epoch": 0.0821962847279303, "grad_norm": 2.717165470123291, "learning_rate": 0.0001, "loss": 1.2484, "step": 1500 }, { "epoch": 0.08329223519096937, "grad_norm": 2.3187918663024902, "learning_rate": 0.0001, "loss": 1.2038, "step": 1520 }, { "epoch": 0.08438818565400844, "grad_norm": 2.9296529293060303, "learning_rate": 0.0001, "loss": 1.1962, "step": 1540 }, { "epoch": 0.08548413611704751, "grad_norm": 2.5763330459594727, "learning_rate": 0.0001, "loss": 1.2122, "step": 1560 }, { "epoch": 0.08658008658008658, "grad_norm": 3.4159390926361084, "learning_rate": 0.0001, "loss": 1.2302, "step": 1580 }, { "epoch": 0.08767603704312565, "grad_norm": 2.893261432647705, "learning_rate": 0.0001, "loss": 1.2106, "step": 1600 }, { "epoch": 0.08877198750616472, "grad_norm": 2.1891727447509766, "learning_rate": 0.0001, "loss": 1.2282, "step": 1620 }, { "epoch": 0.0898679379692038, "grad_norm": 2.4100029468536377, "learning_rate": 0.0001, "loss": 1.2039, "step": 1640 }, { "epoch": 0.09096388843224286, "grad_norm": 2.5420494079589844, "learning_rate": 0.0001, "loss": 1.2201, "step": 1660 }, { "epoch": 0.09205983889528194, "grad_norm": 3.1885313987731934, "learning_rate": 0.0001, "loss": 1.2446, "step": 1680 }, { "epoch": 0.093155789358321, "grad_norm": 3.120586633682251, "learning_rate": 0.0001, "loss": 1.2308, "step": 1700 }, { "epoch": 0.09425173982136008, "grad_norm": 2.4548628330230713, "learning_rate": 0.0001, "loss": 1.1777, "step": 1720 }, { "epoch": 0.09534769028439914, "grad_norm": 3.101803779602051, "learning_rate": 0.0001, "loss": 1.2123, "step": 1740 }, { "epoch": 0.09644364074743822, "grad_norm": 2.536121368408203, "learning_rate": 0.0001, "loss": 1.1914, "step": 1760 }, { "epoch": 0.09753959121047728, "grad_norm": 2.3796801567077637, "learning_rate": 0.0001, "loss": 1.1848, "step": 1780 }, { "epoch": 0.09863554167351636, "grad_norm": 2.67964243888855, "learning_rate": 0.0001, "loss": 1.1973, "step": 1800 }, { "epoch": 0.09973149213655542, "grad_norm": 3.160212755203247, "learning_rate": 0.0001, "loss": 1.2472, "step": 1820 }, { "epoch": 0.1008274425995945, "grad_norm": 2.7035927772521973, "learning_rate": 0.0001, "loss": 1.1844, "step": 1840 }, { "epoch": 0.10192339306263357, "grad_norm": 2.7725090980529785, "learning_rate": 0.0001, "loss": 1.1262, "step": 1860 }, { "epoch": 0.10301934352567264, "grad_norm": 2.2705016136169434, "learning_rate": 0.0001, "loss": 1.182, "step": 1880 }, { "epoch": 0.10411529398871171, "grad_norm": 3.0717403888702393, "learning_rate": 0.0001, "loss": 1.137, "step": 1900 }, { "epoch": 0.10521124445175078, "grad_norm": 2.9270904064178467, "learning_rate": 0.0001, "loss": 1.2556, "step": 1920 }, { "epoch": 0.10630719491478985, "grad_norm": 2.4564895629882812, "learning_rate": 0.0001, "loss": 1.1812, "step": 1940 }, { "epoch": 0.10740314537782893, "grad_norm": 2.983851909637451, "learning_rate": 0.0001, "loss": 1.1445, "step": 1960 }, { "epoch": 0.10849909584086799, "grad_norm": 2.772733688354492, "learning_rate": 0.0001, "loss": 1.1968, "step": 1980 }, { "epoch": 0.10959504630390707, "grad_norm": 2.9768126010894775, "learning_rate": 0.0001, "loss": 1.0942, "step": 2000 }, { "epoch": 0.10959504630390707, "eval_loss": 1.1446514129638672, "eval_runtime": 30634.8587, "eval_samples_per_second": 2.118, "eval_steps_per_second": 0.066, "eval_wer": 61.28519240053001, "step": 2000 }, { "epoch": 0.11069099676694613, "grad_norm": 2.806312322616577, "learning_rate": 0.0001, "loss": 1.1924, "step": 2020 }, { "epoch": 0.11178694722998521, "grad_norm": 2.639443874359131, "learning_rate": 0.0001, "loss": 1.0572, "step": 2040 }, { "epoch": 0.11288289769302427, "grad_norm": 2.2005367279052734, "learning_rate": 0.0001, "loss": 1.1337, "step": 2060 }, { "epoch": 0.11397884815606335, "grad_norm": 2.4102020263671875, "learning_rate": 0.0001, "loss": 1.1297, "step": 2080 }, { "epoch": 0.11507479861910241, "grad_norm": 3.410691976547241, "learning_rate": 0.0001, "loss": 1.1354, "step": 2100 }, { "epoch": 0.11617074908214149, "grad_norm": 2.1337172985076904, "learning_rate": 0.0001, "loss": 1.1725, "step": 2120 }, { "epoch": 0.11726669954518056, "grad_norm": 2.627319097518921, "learning_rate": 0.0001, "loss": 1.1006, "step": 2140 }, { "epoch": 0.11836265000821963, "grad_norm": 2.6450726985931396, "learning_rate": 0.0001, "loss": 1.0985, "step": 2160 }, { "epoch": 0.1194586004712587, "grad_norm": 2.3205084800720215, "learning_rate": 0.0001, "loss": 1.1634, "step": 2180 }, { "epoch": 0.12055455093429777, "grad_norm": 2.51177978515625, "learning_rate": 0.0001, "loss": 1.1697, "step": 2200 }, { "epoch": 0.12165050139733684, "grad_norm": 2.6632323265075684, "learning_rate": 0.0001, "loss": 1.071, "step": 2220 }, { "epoch": 0.12274645186037592, "grad_norm": 2.8322274684906006, "learning_rate": 0.0001, "loss": 1.0983, "step": 2240 }, { "epoch": 0.12384240232341498, "grad_norm": 2.547708749771118, "learning_rate": 0.0001, "loss": 1.0629, "step": 2260 }, { "epoch": 0.12493835278645406, "grad_norm": 2.6638150215148926, "learning_rate": 0.0001, "loss": 1.1985, "step": 2280 }, { "epoch": 0.12603430324949313, "grad_norm": 2.980463743209839, "learning_rate": 0.0001, "loss": 1.1885, "step": 2300 }, { "epoch": 0.1271302537125322, "grad_norm": 1.9924368858337402, "learning_rate": 0.0001, "loss": 1.0971, "step": 2320 }, { "epoch": 0.12822620417557126, "grad_norm": 2.2847180366516113, "learning_rate": 0.0001, "loss": 1.1149, "step": 2340 }, { "epoch": 0.12932215463861033, "grad_norm": 2.4860479831695557, "learning_rate": 0.0001, "loss": 1.0927, "step": 2360 }, { "epoch": 0.13041810510164942, "grad_norm": 2.3988494873046875, "learning_rate": 0.0001, "loss": 1.1918, "step": 2380 }, { "epoch": 0.13151405556468848, "grad_norm": 2.5361902713775635, "learning_rate": 0.0001, "loss": 1.0603, "step": 2400 }, { "epoch": 0.13261000602772754, "grad_norm": 2.4060215950012207, "learning_rate": 0.0001, "loss": 1.056, "step": 2420 }, { "epoch": 0.1337059564907666, "grad_norm": 2.4094231128692627, "learning_rate": 0.0001, "loss": 1.0787, "step": 2440 }, { "epoch": 0.1348019069538057, "grad_norm": 2.5207912921905518, "learning_rate": 0.0001, "loss": 1.0901, "step": 2460 }, { "epoch": 0.13589785741684476, "grad_norm": 2.1340293884277344, "learning_rate": 0.0001, "loss": 1.1691, "step": 2480 }, { "epoch": 0.13699380787988383, "grad_norm": 2.312554359436035, "learning_rate": 0.0001, "loss": 0.9791, "step": 2500 }, { "epoch": 0.1380897583429229, "grad_norm": 2.2881298065185547, "learning_rate": 0.0001, "loss": 0.9998, "step": 2520 }, { "epoch": 0.13918570880596198, "grad_norm": 2.2146573066711426, "learning_rate": 0.0001, "loss": 1.094, "step": 2540 }, { "epoch": 0.14028165926900105, "grad_norm": 2.3992650508880615, "learning_rate": 0.0001, "loss": 1.0667, "step": 2560 }, { "epoch": 0.1413776097320401, "grad_norm": 2.7630209922790527, "learning_rate": 0.0001, "loss": 1.1541, "step": 2580 }, { "epoch": 0.14247356019507917, "grad_norm": 2.9216675758361816, "learning_rate": 0.0001, "loss": 1.0463, "step": 2600 }, { "epoch": 0.14356951065811827, "grad_norm": 2.366373062133789, "learning_rate": 0.0001, "loss": 1.0557, "step": 2620 }, { "epoch": 0.14466546112115733, "grad_norm": 2.7161865234375, "learning_rate": 0.0001, "loss": 1.1066, "step": 2640 }, { "epoch": 0.1457614115841964, "grad_norm": 2.046992778778076, "learning_rate": 0.0001, "loss": 0.9786, "step": 2660 }, { "epoch": 0.14685736204723546, "grad_norm": 2.6320793628692627, "learning_rate": 0.0001, "loss": 0.9564, "step": 2680 }, { "epoch": 0.14795331251027455, "grad_norm": 2.485445737838745, "learning_rate": 0.0001, "loss": 1.0283, "step": 2700 }, { "epoch": 0.1490492629733136, "grad_norm": 2.267420768737793, "learning_rate": 0.0001, "loss": 1.0092, "step": 2720 }, { "epoch": 0.15014521343635268, "grad_norm": 2.618067502975464, "learning_rate": 0.0001, "loss": 1.0369, "step": 2740 }, { "epoch": 0.15124116389939174, "grad_norm": 2.502471685409546, "learning_rate": 0.0001, "loss": 0.9982, "step": 2760 }, { "epoch": 0.15233711436243083, "grad_norm": 2.936964273452759, "learning_rate": 0.0001, "loss": 1.1122, "step": 2780 }, { "epoch": 0.1534330648254699, "grad_norm": 2.5342159271240234, "learning_rate": 0.0001, "loss": 1.0409, "step": 2800 }, { "epoch": 0.15452901528850896, "grad_norm": 2.88598895072937, "learning_rate": 0.0001, "loss": 1.0259, "step": 2820 }, { "epoch": 0.15562496575154802, "grad_norm": 2.6327946186065674, "learning_rate": 0.0001, "loss": 0.9829, "step": 2840 }, { "epoch": 0.1567209162145871, "grad_norm": 2.4873671531677246, "learning_rate": 0.0001, "loss": 1.0472, "step": 2860 }, { "epoch": 0.15781686667762618, "grad_norm": 2.1543166637420654, "learning_rate": 0.0001, "loss": 1.0157, "step": 2880 }, { "epoch": 0.15891281714066524, "grad_norm": 1.9687381982803345, "learning_rate": 0.0001, "loss": 1.0465, "step": 2900 }, { "epoch": 0.1600087676037043, "grad_norm": 2.868544816970825, "learning_rate": 0.0001, "loss": 0.9835, "step": 2920 }, { "epoch": 0.1611047180667434, "grad_norm": 2.3211984634399414, "learning_rate": 0.0001, "loss": 1.1204, "step": 2940 }, { "epoch": 0.16220066852978246, "grad_norm": 2.631458282470703, "learning_rate": 0.0001, "loss": 1.0175, "step": 2960 }, { "epoch": 0.16329661899282152, "grad_norm": 2.7994022369384766, "learning_rate": 0.0001, "loss": 1.0828, "step": 2980 }, { "epoch": 0.1643925694558606, "grad_norm": 2.051626443862915, "learning_rate": 0.0001, "loss": 0.97, "step": 3000 }, { "epoch": 0.1643925694558606, "eval_loss": 1.0072325468063354, "eval_runtime": 30710.9249, "eval_samples_per_second": 2.113, "eval_steps_per_second": 0.066, "eval_wer": 55.08434535201816, "step": 3000 }, { "epoch": 0.16548851991889968, "grad_norm": 2.6088364124298096, "learning_rate": 0.0001, "loss": 0.9803, "step": 3020 }, { "epoch": 0.16658447038193874, "grad_norm": 2.234034299850464, "learning_rate": 0.0001, "loss": 1.0757, "step": 3040 }, { "epoch": 0.1676804208449778, "grad_norm": 2.3472328186035156, "learning_rate": 0.0001, "loss": 0.9408, "step": 3060 }, { "epoch": 0.16877637130801687, "grad_norm": 2.5871200561523438, "learning_rate": 0.0001, "loss": 0.9269, "step": 3080 }, { "epoch": 0.16987232177105596, "grad_norm": 2.0150465965270996, "learning_rate": 0.0001, "loss": 1.0547, "step": 3100 }, { "epoch": 0.17096827223409503, "grad_norm": 2.5823395252227783, "learning_rate": 0.0001, "loss": 1.0559, "step": 3120 }, { "epoch": 0.1720642226971341, "grad_norm": 2.8252885341644287, "learning_rate": 0.0001, "loss": 1.1219, "step": 3140 }, { "epoch": 0.17316017316017315, "grad_norm": 2.1086535453796387, "learning_rate": 0.0001, "loss": 1.0089, "step": 3160 }, { "epoch": 0.17425612362321224, "grad_norm": 2.2288014888763428, "learning_rate": 0.0001, "loss": 1.136, "step": 3180 }, { "epoch": 0.1753520740862513, "grad_norm": 2.6622703075408936, "learning_rate": 0.0001, "loss": 1.0395, "step": 3200 }, { "epoch": 0.17644802454929037, "grad_norm": 1.9478541612625122, "learning_rate": 0.0001, "loss": 1.0658, "step": 3220 }, { "epoch": 0.17754397501232944, "grad_norm": 2.55828857421875, "learning_rate": 0.0001, "loss": 0.9904, "step": 3240 }, { "epoch": 0.1786399254753685, "grad_norm": 2.533651828765869, "learning_rate": 0.0001, "loss": 0.9733, "step": 3260 }, { "epoch": 0.1797358759384076, "grad_norm": 1.8745101690292358, "learning_rate": 0.0001, "loss": 0.9903, "step": 3280 }, { "epoch": 0.18083182640144665, "grad_norm": 1.8459206819534302, "learning_rate": 0.0001, "loss": 0.9095, "step": 3300 }, { "epoch": 0.18192777686448572, "grad_norm": 2.6654012203216553, "learning_rate": 0.0001, "loss": 0.9854, "step": 3320 }, { "epoch": 0.18302372732752478, "grad_norm": 2.6444480419158936, "learning_rate": 0.0001, "loss": 0.8857, "step": 3340 }, { "epoch": 0.18411967779056387, "grad_norm": 2.190462827682495, "learning_rate": 0.0001, "loss": 0.9375, "step": 3360 }, { "epoch": 0.18521562825360294, "grad_norm": 2.8208882808685303, "learning_rate": 0.0001, "loss": 0.9646, "step": 3380 }, { "epoch": 0.186311578716642, "grad_norm": 2.4978795051574707, "learning_rate": 0.0001, "loss": 0.9724, "step": 3400 }, { "epoch": 0.18740752917968106, "grad_norm": 2.4202938079833984, "learning_rate": 0.0001, "loss": 0.9659, "step": 3420 }, { "epoch": 0.18850347964272016, "grad_norm": 1.9026118516921997, "learning_rate": 0.0001, "loss": 1.0321, "step": 3440 }, { "epoch": 0.18959943010575922, "grad_norm": 2.6031651496887207, "learning_rate": 0.0001, "loss": 0.9622, "step": 3460 }, { "epoch": 0.19069538056879828, "grad_norm": 1.962509274482727, "learning_rate": 0.0001, "loss": 1.0262, "step": 3480 }, { "epoch": 0.19179133103183735, "grad_norm": 2.794633626937866, "learning_rate": 0.0001, "loss": 1.0626, "step": 3500 }, { "epoch": 0.19288728149487644, "grad_norm": 2.4276185035705566, "learning_rate": 0.0001, "loss": 0.9961, "step": 3520 }, { "epoch": 0.1939832319579155, "grad_norm": 2.0747737884521484, "learning_rate": 0.0001, "loss": 0.8945, "step": 3540 }, { "epoch": 0.19507918242095457, "grad_norm": 1.9151681661605835, "learning_rate": 0.0001, "loss": 1.0664, "step": 3560 }, { "epoch": 0.19617513288399363, "grad_norm": 2.11547589302063, "learning_rate": 0.0001, "loss": 0.9865, "step": 3580 }, { "epoch": 0.19727108334703272, "grad_norm": 2.359848737716675, "learning_rate": 0.0001, "loss": 0.95, "step": 3600 }, { "epoch": 0.19836703381007179, "grad_norm": 1.9854378700256348, "learning_rate": 0.0001, "loss": 0.9992, "step": 3620 }, { "epoch": 0.19946298427311085, "grad_norm": 2.476423978805542, "learning_rate": 0.0001, "loss": 0.9097, "step": 3640 }, { "epoch": 0.2005589347361499, "grad_norm": 2.420011281967163, "learning_rate": 0.0001, "loss": 1.0167, "step": 3660 }, { "epoch": 0.201654885199189, "grad_norm": 2.12312388420105, "learning_rate": 0.0001, "loss": 0.9298, "step": 3680 }, { "epoch": 0.20275083566222807, "grad_norm": 1.9679986238479614, "learning_rate": 0.0001, "loss": 1.0064, "step": 3700 }, { "epoch": 0.20384678612526713, "grad_norm": 2.608135461807251, "learning_rate": 0.0001, "loss": 0.9396, "step": 3720 }, { "epoch": 0.2049427365883062, "grad_norm": 2.542102098464966, "learning_rate": 0.0001, "loss": 1.0868, "step": 3740 }, { "epoch": 0.2060386870513453, "grad_norm": 2.5252091884613037, "learning_rate": 0.0001, "loss": 1.0417, "step": 3760 }, { "epoch": 0.20713463751438435, "grad_norm": 1.98774254322052, "learning_rate": 0.0001, "loss": 0.9949, "step": 3780 }, { "epoch": 0.20823058797742341, "grad_norm": 1.9502965211868286, "learning_rate": 0.0001, "loss": 0.9862, "step": 3800 }, { "epoch": 0.20932653844046248, "grad_norm": 2.2537944316864014, "learning_rate": 0.0001, "loss": 0.9087, "step": 3820 }, { "epoch": 0.21042248890350157, "grad_norm": 2.2866523265838623, "learning_rate": 0.0001, "loss": 1.0128, "step": 3840 }, { "epoch": 0.21151843936654063, "grad_norm": 2.2907001972198486, "learning_rate": 0.0001, "loss": 0.9654, "step": 3860 }, { "epoch": 0.2126143898295797, "grad_norm": 2.5648560523986816, "learning_rate": 0.0001, "loss": 1.0269, "step": 3880 }, { "epoch": 0.21371034029261876, "grad_norm": 2.198974847793579, "learning_rate": 0.0001, "loss": 0.9823, "step": 3900 }, { "epoch": 0.21480629075565785, "grad_norm": 2.1045591831207275, "learning_rate": 0.0001, "loss": 0.9139, "step": 3920 }, { "epoch": 0.21590224121869692, "grad_norm": 2.1462857723236084, "learning_rate": 0.0001, "loss": 0.9406, "step": 3940 }, { "epoch": 0.21699819168173598, "grad_norm": 2.3216285705566406, "learning_rate": 0.0001, "loss": 0.8597, "step": 3960 }, { "epoch": 0.21809414214477504, "grad_norm": 1.867150068283081, "learning_rate": 0.0001, "loss": 0.9776, "step": 3980 }, { "epoch": 0.21919009260781414, "grad_norm": 2.3432791233062744, "learning_rate": 0.0001, "loss": 0.9546, "step": 4000 }, { "epoch": 0.21919009260781414, "eval_loss": 0.9323587417602539, "eval_runtime": 30935.2713, "eval_samples_per_second": 2.098, "eval_steps_per_second": 0.066, "eval_wer": 63.836951720973865, "step": 4000 }, { "epoch": 0.2202860430708532, "grad_norm": 1.9426536560058594, "learning_rate": 0.0001, "loss": 0.9291, "step": 4020 }, { "epoch": 0.22138199353389226, "grad_norm": 2.693723201751709, "learning_rate": 0.0001, "loss": 0.9072, "step": 4040 }, { "epoch": 0.22247794399693133, "grad_norm": 2.237900972366333, "learning_rate": 0.0001, "loss": 0.8571, "step": 4060 }, { "epoch": 0.22357389445997042, "grad_norm": 2.739129066467285, "learning_rate": 0.0001, "loss": 0.9132, "step": 4080 }, { "epoch": 0.22466984492300948, "grad_norm": 1.886438012123108, "learning_rate": 0.0001, "loss": 0.9646, "step": 4100 }, { "epoch": 0.22576579538604855, "grad_norm": 2.3505897521972656, "learning_rate": 0.0001, "loss": 1.0479, "step": 4120 }, { "epoch": 0.2268617458490876, "grad_norm": 2.4302868843078613, "learning_rate": 0.0001, "loss": 0.9956, "step": 4140 }, { "epoch": 0.2279576963121267, "grad_norm": 2.2747528553009033, "learning_rate": 0.0001, "loss": 0.9621, "step": 4160 }, { "epoch": 0.22905364677516576, "grad_norm": 2.312248945236206, "learning_rate": 0.0001, "loss": 0.9292, "step": 4180 }, { "epoch": 0.23014959723820483, "grad_norm": 2.0439066886901855, "learning_rate": 0.0001, "loss": 0.8804, "step": 4200 }, { "epoch": 0.2312455477012439, "grad_norm": 2.615898609161377, "learning_rate": 0.0001, "loss": 0.9302, "step": 4220 }, { "epoch": 0.23234149816428298, "grad_norm": 2.306796073913574, "learning_rate": 0.0001, "loss": 1.0401, "step": 4240 }, { "epoch": 0.23343744862732205, "grad_norm": 2.4527432918548584, "learning_rate": 0.0001, "loss": 0.9195, "step": 4260 }, { "epoch": 0.2345333990903611, "grad_norm": 1.8589290380477905, "learning_rate": 0.0001, "loss": 0.9284, "step": 4280 }, { "epoch": 0.23562934955340017, "grad_norm": 1.8492025136947632, "learning_rate": 0.0001, "loss": 0.8898, "step": 4300 }, { "epoch": 0.23672530001643927, "grad_norm": 2.574871063232422, "learning_rate": 0.0001, "loss": 1.0026, "step": 4320 }, { "epoch": 0.23782125047947833, "grad_norm": 2.2600936889648438, "learning_rate": 0.0001, "loss": 1.0738, "step": 4340 }, { "epoch": 0.2389172009425174, "grad_norm": 2.35066556930542, "learning_rate": 0.0001, "loss": 0.8573, "step": 4360 }, { "epoch": 0.24001315140555646, "grad_norm": 2.165745496749878, "learning_rate": 0.0001, "loss": 0.8989, "step": 4380 }, { "epoch": 0.24110910186859555, "grad_norm": 2.1494085788726807, "learning_rate": 0.0001, "loss": 0.8292, "step": 4400 }, { "epoch": 0.2422050523316346, "grad_norm": 2.185359239578247, "learning_rate": 0.0001, "loss": 0.8954, "step": 4420 }, { "epoch": 0.24330100279467368, "grad_norm": 2.193904161453247, "learning_rate": 0.0001, "loss": 0.8944, "step": 4440 }, { "epoch": 0.24439695325771274, "grad_norm": 2.1101438999176025, "learning_rate": 0.0001, "loss": 0.9059, "step": 4460 }, { "epoch": 0.24549290372075183, "grad_norm": 2.026642084121704, "learning_rate": 0.0001, "loss": 0.8978, "step": 4480 }, { "epoch": 0.2465888541837909, "grad_norm": 2.0481228828430176, "learning_rate": 0.0001, "loss": 0.8835, "step": 4500 }, { "epoch": 0.24768480464682996, "grad_norm": 2.201350688934326, "learning_rate": 0.0001, "loss": 0.9519, "step": 4520 }, { "epoch": 0.24878075510986902, "grad_norm": 1.852100133895874, "learning_rate": 0.0001, "loss": 0.8458, "step": 4540 }, { "epoch": 0.24987670557290811, "grad_norm": 2.1303794384002686, "learning_rate": 0.0001, "loss": 0.9092, "step": 4560 }, { "epoch": 0.25097265603594715, "grad_norm": 2.2715415954589844, "learning_rate": 0.0001, "loss": 0.8931, "step": 4580 }, { "epoch": 0.25206860649898627, "grad_norm": 2.091785192489624, "learning_rate": 0.0001, "loss": 0.8645, "step": 4600 }, { "epoch": 0.25316455696202533, "grad_norm": 2.108103036880493, "learning_rate": 0.0001, "loss": 0.8387, "step": 4620 }, { "epoch": 0.2542605074250644, "grad_norm": 2.083848237991333, "learning_rate": 0.0001, "loss": 0.8315, "step": 4640 }, { "epoch": 0.25535645788810346, "grad_norm": 1.570475459098816, "learning_rate": 0.0001, "loss": 0.9355, "step": 4660 }, { "epoch": 0.2564524083511425, "grad_norm": 1.90199875831604, "learning_rate": 0.0001, "loss": 0.8308, "step": 4680 }, { "epoch": 0.2575483588141816, "grad_norm": 2.1952812671661377, "learning_rate": 0.0001, "loss": 0.8618, "step": 4700 }, { "epoch": 0.25864430927722065, "grad_norm": 2.0530431270599365, "learning_rate": 0.0001, "loss": 0.7951, "step": 4720 }, { "epoch": 0.2597402597402597, "grad_norm": 2.202252149581909, "learning_rate": 0.0001, "loss": 0.8858, "step": 4740 }, { "epoch": 0.26083621020329883, "grad_norm": 1.9541796445846558, "learning_rate": 0.0001, "loss": 0.8466, "step": 4760 }, { "epoch": 0.2619321606663379, "grad_norm": 1.9440534114837646, "learning_rate": 0.0001, "loss": 0.8488, "step": 4780 }, { "epoch": 0.26302811112937696, "grad_norm": 2.569821834564209, "learning_rate": 0.0001, "loss": 0.963, "step": 4800 }, { "epoch": 0.264124061592416, "grad_norm": 1.8896031379699707, "learning_rate": 0.0001, "loss": 0.837, "step": 4820 }, { "epoch": 0.2652200120554551, "grad_norm": 1.9390859603881836, "learning_rate": 0.0001, "loss": 0.8855, "step": 4840 }, { "epoch": 0.26631596251849415, "grad_norm": 2.2261974811553955, "learning_rate": 0.0001, "loss": 0.8901, "step": 4860 }, { "epoch": 0.2674119129815332, "grad_norm": 2.0486056804656982, "learning_rate": 0.0001, "loss": 0.8073, "step": 4880 }, { "epoch": 0.2685078634445723, "grad_norm": 2.292015314102173, "learning_rate": 0.0001, "loss": 0.9492, "step": 4900 }, { "epoch": 0.2696038139076114, "grad_norm": 2.0762240886688232, "learning_rate": 0.0001, "loss": 0.8528, "step": 4920 }, { "epoch": 0.27069976437065046, "grad_norm": 1.870642066001892, "learning_rate": 0.0001, "loss": 0.9482, "step": 4940 }, { "epoch": 0.27179571483368953, "grad_norm": 2.436768054962158, "learning_rate": 0.0001, "loss": 0.9299, "step": 4960 }, { "epoch": 0.2728916652967286, "grad_norm": 2.505880832672119, "learning_rate": 0.0001, "loss": 0.9259, "step": 4980 }, { "epoch": 0.27398761575976766, "grad_norm": 1.717252492904663, "learning_rate": 0.0001, "loss": 0.8134, "step": 5000 }, { "epoch": 0.27398761575976766, "eval_loss": 0.8726964592933655, "eval_runtime": 30710.3822, "eval_samples_per_second": 2.113, "eval_steps_per_second": 0.066, "eval_wer": 52.213316533880224, "step": 5000 }, { "epoch": 0.2750835662228067, "grad_norm": 2.28765869140625, "learning_rate": 0.0001, "loss": 1.0229, "step": 5020 }, { "epoch": 0.2761795166858458, "grad_norm": 2.2264580726623535, "learning_rate": 0.0001, "loss": 0.8291, "step": 5040 }, { "epoch": 0.27727546714888485, "grad_norm": 1.9387757778167725, "learning_rate": 0.0001, "loss": 0.821, "step": 5060 }, { "epoch": 0.27837141761192397, "grad_norm": 2.8628933429718018, "learning_rate": 0.0001, "loss": 0.9521, "step": 5080 }, { "epoch": 0.27946736807496303, "grad_norm": 2.2691447734832764, "learning_rate": 0.0001, "loss": 0.8182, "step": 5100 }, { "epoch": 0.2805633185380021, "grad_norm": 1.9515260457992554, "learning_rate": 0.0001, "loss": 0.9342, "step": 5120 }, { "epoch": 0.28165926900104116, "grad_norm": 2.1714837551116943, "learning_rate": 0.0001, "loss": 0.9663, "step": 5140 }, { "epoch": 0.2827552194640802, "grad_norm": 2.0159664154052734, "learning_rate": 0.0001, "loss": 0.8294, "step": 5160 }, { "epoch": 0.2838511699271193, "grad_norm": 2.024634599685669, "learning_rate": 0.0001, "loss": 0.896, "step": 5180 }, { "epoch": 0.28494712039015835, "grad_norm": 2.0035595893859863, "learning_rate": 0.0001, "loss": 0.8446, "step": 5200 }, { "epoch": 0.2860430708531974, "grad_norm": 2.4142866134643555, "learning_rate": 0.0001, "loss": 0.8835, "step": 5220 }, { "epoch": 0.28713902131623653, "grad_norm": 2.070338010787964, "learning_rate": 0.0001, "loss": 0.8687, "step": 5240 }, { "epoch": 0.2882349717792756, "grad_norm": 1.9818578958511353, "learning_rate": 0.0001, "loss": 0.8296, "step": 5260 }, { "epoch": 0.28933092224231466, "grad_norm": 1.8923412561416626, "learning_rate": 0.0001, "loss": 0.8999, "step": 5280 }, { "epoch": 0.2904268727053537, "grad_norm": 2.200206995010376, "learning_rate": 0.0001, "loss": 0.8662, "step": 5300 }, { "epoch": 0.2915228231683928, "grad_norm": 1.982446551322937, "learning_rate": 0.0001, "loss": 0.8301, "step": 5320 }, { "epoch": 0.29261877363143185, "grad_norm": 1.934844732284546, "learning_rate": 0.0001, "loss": 0.8219, "step": 5340 }, { "epoch": 0.2937147240944709, "grad_norm": 2.2790510654449463, "learning_rate": 0.0001, "loss": 0.8666, "step": 5360 }, { "epoch": 0.29481067455751, "grad_norm": 1.771672248840332, "learning_rate": 0.0001, "loss": 0.843, "step": 5380 }, { "epoch": 0.2959066250205491, "grad_norm": 2.3459877967834473, "learning_rate": 0.0001, "loss": 0.8516, "step": 5400 }, { "epoch": 0.29700257548358816, "grad_norm": 2.156458854675293, "learning_rate": 0.0001, "loss": 0.8425, "step": 5420 }, { "epoch": 0.2980985259466272, "grad_norm": 1.9492950439453125, "learning_rate": 0.0001, "loss": 0.8445, "step": 5440 }, { "epoch": 0.2991944764096663, "grad_norm": 2.1061997413635254, "learning_rate": 0.0001, "loss": 0.8858, "step": 5460 }, { "epoch": 0.30029042687270535, "grad_norm": 2.3567299842834473, "learning_rate": 0.0001, "loss": 0.8376, "step": 5480 }, { "epoch": 0.3013863773357444, "grad_norm": 2.1302335262298584, "learning_rate": 0.0001, "loss": 0.8272, "step": 5500 }, { "epoch": 0.3024823277987835, "grad_norm": 2.2098424434661865, "learning_rate": 0.0001, "loss": 0.8742, "step": 5520 }, { "epoch": 0.30357827826182254, "grad_norm": 1.7558562755584717, "learning_rate": 0.0001, "loss": 0.8863, "step": 5540 }, { "epoch": 0.30467422872486166, "grad_norm": 1.8461397886276245, "learning_rate": 0.0001, "loss": 0.8792, "step": 5560 }, { "epoch": 0.3057701791879007, "grad_norm": 2.0006344318389893, "learning_rate": 0.0001, "loss": 0.8263, "step": 5580 }, { "epoch": 0.3068661296509398, "grad_norm": 1.6772565841674805, "learning_rate": 0.0001, "loss": 0.789, "step": 5600 }, { "epoch": 0.30796208011397885, "grad_norm": 1.9263228178024292, "learning_rate": 0.0001, "loss": 0.842, "step": 5620 }, { "epoch": 0.3090580305770179, "grad_norm": 1.8888592720031738, "learning_rate": 0.0001, "loss": 0.8475, "step": 5640 }, { "epoch": 0.310153981040057, "grad_norm": 2.2354602813720703, "learning_rate": 0.0001, "loss": 1.0036, "step": 5660 }, { "epoch": 0.31124993150309604, "grad_norm": 1.9634332656860352, "learning_rate": 0.0001, "loss": 0.8517, "step": 5680 }, { "epoch": 0.3123458819661351, "grad_norm": 2.348825216293335, "learning_rate": 0.0001, "loss": 0.8731, "step": 5700 }, { "epoch": 0.3134418324291742, "grad_norm": 2.487741708755493, "learning_rate": 0.0001, "loss": 0.8556, "step": 5720 }, { "epoch": 0.3145377828922133, "grad_norm": 1.999516248703003, "learning_rate": 0.0001, "loss": 0.7969, "step": 5740 }, { "epoch": 0.31563373335525235, "grad_norm": 1.9654616117477417, "learning_rate": 0.0001, "loss": 0.7843, "step": 5760 }, { "epoch": 0.3167296838182914, "grad_norm": 2.1070950031280518, "learning_rate": 0.0001, "loss": 0.8399, "step": 5780 }, { "epoch": 0.3178256342813305, "grad_norm": 2.257129192352295, "learning_rate": 0.0001, "loss": 0.8224, "step": 5800 }, { "epoch": 0.31892158474436955, "grad_norm": 1.8256118297576904, "learning_rate": 0.0001, "loss": 0.794, "step": 5820 }, { "epoch": 0.3200175352074086, "grad_norm": 1.8899625539779663, "learning_rate": 0.0001, "loss": 0.8614, "step": 5840 }, { "epoch": 0.3211134856704477, "grad_norm": 2.221484661102295, "learning_rate": 0.0001, "loss": 0.765, "step": 5860 }, { "epoch": 0.3222094361334868, "grad_norm": 1.796877384185791, "learning_rate": 0.0001, "loss": 0.8359, "step": 5880 }, { "epoch": 0.32330538659652586, "grad_norm": 1.7495447397232056, "learning_rate": 0.0001, "loss": 0.8688, "step": 5900 }, { "epoch": 0.3244013370595649, "grad_norm": 2.136664628982544, "learning_rate": 0.0001, "loss": 0.9163, "step": 5920 }, { "epoch": 0.325497287522604, "grad_norm": 1.8508238792419434, "learning_rate": 0.0001, "loss": 0.7975, "step": 5940 }, { "epoch": 0.32659323798564305, "grad_norm": 2.144523859024048, "learning_rate": 0.0001, "loss": 0.7749, "step": 5960 }, { "epoch": 0.3276891884486821, "grad_norm": 2.208815336227417, "learning_rate": 0.0001, "loss": 0.8148, "step": 5980 }, { "epoch": 0.3287851389117212, "grad_norm": 2.0617401599884033, "learning_rate": 0.0001, "loss": 0.8884, "step": 6000 }, { "epoch": 0.3287851389117212, "eval_loss": 0.8316722512245178, "eval_runtime": 30850.8589, "eval_samples_per_second": 2.103, "eval_steps_per_second": 0.066, "eval_wer": 45.9960352377659, "step": 6000 }, { "epoch": 0.32988108937476024, "grad_norm": 2.0406434535980225, "learning_rate": 0.0001, "loss": 0.8504, "step": 6020 }, { "epoch": 0.33097703983779936, "grad_norm": 2.1899139881134033, "learning_rate": 0.0001, "loss": 0.7782, "step": 6040 }, { "epoch": 0.3320729903008384, "grad_norm": 2.650421380996704, "learning_rate": 0.0001, "loss": 0.7823, "step": 6060 }, { "epoch": 0.3331689407638775, "grad_norm": 2.085683584213257, "learning_rate": 0.0001, "loss": 0.754, "step": 6080 }, { "epoch": 0.33426489122691655, "grad_norm": 2.1783502101898193, "learning_rate": 0.0001, "loss": 0.8819, "step": 6100 }, { "epoch": 0.3353608416899556, "grad_norm": 2.096208333969116, "learning_rate": 0.0001, "loss": 0.8702, "step": 6120 }, { "epoch": 0.3364567921529947, "grad_norm": 2.005629062652588, "learning_rate": 0.0001, "loss": 0.8827, "step": 6140 }, { "epoch": 0.33755274261603374, "grad_norm": 2.1545634269714355, "learning_rate": 0.0001, "loss": 0.8496, "step": 6160 }, { "epoch": 0.3386486930790728, "grad_norm": 1.8190851211547852, "learning_rate": 0.0001, "loss": 0.7622, "step": 6180 }, { "epoch": 0.3397446435421119, "grad_norm": 1.9555623531341553, "learning_rate": 0.0001, "loss": 0.8338, "step": 6200 }, { "epoch": 0.340840594005151, "grad_norm": 1.8530341386795044, "learning_rate": 0.0001, "loss": 0.8017, "step": 6220 }, { "epoch": 0.34193654446819005, "grad_norm": 1.8724114894866943, "learning_rate": 0.0001, "loss": 0.848, "step": 6240 }, { "epoch": 0.3430324949312291, "grad_norm": 1.8598796129226685, "learning_rate": 0.0001, "loss": 0.8074, "step": 6260 }, { "epoch": 0.3441284453942682, "grad_norm": 2.1442923545837402, "learning_rate": 0.0001, "loss": 0.8473, "step": 6280 }, { "epoch": 0.34522439585730724, "grad_norm": 2.3083174228668213, "learning_rate": 0.0001, "loss": 0.9016, "step": 6300 }, { "epoch": 0.3463203463203463, "grad_norm": 1.8194735050201416, "learning_rate": 0.0001, "loss": 0.8267, "step": 6320 }, { "epoch": 0.34741629678338537, "grad_norm": 2.063523054122925, "learning_rate": 0.0001, "loss": 0.7841, "step": 6340 }, { "epoch": 0.3485122472464245, "grad_norm": 2.17594051361084, "learning_rate": 0.0001, "loss": 0.8318, "step": 6360 }, { "epoch": 0.34960819770946355, "grad_norm": 1.665189504623413, "learning_rate": 0.0001, "loss": 0.7983, "step": 6380 }, { "epoch": 0.3507041481725026, "grad_norm": 2.2596445083618164, "learning_rate": 0.0001, "loss": 0.8421, "step": 6400 }, { "epoch": 0.3518000986355417, "grad_norm": 1.7096545696258545, "learning_rate": 0.0001, "loss": 0.889, "step": 6420 }, { "epoch": 0.35289604909858074, "grad_norm": 1.7475535869598389, "learning_rate": 0.0001, "loss": 0.8006, "step": 6440 }, { "epoch": 0.3539919995616198, "grad_norm": 1.8176007270812988, "learning_rate": 0.0001, "loss": 0.8632, "step": 6460 }, { "epoch": 0.35508795002465887, "grad_norm": 2.6806535720825195, "learning_rate": 0.0001, "loss": 0.8427, "step": 6480 }, { "epoch": 0.35618390048769794, "grad_norm": 2.094172477722168, "learning_rate": 0.0001, "loss": 0.7812, "step": 6500 }, { "epoch": 0.357279850950737, "grad_norm": 1.8341765403747559, "learning_rate": 0.0001, "loss": 0.8051, "step": 6520 }, { "epoch": 0.3583758014137761, "grad_norm": 2.2341349124908447, "learning_rate": 0.0001, "loss": 0.8001, "step": 6540 }, { "epoch": 0.3594717518768152, "grad_norm": 2.1017801761627197, "learning_rate": 0.0001, "loss": 0.8142, "step": 6560 }, { "epoch": 0.36056770233985425, "grad_norm": 1.9903994798660278, "learning_rate": 0.0001, "loss": 0.8117, "step": 6580 }, { "epoch": 0.3616636528028933, "grad_norm": 2.273465394973755, "learning_rate": 0.0001, "loss": 0.8864, "step": 6600 }, { "epoch": 0.3627596032659324, "grad_norm": 2.0767428874969482, "learning_rate": 0.0001, "loss": 0.7687, "step": 6620 }, { "epoch": 0.36385555372897144, "grad_norm": 2.559774398803711, "learning_rate": 0.0001, "loss": 0.8181, "step": 6640 }, { "epoch": 0.3649515041920105, "grad_norm": 2.1393582820892334, "learning_rate": 0.0001, "loss": 0.7936, "step": 6660 }, { "epoch": 0.36604745465504956, "grad_norm": 2.06675386428833, "learning_rate": 0.0001, "loss": 0.8263, "step": 6680 }, { "epoch": 0.3671434051180887, "grad_norm": 1.7674784660339355, "learning_rate": 0.0001, "loss": 0.7818, "step": 6700 }, { "epoch": 0.36823935558112775, "grad_norm": 1.765442132949829, "learning_rate": 0.0001, "loss": 0.8335, "step": 6720 }, { "epoch": 0.3693353060441668, "grad_norm": 2.044288158416748, "learning_rate": 0.0001, "loss": 0.8742, "step": 6740 }, { "epoch": 0.3704312565072059, "grad_norm": 1.9821726083755493, "learning_rate": 0.0001, "loss": 0.928, "step": 6760 }, { "epoch": 0.37152720697024494, "grad_norm": 2.0798370838165283, "learning_rate": 0.0001, "loss": 0.7627, "step": 6780 }, { "epoch": 0.372623157433284, "grad_norm": 1.6817582845687866, "learning_rate": 0.0001, "loss": 0.7985, "step": 6800 }, { "epoch": 0.37371910789632307, "grad_norm": 1.872247576713562, "learning_rate": 0.0001, "loss": 0.8102, "step": 6820 }, { "epoch": 0.37481505835936213, "grad_norm": 1.7761516571044922, "learning_rate": 0.0001, "loss": 0.8435, "step": 6840 }, { "epoch": 0.37591100882240125, "grad_norm": 1.739585518836975, "learning_rate": 0.0001, "loss": 0.8706, "step": 6860 }, { "epoch": 0.3770069592854403, "grad_norm": 2.0503687858581543, "learning_rate": 0.0001, "loss": 0.8354, "step": 6880 }, { "epoch": 0.3781029097484794, "grad_norm": 2.283393621444702, "learning_rate": 0.0001, "loss": 0.7476, "step": 6900 }, { "epoch": 0.37919886021151844, "grad_norm": 1.801018238067627, "learning_rate": 0.0001, "loss": 0.7817, "step": 6920 }, { "epoch": 0.3802948106745575, "grad_norm": 2.5343267917633057, "learning_rate": 0.0001, "loss": 0.7628, "step": 6940 }, { "epoch": 0.38139076113759657, "grad_norm": 2.010507822036743, "learning_rate": 0.0001, "loss": 0.7931, "step": 6960 }, { "epoch": 0.38248671160063563, "grad_norm": 1.7228796482086182, "learning_rate": 0.0001, "loss": 0.7517, "step": 6980 }, { "epoch": 0.3835826620636747, "grad_norm": 1.967822551727295, "learning_rate": 0.0001, "loss": 0.804, "step": 7000 }, { "epoch": 0.3835826620636747, "eval_loss": 0.7978512644767761, "eval_runtime": 30977.7517, "eval_samples_per_second": 2.095, "eval_steps_per_second": 0.065, "eval_wer": 61.261910549759826, "step": 7000 }, { "epoch": 0.3846786125267138, "grad_norm": 1.9999229907989502, "learning_rate": 0.0001, "loss": 0.7634, "step": 7020 }, { "epoch": 0.3857745629897529, "grad_norm": 1.956128716468811, "learning_rate": 0.0001, "loss": 0.8102, "step": 7040 }, { "epoch": 0.38687051345279194, "grad_norm": 2.0134966373443604, "learning_rate": 0.0001, "loss": 0.7957, "step": 7060 }, { "epoch": 0.387966463915831, "grad_norm": 2.0373167991638184, "learning_rate": 0.0001, "loss": 0.8251, "step": 7080 }, { "epoch": 0.38906241437887007, "grad_norm": 1.7772964239120483, "learning_rate": 0.0001, "loss": 0.8128, "step": 7100 }, { "epoch": 0.39015836484190913, "grad_norm": 1.7618379592895508, "learning_rate": 0.0001, "loss": 0.8345, "step": 7120 }, { "epoch": 0.3912543153049482, "grad_norm": 2.181671380996704, "learning_rate": 0.0001, "loss": 0.8345, "step": 7140 }, { "epoch": 0.39235026576798726, "grad_norm": 1.8794726133346558, "learning_rate": 0.0001, "loss": 0.7615, "step": 7160 }, { "epoch": 0.3934462162310264, "grad_norm": 1.9297798871994019, "learning_rate": 0.0001, "loss": 0.7618, "step": 7180 }, { "epoch": 0.39454216669406544, "grad_norm": 1.9441471099853516, "learning_rate": 0.0001, "loss": 0.859, "step": 7200 }, { "epoch": 0.3956381171571045, "grad_norm": 2.2561404705047607, "learning_rate": 0.0001, "loss": 0.7877, "step": 7220 }, { "epoch": 0.39673406762014357, "grad_norm": 1.8441416025161743, "learning_rate": 0.0001, "loss": 0.7734, "step": 7240 }, { "epoch": 0.39783001808318263, "grad_norm": 1.686120867729187, "learning_rate": 0.0001, "loss": 0.7066, "step": 7260 }, { "epoch": 0.3989259685462217, "grad_norm": 1.9456263780593872, "learning_rate": 0.0001, "loss": 0.7469, "step": 7280 }, { "epoch": 0.40002191900926076, "grad_norm": 1.9112725257873535, "learning_rate": 0.0001, "loss": 0.7607, "step": 7300 }, { "epoch": 0.4011178694722998, "grad_norm": 2.5668513774871826, "learning_rate": 0.0001, "loss": 0.7859, "step": 7320 }, { "epoch": 0.40221381993533895, "grad_norm": 1.9502942562103271, "learning_rate": 0.0001, "loss": 0.7607, "step": 7340 }, { "epoch": 0.403309770398378, "grad_norm": 1.6973525285720825, "learning_rate": 0.0001, "loss": 0.8313, "step": 7360 }, { "epoch": 0.4044057208614171, "grad_norm": 2.3962297439575195, "learning_rate": 0.0001, "loss": 0.7806, "step": 7380 }, { "epoch": 0.40550167132445614, "grad_norm": 1.887536883354187, "learning_rate": 0.0001, "loss": 0.7524, "step": 7400 }, { "epoch": 0.4065976217874952, "grad_norm": 1.999687910079956, "learning_rate": 0.0001, "loss": 0.7349, "step": 7420 }, { "epoch": 0.40769357225053426, "grad_norm": 1.7444576025009155, "learning_rate": 0.0001, "loss": 0.8156, "step": 7440 }, { "epoch": 0.40878952271357333, "grad_norm": 1.7175132036209106, "learning_rate": 0.0001, "loss": 0.7419, "step": 7460 }, { "epoch": 0.4098854731766124, "grad_norm": 2.23638653755188, "learning_rate": 0.0001, "loss": 0.666, "step": 7480 }, { "epoch": 0.4109814236396515, "grad_norm": 2.024102210998535, "learning_rate": 0.0001, "loss": 0.7541, "step": 7500 }, { "epoch": 0.4120773741026906, "grad_norm": 2.042541265487671, "learning_rate": 0.0001, "loss": 0.7915, "step": 7520 }, { "epoch": 0.41317332456572964, "grad_norm": 1.9140897989273071, "learning_rate": 0.0001, "loss": 0.8712, "step": 7540 }, { "epoch": 0.4142692750287687, "grad_norm": 1.8435416221618652, "learning_rate": 0.0001, "loss": 0.8241, "step": 7560 }, { "epoch": 0.41536522549180777, "grad_norm": 2.027944803237915, "learning_rate": 0.0001, "loss": 0.9422, "step": 7580 }, { "epoch": 0.41646117595484683, "grad_norm": 2.07381534576416, "learning_rate": 0.0001, "loss": 0.812, "step": 7600 }, { "epoch": 0.4175571264178859, "grad_norm": 1.9762136936187744, "learning_rate": 0.0001, "loss": 0.7852, "step": 7620 }, { "epoch": 0.41865307688092496, "grad_norm": 1.8222426176071167, "learning_rate": 0.0001, "loss": 0.752, "step": 7640 }, { "epoch": 0.4197490273439641, "grad_norm": 2.0519089698791504, "learning_rate": 0.0001, "loss": 0.8031, "step": 7660 }, { "epoch": 0.42084497780700314, "grad_norm": 1.8777110576629639, "learning_rate": 0.0001, "loss": 0.8173, "step": 7680 }, { "epoch": 0.4219409282700422, "grad_norm": 2.323411703109741, "learning_rate": 0.0001, "loss": 0.8479, "step": 7700 }, { "epoch": 0.42303687873308127, "grad_norm": 1.6403400897979736, "learning_rate": 0.0001, "loss": 0.7567, "step": 7720 }, { "epoch": 0.42413282919612033, "grad_norm": 1.6627925634384155, "learning_rate": 0.0001, "loss": 0.7734, "step": 7740 }, { "epoch": 0.4252287796591594, "grad_norm": 1.8771709203720093, "learning_rate": 0.0001, "loss": 0.7652, "step": 7760 }, { "epoch": 0.42632473012219846, "grad_norm": 1.9806597232818604, "learning_rate": 0.0001, "loss": 0.7699, "step": 7780 }, { "epoch": 0.4274206805852375, "grad_norm": 2.1376988887786865, "learning_rate": 0.0001, "loss": 0.7825, "step": 7800 }, { "epoch": 0.42851663104827664, "grad_norm": 1.5566449165344238, "learning_rate": 0.0001, "loss": 0.704, "step": 7820 }, { "epoch": 0.4296125815113157, "grad_norm": 2.1835947036743164, "learning_rate": 0.0001, "loss": 0.8101, "step": 7840 }, { "epoch": 0.43070853197435477, "grad_norm": 2.055119037628174, "learning_rate": 0.0001, "loss": 0.703, "step": 7860 }, { "epoch": 0.43180448243739383, "grad_norm": 1.9324967861175537, "learning_rate": 0.0001, "loss": 0.81, "step": 7880 }, { "epoch": 0.4329004329004329, "grad_norm": 2.1087846755981445, "learning_rate": 0.0001, "loss": 0.7676, "step": 7900 }, { "epoch": 0.43399638336347196, "grad_norm": 1.8521897792816162, "learning_rate": 0.0001, "loss": 0.7546, "step": 7920 }, { "epoch": 0.435092333826511, "grad_norm": 2.145947217941284, "learning_rate": 0.0001, "loss": 0.7992, "step": 7940 }, { "epoch": 0.4361882842895501, "grad_norm": 1.7739931344985962, "learning_rate": 0.0001, "loss": 0.7133, "step": 7960 }, { "epoch": 0.4372842347525892, "grad_norm": 1.6032921075820923, "learning_rate": 0.0001, "loss": 0.8207, "step": 7980 }, { "epoch": 0.43838018521562827, "grad_norm": 2.1895668506622314, "learning_rate": 0.0001, "loss": 0.7638, "step": 8000 }, { "epoch": 0.43838018521562827, "eval_loss": 0.770411491394043, "eval_runtime": 30675.7059, "eval_samples_per_second": 2.115, "eval_steps_per_second": 0.066, "eval_wer": 43.10069742838263, "step": 8000 }, { "epoch": 0.43947613567866733, "grad_norm": 1.9759962558746338, "learning_rate": 0.0001, "loss": 0.7792, "step": 8020 }, { "epoch": 0.4405720861417064, "grad_norm": 1.845012903213501, "learning_rate": 0.0001, "loss": 0.847, "step": 8040 }, { "epoch": 0.44166803660474546, "grad_norm": 1.9666188955307007, "learning_rate": 0.0001, "loss": 0.767, "step": 8060 }, { "epoch": 0.4427639870677845, "grad_norm": 2.1448235511779785, "learning_rate": 0.0001, "loss": 0.7924, "step": 8080 }, { "epoch": 0.4438599375308236, "grad_norm": 1.9017919301986694, "learning_rate": 0.0001, "loss": 0.7239, "step": 8100 }, { "epoch": 0.44495588799386265, "grad_norm": 1.8005828857421875, "learning_rate": 0.0001, "loss": 0.7202, "step": 8120 }, { "epoch": 0.4460518384569018, "grad_norm": 1.7341022491455078, "learning_rate": 0.0001, "loss": 0.7045, "step": 8140 }, { "epoch": 0.44714778891994084, "grad_norm": 2.094618320465088, "learning_rate": 0.0001, "loss": 0.8067, "step": 8160 }, { "epoch": 0.4482437393829799, "grad_norm": 2.0414187908172607, "learning_rate": 0.0001, "loss": 0.6888, "step": 8180 }, { "epoch": 0.44933968984601896, "grad_norm": 1.8842118978500366, "learning_rate": 0.0001, "loss": 0.7125, "step": 8200 }, { "epoch": 0.450435640309058, "grad_norm": 1.9878696203231812, "learning_rate": 0.0001, "loss": 0.723, "step": 8220 }, { "epoch": 0.4515315907720971, "grad_norm": 1.94351065158844, "learning_rate": 0.0001, "loss": 0.727, "step": 8240 }, { "epoch": 0.45262754123513615, "grad_norm": 1.900718331336975, "learning_rate": 0.0001, "loss": 0.7306, "step": 8260 }, { "epoch": 0.4537234916981752, "grad_norm": 2.5974204540252686, "learning_rate": 0.0001, "loss": 0.7968, "step": 8280 }, { "epoch": 0.45481944216121434, "grad_norm": 1.9214075803756714, "learning_rate": 0.0001, "loss": 0.7767, "step": 8300 }, { "epoch": 0.4559153926242534, "grad_norm": 2.6079931259155273, "learning_rate": 0.0001, "loss": 0.7787, "step": 8320 }, { "epoch": 0.45701134308729247, "grad_norm": 1.8398691415786743, "learning_rate": 0.0001, "loss": 0.7941, "step": 8340 }, { "epoch": 0.45810729355033153, "grad_norm": 1.740376591682434, "learning_rate": 0.0001, "loss": 0.7714, "step": 8360 }, { "epoch": 0.4592032440133706, "grad_norm": 2.109416961669922, "learning_rate": 0.0001, "loss": 0.8015, "step": 8380 }, { "epoch": 0.46029919447640966, "grad_norm": 1.9565001726150513, "learning_rate": 0.0001, "loss": 0.7473, "step": 8400 }, { "epoch": 0.4613951449394487, "grad_norm": 1.88534414768219, "learning_rate": 0.0001, "loss": 0.7828, "step": 8420 }, { "epoch": 0.4624910954024878, "grad_norm": 1.7713934183120728, "learning_rate": 0.0001, "loss": 0.7289, "step": 8440 }, { "epoch": 0.4635870458655269, "grad_norm": 1.9173312187194824, "learning_rate": 0.0001, "loss": 0.7478, "step": 8460 }, { "epoch": 0.46468299632856597, "grad_norm": 1.6866717338562012, "learning_rate": 0.0001, "loss": 0.8235, "step": 8480 }, { "epoch": 0.46577894679160503, "grad_norm": 1.6713476181030273, "learning_rate": 0.0001, "loss": 0.7216, "step": 8500 }, { "epoch": 0.4668748972546441, "grad_norm": 1.9601606130599976, "learning_rate": 0.0001, "loss": 0.6994, "step": 8520 }, { "epoch": 0.46797084771768316, "grad_norm": 1.7472949028015137, "learning_rate": 0.0001, "loss": 0.7694, "step": 8540 }, { "epoch": 0.4690667981807222, "grad_norm": 1.8540037870407104, "learning_rate": 0.0001, "loss": 0.7253, "step": 8560 }, { "epoch": 0.4701627486437613, "grad_norm": 2.0671746730804443, "learning_rate": 0.0001, "loss": 0.7514, "step": 8580 }, { "epoch": 0.47125869910680035, "grad_norm": 1.900918960571289, "learning_rate": 0.0001, "loss": 0.7871, "step": 8600 }, { "epoch": 0.47235464956983947, "grad_norm": 1.7465757131576538, "learning_rate": 0.0001, "loss": 0.8009, "step": 8620 }, { "epoch": 0.47345060003287853, "grad_norm": 2.3400652408599854, "learning_rate": 0.0001, "loss": 0.7741, "step": 8640 }, { "epoch": 0.4745465504959176, "grad_norm": 2.1384716033935547, "learning_rate": 0.0001, "loss": 0.7577, "step": 8660 }, { "epoch": 0.47564250095895666, "grad_norm": 2.7113006114959717, "learning_rate": 0.0001, "loss": 0.6968, "step": 8680 }, { "epoch": 0.4767384514219957, "grad_norm": 1.6666728258132935, "learning_rate": 0.0001, "loss": 0.7307, "step": 8700 }, { "epoch": 0.4778344018850348, "grad_norm": 1.8394851684570312, "learning_rate": 0.0001, "loss": 0.7353, "step": 8720 }, { "epoch": 0.47893035234807385, "grad_norm": 2.0569512844085693, "learning_rate": 0.0001, "loss": 0.814, "step": 8740 }, { "epoch": 0.4800263028111129, "grad_norm": 1.6457910537719727, "learning_rate": 0.0001, "loss": 0.7521, "step": 8760 }, { "epoch": 0.48112225327415203, "grad_norm": 2.010711908340454, "learning_rate": 0.0001, "loss": 0.7101, "step": 8780 }, { "epoch": 0.4822182037371911, "grad_norm": 2.422718048095703, "learning_rate": 0.0001, "loss": 0.7867, "step": 8800 }, { "epoch": 0.48331415420023016, "grad_norm": 1.5170652866363525, "learning_rate": 0.0001, "loss": 0.8042, "step": 8820 }, { "epoch": 0.4844101046632692, "grad_norm": 1.9751352071762085, "learning_rate": 0.0001, "loss": 0.7408, "step": 8840 }, { "epoch": 0.4855060551263083, "grad_norm": 1.8477592468261719, "learning_rate": 0.0001, "loss": 0.7675, "step": 8860 }, { "epoch": 0.48660200558934735, "grad_norm": 1.9999114274978638, "learning_rate": 0.0001, "loss": 0.745, "step": 8880 }, { "epoch": 0.4876979560523864, "grad_norm": 1.7456104755401611, "learning_rate": 0.0001, "loss": 0.7713, "step": 8900 }, { "epoch": 0.4887939065154255, "grad_norm": 1.9687026739120483, "learning_rate": 0.0001, "loss": 0.7349, "step": 8920 }, { "epoch": 0.4898898569784646, "grad_norm": 1.8585296869277954, "learning_rate": 0.0001, "loss": 0.7369, "step": 8940 }, { "epoch": 0.49098580744150366, "grad_norm": 2.7875003814697266, "learning_rate": 0.0001, "loss": 0.7002, "step": 8960 }, { "epoch": 0.4920817579045427, "grad_norm": 2.01347017288208, "learning_rate": 0.0001, "loss": 0.7598, "step": 8980 }, { "epoch": 0.4931777083675818, "grad_norm": 1.8863261938095093, "learning_rate": 0.0001, "loss": 0.7617, "step": 9000 }, { "epoch": 0.4931777083675818, "eval_loss": 0.7443549036979675, "eval_runtime": 31204.044, "eval_samples_per_second": 2.079, "eval_steps_per_second": 0.065, "eval_wer": 44.168238762227254, "step": 9000 }, { "epoch": 0.49427365883062085, "grad_norm": 2.0402464866638184, "learning_rate": 0.0001, "loss": 0.8118, "step": 9020 }, { "epoch": 0.4953696092936599, "grad_norm": 2.072380304336548, "learning_rate": 0.0001, "loss": 0.6703, "step": 9040 }, { "epoch": 0.496465559756699, "grad_norm": 1.9627012014389038, "learning_rate": 0.0001, "loss": 0.7547, "step": 9060 }, { "epoch": 0.49756151021973805, "grad_norm": 1.904860496520996, "learning_rate": 0.0001, "loss": 0.8141, "step": 9080 }, { "epoch": 0.49865746068277716, "grad_norm": 2.153672933578491, "learning_rate": 0.0001, "loss": 0.8167, "step": 9100 }, { "epoch": 0.49975341114581623, "grad_norm": 2.0599303245544434, "learning_rate": 0.0001, "loss": 0.8632, "step": 9120 }, { "epoch": 0.5008493616088553, "grad_norm": 1.9562146663665771, "learning_rate": 0.0001, "loss": 0.7477, "step": 9140 }, { "epoch": 0.5019453120718943, "grad_norm": 2.086508274078369, "learning_rate": 0.0001, "loss": 0.7973, "step": 9160 }, { "epoch": 0.5030412625349334, "grad_norm": 1.9192993640899658, "learning_rate": 0.0001, "loss": 0.8359, "step": 9180 }, { "epoch": 0.5041372129979725, "grad_norm": 1.9085866212844849, "learning_rate": 0.0001, "loss": 0.7942, "step": 9200 }, { "epoch": 0.5052331634610115, "grad_norm": 1.901637315750122, "learning_rate": 0.0001, "loss": 0.7608, "step": 9220 }, { "epoch": 0.5063291139240507, "grad_norm": 2.145914316177368, "learning_rate": 0.0001, "loss": 0.6919, "step": 9240 }, { "epoch": 0.5074250643870897, "grad_norm": 1.9005271196365356, "learning_rate": 0.0001, "loss": 0.7506, "step": 9260 }, { "epoch": 0.5085210148501288, "grad_norm": 1.6468952894210815, "learning_rate": 0.0001, "loss": 0.7843, "step": 9280 }, { "epoch": 0.5096169653131678, "grad_norm": 1.7703279256820679, "learning_rate": 0.0001, "loss": 0.7192, "step": 9300 }, { "epoch": 0.5107129157762069, "grad_norm": 2.0094175338745117, "learning_rate": 0.0001, "loss": 0.847, "step": 9320 }, { "epoch": 0.511808866239246, "grad_norm": 2.0970561504364014, "learning_rate": 0.0001, "loss": 0.7679, "step": 9340 }, { "epoch": 0.512904816702285, "grad_norm": 1.757664680480957, "learning_rate": 0.0001, "loss": 0.7391, "step": 9360 }, { "epoch": 0.5140007671653242, "grad_norm": 1.8297368288040161, "learning_rate": 0.0001, "loss": 0.8382, "step": 9380 }, { "epoch": 0.5150967176283632, "grad_norm": 1.9832725524902344, "learning_rate": 0.0001, "loss": 0.7226, "step": 9400 }, { "epoch": 0.5161926680914023, "grad_norm": 1.7083086967468262, "learning_rate": 0.0001, "loss": 0.7798, "step": 9420 }, { "epoch": 0.5172886185544413, "grad_norm": 1.7105575799942017, "learning_rate": 0.0001, "loss": 0.7118, "step": 9440 }, { "epoch": 0.5183845690174804, "grad_norm": 1.547608494758606, "learning_rate": 0.0001, "loss": 0.7259, "step": 9460 }, { "epoch": 0.5194805194805194, "grad_norm": 2.0215799808502197, "learning_rate": 0.0001, "loss": 0.7648, "step": 9480 }, { "epoch": 0.5205764699435586, "grad_norm": 1.5999863147735596, "learning_rate": 0.0001, "loss": 0.7863, "step": 9500 }, { "epoch": 0.5216724204065977, "grad_norm": 2.0813591480255127, "learning_rate": 0.0001, "loss": 0.758, "step": 9520 }, { "epoch": 0.5227683708696367, "grad_norm": 1.6513686180114746, "learning_rate": 0.0001, "loss": 0.7735, "step": 9540 }, { "epoch": 0.5238643213326758, "grad_norm": 1.49434232711792, "learning_rate": 0.0001, "loss": 0.6547, "step": 9560 }, { "epoch": 0.5249602717957148, "grad_norm": 1.8316184282302856, "learning_rate": 0.0001, "loss": 0.7428, "step": 9580 }, { "epoch": 0.5260562222587539, "grad_norm": 2.0041682720184326, "learning_rate": 0.0001, "loss": 0.7058, "step": 9600 }, { "epoch": 0.5271521727217929, "grad_norm": 1.9916651248931885, "learning_rate": 0.0001, "loss": 0.7049, "step": 9620 }, { "epoch": 0.528248123184832, "grad_norm": 1.8289718627929688, "learning_rate": 0.0001, "loss": 0.7179, "step": 9640 }, { "epoch": 0.5293440736478712, "grad_norm": 1.7447452545166016, "learning_rate": 0.0001, "loss": 0.7432, "step": 9660 }, { "epoch": 0.5304400241109102, "grad_norm": 2.375234365463257, "learning_rate": 0.0001, "loss": 0.6676, "step": 9680 }, { "epoch": 0.5315359745739493, "grad_norm": 1.683435320854187, "learning_rate": 0.0001, "loss": 0.7269, "step": 9700 }, { "epoch": 0.5326319250369883, "grad_norm": 1.6535717248916626, "learning_rate": 0.0001, "loss": 0.7315, "step": 9720 }, { "epoch": 0.5337278755000274, "grad_norm": 1.5276830196380615, "learning_rate": 0.0001, "loss": 0.7382, "step": 9740 }, { "epoch": 0.5348238259630664, "grad_norm": 1.8443965911865234, "learning_rate": 0.0001, "loss": 0.7471, "step": 9760 }, { "epoch": 0.5359197764261056, "grad_norm": 2.0346148014068604, "learning_rate": 0.0001, "loss": 0.7268, "step": 9780 }, { "epoch": 0.5370157268891446, "grad_norm": 1.750613808631897, "learning_rate": 0.0001, "loss": 0.8444, "step": 9800 }, { "epoch": 0.5381116773521837, "grad_norm": 1.9546024799346924, "learning_rate": 0.0001, "loss": 0.6968, "step": 9820 }, { "epoch": 0.5392076278152228, "grad_norm": 1.6618010997772217, "learning_rate": 0.0001, "loss": 0.7222, "step": 9840 }, { "epoch": 0.5403035782782618, "grad_norm": 1.6404950618743896, "learning_rate": 0.0001, "loss": 0.6896, "step": 9860 }, { "epoch": 0.5413995287413009, "grad_norm": 1.7741234302520752, "learning_rate": 0.0001, "loss": 0.7412, "step": 9880 }, { "epoch": 0.5424954792043399, "grad_norm": 1.8278882503509521, "learning_rate": 0.0001, "loss": 0.7385, "step": 9900 }, { "epoch": 0.5435914296673791, "grad_norm": 1.6102566719055176, "learning_rate": 0.0001, "loss": 0.7461, "step": 9920 }, { "epoch": 0.5446873801304181, "grad_norm": 1.7899205684661865, "learning_rate": 0.0001, "loss": 0.6349, "step": 9940 }, { "epoch": 0.5457833305934572, "grad_norm": 1.9663938283920288, "learning_rate": 0.0001, "loss": 0.8028, "step": 9960 }, { "epoch": 0.5468792810564962, "grad_norm": 1.841476559638977, "learning_rate": 0.0001, "loss": 0.7503, "step": 9980 }, { "epoch": 0.5479752315195353, "grad_norm": 1.9106056690216064, "learning_rate": 0.0001, "loss": 0.7097, "step": 10000 }, { "epoch": 0.5479752315195353, "eval_loss": 0.7265371084213257, "eval_runtime": 30536.1813, "eval_samples_per_second": 2.125, "eval_steps_per_second": 0.066, "eval_wer": 42.517110448415295, "step": 10000 }, { "epoch": 0.5490711819825744, "grad_norm": 1.680649995803833, "learning_rate": 0.0001, "loss": 0.6445, "step": 10020 }, { "epoch": 0.5501671324456134, "grad_norm": 2.079050064086914, "learning_rate": 0.0001, "loss": 0.7648, "step": 10040 }, { "epoch": 0.5512630829086526, "grad_norm": 1.4419294595718384, "learning_rate": 0.0001, "loss": 0.6953, "step": 10060 }, { "epoch": 0.5523590333716916, "grad_norm": 1.9906927347183228, "learning_rate": 0.0001, "loss": 0.749, "step": 10080 }, { "epoch": 0.5534549838347307, "grad_norm": 1.7384852170944214, "learning_rate": 0.0001, "loss": 0.745, "step": 10100 }, { "epoch": 0.5545509342977697, "grad_norm": 1.7342479228973389, "learning_rate": 0.0001, "loss": 0.7687, "step": 10120 }, { "epoch": 0.5556468847608088, "grad_norm": 1.887969970703125, "learning_rate": 0.0001, "loss": 0.7662, "step": 10140 }, { "epoch": 0.5567428352238479, "grad_norm": 1.6345020532608032, "learning_rate": 0.0001, "loss": 0.7843, "step": 10160 }, { "epoch": 0.5578387856868869, "grad_norm": 1.5596251487731934, "learning_rate": 0.0001, "loss": 0.6983, "step": 10180 }, { "epoch": 0.5589347361499261, "grad_norm": 1.6423192024230957, "learning_rate": 0.0001, "loss": 0.6622, "step": 10200 }, { "epoch": 0.5600306866129651, "grad_norm": 1.7268792390823364, "learning_rate": 0.0001, "loss": 0.8409, "step": 10220 }, { "epoch": 0.5611266370760042, "grad_norm": 1.6870604753494263, "learning_rate": 0.0001, "loss": 0.7801, "step": 10240 }, { "epoch": 0.5622225875390432, "grad_norm": 1.5945113897323608, "learning_rate": 0.0001, "loss": 0.6695, "step": 10260 }, { "epoch": 0.5633185380020823, "grad_norm": 1.7995914220809937, "learning_rate": 0.0001, "loss": 0.7088, "step": 10280 }, { "epoch": 0.5644144884651213, "grad_norm": 1.8924362659454346, "learning_rate": 0.0001, "loss": 0.7621, "step": 10300 }, { "epoch": 0.5655104389281604, "grad_norm": 1.5099490880966187, "learning_rate": 0.0001, "loss": 0.6923, "step": 10320 }, { "epoch": 0.5666063893911996, "grad_norm": 1.481195092201233, "learning_rate": 0.0001, "loss": 0.6801, "step": 10340 }, { "epoch": 0.5677023398542386, "grad_norm": 1.9247808456420898, "learning_rate": 0.0001, "loss": 0.7247, "step": 10360 }, { "epoch": 0.5687982903172777, "grad_norm": 1.721666693687439, "learning_rate": 0.0001, "loss": 0.85, "step": 10380 }, { "epoch": 0.5698942407803167, "grad_norm": 1.981312870979309, "learning_rate": 0.0001, "loss": 0.6894, "step": 10400 }, { "epoch": 0.5709901912433558, "grad_norm": 1.825363039970398, "learning_rate": 0.0001, "loss": 0.7017, "step": 10420 }, { "epoch": 0.5720861417063948, "grad_norm": 2.021385669708252, "learning_rate": 0.0001, "loss": 0.7996, "step": 10440 }, { "epoch": 0.5731820921694339, "grad_norm": 1.9287372827529907, "learning_rate": 0.0001, "loss": 0.7387, "step": 10460 }, { "epoch": 0.5742780426324731, "grad_norm": 2.0109355449676514, "learning_rate": 0.0001, "loss": 0.7359, "step": 10480 }, { "epoch": 0.5753739930955121, "grad_norm": 1.7715758085250854, "learning_rate": 0.0001, "loss": 0.7126, "step": 10500 }, { "epoch": 0.5764699435585512, "grad_norm": 1.5866303443908691, "learning_rate": 0.0001, "loss": 0.6808, "step": 10520 }, { "epoch": 0.5775658940215902, "grad_norm": 1.3831912279129028, "learning_rate": 0.0001, "loss": 0.7251, "step": 10540 }, { "epoch": 0.5786618444846293, "grad_norm": 1.603388786315918, "learning_rate": 0.0001, "loss": 0.6497, "step": 10560 }, { "epoch": 0.5797577949476683, "grad_norm": 1.8507051467895508, "learning_rate": 0.0001, "loss": 0.7247, "step": 10580 }, { "epoch": 0.5808537454107074, "grad_norm": 2.240337610244751, "learning_rate": 0.0001, "loss": 0.7879, "step": 10600 }, { "epoch": 0.5819496958737465, "grad_norm": 1.858344316482544, "learning_rate": 0.0001, "loss": 0.647, "step": 10620 }, { "epoch": 0.5830456463367856, "grad_norm": 1.840640664100647, "learning_rate": 0.0001, "loss": 0.6924, "step": 10640 }, { "epoch": 0.5841415967998247, "grad_norm": 2.0423295497894287, "learning_rate": 0.0001, "loss": 0.6762, "step": 10660 }, { "epoch": 0.5852375472628637, "grad_norm": 1.7426679134368896, "learning_rate": 0.0001, "loss": 0.7824, "step": 10680 }, { "epoch": 0.5863334977259028, "grad_norm": 1.5974029302597046, "learning_rate": 0.0001, "loss": 0.6874, "step": 10700 }, { "epoch": 0.5874294481889418, "grad_norm": 1.6082810163497925, "learning_rate": 0.0001, "loss": 0.6916, "step": 10720 }, { "epoch": 0.588525398651981, "grad_norm": 1.6124242544174194, "learning_rate": 0.0001, "loss": 0.676, "step": 10740 }, { "epoch": 0.58962134911502, "grad_norm": 1.9140983819961548, "learning_rate": 0.0001, "loss": 0.6281, "step": 10760 }, { "epoch": 0.5907172995780591, "grad_norm": 1.708742618560791, "learning_rate": 0.0001, "loss": 0.7245, "step": 10780 }, { "epoch": 0.5918132500410982, "grad_norm": 2.36368989944458, "learning_rate": 0.0001, "loss": 0.6934, "step": 10800 }, { "epoch": 0.5929092005041372, "grad_norm": 1.9806820154190063, "learning_rate": 0.0001, "loss": 0.677, "step": 10820 }, { "epoch": 0.5940051509671763, "grad_norm": 1.893801212310791, "learning_rate": 0.0001, "loss": 0.7629, "step": 10840 }, { "epoch": 0.5951011014302153, "grad_norm": 1.917204737663269, "learning_rate": 0.0001, "loss": 0.7836, "step": 10860 }, { "epoch": 0.5961970518932544, "grad_norm": 1.5599673986434937, "learning_rate": 0.0001, "loss": 0.7132, "step": 10880 }, { "epoch": 0.5972930023562935, "grad_norm": 1.9569772481918335, "learning_rate": 0.0001, "loss": 0.7466, "step": 10900 }, { "epoch": 0.5983889528193326, "grad_norm": 1.8709198236465454, "learning_rate": 0.0001, "loss": 0.6456, "step": 10920 }, { "epoch": 0.5994849032823716, "grad_norm": 1.8249480724334717, "learning_rate": 0.0001, "loss": 0.7159, "step": 10940 }, { "epoch": 0.6005808537454107, "grad_norm": 1.7063779830932617, "learning_rate": 0.0001, "loss": 0.7093, "step": 10960 }, { "epoch": 0.6016768042084498, "grad_norm": 1.681219220161438, "learning_rate": 0.0001, "loss": 0.6995, "step": 10980 }, { "epoch": 0.6027727546714888, "grad_norm": 1.640663504600525, "learning_rate": 0.0001, "loss": 0.7045, "step": 11000 }, { "epoch": 0.6027727546714888, "eval_loss": 0.7112395763397217, "eval_runtime": 30901.8839, "eval_samples_per_second": 2.1, "eval_steps_per_second": 0.066, "eval_wer": 47.33354332649714, "step": 11000 }, { "epoch": 0.603868705134528, "grad_norm": 1.7204805612564087, "learning_rate": 0.0001, "loss": 0.7217, "step": 11020 }, { "epoch": 0.604964655597567, "grad_norm": 1.507012128829956, "learning_rate": 0.0001, "loss": 0.7482, "step": 11040 }, { "epoch": 0.6060606060606061, "grad_norm": 1.7084465026855469, "learning_rate": 0.0001, "loss": 0.645, "step": 11060 }, { "epoch": 0.6071565565236451, "grad_norm": 1.73207426071167, "learning_rate": 0.0001, "loss": 0.7687, "step": 11080 }, { "epoch": 0.6082525069866842, "grad_norm": 2.2146365642547607, "learning_rate": 0.0001, "loss": 0.7771, "step": 11100 }, { "epoch": 0.6093484574497233, "grad_norm": 1.6794184446334839, "learning_rate": 0.0001, "loss": 0.6613, "step": 11120 }, { "epoch": 0.6104444079127623, "grad_norm": 1.8254398107528687, "learning_rate": 0.0001, "loss": 0.6787, "step": 11140 }, { "epoch": 0.6115403583758015, "grad_norm": 1.8397271633148193, "learning_rate": 0.0001, "loss": 0.7119, "step": 11160 }, { "epoch": 0.6126363088388405, "grad_norm": 1.8676248788833618, "learning_rate": 0.0001, "loss": 0.7294, "step": 11180 }, { "epoch": 0.6137322593018796, "grad_norm": 1.4971026182174683, "learning_rate": 0.0001, "loss": 0.6312, "step": 11200 }, { "epoch": 0.6148282097649186, "grad_norm": 1.8128615617752075, "learning_rate": 0.0001, "loss": 0.653, "step": 11220 }, { "epoch": 0.6159241602279577, "grad_norm": 1.426620364189148, "learning_rate": 0.0001, "loss": 0.7087, "step": 11240 }, { "epoch": 0.6170201106909967, "grad_norm": 1.4840887784957886, "learning_rate": 0.0001, "loss": 0.6665, "step": 11260 }, { "epoch": 0.6181160611540358, "grad_norm": 1.7882121801376343, "learning_rate": 0.0001, "loss": 0.7236, "step": 11280 }, { "epoch": 0.619212011617075, "grad_norm": 1.8195546865463257, "learning_rate": 0.0001, "loss": 0.7998, "step": 11300 }, { "epoch": 0.620307962080114, "grad_norm": 1.9482252597808838, "learning_rate": 0.0001, "loss": 0.751, "step": 11320 }, { "epoch": 0.6214039125431531, "grad_norm": 2.1224782466888428, "learning_rate": 0.0001, "loss": 0.7518, "step": 11340 }, { "epoch": 0.6224998630061921, "grad_norm": 1.811909556388855, "learning_rate": 0.0001, "loss": 0.679, "step": 11360 }, { "epoch": 0.6235958134692312, "grad_norm": 2.0843353271484375, "learning_rate": 0.0001, "loss": 0.7381, "step": 11380 }, { "epoch": 0.6246917639322702, "grad_norm": 1.5517933368682861, "learning_rate": 0.0001, "loss": 0.7318, "step": 11400 }, { "epoch": 0.6257877143953093, "grad_norm": 1.3482716083526611, "learning_rate": 0.0001, "loss": 0.6999, "step": 11420 }, { "epoch": 0.6268836648583485, "grad_norm": 1.548904299736023, "learning_rate": 0.0001, "loss": 0.8772, "step": 11440 }, { "epoch": 0.6279796153213875, "grad_norm": 1.553775429725647, "learning_rate": 0.0001, "loss": 0.6479, "step": 11460 }, { "epoch": 0.6290755657844266, "grad_norm": 2.0762696266174316, "learning_rate": 0.0001, "loss": 0.6457, "step": 11480 }, { "epoch": 0.6301715162474656, "grad_norm": 1.9620105028152466, "learning_rate": 0.0001, "loss": 0.7098, "step": 11500 }, { "epoch": 0.6312674667105047, "grad_norm": 1.382176399230957, "learning_rate": 0.0001, "loss": 0.686, "step": 11520 }, { "epoch": 0.6323634171735437, "grad_norm": 1.9390108585357666, "learning_rate": 0.0001, "loss": 0.6882, "step": 11540 }, { "epoch": 0.6334593676365828, "grad_norm": 1.7750768661499023, "learning_rate": 0.0001, "loss": 0.6942, "step": 11560 }, { "epoch": 0.6345553180996218, "grad_norm": 1.8459293842315674, "learning_rate": 0.0001, "loss": 0.7118, "step": 11580 }, { "epoch": 0.635651268562661, "grad_norm": 1.8210084438323975, "learning_rate": 0.0001, "loss": 0.7166, "step": 11600 }, { "epoch": 0.6367472190257001, "grad_norm": 1.7728508710861206, "learning_rate": 0.0001, "loss": 0.692, "step": 11620 }, { "epoch": 0.6378431694887391, "grad_norm": 1.7886627912521362, "learning_rate": 0.0001, "loss": 0.7185, "step": 11640 }, { "epoch": 0.6389391199517782, "grad_norm": 1.895150065422058, "learning_rate": 0.0001, "loss": 0.7174, "step": 11660 }, { "epoch": 0.6400350704148172, "grad_norm": 1.8740530014038086, "learning_rate": 0.0001, "loss": 0.6893, "step": 11680 }, { "epoch": 0.6411310208778563, "grad_norm": 1.6588834524154663, "learning_rate": 0.0001, "loss": 0.7073, "step": 11700 }, { "epoch": 0.6422269713408953, "grad_norm": 1.9573453664779663, "learning_rate": 0.0001, "loss": 0.671, "step": 11720 }, { "epoch": 0.6433229218039345, "grad_norm": 1.7064661979675293, "learning_rate": 0.0001, "loss": 0.7401, "step": 11740 }, { "epoch": 0.6444188722669736, "grad_norm": 1.8850706815719604, "learning_rate": 0.0001, "loss": 0.7397, "step": 11760 }, { "epoch": 0.6455148227300126, "grad_norm": 1.7744836807250977, "learning_rate": 0.0001, "loss": 0.7204, "step": 11780 }, { "epoch": 0.6466107731930517, "grad_norm": 1.5768756866455078, "learning_rate": 0.0001, "loss": 0.7868, "step": 11800 }, { "epoch": 0.6477067236560907, "grad_norm": 2.0770552158355713, "learning_rate": 0.0001, "loss": 0.7434, "step": 11820 }, { "epoch": 0.6488026741191298, "grad_norm": 2.0797810554504395, "learning_rate": 0.0001, "loss": 0.7342, "step": 11840 }, { "epoch": 0.6498986245821688, "grad_norm": 1.8984261751174927, "learning_rate": 0.0001, "loss": 0.6642, "step": 11860 }, { "epoch": 0.650994575045208, "grad_norm": 2.00124192237854, "learning_rate": 0.0001, "loss": 0.749, "step": 11880 }, { "epoch": 0.652090525508247, "grad_norm": 1.5575506687164307, "learning_rate": 0.0001, "loss": 0.8315, "step": 11900 }, { "epoch": 0.6531864759712861, "grad_norm": 2.6183197498321533, "learning_rate": 0.0001, "loss": 0.7533, "step": 11920 }, { "epoch": 0.6542824264343252, "grad_norm": 1.7211464643478394, "learning_rate": 0.0001, "loss": 0.7073, "step": 11940 }, { "epoch": 0.6553783768973642, "grad_norm": 1.9105095863342285, "learning_rate": 0.0001, "loss": 0.6526, "step": 11960 }, { "epoch": 0.6564743273604033, "grad_norm": 1.9578741788864136, "learning_rate": 0.0001, "loss": 0.6746, "step": 11980 }, { "epoch": 0.6575702778234424, "grad_norm": 1.8473331928253174, "learning_rate": 0.0001, "loss": 0.6808, "step": 12000 }, { "epoch": 0.6575702778234424, "eval_loss": 0.6952778100967407, "eval_runtime": 30743.2328, "eval_samples_per_second": 2.111, "eval_steps_per_second": 0.066, "eval_wer": 51.05538683822195, "step": 12000 }, { "epoch": 0.6586662282864815, "grad_norm": 1.7486096620559692, "learning_rate": 0.0001, "loss": 0.7417, "step": 12020 }, { "epoch": 0.6597621787495205, "grad_norm": 1.6540303230285645, "learning_rate": 0.0001, "loss": 0.7118, "step": 12040 }, { "epoch": 0.6608581292125596, "grad_norm": 1.89935302734375, "learning_rate": 0.0001, "loss": 0.6315, "step": 12060 }, { "epoch": 0.6619540796755987, "grad_norm": 1.8266342878341675, "learning_rate": 0.0001, "loss": 0.7434, "step": 12080 }, { "epoch": 0.6630500301386377, "grad_norm": 1.8254984617233276, "learning_rate": 0.0001, "loss": 0.7832, "step": 12100 }, { "epoch": 0.6641459806016768, "grad_norm": 2.0791878700256348, "learning_rate": 0.0001, "loss": 0.6694, "step": 12120 }, { "epoch": 0.6652419310647159, "grad_norm": 1.8277227878570557, "learning_rate": 0.0001, "loss": 0.6686, "step": 12140 }, { "epoch": 0.666337881527755, "grad_norm": 1.697810411453247, "learning_rate": 0.0001, "loss": 0.6895, "step": 12160 }, { "epoch": 0.667433831990794, "grad_norm": 1.6084686517715454, "learning_rate": 0.0001, "loss": 0.7431, "step": 12180 }, { "epoch": 0.6685297824538331, "grad_norm": 1.7437437772750854, "learning_rate": 0.0001, "loss": 0.6851, "step": 12200 }, { "epoch": 0.6696257329168721, "grad_norm": 1.849237322807312, "learning_rate": 0.0001, "loss": 0.6926, "step": 12220 }, { "epoch": 0.6707216833799112, "grad_norm": 1.8398326635360718, "learning_rate": 0.0001, "loss": 0.7282, "step": 12240 }, { "epoch": 0.6718176338429503, "grad_norm": 2.056136131286621, "learning_rate": 0.0001, "loss": 0.76, "step": 12260 }, { "epoch": 0.6729135843059894, "grad_norm": 1.8255378007888794, "learning_rate": 0.0001, "loss": 0.7155, "step": 12280 }, { "epoch": 0.6740095347690285, "grad_norm": 1.6555898189544678, "learning_rate": 0.0001, "loss": 0.6333, "step": 12300 }, { "epoch": 0.6751054852320675, "grad_norm": 1.825000286102295, "learning_rate": 0.0001, "loss": 0.6603, "step": 12320 }, { "epoch": 0.6762014356951066, "grad_norm": 1.5000559091567993, "learning_rate": 0.0001, "loss": 0.6861, "step": 12340 }, { "epoch": 0.6772973861581456, "grad_norm": 1.826874017715454, "learning_rate": 0.0001, "loss": 0.7337, "step": 12360 }, { "epoch": 0.6783933366211847, "grad_norm": 2.042325735092163, "learning_rate": 0.0001, "loss": 0.7566, "step": 12380 }, { "epoch": 0.6794892870842238, "grad_norm": 1.6419124603271484, "learning_rate": 0.0001, "loss": 0.6825, "step": 12400 }, { "epoch": 0.6805852375472629, "grad_norm": 2.1221911907196045, "learning_rate": 0.0001, "loss": 0.7013, "step": 12420 }, { "epoch": 0.681681188010302, "grad_norm": 1.598191738128662, "learning_rate": 0.0001, "loss": 0.6976, "step": 12440 }, { "epoch": 0.682777138473341, "grad_norm": 1.8890109062194824, "learning_rate": 0.0001, "loss": 0.6792, "step": 12460 }, { "epoch": 0.6838730889363801, "grad_norm": 1.7647831439971924, "learning_rate": 0.0001, "loss": 0.6878, "step": 12480 }, { "epoch": 0.6849690393994191, "grad_norm": 1.358193278312683, "learning_rate": 0.0001, "loss": 0.754, "step": 12500 }, { "epoch": 0.6860649898624582, "grad_norm": 1.9739768505096436, "learning_rate": 0.0001, "loss": 0.6799, "step": 12520 }, { "epoch": 0.6871609403254972, "grad_norm": 1.532867670059204, "learning_rate": 0.0001, "loss": 0.7063, "step": 12540 }, { "epoch": 0.6882568907885364, "grad_norm": 1.3203604221343994, "learning_rate": 0.0001, "loss": 0.6725, "step": 12560 }, { "epoch": 0.6893528412515755, "grad_norm": 1.835530161857605, "learning_rate": 0.0001, "loss": 0.6447, "step": 12580 }, { "epoch": 0.6904487917146145, "grad_norm": 1.4508098363876343, "learning_rate": 0.0001, "loss": 0.7137, "step": 12600 }, { "epoch": 0.6915447421776536, "grad_norm": 1.520942211151123, "learning_rate": 0.0001, "loss": 0.6454, "step": 12620 }, { "epoch": 0.6926406926406926, "grad_norm": 1.655716061592102, "learning_rate": 0.0001, "loss": 0.6659, "step": 12640 }, { "epoch": 0.6937366431037317, "grad_norm": 1.5934149026870728, "learning_rate": 0.0001, "loss": 0.7084, "step": 12660 }, { "epoch": 0.6948325935667707, "grad_norm": 1.961393117904663, "learning_rate": 0.0001, "loss": 0.7553, "step": 12680 }, { "epoch": 0.6959285440298099, "grad_norm": 1.4186025857925415, "learning_rate": 0.0001, "loss": 0.7205, "step": 12700 }, { "epoch": 0.697024494492849, "grad_norm": 1.6756350994110107, "learning_rate": 0.0001, "loss": 0.6166, "step": 12720 }, { "epoch": 0.698120444955888, "grad_norm": 1.8438879251480103, "learning_rate": 0.0001, "loss": 0.6837, "step": 12740 }, { "epoch": 0.6992163954189271, "grad_norm": 1.5732409954071045, "learning_rate": 0.0001, "loss": 0.7777, "step": 12760 }, { "epoch": 0.7003123458819661, "grad_norm": 1.8927737474441528, "learning_rate": 0.0001, "loss": 0.6978, "step": 12780 }, { "epoch": 0.7014082963450052, "grad_norm": 1.4720592498779297, "learning_rate": 0.0001, "loss": 0.7043, "step": 12800 }, { "epoch": 0.7025042468080442, "grad_norm": 1.8671678304672241, "learning_rate": 0.0001, "loss": 0.7245, "step": 12820 }, { "epoch": 0.7036001972710834, "grad_norm": 1.5541017055511475, "learning_rate": 0.0001, "loss": 0.728, "step": 12840 }, { "epoch": 0.7046961477341224, "grad_norm": 1.6623157262802124, "learning_rate": 0.0001, "loss": 0.7211, "step": 12860 }, { "epoch": 0.7057920981971615, "grad_norm": 2.1644530296325684, "learning_rate": 0.0001, "loss": 0.7958, "step": 12880 }, { "epoch": 0.7068880486602006, "grad_norm": 1.4526203870773315, "learning_rate": 0.0001, "loss": 0.6385, "step": 12900 }, { "epoch": 0.7079839991232396, "grad_norm": 1.586296796798706, "learning_rate": 0.0001, "loss": 0.7759, "step": 12920 }, { "epoch": 0.7090799495862787, "grad_norm": 1.8547158241271973, "learning_rate": 0.0001, "loss": 0.7478, "step": 12940 }, { "epoch": 0.7101759000493177, "grad_norm": 1.46295964717865, "learning_rate": 0.0001, "loss": 0.5804, "step": 12960 }, { "epoch": 0.7112718505123569, "grad_norm": 1.8653600215911865, "learning_rate": 0.0001, "loss": 0.6874, "step": 12980 }, { "epoch": 0.7123678009753959, "grad_norm": 1.8301453590393066, "learning_rate": 0.0001, "loss": 0.6886, "step": 13000 }, { "epoch": 0.7123678009753959, "eval_loss": 0.6790329217910767, "eval_runtime": 30731.9197, "eval_samples_per_second": 2.111, "eval_steps_per_second": 0.066, "eval_wer": 52.619379401724906, "step": 13000 }, { "epoch": 0.713463751438435, "grad_norm": 2.6560330390930176, "learning_rate": 0.0001, "loss": 0.6422, "step": 13020 }, { "epoch": 0.714559701901474, "grad_norm": 1.9559868574142456, "learning_rate": 0.0001, "loss": 0.7321, "step": 13040 }, { "epoch": 0.7156556523645131, "grad_norm": 1.8091590404510498, "learning_rate": 0.0001, "loss": 0.6303, "step": 13060 }, { "epoch": 0.7167516028275522, "grad_norm": 1.428688406944275, "learning_rate": 0.0001, "loss": 0.6489, "step": 13080 }, { "epoch": 0.7178475532905912, "grad_norm": 1.6543529033660889, "learning_rate": 0.0001, "loss": 0.6793, "step": 13100 }, { "epoch": 0.7189435037536304, "grad_norm": 2.012596368789673, "learning_rate": 0.0001, "loss": 0.6502, "step": 13120 }, { "epoch": 0.7200394542166694, "grad_norm": 2.0701732635498047, "learning_rate": 0.0001, "loss": 0.6746, "step": 13140 }, { "epoch": 0.7211354046797085, "grad_norm": 1.5318336486816406, "learning_rate": 0.0001, "loss": 0.6142, "step": 13160 }, { "epoch": 0.7222313551427475, "grad_norm": 1.7924253940582275, "learning_rate": 0.0001, "loss": 0.6536, "step": 13180 }, { "epoch": 0.7233273056057866, "grad_norm": 1.8197805881500244, "learning_rate": 0.0001, "loss": 0.7804, "step": 13200 }, { "epoch": 0.7244232560688257, "grad_norm": 1.5444835424423218, "learning_rate": 0.0001, "loss": 0.6019, "step": 13220 }, { "epoch": 0.7255192065318647, "grad_norm": 1.735474705696106, "learning_rate": 0.0001, "loss": 0.5891, "step": 13240 }, { "epoch": 0.7266151569949039, "grad_norm": 1.9891881942749023, "learning_rate": 0.0001, "loss": 0.6498, "step": 13260 }, { "epoch": 0.7277111074579429, "grad_norm": 1.6917784214019775, "learning_rate": 0.0001, "loss": 0.7044, "step": 13280 }, { "epoch": 0.728807057920982, "grad_norm": 1.661033034324646, "learning_rate": 0.0001, "loss": 0.6185, "step": 13300 }, { "epoch": 0.729903008384021, "grad_norm": 2.326937198638916, "learning_rate": 0.0001, "loss": 0.7274, "step": 13320 }, { "epoch": 0.7309989588470601, "grad_norm": 1.6929740905761719, "learning_rate": 0.0001, "loss": 0.6452, "step": 13340 }, { "epoch": 0.7320949093100991, "grad_norm": 1.7399369478225708, "learning_rate": 0.0001, "loss": 0.6243, "step": 13360 }, { "epoch": 0.7331908597731382, "grad_norm": 1.7071975469589233, "learning_rate": 0.0001, "loss": 0.6578, "step": 13380 }, { "epoch": 0.7342868102361774, "grad_norm": 1.7368084192276, "learning_rate": 0.0001, "loss": 0.6337, "step": 13400 }, { "epoch": 0.7353827606992164, "grad_norm": 2.0973663330078125, "learning_rate": 0.0001, "loss": 0.7101, "step": 13420 }, { "epoch": 0.7364787111622555, "grad_norm": 1.636421799659729, "learning_rate": 0.0001, "loss": 0.6958, "step": 13440 }, { "epoch": 0.7375746616252945, "grad_norm": 1.6134982109069824, "learning_rate": 0.0001, "loss": 0.7336, "step": 13460 }, { "epoch": 0.7386706120883336, "grad_norm": 1.8911906480789185, "learning_rate": 0.0001, "loss": 0.6723, "step": 13480 }, { "epoch": 0.7397665625513726, "grad_norm": 1.8372421264648438, "learning_rate": 0.0001, "loss": 0.6748, "step": 13500 }, { "epoch": 0.7408625130144117, "grad_norm": 1.8735203742980957, "learning_rate": 0.0001, "loss": 0.7481, "step": 13520 }, { "epoch": 0.7419584634774509, "grad_norm": 1.7684818506240845, "learning_rate": 0.0001, "loss": 0.6395, "step": 13540 }, { "epoch": 0.7430544139404899, "grad_norm": 1.8018254041671753, "learning_rate": 0.0001, "loss": 0.8523, "step": 13560 }, { "epoch": 0.744150364403529, "grad_norm": 1.7703465223312378, "learning_rate": 0.0001, "loss": 0.6938, "step": 13580 }, { "epoch": 0.745246314866568, "grad_norm": 1.6299625635147095, "learning_rate": 0.0001, "loss": 0.6341, "step": 13600 }, { "epoch": 0.7463422653296071, "grad_norm": 2.0545101165771484, "learning_rate": 0.0001, "loss": 0.7248, "step": 13620 }, { "epoch": 0.7474382157926461, "grad_norm": 1.5883153676986694, "learning_rate": 0.0001, "loss": 0.6917, "step": 13640 }, { "epoch": 0.7485341662556853, "grad_norm": 1.5751030445098877, "learning_rate": 0.0001, "loss": 0.6789, "step": 13660 }, { "epoch": 0.7496301167187243, "grad_norm": 1.53587806224823, "learning_rate": 0.0001, "loss": 0.6462, "step": 13680 }, { "epoch": 0.7507260671817634, "grad_norm": 1.5108363628387451, "learning_rate": 0.0001, "loss": 0.7585, "step": 13700 }, { "epoch": 0.7518220176448025, "grad_norm": 1.5622588396072388, "learning_rate": 0.0001, "loss": 0.6466, "step": 13720 }, { "epoch": 0.7529179681078415, "grad_norm": 1.9326175451278687, "learning_rate": 0.0001, "loss": 0.7086, "step": 13740 }, { "epoch": 0.7540139185708806, "grad_norm": 1.7847191095352173, "learning_rate": 0.0001, "loss": 0.661, "step": 13760 }, { "epoch": 0.7551098690339196, "grad_norm": 2.1520116329193115, "learning_rate": 0.0001, "loss": 0.5861, "step": 13780 }, { "epoch": 0.7562058194969588, "grad_norm": 1.9346301555633545, "learning_rate": 0.0001, "loss": 0.7735, "step": 13800 }, { "epoch": 0.7573017699599978, "grad_norm": 1.5564959049224854, "learning_rate": 0.0001, "loss": 0.6997, "step": 13820 }, { "epoch": 0.7583977204230369, "grad_norm": 1.848569393157959, "learning_rate": 0.0001, "loss": 0.6836, "step": 13840 }, { "epoch": 0.759493670886076, "grad_norm": 1.5552887916564941, "learning_rate": 0.0001, "loss": 0.6014, "step": 13860 }, { "epoch": 0.760589621349115, "grad_norm": 1.5576545000076294, "learning_rate": 0.0001, "loss": 0.7034, "step": 13880 }, { "epoch": 0.7616855718121541, "grad_norm": 1.795949935913086, "learning_rate": 0.0001, "loss": 0.7322, "step": 13900 }, { "epoch": 0.7627815222751931, "grad_norm": 1.498818039894104, "learning_rate": 0.0001, "loss": 0.697, "step": 13920 }, { "epoch": 0.7638774727382323, "grad_norm": 1.7154011726379395, "learning_rate": 0.0001, "loss": 0.7057, "step": 13940 }, { "epoch": 0.7649734232012713, "grad_norm": 1.693199872970581, "learning_rate": 0.0001, "loss": 0.722, "step": 13960 }, { "epoch": 0.7660693736643104, "grad_norm": 1.7617517709732056, "learning_rate": 0.0001, "loss": 0.727, "step": 13980 }, { "epoch": 0.7671653241273494, "grad_norm": 1.7693978548049927, "learning_rate": 0.0001, "loss": 0.6552, "step": 14000 }, { "epoch": 0.7671653241273494, "eval_loss": 0.6688939929008484, "eval_runtime": 30450.1084, "eval_samples_per_second": 2.131, "eval_steps_per_second": 0.067, "eval_wer": 38.925884967114385, "step": 14000 }, { "epoch": 0.7682612745903885, "grad_norm": 1.531043529510498, "learning_rate": 0.0001, "loss": 0.6237, "step": 14020 }, { "epoch": 0.7693572250534276, "grad_norm": 1.7747310400009155, "learning_rate": 0.0001, "loss": 0.6769, "step": 14040 }, { "epoch": 0.7704531755164666, "grad_norm": 1.457766056060791, "learning_rate": 0.0001, "loss": 0.6406, "step": 14060 }, { "epoch": 0.7715491259795058, "grad_norm": 1.478061318397522, "learning_rate": 0.0001, "loss": 0.6797, "step": 14080 }, { "epoch": 0.7726450764425448, "grad_norm": 1.462485909461975, "learning_rate": 0.0001, "loss": 0.654, "step": 14100 }, { "epoch": 0.7737410269055839, "grad_norm": 2.1201417446136475, "learning_rate": 0.0001, "loss": 0.676, "step": 14120 }, { "epoch": 0.7748369773686229, "grad_norm": 1.6672828197479248, "learning_rate": 0.0001, "loss": 0.6402, "step": 14140 }, { "epoch": 0.775932927831662, "grad_norm": 1.848254680633545, "learning_rate": 0.0001, "loss": 0.6623, "step": 14160 }, { "epoch": 0.7770288782947011, "grad_norm": 1.8868560791015625, "learning_rate": 0.0001, "loss": 0.6598, "step": 14180 }, { "epoch": 0.7781248287577401, "grad_norm": 2.0615594387054443, "learning_rate": 0.0001, "loss": 0.6786, "step": 14200 }, { "epoch": 0.7792207792207793, "grad_norm": 1.6596072912216187, "learning_rate": 0.0001, "loss": 0.6448, "step": 14220 }, { "epoch": 0.7803167296838183, "grad_norm": 2.0829083919525146, "learning_rate": 0.0001, "loss": 0.6558, "step": 14240 }, { "epoch": 0.7814126801468574, "grad_norm": 1.7660095691680908, "learning_rate": 0.0001, "loss": 0.7399, "step": 14260 }, { "epoch": 0.7825086306098964, "grad_norm": 1.7068332433700562, "learning_rate": 0.0001, "loss": 0.637, "step": 14280 }, { "epoch": 0.7836045810729355, "grad_norm": 1.5040172338485718, "learning_rate": 0.0001, "loss": 0.5708, "step": 14300 }, { "epoch": 0.7847005315359745, "grad_norm": 1.7479969263076782, "learning_rate": 0.0001, "loss": 0.7348, "step": 14320 }, { "epoch": 0.7857964819990136, "grad_norm": 1.7886347770690918, "learning_rate": 0.0001, "loss": 0.72, "step": 14340 }, { "epoch": 0.7868924324620528, "grad_norm": 1.6001741886138916, "learning_rate": 0.0001, "loss": 0.6512, "step": 14360 }, { "epoch": 0.7879883829250918, "grad_norm": 1.7489492893218994, "learning_rate": 0.0001, "loss": 0.6714, "step": 14380 }, { "epoch": 0.7890843333881309, "grad_norm": 1.9967806339263916, "learning_rate": 0.0001, "loss": 0.6651, "step": 14400 }, { "epoch": 0.7901802838511699, "grad_norm": 1.6555088758468628, "learning_rate": 0.0001, "loss": 0.6584, "step": 14420 }, { "epoch": 0.791276234314209, "grad_norm": 1.589168667793274, "learning_rate": 0.0001, "loss": 0.6448, "step": 14440 }, { "epoch": 0.792372184777248, "grad_norm": 1.2876309156417847, "learning_rate": 0.0001, "loss": 0.6612, "step": 14460 }, { "epoch": 0.7934681352402871, "grad_norm": 1.6673985719680786, "learning_rate": 0.0001, "loss": 0.5769, "step": 14480 }, { "epoch": 0.7945640857033263, "grad_norm": 1.6478184461593628, "learning_rate": 0.0001, "loss": 0.6457, "step": 14500 }, { "epoch": 0.7956600361663653, "grad_norm": 1.5702099800109863, "learning_rate": 0.0001, "loss": 0.6866, "step": 14520 }, { "epoch": 0.7967559866294044, "grad_norm": 1.850900411605835, "learning_rate": 0.0001, "loss": 0.6475, "step": 14540 }, { "epoch": 0.7978519370924434, "grad_norm": 1.2784024477005005, "learning_rate": 0.0001, "loss": 0.6366, "step": 14560 }, { "epoch": 0.7989478875554825, "grad_norm": 2.2533817291259766, "learning_rate": 0.0001, "loss": 0.6164, "step": 14580 }, { "epoch": 0.8000438380185215, "grad_norm": 1.442713737487793, "learning_rate": 0.0001, "loss": 0.6853, "step": 14600 }, { "epoch": 0.8011397884815606, "grad_norm": 1.594449520111084, "learning_rate": 0.0001, "loss": 0.6535, "step": 14620 }, { "epoch": 0.8022357389445997, "grad_norm": 1.4961411952972412, "learning_rate": 0.0001, "loss": 0.6696, "step": 14640 }, { "epoch": 0.8033316894076388, "grad_norm": 2.1010756492614746, "learning_rate": 0.0001, "loss": 0.6607, "step": 14660 }, { "epoch": 0.8044276398706779, "grad_norm": 2.134493589401245, "learning_rate": 0.0001, "loss": 0.6512, "step": 14680 }, { "epoch": 0.8055235903337169, "grad_norm": 1.6435072422027588, "learning_rate": 0.0001, "loss": 0.6094, "step": 14700 }, { "epoch": 0.806619540796756, "grad_norm": 1.8982771635055542, "learning_rate": 0.0001, "loss": 0.6761, "step": 14720 }, { "epoch": 0.807715491259795, "grad_norm": 1.968770146369934, "learning_rate": 0.0001, "loss": 0.7178, "step": 14740 }, { "epoch": 0.8088114417228341, "grad_norm": 2.19568133354187, "learning_rate": 0.0001, "loss": 0.6246, "step": 14760 }, { "epoch": 0.8099073921858732, "grad_norm": 1.6024566888809204, "learning_rate": 0.0001, "loss": 0.6174, "step": 14780 }, { "epoch": 0.8110033426489123, "grad_norm": 1.4896485805511475, "learning_rate": 0.0001, "loss": 0.5843, "step": 14800 }, { "epoch": 0.8120992931119514, "grad_norm": 1.502487301826477, "learning_rate": 0.0001, "loss": 0.7747, "step": 14820 }, { "epoch": 0.8131952435749904, "grad_norm": 1.5037872791290283, "learning_rate": 0.0001, "loss": 0.6745, "step": 14840 }, { "epoch": 0.8142911940380295, "grad_norm": 1.3984043598175049, "learning_rate": 0.0001, "loss": 0.6938, "step": 14860 }, { "epoch": 0.8153871445010685, "grad_norm": 1.7627023458480835, "learning_rate": 0.0001, "loss": 0.6407, "step": 14880 }, { "epoch": 0.8164830949641076, "grad_norm": 1.5276484489440918, "learning_rate": 0.0001, "loss": 0.6142, "step": 14900 }, { "epoch": 0.8175790454271467, "grad_norm": 1.598743200302124, "learning_rate": 0.0001, "loss": 0.6534, "step": 14920 }, { "epoch": 0.8186749958901858, "grad_norm": 1.5528680086135864, "learning_rate": 0.0001, "loss": 0.6272, "step": 14940 }, { "epoch": 0.8197709463532248, "grad_norm": 1.71839439868927, "learning_rate": 0.0001, "loss": 0.7259, "step": 14960 }, { "epoch": 0.8208668968162639, "grad_norm": 1.5527739524841309, "learning_rate": 0.0001, "loss": 0.6387, "step": 14980 }, { "epoch": 0.821962847279303, "grad_norm": 1.7775479555130005, "learning_rate": 0.0001, "loss": 0.6963, "step": 15000 }, { "epoch": 0.821962847279303, "eval_loss": 0.6593644618988037, "eval_runtime": 30360.7703, "eval_samples_per_second": 2.137, "eval_steps_per_second": 0.067, "eval_wer": 42.68128173436093, "step": 15000 }, { "epoch": 0.823058797742342, "grad_norm": 1.772290825843811, "learning_rate": 0.0001, "loss": 0.6439, "step": 15020 }, { "epoch": 0.8241547482053811, "grad_norm": 1.655604600906372, "learning_rate": 0.0001, "loss": 0.7521, "step": 15040 }, { "epoch": 0.8252506986684202, "grad_norm": 1.5305246114730835, "learning_rate": 0.0001, "loss": 0.5623, "step": 15060 }, { "epoch": 0.8263466491314593, "grad_norm": 1.399568796157837, "learning_rate": 0.0001, "loss": 0.634, "step": 15080 }, { "epoch": 0.8274425995944983, "grad_norm": 1.412463903427124, "learning_rate": 0.0001, "loss": 0.7272, "step": 15100 }, { "epoch": 0.8285385500575374, "grad_norm": 1.793396234512329, "learning_rate": 0.0001, "loss": 0.684, "step": 15120 }, { "epoch": 0.8296345005205765, "grad_norm": 1.9623442888259888, "learning_rate": 0.0001, "loss": 0.6247, "step": 15140 }, { "epoch": 0.8307304509836155, "grad_norm": 1.4576257467269897, "learning_rate": 0.0001, "loss": 0.675, "step": 15160 }, { "epoch": 0.8318264014466547, "grad_norm": 1.6135623455047607, "learning_rate": 0.0001, "loss": 0.7046, "step": 15180 }, { "epoch": 0.8329223519096937, "grad_norm": 1.5553112030029297, "learning_rate": 0.0001, "loss": 0.7246, "step": 15200 }, { "epoch": 0.8340183023727328, "grad_norm": 1.4521915912628174, "learning_rate": 0.0001, "loss": 0.7288, "step": 15220 }, { "epoch": 0.8351142528357718, "grad_norm": 1.429190754890442, "learning_rate": 0.0001, "loss": 0.6357, "step": 15240 }, { "epoch": 0.8362102032988109, "grad_norm": 1.80194890499115, "learning_rate": 0.0001, "loss": 0.6462, "step": 15260 }, { "epoch": 0.8373061537618499, "grad_norm": 1.833225131034851, "learning_rate": 0.0001, "loss": 0.6942, "step": 15280 }, { "epoch": 0.838402104224889, "grad_norm": 1.8329098224639893, "learning_rate": 0.0001, "loss": 0.6525, "step": 15300 }, { "epoch": 0.8394980546879282, "grad_norm": 1.5729244947433472, "learning_rate": 0.0001, "loss": 0.6721, "step": 15320 }, { "epoch": 0.8405940051509672, "grad_norm": 1.8156899213790894, "learning_rate": 0.0001, "loss": 0.6777, "step": 15340 }, { "epoch": 0.8416899556140063, "grad_norm": 1.7255985736846924, "learning_rate": 0.0001, "loss": 0.6653, "step": 15360 }, { "epoch": 0.8427859060770453, "grad_norm": 1.8051388263702393, "learning_rate": 0.0001, "loss": 0.6429, "step": 15380 }, { "epoch": 0.8438818565400844, "grad_norm": 1.4799489974975586, "learning_rate": 0.0001, "loss": 0.6219, "step": 15400 }, { "epoch": 0.8449778070031234, "grad_norm": 1.5661497116088867, "learning_rate": 0.0001, "loss": 0.7042, "step": 15420 }, { "epoch": 0.8460737574661625, "grad_norm": 1.7842859029769897, "learning_rate": 0.0001, "loss": 0.5657, "step": 15440 }, { "epoch": 0.8471697079292017, "grad_norm": 2.036591053009033, "learning_rate": 0.0001, "loss": 0.7448, "step": 15460 }, { "epoch": 0.8482656583922407, "grad_norm": 1.5923106670379639, "learning_rate": 0.0001, "loss": 0.6937, "step": 15480 }, { "epoch": 0.8493616088552798, "grad_norm": 1.7609819173812866, "learning_rate": 0.0001, "loss": 0.6282, "step": 15500 }, { "epoch": 0.8504575593183188, "grad_norm": 1.627193570137024, "learning_rate": 0.0001, "loss": 0.6593, "step": 15520 }, { "epoch": 0.8515535097813579, "grad_norm": 1.5199600458145142, "learning_rate": 0.0001, "loss": 0.6493, "step": 15540 }, { "epoch": 0.8526494602443969, "grad_norm": 1.8375046253204346, "learning_rate": 0.0001, "loss": 0.7139, "step": 15560 }, { "epoch": 0.853745410707436, "grad_norm": 1.7061831951141357, "learning_rate": 0.0001, "loss": 0.645, "step": 15580 }, { "epoch": 0.854841361170475, "grad_norm": 1.5046154260635376, "learning_rate": 0.0001, "loss": 0.7154, "step": 15600 }, { "epoch": 0.8559373116335142, "grad_norm": 2.0937325954437256, "learning_rate": 0.0001, "loss": 0.716, "step": 15620 }, { "epoch": 0.8570332620965533, "grad_norm": 1.502930760383606, "learning_rate": 0.0001, "loss": 0.6476, "step": 15640 }, { "epoch": 0.8581292125595923, "grad_norm": 1.832287073135376, "learning_rate": 0.0001, "loss": 0.6279, "step": 15660 }, { "epoch": 0.8592251630226314, "grad_norm": 1.9679219722747803, "learning_rate": 0.0001, "loss": 0.7048, "step": 15680 }, { "epoch": 0.8603211134856704, "grad_norm": 1.4660624265670776, "learning_rate": 0.0001, "loss": 0.6217, "step": 15700 }, { "epoch": 0.8614170639487095, "grad_norm": 1.6641209125518799, "learning_rate": 0.0001, "loss": 0.5692, "step": 15720 }, { "epoch": 0.8625130144117485, "grad_norm": 1.6354645490646362, "learning_rate": 0.0001, "loss": 0.7019, "step": 15740 }, { "epoch": 0.8636089648747877, "grad_norm": 1.5404868125915527, "learning_rate": 0.0001, "loss": 0.667, "step": 15760 }, { "epoch": 0.8647049153378268, "grad_norm": 1.759466528892517, "learning_rate": 0.0001, "loss": 0.7413, "step": 15780 }, { "epoch": 0.8658008658008658, "grad_norm": 1.289501667022705, "learning_rate": 0.0001, "loss": 0.6696, "step": 15800 }, { "epoch": 0.8668968162639049, "grad_norm": 1.516506552696228, "learning_rate": 0.0001, "loss": 0.6385, "step": 15820 }, { "epoch": 0.8679927667269439, "grad_norm": 1.602023959159851, "learning_rate": 0.0001, "loss": 0.6034, "step": 15840 }, { "epoch": 0.869088717189983, "grad_norm": 1.6681197881698608, "learning_rate": 0.0001, "loss": 0.6524, "step": 15860 }, { "epoch": 0.870184667653022, "grad_norm": 1.7448092699050903, "learning_rate": 0.0001, "loss": 0.5845, "step": 15880 }, { "epoch": 0.8712806181160612, "grad_norm": 1.763609766960144, "learning_rate": 0.0001, "loss": 0.6078, "step": 15900 }, { "epoch": 0.8723765685791002, "grad_norm": 1.8752708435058594, "learning_rate": 0.0001, "loss": 0.6415, "step": 15920 }, { "epoch": 0.8734725190421393, "grad_norm": 1.4633687734603882, "learning_rate": 0.0001, "loss": 0.6509, "step": 15940 }, { "epoch": 0.8745684695051784, "grad_norm": 1.630188226699829, "learning_rate": 0.0001, "loss": 0.678, "step": 15960 }, { "epoch": 0.8756644199682174, "grad_norm": 1.746390461921692, "learning_rate": 0.0001, "loss": 0.6883, "step": 15980 }, { "epoch": 0.8767603704312565, "grad_norm": 1.8357354402542114, "learning_rate": 0.0001, "loss": 0.5674, "step": 16000 }, { "epoch": 0.8767603704312565, "eval_loss": 0.6495629549026489, "eval_runtime": 30578.7623, "eval_samples_per_second": 2.122, "eval_steps_per_second": 0.066, "eval_wer": 46.744478263995646, "step": 16000 }, { "epoch": 0.8778563208942955, "grad_norm": 1.3959294557571411, "learning_rate": 0.0001, "loss": 0.7334, "step": 16020 }, { "epoch": 0.8789522713573347, "grad_norm": 1.7587610483169556, "learning_rate": 0.0001, "loss": 0.6916, "step": 16040 }, { "epoch": 0.8800482218203737, "grad_norm": 2.157567024230957, "learning_rate": 0.0001, "loss": 0.6288, "step": 16060 }, { "epoch": 0.8811441722834128, "grad_norm": 1.927071452140808, "learning_rate": 0.0001, "loss": 0.6596, "step": 16080 }, { "epoch": 0.8822401227464519, "grad_norm": 1.7229890823364258, "learning_rate": 0.0001, "loss": 0.6351, "step": 16100 }, { "epoch": 0.8833360732094909, "grad_norm": 1.4584635496139526, "learning_rate": 0.0001, "loss": 0.6347, "step": 16120 }, { "epoch": 0.88443202367253, "grad_norm": 1.4768098592758179, "learning_rate": 0.0001, "loss": 0.6326, "step": 16140 }, { "epoch": 0.885527974135569, "grad_norm": 1.6411234140396118, "learning_rate": 0.0001, "loss": 0.6935, "step": 16160 }, { "epoch": 0.8866239245986082, "grad_norm": 1.4742987155914307, "learning_rate": 0.0001, "loss": 0.6188, "step": 16180 }, { "epoch": 0.8877198750616472, "grad_norm": 2.1708977222442627, "learning_rate": 0.0001, "loss": 0.6837, "step": 16200 }, { "epoch": 0.8888158255246863, "grad_norm": 1.5142560005187988, "learning_rate": 0.0001, "loss": 0.6218, "step": 16220 }, { "epoch": 0.8899117759877253, "grad_norm": 1.5650640726089478, "learning_rate": 0.0001, "loss": 0.5991, "step": 16240 }, { "epoch": 0.8910077264507644, "grad_norm": 1.5553919076919556, "learning_rate": 0.0001, "loss": 0.6081, "step": 16260 }, { "epoch": 0.8921036769138035, "grad_norm": 1.813482642173767, "learning_rate": 0.0001, "loss": 0.6599, "step": 16280 }, { "epoch": 0.8931996273768426, "grad_norm": 1.6864385604858398, "learning_rate": 0.0001, "loss": 0.6337, "step": 16300 }, { "epoch": 0.8942955778398817, "grad_norm": 1.5707799196243286, "learning_rate": 0.0001, "loss": 0.7029, "step": 16320 }, { "epoch": 0.8953915283029207, "grad_norm": 1.3465133905410767, "learning_rate": 0.0001, "loss": 0.6519, "step": 16340 }, { "epoch": 0.8964874787659598, "grad_norm": 1.5546880960464478, "learning_rate": 0.0001, "loss": 0.6111, "step": 16360 }, { "epoch": 0.8975834292289988, "grad_norm": 1.6297564506530762, "learning_rate": 0.0001, "loss": 0.6825, "step": 16380 }, { "epoch": 0.8986793796920379, "grad_norm": 1.5396370887756348, "learning_rate": 0.0001, "loss": 0.6454, "step": 16400 }, { "epoch": 0.8997753301550769, "grad_norm": 1.3082808256149292, "learning_rate": 0.0001, "loss": 0.6489, "step": 16420 }, { "epoch": 0.900871280618116, "grad_norm": 1.68564772605896, "learning_rate": 0.0001, "loss": 0.6688, "step": 16440 }, { "epoch": 0.9019672310811552, "grad_norm": 1.6919423341751099, "learning_rate": 0.0001, "loss": 0.6353, "step": 16460 }, { "epoch": 0.9030631815441942, "grad_norm": 1.4040336608886719, "learning_rate": 0.0001, "loss": 0.6286, "step": 16480 }, { "epoch": 0.9041591320072333, "grad_norm": 1.5394583940505981, "learning_rate": 0.0001, "loss": 0.648, "step": 16500 }, { "epoch": 0.9052550824702723, "grad_norm": 1.8135911226272583, "learning_rate": 0.0001, "loss": 0.6315, "step": 16520 }, { "epoch": 0.9063510329333114, "grad_norm": 1.6827434301376343, "learning_rate": 0.0001, "loss": 0.637, "step": 16540 }, { "epoch": 0.9074469833963504, "grad_norm": 1.3692152500152588, "learning_rate": 0.0001, "loss": 0.7015, "step": 16560 }, { "epoch": 0.9085429338593896, "grad_norm": 1.6391196250915527, "learning_rate": 0.0001, "loss": 0.6479, "step": 16580 }, { "epoch": 0.9096388843224287, "grad_norm": 2.5071117877960205, "learning_rate": 0.0001, "loss": 0.6746, "step": 16600 }, { "epoch": 0.9107348347854677, "grad_norm": 1.7680779695510864, "learning_rate": 0.0001, "loss": 0.6543, "step": 16620 }, { "epoch": 0.9118307852485068, "grad_norm": 1.487269639968872, "learning_rate": 0.0001, "loss": 0.618, "step": 16640 }, { "epoch": 0.9129267357115458, "grad_norm": 1.322325348854065, "learning_rate": 0.0001, "loss": 0.635, "step": 16660 }, { "epoch": 0.9140226861745849, "grad_norm": 2.054997682571411, "learning_rate": 0.0001, "loss": 0.645, "step": 16680 }, { "epoch": 0.9151186366376239, "grad_norm": 1.7619165182113647, "learning_rate": 0.0001, "loss": 0.6405, "step": 16700 }, { "epoch": 0.9162145871006631, "grad_norm": 1.3276571035385132, "learning_rate": 0.0001, "loss": 0.5797, "step": 16720 }, { "epoch": 0.9173105375637021, "grad_norm": 2.2796542644500732, "learning_rate": 0.0001, "loss": 0.7342, "step": 16740 }, { "epoch": 0.9184064880267412, "grad_norm": 1.637654423713684, "learning_rate": 0.0001, "loss": 0.6149, "step": 16760 }, { "epoch": 0.9195024384897803, "grad_norm": 1.4013864994049072, "learning_rate": 0.0001, "loss": 0.627, "step": 16780 }, { "epoch": 0.9205983889528193, "grad_norm": 1.5173211097717285, "learning_rate": 0.0001, "loss": 0.5449, "step": 16800 }, { "epoch": 0.9216943394158584, "grad_norm": 1.5530805587768555, "learning_rate": 0.0001, "loss": 0.5416, "step": 16820 }, { "epoch": 0.9227902898788974, "grad_norm": 1.5294363498687744, "learning_rate": 0.0001, "loss": 0.6146, "step": 16840 }, { "epoch": 0.9238862403419366, "grad_norm": 1.7312266826629639, "learning_rate": 0.0001, "loss": 0.6952, "step": 16860 }, { "epoch": 0.9249821908049756, "grad_norm": 1.301459789276123, "learning_rate": 0.0001, "loss": 0.6258, "step": 16880 }, { "epoch": 0.9260781412680147, "grad_norm": 1.915128469467163, "learning_rate": 0.0001, "loss": 0.6955, "step": 16900 }, { "epoch": 0.9271740917310538, "grad_norm": 1.3437505960464478, "learning_rate": 0.0001, "loss": 0.7044, "step": 16920 }, { "epoch": 0.9282700421940928, "grad_norm": 1.5920603275299072, "learning_rate": 0.0001, "loss": 0.5871, "step": 16940 }, { "epoch": 0.9293659926571319, "grad_norm": 1.2615900039672852, "learning_rate": 0.0001, "loss": 0.6311, "step": 16960 }, { "epoch": 0.9304619431201709, "grad_norm": 1.6863378286361694, "learning_rate": 0.0001, "loss": 0.5746, "step": 16980 }, { "epoch": 0.9315578935832101, "grad_norm": 1.3633450269699097, "learning_rate": 0.0001, "loss": 0.6354, "step": 17000 }, { "epoch": 0.9315578935832101, "eval_loss": 0.6385661959648132, "eval_runtime": 30462.2265, "eval_samples_per_second": 2.13, "eval_steps_per_second": 0.067, "eval_wer": 36.07830918982583, "step": 17000 }, { "epoch": 0.9326538440462491, "grad_norm": 1.7412103414535522, "learning_rate": 0.0001, "loss": 0.6635, "step": 17020 }, { "epoch": 0.9337497945092882, "grad_norm": 2.0697691440582275, "learning_rate": 0.0001, "loss": 0.678, "step": 17040 }, { "epoch": 0.9348457449723272, "grad_norm": 1.6238869428634644, "learning_rate": 0.0001, "loss": 0.6427, "step": 17060 }, { "epoch": 0.9359416954353663, "grad_norm": 1.498334288597107, "learning_rate": 0.0001, "loss": 0.614, "step": 17080 }, { "epoch": 0.9370376458984054, "grad_norm": 1.4905815124511719, "learning_rate": 0.0001, "loss": 0.5759, "step": 17100 }, { "epoch": 0.9381335963614444, "grad_norm": 1.433747410774231, "learning_rate": 0.0001, "loss": 0.6754, "step": 17120 }, { "epoch": 0.9392295468244836, "grad_norm": 1.8419586420059204, "learning_rate": 0.0001, "loss": 0.6392, "step": 17140 }, { "epoch": 0.9403254972875226, "grad_norm": 1.5990883111953735, "learning_rate": 0.0001, "loss": 0.6204, "step": 17160 }, { "epoch": 0.9414214477505617, "grad_norm": 1.482010841369629, "learning_rate": 0.0001, "loss": 0.6887, "step": 17180 }, { "epoch": 0.9425173982136007, "grad_norm": 1.6629010438919067, "learning_rate": 0.0001, "loss": 0.6457, "step": 17200 }, { "epoch": 0.9436133486766398, "grad_norm": 1.4538336992263794, "learning_rate": 0.0001, "loss": 0.6401, "step": 17220 }, { "epoch": 0.9447092991396789, "grad_norm": 1.5684305429458618, "learning_rate": 0.0001, "loss": 0.676, "step": 17240 }, { "epoch": 0.945805249602718, "grad_norm": 1.4637812376022339, "learning_rate": 0.0001, "loss": 0.5963, "step": 17260 }, { "epoch": 0.9469012000657571, "grad_norm": 2.155348300933838, "learning_rate": 0.0001, "loss": 0.6624, "step": 17280 }, { "epoch": 0.9479971505287961, "grad_norm": 1.6532953977584839, "learning_rate": 0.0001, "loss": 0.6784, "step": 17300 }, { "epoch": 0.9490931009918352, "grad_norm": 1.934787392616272, "learning_rate": 0.0001, "loss": 0.6876, "step": 17320 }, { "epoch": 0.9501890514548742, "grad_norm": 2.319920063018799, "learning_rate": 0.0001, "loss": 0.7676, "step": 17340 }, { "epoch": 0.9512850019179133, "grad_norm": 1.5026947259902954, "learning_rate": 0.0001, "loss": 0.5634, "step": 17360 }, { "epoch": 0.9523809523809523, "grad_norm": 1.9578672647476196, "learning_rate": 0.0001, "loss": 0.5959, "step": 17380 }, { "epoch": 0.9534769028439914, "grad_norm": 1.9930877685546875, "learning_rate": 0.0001, "loss": 0.5947, "step": 17400 }, { "epoch": 0.9545728533070306, "grad_norm": 1.6241062879562378, "learning_rate": 0.0001, "loss": 0.587, "step": 17420 }, { "epoch": 0.9556688037700696, "grad_norm": 1.7155011892318726, "learning_rate": 0.0001, "loss": 0.6352, "step": 17440 }, { "epoch": 0.9567647542331087, "grad_norm": 1.7239856719970703, "learning_rate": 0.0001, "loss": 0.647, "step": 17460 }, { "epoch": 0.9578607046961477, "grad_norm": 1.6342066526412964, "learning_rate": 0.0001, "loss": 0.6017, "step": 17480 }, { "epoch": 0.9589566551591868, "grad_norm": 1.4042915105819702, "learning_rate": 0.0001, "loss": 0.6479, "step": 17500 }, { "epoch": 0.9600526056222258, "grad_norm": 1.5023634433746338, "learning_rate": 0.0001, "loss": 0.6008, "step": 17520 }, { "epoch": 0.961148556085265, "grad_norm": 1.5713409185409546, "learning_rate": 0.0001, "loss": 0.6643, "step": 17540 }, { "epoch": 0.9622445065483041, "grad_norm": 1.8917444944381714, "learning_rate": 0.0001, "loss": 0.6992, "step": 17560 }, { "epoch": 0.9633404570113431, "grad_norm": 1.918900728225708, "learning_rate": 0.0001, "loss": 0.6004, "step": 17580 }, { "epoch": 0.9644364074743822, "grad_norm": 1.7599738836288452, "learning_rate": 0.0001, "loss": 0.6479, "step": 17600 }, { "epoch": 0.9655323579374212, "grad_norm": 1.4554500579833984, "learning_rate": 0.0001, "loss": 0.6367, "step": 17620 }, { "epoch": 0.9666283084004603, "grad_norm": 1.6860467195510864, "learning_rate": 0.0001, "loss": 0.5769, "step": 17640 }, { "epoch": 0.9677242588634993, "grad_norm": 1.6800360679626465, "learning_rate": 0.0001, "loss": 0.6402, "step": 17660 }, { "epoch": 0.9688202093265385, "grad_norm": 1.3988690376281738, "learning_rate": 0.0001, "loss": 0.589, "step": 17680 }, { "epoch": 0.9699161597895775, "grad_norm": 1.6789034605026245, "learning_rate": 0.0001, "loss": 0.6469, "step": 17700 }, { "epoch": 0.9710121102526166, "grad_norm": 1.4465025663375854, "learning_rate": 0.0001, "loss": 0.6362, "step": 17720 }, { "epoch": 0.9721080607156557, "grad_norm": 1.6466797590255737, "learning_rate": 0.0001, "loss": 0.5882, "step": 17740 }, { "epoch": 0.9732040111786947, "grad_norm": 1.4487119913101196, "learning_rate": 0.0001, "loss": 0.6481, "step": 17760 }, { "epoch": 0.9742999616417338, "grad_norm": 1.769286870956421, "learning_rate": 0.0001, "loss": 0.7198, "step": 17780 }, { "epoch": 0.9753959121047728, "grad_norm": 1.8515903949737549, "learning_rate": 0.0001, "loss": 0.6257, "step": 17800 }, { "epoch": 0.976491862567812, "grad_norm": 1.9811028242111206, "learning_rate": 0.0001, "loss": 0.6154, "step": 17820 }, { "epoch": 0.977587813030851, "grad_norm": 1.6197538375854492, "learning_rate": 0.0001, "loss": 0.6196, "step": 17840 }, { "epoch": 0.9786837634938901, "grad_norm": 1.605971336364746, "learning_rate": 0.0001, "loss": 0.5549, "step": 17860 }, { "epoch": 0.9797797139569292, "grad_norm": 1.5246946811676025, "learning_rate": 0.0001, "loss": 0.5979, "step": 17880 }, { "epoch": 0.9808756644199682, "grad_norm": 1.8534538745880127, "learning_rate": 0.0001, "loss": 0.663, "step": 17900 }, { "epoch": 0.9819716148830073, "grad_norm": 1.6625508069992065, "learning_rate": 0.0001, "loss": 0.5863, "step": 17920 }, { "epoch": 0.9830675653460463, "grad_norm": 1.704788088798523, "learning_rate": 0.0001, "loss": 0.6529, "step": 17940 }, { "epoch": 0.9841635158090855, "grad_norm": 1.3650102615356445, "learning_rate": 0.0001, "loss": 0.6317, "step": 17960 }, { "epoch": 0.9852594662721245, "grad_norm": 1.7923402786254883, "learning_rate": 0.0001, "loss": 0.6023, "step": 17980 }, { "epoch": 0.9863554167351636, "grad_norm": 2.0028254985809326, "learning_rate": 0.0001, "loss": 0.6569, "step": 18000 }, { "epoch": 0.9863554167351636, "eval_loss": 0.6309429407119751, "eval_runtime": 30654.7333, "eval_samples_per_second": 2.117, "eval_steps_per_second": 0.066, "eval_wer": 36.64375002139876, "step": 18000 }, { "epoch": 0.9874513671982026, "grad_norm": 1.66196870803833, "learning_rate": 0.0001, "loss": 0.6687, "step": 18020 }, { "epoch": 0.9885473176612417, "grad_norm": 2.031445264816284, "learning_rate": 0.0001, "loss": 0.6032, "step": 18040 }, { "epoch": 0.9896432681242808, "grad_norm": 1.3204675912857056, "learning_rate": 0.0001, "loss": 0.5913, "step": 18060 }, { "epoch": 0.9907392185873198, "grad_norm": 1.7389861345291138, "learning_rate": 0.0001, "loss": 0.6678, "step": 18080 }, { "epoch": 0.991835169050359, "grad_norm": 1.9815995693206787, "learning_rate": 0.0001, "loss": 0.6566, "step": 18100 }, { "epoch": 0.992931119513398, "grad_norm": 1.5902196168899536, "learning_rate": 0.0001, "loss": 0.617, "step": 18120 }, { "epoch": 0.9940270699764371, "grad_norm": 1.4741644859313965, "learning_rate": 0.0001, "loss": 0.5921, "step": 18140 }, { "epoch": 0.9951230204394761, "grad_norm": 1.419965147972107, "learning_rate": 0.0001, "loss": 0.6031, "step": 18160 }, { "epoch": 0.9962189709025152, "grad_norm": 1.5964018106460571, "learning_rate": 0.0001, "loss": 0.6015, "step": 18180 }, { "epoch": 0.9973149213655543, "grad_norm": 1.4470981359481812, "learning_rate": 0.0001, "loss": 0.6898, "step": 18200 }, { "epoch": 0.9984108718285933, "grad_norm": 1.54426908493042, "learning_rate": 0.0001, "loss": 0.6328, "step": 18220 }, { "epoch": 0.9995068222916325, "grad_norm": 1.778437614440918, "learning_rate": 0.0001, "loss": 0.6381, "step": 18240 }, { "epoch": 1.0006027727546716, "grad_norm": 1.5332226753234863, "learning_rate": 0.0001, "loss": 0.5782, "step": 18260 }, { "epoch": 1.0016987232177106, "grad_norm": 1.785762906074524, "learning_rate": 0.0001, "loss": 0.6779, "step": 18280 }, { "epoch": 1.0027946736807496, "grad_norm": 1.684708595275879, "learning_rate": 0.0001, "loss": 0.5786, "step": 18300 }, { "epoch": 1.0038906241437886, "grad_norm": 1.5112040042877197, "learning_rate": 0.0001, "loss": 0.5989, "step": 18320 }, { "epoch": 1.0049865746068278, "grad_norm": 1.5711543560028076, "learning_rate": 0.0001, "loss": 0.6525, "step": 18340 }, { "epoch": 1.0060825250698668, "grad_norm": 1.4053367376327515, "learning_rate": 0.0001, "loss": 0.5913, "step": 18360 }, { "epoch": 1.0071784755329058, "grad_norm": 1.7791050672531128, "learning_rate": 0.0001, "loss": 0.6212, "step": 18380 }, { "epoch": 1.008274425995945, "grad_norm": 1.4202812910079956, "learning_rate": 0.0001, "loss": 0.6168, "step": 18400 }, { "epoch": 1.009370376458984, "grad_norm": 1.7469732761383057, "learning_rate": 0.0001, "loss": 0.6425, "step": 18420 }, { "epoch": 1.010466326922023, "grad_norm": 2.1195449829101562, "learning_rate": 0.0001, "loss": 0.6564, "step": 18440 }, { "epoch": 1.011562277385062, "grad_norm": 1.4056214094161987, "learning_rate": 0.0001, "loss": 0.6809, "step": 18460 }, { "epoch": 1.0126582278481013, "grad_norm": 2.07029128074646, "learning_rate": 0.0001, "loss": 0.6119, "step": 18480 }, { "epoch": 1.0137541783111403, "grad_norm": 1.6518419981002808, "learning_rate": 0.0001, "loss": 0.5954, "step": 18500 }, { "epoch": 1.0148501287741793, "grad_norm": 1.7785189151763916, "learning_rate": 0.0001, "loss": 0.5959, "step": 18520 }, { "epoch": 1.0159460792372186, "grad_norm": 1.4902641773223877, "learning_rate": 0.0001, "loss": 0.6066, "step": 18540 }, { "epoch": 1.0170420297002576, "grad_norm": 1.6291300058364868, "learning_rate": 0.0001, "loss": 0.5781, "step": 18560 }, { "epoch": 1.0181379801632966, "grad_norm": 1.5571300983428955, "learning_rate": 0.0001, "loss": 0.5622, "step": 18580 }, { "epoch": 1.0192339306263356, "grad_norm": 1.5963464975357056, "learning_rate": 0.0001, "loss": 0.6797, "step": 18600 }, { "epoch": 1.0203298810893748, "grad_norm": 1.4604226350784302, "learning_rate": 0.0001, "loss": 0.6712, "step": 18620 }, { "epoch": 1.0214258315524138, "grad_norm": 1.6052221059799194, "learning_rate": 0.0001, "loss": 0.595, "step": 18640 }, { "epoch": 1.0225217820154529, "grad_norm": 1.4075971841812134, "learning_rate": 0.0001, "loss": 0.6123, "step": 18660 }, { "epoch": 1.0236177324784919, "grad_norm": 1.910475730895996, "learning_rate": 0.0001, "loss": 0.7016, "step": 18680 }, { "epoch": 1.024713682941531, "grad_norm": 1.946268081665039, "learning_rate": 0.0001, "loss": 0.6178, "step": 18700 }, { "epoch": 1.02580963340457, "grad_norm": 1.5547478199005127, "learning_rate": 0.0001, "loss": 0.6068, "step": 18720 }, { "epoch": 1.026905583867609, "grad_norm": 1.5006910562515259, "learning_rate": 0.0001, "loss": 0.5818, "step": 18740 }, { "epoch": 1.0280015343306483, "grad_norm": 1.5395736694335938, "learning_rate": 0.0001, "loss": 0.6614, "step": 18760 }, { "epoch": 1.0290974847936873, "grad_norm": 1.5935709476470947, "learning_rate": 0.0001, "loss": 0.5593, "step": 18780 }, { "epoch": 1.0301934352567264, "grad_norm": 1.6643317937850952, "learning_rate": 0.0001, "loss": 0.6441, "step": 18800 }, { "epoch": 1.0312893857197654, "grad_norm": 1.6811660528182983, "learning_rate": 0.0001, "loss": 0.5663, "step": 18820 }, { "epoch": 1.0323853361828046, "grad_norm": 1.4203201532363892, "learning_rate": 0.0001, "loss": 0.6622, "step": 18840 }, { "epoch": 1.0334812866458436, "grad_norm": 1.9712319374084473, "learning_rate": 0.0001, "loss": 0.5818, "step": 18860 }, { "epoch": 1.0345772371088826, "grad_norm": 2.0921614170074463, "learning_rate": 0.0001, "loss": 0.6489, "step": 18880 }, { "epoch": 1.0356731875719218, "grad_norm": 1.3215636014938354, "learning_rate": 0.0001, "loss": 0.5784, "step": 18900 }, { "epoch": 1.0367691380349608, "grad_norm": 1.6520031690597534, "learning_rate": 0.0001, "loss": 0.6069, "step": 18920 }, { "epoch": 1.0378650884979999, "grad_norm": 1.8051795959472656, "learning_rate": 0.0001, "loss": 0.6596, "step": 18940 }, { "epoch": 1.0389610389610389, "grad_norm": 1.7375438213348389, "learning_rate": 0.0001, "loss": 0.5161, "step": 18960 }, { "epoch": 1.040056989424078, "grad_norm": 1.4865177869796753, "learning_rate": 0.0001, "loss": 0.5436, "step": 18980 }, { "epoch": 1.041152939887117, "grad_norm": 1.4444303512573242, "learning_rate": 0.0001, "loss": 0.5906, "step": 19000 }, { "epoch": 1.041152939887117, "eval_loss": 0.622921347618103, "eval_runtime": 30997.4799, "eval_samples_per_second": 2.093, "eval_steps_per_second": 0.065, "eval_wer": 37.84105343527132, "step": 19000 }, { "epoch": 1.042248890350156, "grad_norm": 1.4964603185653687, "learning_rate": 0.0001, "loss": 0.6014, "step": 19020 }, { "epoch": 1.0433448408131953, "grad_norm": 2.2886295318603516, "learning_rate": 0.0001, "loss": 0.6472, "step": 19040 }, { "epoch": 1.0444407912762343, "grad_norm": 1.7065175771713257, "learning_rate": 0.0001, "loss": 0.5561, "step": 19060 }, { "epoch": 1.0455367417392734, "grad_norm": 1.6928189992904663, "learning_rate": 0.0001, "loss": 0.5224, "step": 19080 }, { "epoch": 1.0466326922023124, "grad_norm": 1.4461798667907715, "learning_rate": 0.0001, "loss": 0.5549, "step": 19100 }, { "epoch": 1.0477286426653516, "grad_norm": 3.3619306087493896, "learning_rate": 0.0001, "loss": 0.6065, "step": 19120 }, { "epoch": 1.0488245931283906, "grad_norm": 1.8468629121780396, "learning_rate": 0.0001, "loss": 0.716, "step": 19140 }, { "epoch": 1.0499205435914296, "grad_norm": 1.7207484245300293, "learning_rate": 0.0001, "loss": 0.6078, "step": 19160 }, { "epoch": 1.0510164940544688, "grad_norm": 1.6650727987289429, "learning_rate": 0.0001, "loss": 0.6347, "step": 19180 }, { "epoch": 1.0521124445175078, "grad_norm": 1.3957616090774536, "learning_rate": 0.0001, "loss": 0.6265, "step": 19200 }, { "epoch": 1.0532083949805469, "grad_norm": 1.4066413640975952, "learning_rate": 0.0001, "loss": 0.6118, "step": 19220 }, { "epoch": 1.0543043454435859, "grad_norm": 1.8007709980010986, "learning_rate": 0.0001, "loss": 0.6989, "step": 19240 }, { "epoch": 1.055400295906625, "grad_norm": 1.4759665727615356, "learning_rate": 0.0001, "loss": 0.621, "step": 19260 }, { "epoch": 1.056496246369664, "grad_norm": 1.7062383890151978, "learning_rate": 0.0001, "loss": 0.6413, "step": 19280 }, { "epoch": 1.0575921968327031, "grad_norm": 1.5822961330413818, "learning_rate": 0.0001, "loss": 0.5877, "step": 19300 }, { "epoch": 1.0586881472957423, "grad_norm": 1.7339930534362793, "learning_rate": 0.0001, "loss": 0.6121, "step": 19320 }, { "epoch": 1.0597840977587814, "grad_norm": 1.3066824674606323, "learning_rate": 0.0001, "loss": 0.6278, "step": 19340 }, { "epoch": 1.0608800482218204, "grad_norm": 1.6953777074813843, "learning_rate": 0.0001, "loss": 0.624, "step": 19360 }, { "epoch": 1.0619759986848594, "grad_norm": 1.5192081928253174, "learning_rate": 0.0001, "loss": 0.6078, "step": 19380 }, { "epoch": 1.0630719491478986, "grad_norm": 1.5474629402160645, "learning_rate": 0.0001, "loss": 0.7147, "step": 19400 }, { "epoch": 1.0641678996109376, "grad_norm": 1.5060781240463257, "learning_rate": 0.0001, "loss": 0.6342, "step": 19420 }, { "epoch": 1.0652638500739766, "grad_norm": 1.4227863550186157, "learning_rate": 0.0001, "loss": 0.5967, "step": 19440 }, { "epoch": 1.0663598005370156, "grad_norm": 1.4965442419052124, "learning_rate": 0.0001, "loss": 0.6344, "step": 19460 }, { "epoch": 1.0674557510000549, "grad_norm": 1.4466912746429443, "learning_rate": 0.0001, "loss": 0.6276, "step": 19480 }, { "epoch": 1.0685517014630939, "grad_norm": 1.7357358932495117, "learning_rate": 0.0001, "loss": 0.5664, "step": 19500 }, { "epoch": 1.0696476519261329, "grad_norm": 1.6092090606689453, "learning_rate": 0.0001, "loss": 0.6207, "step": 19520 }, { "epoch": 1.070743602389172, "grad_norm": 1.5991522073745728, "learning_rate": 0.0001, "loss": 0.6113, "step": 19540 }, { "epoch": 1.071839552852211, "grad_norm": 1.320917010307312, "learning_rate": 0.0001, "loss": 0.577, "step": 19560 }, { "epoch": 1.0729355033152501, "grad_norm": 1.4419164657592773, "learning_rate": 0.0001, "loss": 0.6622, "step": 19580 }, { "epoch": 1.0740314537782891, "grad_norm": 1.7298556566238403, "learning_rate": 0.0001, "loss": 0.5721, "step": 19600 }, { "epoch": 1.0751274042413284, "grad_norm": 1.8013224601745605, "learning_rate": 0.0001, "loss": 0.6079, "step": 19620 }, { "epoch": 1.0762233547043674, "grad_norm": 1.5226448774337769, "learning_rate": 0.0001, "loss": 0.6681, "step": 19640 }, { "epoch": 1.0773193051674064, "grad_norm": 1.894225835800171, "learning_rate": 0.0001, "loss": 0.6599, "step": 19660 }, { "epoch": 1.0784152556304456, "grad_norm": 1.58738112449646, "learning_rate": 0.0001, "loss": 0.5796, "step": 19680 }, { "epoch": 1.0795112060934846, "grad_norm": 1.880391240119934, "learning_rate": 0.0001, "loss": 0.6305, "step": 19700 }, { "epoch": 1.0806071565565236, "grad_norm": 1.4821720123291016, "learning_rate": 0.0001, "loss": 0.5675, "step": 19720 }, { "epoch": 1.0817031070195626, "grad_norm": 1.504714846611023, "learning_rate": 0.0001, "loss": 0.5888, "step": 19740 }, { "epoch": 1.0827990574826019, "grad_norm": 1.6745513677597046, "learning_rate": 0.0001, "loss": 0.6199, "step": 19760 }, { "epoch": 1.0838950079456409, "grad_norm": 1.827014684677124, "learning_rate": 0.0001, "loss": 0.5889, "step": 19780 }, { "epoch": 1.0849909584086799, "grad_norm": 1.6627857685089111, "learning_rate": 0.0001, "loss": 0.5649, "step": 19800 }, { "epoch": 1.086086908871719, "grad_norm": 1.5660628080368042, "learning_rate": 0.0001, "loss": 0.6814, "step": 19820 }, { "epoch": 1.0871828593347581, "grad_norm": 1.4713698625564575, "learning_rate": 0.0001, "loss": 0.5789, "step": 19840 }, { "epoch": 1.0882788097977971, "grad_norm": 1.5290462970733643, "learning_rate": 0.0001, "loss": 0.5894, "step": 19860 }, { "epoch": 1.0893747602608361, "grad_norm": 1.7101823091506958, "learning_rate": 0.0001, "loss": 0.5732, "step": 19880 }, { "epoch": 1.0904707107238754, "grad_norm": 1.8752964735031128, "learning_rate": 0.0001, "loss": 0.6358, "step": 19900 }, { "epoch": 1.0915666611869144, "grad_norm": 1.7808418273925781, "learning_rate": 0.0001, "loss": 0.5942, "step": 19920 }, { "epoch": 1.0926626116499534, "grad_norm": 1.4797093868255615, "learning_rate": 0.0001, "loss": 0.605, "step": 19940 }, { "epoch": 1.0937585621129924, "grad_norm": 2.443544387817383, "learning_rate": 0.0001, "loss": 0.7019, "step": 19960 }, { "epoch": 1.0948545125760316, "grad_norm": 1.4167999029159546, "learning_rate": 0.0001, "loss": 0.5969, "step": 19980 }, { "epoch": 1.0959504630390706, "grad_norm": 1.3823322057724, "learning_rate": 0.0001, "loss": 0.634, "step": 20000 }, { "epoch": 1.0959504630390706, "eval_loss": 0.6163960099220276, "eval_runtime": 30555.9795, "eval_samples_per_second": 2.124, "eval_steps_per_second": 0.066, "eval_wer": 36.777449473248126, "step": 20000 }, { "epoch": 1.0970464135021096, "grad_norm": 1.5388779640197754, "learning_rate": 0.0001, "loss": 0.5836, "step": 20020 }, { "epoch": 1.0981423639651489, "grad_norm": 1.9624749422073364, "learning_rate": 0.0001, "loss": 0.5574, "step": 20040 }, { "epoch": 1.0992383144281879, "grad_norm": 1.4712483882904053, "learning_rate": 0.0001, "loss": 0.6284, "step": 20060 }, { "epoch": 1.1003342648912269, "grad_norm": 1.4846380949020386, "learning_rate": 0.0001, "loss": 0.5815, "step": 20080 }, { "epoch": 1.1014302153542659, "grad_norm": 1.4635918140411377, "learning_rate": 0.0001, "loss": 0.6003, "step": 20100 }, { "epoch": 1.1025261658173051, "grad_norm": 1.775586485862732, "learning_rate": 0.0001, "loss": 0.5997, "step": 20120 }, { "epoch": 1.1036221162803441, "grad_norm": 1.5434575080871582, "learning_rate": 0.0001, "loss": 0.5924, "step": 20140 }, { "epoch": 1.1047180667433831, "grad_norm": 1.661482572555542, "learning_rate": 0.0001, "loss": 0.6288, "step": 20160 }, { "epoch": 1.1058140172064224, "grad_norm": 1.706123948097229, "learning_rate": 0.0001, "loss": 0.5624, "step": 20180 }, { "epoch": 1.1069099676694614, "grad_norm": 1.9980905055999756, "learning_rate": 0.0001, "loss": 0.5577, "step": 20200 }, { "epoch": 1.1080059181325004, "grad_norm": 1.60072922706604, "learning_rate": 0.0001, "loss": 0.5947, "step": 20220 }, { "epoch": 1.1091018685955394, "grad_norm": 1.6381016969680786, "learning_rate": 0.0001, "loss": 0.5183, "step": 20240 }, { "epoch": 1.1101978190585786, "grad_norm": 1.7384517192840576, "learning_rate": 0.0001, "loss": 0.5752, "step": 20260 }, { "epoch": 1.1112937695216176, "grad_norm": 1.9785683155059814, "learning_rate": 0.0001, "loss": 0.576, "step": 20280 }, { "epoch": 1.1123897199846566, "grad_norm": 1.2318958044052124, "learning_rate": 0.0001, "loss": 0.5853, "step": 20300 }, { "epoch": 1.1134856704476959, "grad_norm": 1.5406831502914429, "learning_rate": 0.0001, "loss": 0.6059, "step": 20320 }, { "epoch": 1.1145816209107349, "grad_norm": 1.6655981540679932, "learning_rate": 0.0001, "loss": 0.6834, "step": 20340 }, { "epoch": 1.1156775713737739, "grad_norm": 1.513757586479187, "learning_rate": 0.0001, "loss": 0.6158, "step": 20360 }, { "epoch": 1.1167735218368129, "grad_norm": 2.1243133544921875, "learning_rate": 0.0001, "loss": 0.5769, "step": 20380 }, { "epoch": 1.1178694722998521, "grad_norm": 1.6118996143341064, "learning_rate": 0.0001, "loss": 0.5197, "step": 20400 }, { "epoch": 1.1189654227628911, "grad_norm": 1.8882734775543213, "learning_rate": 0.0001, "loss": 0.5459, "step": 20420 }, { "epoch": 1.1200613732259301, "grad_norm": 1.8385454416275024, "learning_rate": 0.0001, "loss": 0.5589, "step": 20440 }, { "epoch": 1.1211573236889691, "grad_norm": 1.7789567708969116, "learning_rate": 0.0001, "loss": 0.589, "step": 20460 }, { "epoch": 1.1222532741520084, "grad_norm": 1.839416265487671, "learning_rate": 0.0001, "loss": 0.5979, "step": 20480 }, { "epoch": 1.1233492246150474, "grad_norm": 1.8148174285888672, "learning_rate": 0.0001, "loss": 0.6179, "step": 20500 }, { "epoch": 1.1244451750780864, "grad_norm": 1.594193696975708, "learning_rate": 0.0001, "loss": 0.6427, "step": 20520 }, { "epoch": 1.1255411255411256, "grad_norm": 1.5754518508911133, "learning_rate": 0.0001, "loss": 0.5933, "step": 20540 }, { "epoch": 1.1266370760041646, "grad_norm": 1.7265543937683105, "learning_rate": 0.0001, "loss": 0.6021, "step": 20560 }, { "epoch": 1.1277330264672036, "grad_norm": 1.4708410501480103, "learning_rate": 0.0001, "loss": 0.5824, "step": 20580 }, { "epoch": 1.1288289769302429, "grad_norm": 1.7831743955612183, "learning_rate": 0.0001, "loss": 0.5806, "step": 20600 }, { "epoch": 1.1299249273932819, "grad_norm": 1.530446171760559, "learning_rate": 0.0001, "loss": 0.6523, "step": 20620 }, { "epoch": 1.1310208778563209, "grad_norm": 2.154409408569336, "learning_rate": 0.0001, "loss": 0.5898, "step": 20640 }, { "epoch": 1.13211682831936, "grad_norm": 1.3791108131408691, "learning_rate": 0.0001, "loss": 0.6321, "step": 20660 }, { "epoch": 1.1332127787823991, "grad_norm": 2.259727954864502, "learning_rate": 0.0001, "loss": 0.67, "step": 20680 }, { "epoch": 1.1343087292454381, "grad_norm": 1.7098194360733032, "learning_rate": 0.0001, "loss": 0.5554, "step": 20700 }, { "epoch": 1.1354046797084771, "grad_norm": 1.5874308347702026, "learning_rate": 0.0001, "loss": 0.5596, "step": 20720 }, { "epoch": 1.1365006301715161, "grad_norm": 1.9818806648254395, "learning_rate": 0.0001, "loss": 0.6618, "step": 20740 }, { "epoch": 1.1375965806345554, "grad_norm": 1.4672831296920776, "learning_rate": 0.0001, "loss": 0.5771, "step": 20760 }, { "epoch": 1.1386925310975944, "grad_norm": 1.746772050857544, "learning_rate": 0.0001, "loss": 0.6085, "step": 20780 }, { "epoch": 1.1397884815606334, "grad_norm": 1.7367818355560303, "learning_rate": 0.0001, "loss": 0.6401, "step": 20800 }, { "epoch": 1.1408844320236726, "grad_norm": 1.5269123315811157, "learning_rate": 0.0001, "loss": 0.678, "step": 20820 }, { "epoch": 1.1419803824867116, "grad_norm": 1.6921030282974243, "learning_rate": 0.0001, "loss": 0.6292, "step": 20840 }, { "epoch": 1.1430763329497506, "grad_norm": 1.4888218641281128, "learning_rate": 0.0001, "loss": 0.6034, "step": 20860 }, { "epoch": 1.1441722834127896, "grad_norm": 1.9028393030166626, "learning_rate": 0.0001, "loss": 0.604, "step": 20880 }, { "epoch": 1.1452682338758289, "grad_norm": 1.4702014923095703, "learning_rate": 0.0001, "loss": 0.6375, "step": 20900 }, { "epoch": 1.1463641843388679, "grad_norm": 1.9169687032699585, "learning_rate": 0.0001, "loss": 0.6173, "step": 20920 }, { "epoch": 1.147460134801907, "grad_norm": 1.6540066003799438, "learning_rate": 0.0001, "loss": 0.5366, "step": 20940 }, { "epoch": 1.148556085264946, "grad_norm": 1.564468502998352, "learning_rate": 0.0001, "loss": 0.5614, "step": 20960 }, { "epoch": 1.1496520357279851, "grad_norm": 1.75001060962677, "learning_rate": 0.0001, "loss": 0.6408, "step": 20980 }, { "epoch": 1.1507479861910241, "grad_norm": 1.759342074394226, "learning_rate": 0.0001, "loss": 0.6267, "step": 21000 }, { "epoch": 1.1507479861910241, "eval_loss": 0.6101906895637512, "eval_runtime": 30794.7182, "eval_samples_per_second": 2.107, "eval_steps_per_second": 0.066, "eval_wer": 46.360156536208414, "step": 21000 }, { "epoch": 1.1518439366540631, "grad_norm": 1.5255391597747803, "learning_rate": 0.0001, "loss": 0.6105, "step": 21020 }, { "epoch": 1.1529398871171024, "grad_norm": 1.8833086490631104, "learning_rate": 0.0001, "loss": 0.628, "step": 21040 }, { "epoch": 1.1540358375801414, "grad_norm": 1.6667803525924683, "learning_rate": 0.0001, "loss": 0.5758, "step": 21060 }, { "epoch": 1.1551317880431804, "grad_norm": 1.6798675060272217, "learning_rate": 0.0001, "loss": 0.5869, "step": 21080 }, { "epoch": 1.1562277385062196, "grad_norm": 1.3532921075820923, "learning_rate": 0.0001, "loss": 0.5978, "step": 21100 }, { "epoch": 1.1573236889692586, "grad_norm": 1.9075069427490234, "learning_rate": 0.0001, "loss": 0.636, "step": 21120 }, { "epoch": 1.1584196394322976, "grad_norm": 1.6051623821258545, "learning_rate": 0.0001, "loss": 0.6624, "step": 21140 }, { "epoch": 1.1595155898953367, "grad_norm": 1.6509668827056885, "learning_rate": 0.0001, "loss": 0.6098, "step": 21160 }, { "epoch": 1.1606115403583759, "grad_norm": 1.8513017892837524, "learning_rate": 0.0001, "loss": 0.5753, "step": 21180 }, { "epoch": 1.161707490821415, "grad_norm": 1.7680573463439941, "learning_rate": 0.0001, "loss": 0.5971, "step": 21200 }, { "epoch": 1.162803441284454, "grad_norm": 1.7778613567352295, "learning_rate": 0.0001, "loss": 0.6127, "step": 21220 }, { "epoch": 1.163899391747493, "grad_norm": 1.524961233139038, "learning_rate": 0.0001, "loss": 0.6181, "step": 21240 }, { "epoch": 1.1649953422105321, "grad_norm": 1.7401758432388306, "learning_rate": 0.0001, "loss": 0.6575, "step": 21260 }, { "epoch": 1.1660912926735711, "grad_norm": 1.7773276567459106, "learning_rate": 0.0001, "loss": 0.6264, "step": 21280 }, { "epoch": 1.1671872431366102, "grad_norm": 1.987033486366272, "learning_rate": 0.0001, "loss": 0.5309, "step": 21300 }, { "epoch": 1.1682831935996494, "grad_norm": 1.7425816059112549, "learning_rate": 0.0001, "loss": 0.6644, "step": 21320 }, { "epoch": 1.1693791440626884, "grad_norm": 1.6333574056625366, "learning_rate": 0.0001, "loss": 0.586, "step": 21340 }, { "epoch": 1.1704750945257274, "grad_norm": 1.6959367990493774, "learning_rate": 0.0001, "loss": 0.566, "step": 21360 }, { "epoch": 1.1715710449887666, "grad_norm": 1.6892461776733398, "learning_rate": 0.0001, "loss": 0.5788, "step": 21380 }, { "epoch": 1.1726669954518056, "grad_norm": 1.805227279663086, "learning_rate": 0.0001, "loss": 0.5984, "step": 21400 }, { "epoch": 1.1737629459148446, "grad_norm": 1.5054594278335571, "learning_rate": 0.0001, "loss": 0.5851, "step": 21420 }, { "epoch": 1.1748588963778837, "grad_norm": 1.7826286554336548, "learning_rate": 0.0001, "loss": 0.5872, "step": 21440 }, { "epoch": 1.1759548468409229, "grad_norm": 1.1451081037521362, "learning_rate": 0.0001, "loss": 0.5533, "step": 21460 }, { "epoch": 1.177050797303962, "grad_norm": 1.425122618675232, "learning_rate": 0.0001, "loss": 0.5934, "step": 21480 }, { "epoch": 1.178146747767001, "grad_norm": 1.6452502012252808, "learning_rate": 0.0001, "loss": 0.6008, "step": 21500 }, { "epoch": 1.17924269823004, "grad_norm": 1.6565967798233032, "learning_rate": 0.0001, "loss": 0.5945, "step": 21520 }, { "epoch": 1.1803386486930791, "grad_norm": 1.7541433572769165, "learning_rate": 0.0001, "loss": 0.6243, "step": 21540 }, { "epoch": 1.1814345991561181, "grad_norm": 1.5369079113006592, "learning_rate": 0.0001, "loss": 0.5968, "step": 21560 }, { "epoch": 1.1825305496191572, "grad_norm": 2.0941789150238037, "learning_rate": 0.0001, "loss": 0.599, "step": 21580 }, { "epoch": 1.1836265000821964, "grad_norm": 1.6641647815704346, "learning_rate": 0.0001, "loss": 0.634, "step": 21600 }, { "epoch": 1.1847224505452354, "grad_norm": 1.5256268978118896, "learning_rate": 0.0001, "loss": 0.5939, "step": 21620 }, { "epoch": 1.1858184010082744, "grad_norm": 1.5720540285110474, "learning_rate": 0.0001, "loss": 0.6005, "step": 21640 }, { "epoch": 1.1869143514713134, "grad_norm": 1.773973822593689, "learning_rate": 0.0001, "loss": 0.5607, "step": 21660 }, { "epoch": 1.1880103019343526, "grad_norm": 1.6868877410888672, "learning_rate": 0.0001, "loss": 0.6013, "step": 21680 }, { "epoch": 1.1891062523973916, "grad_norm": 1.518250584602356, "learning_rate": 0.0001, "loss": 0.612, "step": 21700 }, { "epoch": 1.1902022028604307, "grad_norm": 1.4469574689865112, "learning_rate": 0.0001, "loss": 0.5684, "step": 21720 }, { "epoch": 1.1912981533234697, "grad_norm": 1.3651134967803955, "learning_rate": 0.0001, "loss": 0.6275, "step": 21740 }, { "epoch": 1.192394103786509, "grad_norm": 1.1910673379898071, "learning_rate": 0.0001, "loss": 0.6281, "step": 21760 }, { "epoch": 1.193490054249548, "grad_norm": 1.5071038007736206, "learning_rate": 0.0001, "loss": 0.5909, "step": 21780 }, { "epoch": 1.194586004712587, "grad_norm": 1.3401362895965576, "learning_rate": 0.0001, "loss": 0.6273, "step": 21800 }, { "epoch": 1.1956819551756261, "grad_norm": 1.3563752174377441, "learning_rate": 0.0001, "loss": 0.5634, "step": 21820 }, { "epoch": 1.1967779056386652, "grad_norm": 1.5860759019851685, "learning_rate": 0.0001, "loss": 0.5996, "step": 21840 }, { "epoch": 1.1978738561017042, "grad_norm": 1.6106479167938232, "learning_rate": 0.0001, "loss": 0.6038, "step": 21860 }, { "epoch": 1.1989698065647434, "grad_norm": 1.2792236804962158, "learning_rate": 0.0001, "loss": 0.569, "step": 21880 }, { "epoch": 1.2000657570277824, "grad_norm": 1.7770174741744995, "learning_rate": 0.0001, "loss": 0.6159, "step": 21900 }, { "epoch": 1.2011617074908214, "grad_norm": 1.522647738456726, "learning_rate": 0.0001, "loss": 0.5756, "step": 21920 }, { "epoch": 1.2022576579538604, "grad_norm": 1.4393162727355957, "learning_rate": 0.0001, "loss": 0.598, "step": 21940 }, { "epoch": 1.2033536084168996, "grad_norm": 1.9221006631851196, "learning_rate": 0.0001, "loss": 0.5736, "step": 21960 }, { "epoch": 1.2044495588799387, "grad_norm": 1.4930051565170288, "learning_rate": 0.0001, "loss": 0.591, "step": 21980 }, { "epoch": 1.2055455093429777, "grad_norm": 1.1012686491012573, "learning_rate": 0.0001, "loss": 0.6139, "step": 22000 }, { "epoch": 1.2055455093429777, "eval_loss": 0.6032226085662842, "eval_runtime": 30533.3597, "eval_samples_per_second": 2.125, "eval_steps_per_second": 0.066, "eval_wer": 36.777449473248126, "step": 22000 }, { "epoch": 1.2066414598060167, "grad_norm": 1.501633644104004, "learning_rate": 0.0001, "loss": 0.55, "step": 22020 }, { "epoch": 1.207737410269056, "grad_norm": 1.7444618940353394, "learning_rate": 0.0001, "loss": 0.62, "step": 22040 }, { "epoch": 1.208833360732095, "grad_norm": 1.3666551113128662, "learning_rate": 0.0001, "loss": 0.6087, "step": 22060 }, { "epoch": 1.209929311195134, "grad_norm": 1.5576726198196411, "learning_rate": 0.0001, "loss": 0.5958, "step": 22080 }, { "epoch": 1.2110252616581731, "grad_norm": 1.457824945449829, "learning_rate": 0.0001, "loss": 0.5588, "step": 22100 }, { "epoch": 1.2121212121212122, "grad_norm": 1.621120810508728, "learning_rate": 0.0001, "loss": 0.5574, "step": 22120 }, { "epoch": 1.2132171625842512, "grad_norm": 1.1742050647735596, "learning_rate": 0.0001, "loss": 0.5498, "step": 22140 }, { "epoch": 1.2143131130472902, "grad_norm": 1.3734312057495117, "learning_rate": 0.0001, "loss": 0.5899, "step": 22160 }, { "epoch": 1.2154090635103294, "grad_norm": 2.046262741088867, "learning_rate": 0.0001, "loss": 0.6574, "step": 22180 }, { "epoch": 1.2165050139733684, "grad_norm": 1.3114126920700073, "learning_rate": 0.0001, "loss": 0.607, "step": 22200 }, { "epoch": 1.2176009644364074, "grad_norm": 1.5335580110549927, "learning_rate": 0.0001, "loss": 0.5828, "step": 22220 }, { "epoch": 1.2186969148994464, "grad_norm": 1.7492777109146118, "learning_rate": 0.0001, "loss": 0.557, "step": 22240 }, { "epoch": 1.2197928653624857, "grad_norm": 1.3159027099609375, "learning_rate": 0.0001, "loss": 0.6923, "step": 22260 }, { "epoch": 1.2208888158255247, "grad_norm": 1.5170766115188599, "learning_rate": 0.0001, "loss": 0.7194, "step": 22280 }, { "epoch": 1.2219847662885637, "grad_norm": 1.336846113204956, "learning_rate": 0.0001, "loss": 0.583, "step": 22300 }, { "epoch": 1.223080716751603, "grad_norm": 1.768999457359314, "learning_rate": 0.0001, "loss": 0.7009, "step": 22320 }, { "epoch": 1.224176667214642, "grad_norm": 1.6113883256912231, "learning_rate": 0.0001, "loss": 0.5747, "step": 22340 }, { "epoch": 1.225272617677681, "grad_norm": 1.4983850717544556, "learning_rate": 0.0001, "loss": 0.5477, "step": 22360 }, { "epoch": 1.2263685681407201, "grad_norm": 1.380181908607483, "learning_rate": 0.0001, "loss": 0.6003, "step": 22380 }, { "epoch": 1.2274645186037592, "grad_norm": 1.4921499490737915, "learning_rate": 0.0001, "loss": 0.586, "step": 22400 }, { "epoch": 1.2285604690667982, "grad_norm": 1.3056907653808594, "learning_rate": 0.0001, "loss": 0.5393, "step": 22420 }, { "epoch": 1.2296564195298372, "grad_norm": 1.702541470527649, "learning_rate": 0.0001, "loss": 0.6474, "step": 22440 }, { "epoch": 1.2307523699928764, "grad_norm": 1.703065276145935, "learning_rate": 0.0001, "loss": 0.6102, "step": 22460 }, { "epoch": 1.2318483204559154, "grad_norm": 1.7823582887649536, "learning_rate": 0.0001, "loss": 0.529, "step": 22480 }, { "epoch": 1.2329442709189544, "grad_norm": 1.5001643896102905, "learning_rate": 0.0001, "loss": 0.5936, "step": 22500 }, { "epoch": 1.2340402213819934, "grad_norm": 1.4515180587768555, "learning_rate": 0.0001, "loss": 0.5817, "step": 22520 }, { "epoch": 1.2351361718450327, "grad_norm": 1.6166529655456543, "learning_rate": 0.0001, "loss": 0.5901, "step": 22540 }, { "epoch": 1.2362321223080717, "grad_norm": 1.6117253303527832, "learning_rate": 0.0001, "loss": 0.668, "step": 22560 }, { "epoch": 1.2373280727711107, "grad_norm": 1.4674168825149536, "learning_rate": 0.0001, "loss": 0.5783, "step": 22580 }, { "epoch": 1.23842402323415, "grad_norm": 1.5282671451568604, "learning_rate": 0.0001, "loss": 0.6069, "step": 22600 }, { "epoch": 1.239519973697189, "grad_norm": 1.446772575378418, "learning_rate": 0.0001, "loss": 0.5645, "step": 22620 }, { "epoch": 1.240615924160228, "grad_norm": 1.7833497524261475, "learning_rate": 0.0001, "loss": 0.5555, "step": 22640 }, { "epoch": 1.241711874623267, "grad_norm": 1.8573659658432007, "learning_rate": 0.0001, "loss": 0.6258, "step": 22660 }, { "epoch": 1.2428078250863062, "grad_norm": 1.375735878944397, "learning_rate": 0.0001, "loss": 0.5316, "step": 22680 }, { "epoch": 1.2439037755493452, "grad_norm": 1.4545280933380127, "learning_rate": 0.0001, "loss": 0.609, "step": 22700 }, { "epoch": 1.2449997260123842, "grad_norm": 1.499182105064392, "learning_rate": 0.0001, "loss": 0.6205, "step": 22720 }, { "epoch": 1.2460956764754232, "grad_norm": 1.418739914894104, "learning_rate": 0.0001, "loss": 0.5038, "step": 22740 }, { "epoch": 1.2471916269384624, "grad_norm": 1.4958001375198364, "learning_rate": 0.0001, "loss": 0.5607, "step": 22760 }, { "epoch": 1.2482875774015014, "grad_norm": 1.7422837018966675, "learning_rate": 0.0001, "loss": 0.5593, "step": 22780 }, { "epoch": 1.2493835278645404, "grad_norm": 1.536526083946228, "learning_rate": 0.0001, "loss": 0.5518, "step": 22800 }, { "epoch": 1.2504794783275797, "grad_norm": 1.2415670156478882, "learning_rate": 0.0001, "loss": 0.625, "step": 22820 }, { "epoch": 1.2515754287906187, "grad_norm": 1.2609211206436157, "learning_rate": 0.0001, "loss": 0.5213, "step": 22840 }, { "epoch": 1.2526713792536577, "grad_norm": 1.4843876361846924, "learning_rate": 0.0001, "loss": 0.5618, "step": 22860 }, { "epoch": 1.253767329716697, "grad_norm": 1.7089099884033203, "learning_rate": 0.0001, "loss": 0.587, "step": 22880 }, { "epoch": 1.254863280179736, "grad_norm": 1.8894917964935303, "learning_rate": 0.0001, "loss": 0.5952, "step": 22900 }, { "epoch": 1.255959230642775, "grad_norm": 1.3892401456832886, "learning_rate": 0.0001, "loss": 0.5949, "step": 22920 }, { "epoch": 1.257055181105814, "grad_norm": 1.8688722848892212, "learning_rate": 0.0001, "loss": 0.6524, "step": 22940 }, { "epoch": 1.2581511315688532, "grad_norm": 1.8726931810379028, "learning_rate": 0.0001, "loss": 0.5547, "step": 22960 }, { "epoch": 1.2592470820318922, "grad_norm": 1.9214690923690796, "learning_rate": 0.0001, "loss": 0.618, "step": 22980 }, { "epoch": 1.2603430324949312, "grad_norm": 1.6148467063903809, "learning_rate": 0.0001, "loss": 0.4822, "step": 23000 }, { "epoch": 1.2603430324949312, "eval_loss": 0.599087655544281, "eval_runtime": 30610.1389, "eval_samples_per_second": 2.12, "eval_steps_per_second": 0.066, "eval_wer": 40.07337206794192, "step": 23000 }, { "epoch": 1.2614389829579702, "grad_norm": 1.536855936050415, "learning_rate": 0.0001, "loss": 0.5852, "step": 23020 }, { "epoch": 1.2625349334210094, "grad_norm": 1.884334683418274, "learning_rate": 0.0001, "loss": 0.6474, "step": 23040 }, { "epoch": 1.2636308838840484, "grad_norm": 1.359174132347107, "learning_rate": 0.0001, "loss": 0.6204, "step": 23060 }, { "epoch": 1.2647268343470874, "grad_norm": 1.7376055717468262, "learning_rate": 0.0001, "loss": 0.5524, "step": 23080 }, { "epoch": 1.2658227848101267, "grad_norm": 1.6594703197479248, "learning_rate": 0.0001, "loss": 0.5396, "step": 23100 }, { "epoch": 1.2669187352731657, "grad_norm": 1.4215294122695923, "learning_rate": 0.0001, "loss": 0.5842, "step": 23120 }, { "epoch": 1.2680146857362047, "grad_norm": 1.7680842876434326, "learning_rate": 0.0001, "loss": 0.5625, "step": 23140 }, { "epoch": 1.269110636199244, "grad_norm": 1.3353180885314941, "learning_rate": 0.0001, "loss": 0.5775, "step": 23160 }, { "epoch": 1.270206586662283, "grad_norm": 1.450649619102478, "learning_rate": 0.0001, "loss": 0.5319, "step": 23180 }, { "epoch": 1.271302537125322, "grad_norm": 1.7398178577423096, "learning_rate": 0.0001, "loss": 0.5615, "step": 23200 }, { "epoch": 1.272398487588361, "grad_norm": 1.6940994262695312, "learning_rate": 0.0001, "loss": 0.6069, "step": 23220 }, { "epoch": 1.2734944380514, "grad_norm": 1.899994969367981, "learning_rate": 0.0001, "loss": 0.6491, "step": 23240 }, { "epoch": 1.2745903885144392, "grad_norm": 1.457036018371582, "learning_rate": 0.0001, "loss": 0.5762, "step": 23260 }, { "epoch": 1.2756863389774782, "grad_norm": 1.4215611219406128, "learning_rate": 0.0001, "loss": 0.5966, "step": 23280 }, { "epoch": 1.2767822894405172, "grad_norm": 1.7165329456329346, "learning_rate": 0.0001, "loss": 0.5897, "step": 23300 }, { "epoch": 1.2778782399035564, "grad_norm": 1.524688720703125, "learning_rate": 0.0001, "loss": 0.6541, "step": 23320 }, { "epoch": 1.2789741903665954, "grad_norm": 1.3125251531600952, "learning_rate": 0.0001, "loss": 0.521, "step": 23340 }, { "epoch": 1.2800701408296344, "grad_norm": 1.5787118673324585, "learning_rate": 0.0001, "loss": 0.5453, "step": 23360 }, { "epoch": 1.2811660912926737, "grad_norm": 1.636098027229309, "learning_rate": 0.0001, "loss": 0.5606, "step": 23380 }, { "epoch": 1.2822620417557127, "grad_norm": 1.4963462352752686, "learning_rate": 0.0001, "loss": 0.6251, "step": 23400 }, { "epoch": 1.2833579922187517, "grad_norm": 1.3213664293289185, "learning_rate": 0.0001, "loss": 0.6293, "step": 23420 }, { "epoch": 1.284453942681791, "grad_norm": 1.6737782955169678, "learning_rate": 0.0001, "loss": 0.6499, "step": 23440 }, { "epoch": 1.28554989314483, "grad_norm": 1.525976300239563, "learning_rate": 0.0001, "loss": 0.6073, "step": 23460 }, { "epoch": 1.286645843607869, "grad_norm": 1.3534733057022095, "learning_rate": 0.0001, "loss": 0.5434, "step": 23480 }, { "epoch": 1.287741794070908, "grad_norm": 1.8090375661849976, "learning_rate": 0.0001, "loss": 0.6806, "step": 23500 }, { "epoch": 1.288837744533947, "grad_norm": 1.7110000848770142, "learning_rate": 0.0001, "loss": 0.5678, "step": 23520 }, { "epoch": 1.2899336949969862, "grad_norm": 1.6300121545791626, "learning_rate": 0.0001, "loss": 0.6674, "step": 23540 }, { "epoch": 1.2910296454600252, "grad_norm": 1.4068278074264526, "learning_rate": 0.0001, "loss": 0.5294, "step": 23560 }, { "epoch": 1.2921255959230642, "grad_norm": 1.6015020608901978, "learning_rate": 0.0001, "loss": 0.5791, "step": 23580 }, { "epoch": 1.2932215463861034, "grad_norm": 1.7282171249389648, "learning_rate": 0.0001, "loss": 0.6358, "step": 23600 }, { "epoch": 1.2943174968491424, "grad_norm": 1.3395479917526245, "learning_rate": 0.0001, "loss": 0.5972, "step": 23620 }, { "epoch": 1.2954134473121814, "grad_norm": 1.5393882989883423, "learning_rate": 0.0001, "loss": 0.6243, "step": 23640 }, { "epoch": 1.2965093977752207, "grad_norm": 2.0010182857513428, "learning_rate": 0.0001, "loss": 0.6047, "step": 23660 }, { "epoch": 1.2976053482382597, "grad_norm": 1.742031455039978, "learning_rate": 0.0001, "loss": 0.586, "step": 23680 }, { "epoch": 1.2987012987012987, "grad_norm": 1.5596591234207153, "learning_rate": 0.0001, "loss": 0.5545, "step": 23700 }, { "epoch": 1.2997972491643377, "grad_norm": 1.2098394632339478, "learning_rate": 0.0001, "loss": 0.6264, "step": 23720 }, { "epoch": 1.3008931996273767, "grad_norm": 1.5962443351745605, "learning_rate": 0.0001, "loss": 0.5827, "step": 23740 }, { "epoch": 1.301989150090416, "grad_norm": 1.7482990026474, "learning_rate": 0.0001, "loss": 0.6113, "step": 23760 }, { "epoch": 1.303085100553455, "grad_norm": 1.6832690238952637, "learning_rate": 0.0001, "loss": 0.5231, "step": 23780 }, { "epoch": 1.304181051016494, "grad_norm": 1.2904006242752075, "learning_rate": 0.0001, "loss": 0.5472, "step": 23800 }, { "epoch": 1.3052770014795332, "grad_norm": 1.260377287864685, "learning_rate": 0.0001, "loss": 0.5555, "step": 23820 }, { "epoch": 1.3063729519425722, "grad_norm": 1.6346126794815063, "learning_rate": 0.0001, "loss": 0.5247, "step": 23840 }, { "epoch": 1.3074689024056112, "grad_norm": 1.5854252576828003, "learning_rate": 0.0001, "loss": 0.5784, "step": 23860 }, { "epoch": 1.3085648528686504, "grad_norm": 1.754293441772461, "learning_rate": 0.0001, "loss": 0.6555, "step": 23880 }, { "epoch": 1.3096608033316894, "grad_norm": 1.642504096031189, "learning_rate": 0.0001, "loss": 0.5822, "step": 23900 }, { "epoch": 1.3107567537947284, "grad_norm": 1.4986265897750854, "learning_rate": 0.0001, "loss": 0.6246, "step": 23920 }, { "epoch": 1.3118527042577677, "grad_norm": 1.6175062656402588, "learning_rate": 0.0001, "loss": 0.5616, "step": 23940 }, { "epoch": 1.3129486547208067, "grad_norm": 1.6189128160476685, "learning_rate": 0.0001, "loss": 0.6108, "step": 23960 }, { "epoch": 1.3140446051838457, "grad_norm": 1.6187801361083984, "learning_rate": 0.0001, "loss": 0.5992, "step": 23980 }, { "epoch": 1.3151405556468847, "grad_norm": 1.396136999130249, "learning_rate": 0.0001, "loss": 0.5409, "step": 24000 }, { "epoch": 1.3151405556468847, "eval_loss": 0.5925264358520508, "eval_runtime": 30722.0761, "eval_samples_per_second": 2.112, "eval_steps_per_second": 0.066, "eval_wer": 50.53154519589281, "step": 24000 }, { "epoch": 1.3162365061099237, "grad_norm": 1.4604203701019287, "learning_rate": 0.0001, "loss": 0.5795, "step": 24020 }, { "epoch": 1.317332456572963, "grad_norm": 1.667830228805542, "learning_rate": 0.0001, "loss": 0.5799, "step": 24040 }, { "epoch": 1.318428407036002, "grad_norm": 1.4066877365112305, "learning_rate": 0.0001, "loss": 0.603, "step": 24060 }, { "epoch": 1.319524357499041, "grad_norm": 1.6986689567565918, "learning_rate": 0.0001, "loss": 0.5943, "step": 24080 }, { "epoch": 1.3206203079620802, "grad_norm": 2.00864839553833, "learning_rate": 0.0001, "loss": 0.646, "step": 24100 }, { "epoch": 1.3217162584251192, "grad_norm": 1.3557894229888916, "learning_rate": 0.0001, "loss": 0.6324, "step": 24120 }, { "epoch": 1.3228122088881582, "grad_norm": 1.442983865737915, "learning_rate": 0.0001, "loss": 0.5326, "step": 24140 }, { "epoch": 1.3239081593511974, "grad_norm": 1.4924156665802002, "learning_rate": 0.0001, "loss": 0.5464, "step": 24160 }, { "epoch": 1.3250041098142364, "grad_norm": 1.408599615097046, "learning_rate": 0.0001, "loss": 0.5989, "step": 24180 }, { "epoch": 1.3261000602772754, "grad_norm": 1.6432658433914185, "learning_rate": 0.0001, "loss": 0.6034, "step": 24200 }, { "epoch": 1.3271960107403145, "grad_norm": 1.5687427520751953, "learning_rate": 0.0001, "loss": 0.5222, "step": 24220 }, { "epoch": 1.3282919612033537, "grad_norm": 1.3878777027130127, "learning_rate": 0.0001, "loss": 0.5913, "step": 24240 }, { "epoch": 1.3293879116663927, "grad_norm": 1.276931643486023, "learning_rate": 0.0001, "loss": 0.5509, "step": 24260 }, { "epoch": 1.3304838621294317, "grad_norm": 1.5601953268051147, "learning_rate": 0.0001, "loss": 0.6077, "step": 24280 }, { "epoch": 1.3315798125924707, "grad_norm": 1.9250099658966064, "learning_rate": 0.0001, "loss": 0.5629, "step": 24300 }, { "epoch": 1.33267576305551, "grad_norm": 1.314794659614563, "learning_rate": 0.0001, "loss": 0.6085, "step": 24320 }, { "epoch": 1.333771713518549, "grad_norm": 1.4445682764053345, "learning_rate": 0.0001, "loss": 0.5725, "step": 24340 }, { "epoch": 1.334867663981588, "grad_norm": 1.6029905080795288, "learning_rate": 0.0001, "loss": 0.6448, "step": 24360 }, { "epoch": 1.3359636144446272, "grad_norm": 1.969078540802002, "learning_rate": 0.0001, "loss": 0.5397, "step": 24380 }, { "epoch": 1.3370595649076662, "grad_norm": 1.900762677192688, "learning_rate": 0.0001, "loss": 0.6243, "step": 24400 }, { "epoch": 1.3381555153707052, "grad_norm": 1.8829255104064941, "learning_rate": 0.0001, "loss": 0.5744, "step": 24420 }, { "epoch": 1.3392514658337444, "grad_norm": 1.4927318096160889, "learning_rate": 0.0001, "loss": 0.6023, "step": 24440 }, { "epoch": 1.3403474162967834, "grad_norm": 1.7608602046966553, "learning_rate": 0.0001, "loss": 0.5654, "step": 24460 }, { "epoch": 1.3414433667598225, "grad_norm": 1.2257969379425049, "learning_rate": 0.0001, "loss": 0.5953, "step": 24480 }, { "epoch": 1.3425393172228615, "grad_norm": 1.5768262147903442, "learning_rate": 0.0001, "loss": 0.5972, "step": 24500 }, { "epoch": 1.3436352676859005, "grad_norm": 1.5148476362228394, "learning_rate": 0.0001, "loss": 0.6313, "step": 24520 }, { "epoch": 1.3447312181489397, "grad_norm": 1.347442865371704, "learning_rate": 0.0001, "loss": 0.5867, "step": 24540 }, { "epoch": 1.3458271686119787, "grad_norm": 1.3003042936325073, "learning_rate": 0.0001, "loss": 0.5709, "step": 24560 }, { "epoch": 1.3469231190750177, "grad_norm": 1.5577054023742676, "learning_rate": 0.0001, "loss": 0.5719, "step": 24580 }, { "epoch": 1.348019069538057, "grad_norm": 1.6141449213027954, "learning_rate": 0.0001, "loss": 0.5684, "step": 24600 }, { "epoch": 1.349115020001096, "grad_norm": 1.515576958656311, "learning_rate": 0.0001, "loss": 0.5421, "step": 24620 }, { "epoch": 1.350210970464135, "grad_norm": 1.62236487865448, "learning_rate": 0.0001, "loss": 0.6338, "step": 24640 }, { "epoch": 1.3513069209271742, "grad_norm": 1.5557656288146973, "learning_rate": 0.0001, "loss": 0.55, "step": 24660 }, { "epoch": 1.3524028713902132, "grad_norm": 1.9079481363296509, "learning_rate": 0.0001, "loss": 0.6352, "step": 24680 }, { "epoch": 1.3534988218532522, "grad_norm": 1.3640868663787842, "learning_rate": 0.0001, "loss": 0.6484, "step": 24700 }, { "epoch": 1.3545947723162914, "grad_norm": 1.831858515739441, "learning_rate": 0.0001, "loss": 0.5577, "step": 24720 }, { "epoch": 1.3556907227793304, "grad_norm": 1.5431773662567139, "learning_rate": 0.0001, "loss": 0.5918, "step": 24740 }, { "epoch": 1.3567866732423695, "grad_norm": 1.1655539274215698, "learning_rate": 0.0001, "loss": 0.5892, "step": 24760 }, { "epoch": 1.3578826237054085, "grad_norm": 1.7395954132080078, "learning_rate": 0.0001, "loss": 0.5416, "step": 24780 }, { "epoch": 1.3589785741684475, "grad_norm": 1.89164400100708, "learning_rate": 0.0001, "loss": 0.5708, "step": 24800 }, { "epoch": 1.3600745246314867, "grad_norm": 1.5013233423233032, "learning_rate": 0.0001, "loss": 0.5234, "step": 24820 }, { "epoch": 1.3611704750945257, "grad_norm": 1.4959980249404907, "learning_rate": 0.0001, "loss": 0.7578, "step": 24840 }, { "epoch": 1.3622664255575647, "grad_norm": 1.3962562084197998, "learning_rate": 0.0001, "loss": 0.564, "step": 24860 }, { "epoch": 1.363362376020604, "grad_norm": 1.370695948600769, "learning_rate": 0.0001, "loss": 0.5533, "step": 24880 }, { "epoch": 1.364458326483643, "grad_norm": 1.629671573638916, "learning_rate": 0.0001, "loss": 0.5463, "step": 24900 }, { "epoch": 1.365554276946682, "grad_norm": 1.6115111112594604, "learning_rate": 0.0001, "loss": 0.5837, "step": 24920 }, { "epoch": 1.3666502274097212, "grad_norm": 1.756898283958435, "learning_rate": 0.0001, "loss": 0.5784, "step": 24940 }, { "epoch": 1.3677461778727602, "grad_norm": 1.3999930620193481, "learning_rate": 0.0001, "loss": 0.6007, "step": 24960 }, { "epoch": 1.3688421283357992, "grad_norm": 1.3553202152252197, "learning_rate": 0.0001, "loss": 0.5996, "step": 24980 }, { "epoch": 1.3699380787988382, "grad_norm": 1.3020998239517212, "learning_rate": 0.0001, "loss": 0.5006, "step": 25000 }, { "epoch": 1.3699380787988382, "eval_loss": 0.5878660678863525, "eval_runtime": 30562.5187, "eval_samples_per_second": 2.123, "eval_steps_per_second": 0.066, "eval_wer": 44.19117823283905, "step": 25000 }, { "epoch": 1.3710340292618772, "grad_norm": 1.622175931930542, "learning_rate": 0.0001, "loss": 0.6049, "step": 25020 }, { "epoch": 1.3721299797249165, "grad_norm": 1.6178827285766602, "learning_rate": 0.0001, "loss": 0.6272, "step": 25040 }, { "epoch": 1.3732259301879555, "grad_norm": 1.9687530994415283, "learning_rate": 0.0001, "loss": 0.6194, "step": 25060 }, { "epoch": 1.3743218806509945, "grad_norm": 1.5638937950134277, "learning_rate": 0.0001, "loss": 0.6033, "step": 25080 }, { "epoch": 1.3754178311140337, "grad_norm": 1.5316487550735474, "learning_rate": 0.0001, "loss": 0.6847, "step": 25100 }, { "epoch": 1.3765137815770727, "grad_norm": 2.431041717529297, "learning_rate": 0.0001, "loss": 0.5975, "step": 25120 }, { "epoch": 1.3776097320401117, "grad_norm": 1.2522825002670288, "learning_rate": 0.0001, "loss": 0.6188, "step": 25140 }, { "epoch": 1.378705682503151, "grad_norm": 1.428832769393921, "learning_rate": 0.0001, "loss": 0.4743, "step": 25160 }, { "epoch": 1.37980163296619, "grad_norm": 1.6147892475128174, "learning_rate": 0.0001, "loss": 0.6463, "step": 25180 }, { "epoch": 1.380897583429229, "grad_norm": 1.6648330688476562, "learning_rate": 0.0001, "loss": 0.556, "step": 25200 }, { "epoch": 1.3819935338922682, "grad_norm": 1.4745285511016846, "learning_rate": 0.0001, "loss": 0.5997, "step": 25220 }, { "epoch": 1.3830894843553072, "grad_norm": 1.433664321899414, "learning_rate": 0.0001, "loss": 0.5288, "step": 25240 }, { "epoch": 1.3841854348183462, "grad_norm": 1.264054775238037, "learning_rate": 0.0001, "loss": 0.6075, "step": 25260 }, { "epoch": 1.3852813852813852, "grad_norm": 1.7683794498443604, "learning_rate": 0.0001, "loss": 0.6011, "step": 25280 }, { "epoch": 1.3863773357444242, "grad_norm": 1.5316460132598877, "learning_rate": 0.0001, "loss": 0.5799, "step": 25300 }, { "epoch": 1.3874732862074635, "grad_norm": 3.361269950866699, "learning_rate": 0.0001, "loss": 0.6577, "step": 25320 }, { "epoch": 1.3885692366705025, "grad_norm": 1.7918739318847656, "learning_rate": 0.0001, "loss": 0.6106, "step": 25340 }, { "epoch": 1.3896651871335415, "grad_norm": 1.5828975439071655, "learning_rate": 0.0001, "loss": 0.5683, "step": 25360 }, { "epoch": 1.3907611375965807, "grad_norm": 1.9565653800964355, "learning_rate": 0.0001, "loss": 0.5368, "step": 25380 }, { "epoch": 1.3918570880596197, "grad_norm": 1.446603775024414, "learning_rate": 0.0001, "loss": 0.551, "step": 25400 }, { "epoch": 1.3929530385226587, "grad_norm": 1.3377403020858765, "learning_rate": 0.0001, "loss": 0.5889, "step": 25420 }, { "epoch": 1.394048988985698, "grad_norm": 1.5956981182098389, "learning_rate": 0.0001, "loss": 0.5962, "step": 25440 }, { "epoch": 1.395144939448737, "grad_norm": 2.122570276260376, "learning_rate": 0.0001, "loss": 0.6044, "step": 25460 }, { "epoch": 1.396240889911776, "grad_norm": 1.8314157724380493, "learning_rate": 0.0001, "loss": 0.5612, "step": 25480 }, { "epoch": 1.397336840374815, "grad_norm": 1.327453851699829, "learning_rate": 0.0001, "loss": 0.5214, "step": 25500 }, { "epoch": 1.3984327908378542, "grad_norm": 2.1016182899475098, "learning_rate": 0.0001, "loss": 0.594, "step": 25520 }, { "epoch": 1.3995287413008932, "grad_norm": 1.5892709493637085, "learning_rate": 0.0001, "loss": 0.5927, "step": 25540 }, { "epoch": 1.4006246917639322, "grad_norm": 1.8178175687789917, "learning_rate": 0.0001, "loss": 0.5204, "step": 25560 }, { "epoch": 1.4017206422269712, "grad_norm": 1.3808900117874146, "learning_rate": 0.0001, "loss": 0.5434, "step": 25580 }, { "epoch": 1.4028165926900105, "grad_norm": 1.4849821329116821, "learning_rate": 0.0001, "loss": 0.6208, "step": 25600 }, { "epoch": 1.4039125431530495, "grad_norm": 1.5404740571975708, "learning_rate": 0.0001, "loss": 0.5492, "step": 25620 }, { "epoch": 1.4050084936160885, "grad_norm": 1.4663268327713013, "learning_rate": 0.0001, "loss": 0.5275, "step": 25640 }, { "epoch": 1.4061044440791277, "grad_norm": 1.5518896579742432, "learning_rate": 0.0001, "loss": 0.5966, "step": 25660 }, { "epoch": 1.4072003945421667, "grad_norm": 1.5495002269744873, "learning_rate": 0.0001, "loss": 0.5451, "step": 25680 }, { "epoch": 1.4082963450052057, "grad_norm": 1.5400805473327637, "learning_rate": 0.0001, "loss": 0.6543, "step": 25700 }, { "epoch": 1.409392295468245, "grad_norm": 1.5806297063827515, "learning_rate": 0.0001, "loss": 0.5849, "step": 25720 }, { "epoch": 1.410488245931284, "grad_norm": 1.574846863746643, "learning_rate": 0.0001, "loss": 0.5653, "step": 25740 }, { "epoch": 1.411584196394323, "grad_norm": 1.4742170572280884, "learning_rate": 0.0001, "loss": 0.5173, "step": 25760 }, { "epoch": 1.412680146857362, "grad_norm": 2.009706735610962, "learning_rate": 0.0001, "loss": 0.5467, "step": 25780 }, { "epoch": 1.413776097320401, "grad_norm": 1.9192876815795898, "learning_rate": 0.0001, "loss": 0.6211, "step": 25800 }, { "epoch": 1.4148720477834402, "grad_norm": 1.2425312995910645, "learning_rate": 0.0001, "loss": 0.5398, "step": 25820 }, { "epoch": 1.4159679982464792, "grad_norm": 1.8032817840576172, "learning_rate": 0.0001, "loss": 0.6143, "step": 25840 }, { "epoch": 1.4170639487095182, "grad_norm": 1.5889620780944824, "learning_rate": 0.0001, "loss": 0.6361, "step": 25860 }, { "epoch": 1.4181598991725575, "grad_norm": 1.3312866687774658, "learning_rate": 0.0001, "loss": 0.637, "step": 25880 }, { "epoch": 1.4192558496355965, "grad_norm": 1.4662201404571533, "learning_rate": 0.0001, "loss": 0.6206, "step": 25900 }, { "epoch": 1.4203518000986355, "grad_norm": 1.7590441703796387, "learning_rate": 0.0001, "loss": 0.5637, "step": 25920 }, { "epoch": 1.4214477505616747, "grad_norm": 1.5360428094863892, "learning_rate": 0.0001, "loss": 0.6033, "step": 25940 }, { "epoch": 1.4225437010247137, "grad_norm": 1.6273736953735352, "learning_rate": 0.0001, "loss": 0.559, "step": 25960 }, { "epoch": 1.4236396514877527, "grad_norm": 1.3700859546661377, "learning_rate": 0.0001, "loss": 0.637, "step": 25980 }, { "epoch": 1.4247356019507917, "grad_norm": 1.419872522354126, "learning_rate": 0.0001, "loss": 0.5674, "step": 26000 }, { "epoch": 1.4247356019507917, "eval_loss": 0.5845187306404114, "eval_runtime": 30268.7098, "eval_samples_per_second": 2.144, "eval_steps_per_second": 0.067, "eval_wer": 33.472796184515516, "step": 26000 } ], "logging_steps": 20, "max_steps": 54747, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.278115609397166e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }