checkpoint-26000 / trainer_state.json
fanaf91318's picture
Upload trainer_state.json with huggingface_hub
df9da1a verified
{
"best_metric": 33.472796184515516,
"best_model_checkpoint": "./whisper-distil-v3/checkpoint-26000",
"epoch": 1.4247356019507917,
"eval_steps": 1000,
"global_step": 26000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0010959504630390707,
"grad_norm": 5.807405471801758,
"learning_rate": 3.6e-06,
"loss": 8.6037,
"step": 20
},
{
"epoch": 0.0021919009260781414,
"grad_norm": 5.726860523223877,
"learning_rate": 7.6e-06,
"loss": 8.4874,
"step": 40
},
{
"epoch": 0.003287851389117212,
"grad_norm": 7.615314960479736,
"learning_rate": 1.16e-05,
"loss": 8.0934,
"step": 60
},
{
"epoch": 0.004383801852156283,
"grad_norm": 7.089465618133545,
"learning_rate": 1.56e-05,
"loss": 7.4227,
"step": 80
},
{
"epoch": 0.005479752315195353,
"grad_norm": 5.158086776733398,
"learning_rate": 1.9600000000000002e-05,
"loss": 6.3418,
"step": 100
},
{
"epoch": 0.006575702778234424,
"grad_norm": 3.288583278656006,
"learning_rate": 2.3400000000000003e-05,
"loss": 5.2747,
"step": 120
},
{
"epoch": 0.007671653241273495,
"grad_norm": 3.1715681552886963,
"learning_rate": 2.7400000000000002e-05,
"loss": 4.3075,
"step": 140
},
{
"epoch": 0.008767603704312565,
"grad_norm": 3.033198833465576,
"learning_rate": 3.1400000000000004e-05,
"loss": 3.631,
"step": 160
},
{
"epoch": 0.009863554167351636,
"grad_norm": 3.027251720428467,
"learning_rate": 3.54e-05,
"loss": 3.2186,
"step": 180
},
{
"epoch": 0.010959504630390707,
"grad_norm": 2.9063901901245117,
"learning_rate": 3.94e-05,
"loss": 2.9226,
"step": 200
},
{
"epoch": 0.012055455093429777,
"grad_norm": 3.1165690422058105,
"learning_rate": 4.3400000000000005e-05,
"loss": 2.8402,
"step": 220
},
{
"epoch": 0.013151405556468848,
"grad_norm": 2.7977383136749268,
"learning_rate": 4.74e-05,
"loss": 2.613,
"step": 240
},
{
"epoch": 0.014247356019507919,
"grad_norm": 3.7818286418914795,
"learning_rate": 5.14e-05,
"loss": 2.377,
"step": 260
},
{
"epoch": 0.01534330648254699,
"grad_norm": 3.2088804244995117,
"learning_rate": 5.5400000000000005e-05,
"loss": 2.3204,
"step": 280
},
{
"epoch": 0.01643925694558606,
"grad_norm": 3.2518157958984375,
"learning_rate": 5.94e-05,
"loss": 2.1812,
"step": 300
},
{
"epoch": 0.01753520740862513,
"grad_norm": 3.725226640701294,
"learning_rate": 6.340000000000001e-05,
"loss": 2.158,
"step": 320
},
{
"epoch": 0.0186311578716642,
"grad_norm": 3.5929486751556396,
"learning_rate": 6.740000000000001e-05,
"loss": 2.1241,
"step": 340
},
{
"epoch": 0.019727108334703272,
"grad_norm": 4.1317572593688965,
"learning_rate": 7.14e-05,
"loss": 2.2284,
"step": 360
},
{
"epoch": 0.020823058797742343,
"grad_norm": 3.4276161193847656,
"learning_rate": 7.54e-05,
"loss": 1.9655,
"step": 380
},
{
"epoch": 0.021919009260781414,
"grad_norm": 3.9775540828704834,
"learning_rate": 7.94e-05,
"loss": 1.9407,
"step": 400
},
{
"epoch": 0.023014959723820484,
"grad_norm": 3.67511248588562,
"learning_rate": 8.34e-05,
"loss": 1.9312,
"step": 420
},
{
"epoch": 0.024110910186859555,
"grad_norm": 4.781565189361572,
"learning_rate": 8.740000000000001e-05,
"loss": 1.9218,
"step": 440
},
{
"epoch": 0.025206860649898626,
"grad_norm": 5.2797698974609375,
"learning_rate": 9.140000000000001e-05,
"loss": 1.8729,
"step": 460
},
{
"epoch": 0.026302811112937696,
"grad_norm": 6.1737284660339355,
"learning_rate": 9.54e-05,
"loss": 1.6848,
"step": 480
},
{
"epoch": 0.027398761575976767,
"grad_norm": 4.926702976226807,
"learning_rate": 9.94e-05,
"loss": 1.8866,
"step": 500
},
{
"epoch": 0.028494712039015838,
"grad_norm": 4.043098449707031,
"learning_rate": 0.0001,
"loss": 1.749,
"step": 520
},
{
"epoch": 0.02959066250205491,
"grad_norm": 4.022521495819092,
"learning_rate": 0.0001,
"loss": 1.7654,
"step": 540
},
{
"epoch": 0.03068661296509398,
"grad_norm": 3.1964547634124756,
"learning_rate": 0.0001,
"loss": 1.7496,
"step": 560
},
{
"epoch": 0.03178256342813305,
"grad_norm": 3.5182583332061768,
"learning_rate": 0.0001,
"loss": 1.7312,
"step": 580
},
{
"epoch": 0.03287851389117212,
"grad_norm": 3.529665231704712,
"learning_rate": 0.0001,
"loss": 1.6307,
"step": 600
},
{
"epoch": 0.03397446435421119,
"grad_norm": 3.329401969909668,
"learning_rate": 0.0001,
"loss": 1.7613,
"step": 620
},
{
"epoch": 0.03507041481725026,
"grad_norm": 3.4481399059295654,
"learning_rate": 0.0001,
"loss": 1.6204,
"step": 640
},
{
"epoch": 0.03616636528028933,
"grad_norm": 3.3551902770996094,
"learning_rate": 0.0001,
"loss": 1.5846,
"step": 660
},
{
"epoch": 0.0372623157433284,
"grad_norm": 3.591031074523926,
"learning_rate": 0.0001,
"loss": 1.6077,
"step": 680
},
{
"epoch": 0.038358266206367474,
"grad_norm": 3.8630764484405518,
"learning_rate": 0.0001,
"loss": 1.5275,
"step": 700
},
{
"epoch": 0.039454216669406544,
"grad_norm": 3.77461838722229,
"learning_rate": 0.0001,
"loss": 1.5386,
"step": 720
},
{
"epoch": 0.040550167132445615,
"grad_norm": 2.9158153533935547,
"learning_rate": 0.0001,
"loss": 1.5536,
"step": 740
},
{
"epoch": 0.041646117595484686,
"grad_norm": 3.761077642440796,
"learning_rate": 0.0001,
"loss": 1.5607,
"step": 760
},
{
"epoch": 0.042742068058523756,
"grad_norm": 3.5758230686187744,
"learning_rate": 0.0001,
"loss": 1.5145,
"step": 780
},
{
"epoch": 0.04383801852156283,
"grad_norm": 3.01175856590271,
"learning_rate": 0.0001,
"loss": 1.5639,
"step": 800
},
{
"epoch": 0.0449339689846019,
"grad_norm": 3.8395230770111084,
"learning_rate": 0.0001,
"loss": 1.6478,
"step": 820
},
{
"epoch": 0.04602991944764097,
"grad_norm": 2.9240541458129883,
"learning_rate": 0.0001,
"loss": 1.5303,
"step": 840
},
{
"epoch": 0.04712586991068004,
"grad_norm": 3.603835344314575,
"learning_rate": 0.0001,
"loss": 1.4436,
"step": 860
},
{
"epoch": 0.04822182037371911,
"grad_norm": 3.1701183319091797,
"learning_rate": 0.0001,
"loss": 1.5622,
"step": 880
},
{
"epoch": 0.04931777083675818,
"grad_norm": 4.054835796356201,
"learning_rate": 0.0001,
"loss": 1.4354,
"step": 900
},
{
"epoch": 0.05041372129979725,
"grad_norm": 2.9651615619659424,
"learning_rate": 0.0001,
"loss": 1.4676,
"step": 920
},
{
"epoch": 0.05150967176283632,
"grad_norm": 3.2480218410491943,
"learning_rate": 0.0001,
"loss": 1.3769,
"step": 940
},
{
"epoch": 0.05260562222587539,
"grad_norm": 3.494356155395508,
"learning_rate": 0.0001,
"loss": 1.4928,
"step": 960
},
{
"epoch": 0.05370157268891446,
"grad_norm": 3.394205331802368,
"learning_rate": 0.0001,
"loss": 1.4045,
"step": 980
},
{
"epoch": 0.054797523151953534,
"grad_norm": 3.333587646484375,
"learning_rate": 0.0001,
"loss": 1.4953,
"step": 1000
},
{
"epoch": 0.054797523151953534,
"eval_loss": 1.4313914775848389,
"eval_runtime": 30911.9498,
"eval_samples_per_second": 2.099,
"eval_steps_per_second": 0.066,
"eval_wer": 70.88587442180551,
"step": 1000
},
{
"epoch": 0.055893473614992605,
"grad_norm": 3.2317609786987305,
"learning_rate": 0.0001,
"loss": 1.4376,
"step": 1020
},
{
"epoch": 0.056989424078031675,
"grad_norm": 2.9077706336975098,
"learning_rate": 0.0001,
"loss": 1.4398,
"step": 1040
},
{
"epoch": 0.058085374541070746,
"grad_norm": 3.0054707527160645,
"learning_rate": 0.0001,
"loss": 1.4326,
"step": 1060
},
{
"epoch": 0.05918132500410982,
"grad_norm": 3.7243480682373047,
"learning_rate": 0.0001,
"loss": 1.4915,
"step": 1080
},
{
"epoch": 0.06027727546714889,
"grad_norm": 2.9608402252197266,
"learning_rate": 0.0001,
"loss": 1.4595,
"step": 1100
},
{
"epoch": 0.06137322593018796,
"grad_norm": 2.652829885482788,
"learning_rate": 0.0001,
"loss": 1.4378,
"step": 1120
},
{
"epoch": 0.06246917639322703,
"grad_norm": 2.9017295837402344,
"learning_rate": 0.0001,
"loss": 1.4257,
"step": 1140
},
{
"epoch": 0.0635651268562661,
"grad_norm": 3.0610859394073486,
"learning_rate": 0.0001,
"loss": 1.348,
"step": 1160
},
{
"epoch": 0.06466107731930516,
"grad_norm": 2.701765775680542,
"learning_rate": 0.0001,
"loss": 1.3853,
"step": 1180
},
{
"epoch": 0.06575702778234424,
"grad_norm": 3.83376145362854,
"learning_rate": 0.0001,
"loss": 1.4708,
"step": 1200
},
{
"epoch": 0.0668529782453833,
"grad_norm": 3.178449869155884,
"learning_rate": 0.0001,
"loss": 1.362,
"step": 1220
},
{
"epoch": 0.06794892870842238,
"grad_norm": 3.796205997467041,
"learning_rate": 0.0001,
"loss": 1.4331,
"step": 1240
},
{
"epoch": 0.06904487917146145,
"grad_norm": 2.8163928985595703,
"learning_rate": 0.0001,
"loss": 1.2835,
"step": 1260
},
{
"epoch": 0.07014082963450052,
"grad_norm": 2.698793888092041,
"learning_rate": 0.0001,
"loss": 1.3444,
"step": 1280
},
{
"epoch": 0.07123678009753959,
"grad_norm": 2.584484815597534,
"learning_rate": 0.0001,
"loss": 1.2145,
"step": 1300
},
{
"epoch": 0.07233273056057866,
"grad_norm": 2.696967363357544,
"learning_rate": 0.0001,
"loss": 1.2855,
"step": 1320
},
{
"epoch": 0.07342868102361773,
"grad_norm": 3.382924795150757,
"learning_rate": 0.0001,
"loss": 1.2164,
"step": 1340
},
{
"epoch": 0.0745246314866568,
"grad_norm": 2.8127260208129883,
"learning_rate": 0.0001,
"loss": 1.2873,
"step": 1360
},
{
"epoch": 0.07562058194969587,
"grad_norm": 2.631011724472046,
"learning_rate": 0.0001,
"loss": 1.3759,
"step": 1380
},
{
"epoch": 0.07671653241273495,
"grad_norm": 2.913276433944702,
"learning_rate": 0.0001,
"loss": 1.2688,
"step": 1400
},
{
"epoch": 0.07781248287577401,
"grad_norm": 2.811455488204956,
"learning_rate": 0.0001,
"loss": 1.2179,
"step": 1420
},
{
"epoch": 0.07890843333881309,
"grad_norm": 2.8242247104644775,
"learning_rate": 0.0001,
"loss": 1.142,
"step": 1440
},
{
"epoch": 0.08000438380185215,
"grad_norm": 3.1733341217041016,
"learning_rate": 0.0001,
"loss": 1.2934,
"step": 1460
},
{
"epoch": 0.08110033426489123,
"grad_norm": 2.491945743560791,
"learning_rate": 0.0001,
"loss": 1.3274,
"step": 1480
},
{
"epoch": 0.0821962847279303,
"grad_norm": 2.717165470123291,
"learning_rate": 0.0001,
"loss": 1.2484,
"step": 1500
},
{
"epoch": 0.08329223519096937,
"grad_norm": 2.3187918663024902,
"learning_rate": 0.0001,
"loss": 1.2038,
"step": 1520
},
{
"epoch": 0.08438818565400844,
"grad_norm": 2.9296529293060303,
"learning_rate": 0.0001,
"loss": 1.1962,
"step": 1540
},
{
"epoch": 0.08548413611704751,
"grad_norm": 2.5763330459594727,
"learning_rate": 0.0001,
"loss": 1.2122,
"step": 1560
},
{
"epoch": 0.08658008658008658,
"grad_norm": 3.4159390926361084,
"learning_rate": 0.0001,
"loss": 1.2302,
"step": 1580
},
{
"epoch": 0.08767603704312565,
"grad_norm": 2.893261432647705,
"learning_rate": 0.0001,
"loss": 1.2106,
"step": 1600
},
{
"epoch": 0.08877198750616472,
"grad_norm": 2.1891727447509766,
"learning_rate": 0.0001,
"loss": 1.2282,
"step": 1620
},
{
"epoch": 0.0898679379692038,
"grad_norm": 2.4100029468536377,
"learning_rate": 0.0001,
"loss": 1.2039,
"step": 1640
},
{
"epoch": 0.09096388843224286,
"grad_norm": 2.5420494079589844,
"learning_rate": 0.0001,
"loss": 1.2201,
"step": 1660
},
{
"epoch": 0.09205983889528194,
"grad_norm": 3.1885313987731934,
"learning_rate": 0.0001,
"loss": 1.2446,
"step": 1680
},
{
"epoch": 0.093155789358321,
"grad_norm": 3.120586633682251,
"learning_rate": 0.0001,
"loss": 1.2308,
"step": 1700
},
{
"epoch": 0.09425173982136008,
"grad_norm": 2.4548628330230713,
"learning_rate": 0.0001,
"loss": 1.1777,
"step": 1720
},
{
"epoch": 0.09534769028439914,
"grad_norm": 3.101803779602051,
"learning_rate": 0.0001,
"loss": 1.2123,
"step": 1740
},
{
"epoch": 0.09644364074743822,
"grad_norm": 2.536121368408203,
"learning_rate": 0.0001,
"loss": 1.1914,
"step": 1760
},
{
"epoch": 0.09753959121047728,
"grad_norm": 2.3796801567077637,
"learning_rate": 0.0001,
"loss": 1.1848,
"step": 1780
},
{
"epoch": 0.09863554167351636,
"grad_norm": 2.67964243888855,
"learning_rate": 0.0001,
"loss": 1.1973,
"step": 1800
},
{
"epoch": 0.09973149213655542,
"grad_norm": 3.160212755203247,
"learning_rate": 0.0001,
"loss": 1.2472,
"step": 1820
},
{
"epoch": 0.1008274425995945,
"grad_norm": 2.7035927772521973,
"learning_rate": 0.0001,
"loss": 1.1844,
"step": 1840
},
{
"epoch": 0.10192339306263357,
"grad_norm": 2.7725090980529785,
"learning_rate": 0.0001,
"loss": 1.1262,
"step": 1860
},
{
"epoch": 0.10301934352567264,
"grad_norm": 2.2705016136169434,
"learning_rate": 0.0001,
"loss": 1.182,
"step": 1880
},
{
"epoch": 0.10411529398871171,
"grad_norm": 3.0717403888702393,
"learning_rate": 0.0001,
"loss": 1.137,
"step": 1900
},
{
"epoch": 0.10521124445175078,
"grad_norm": 2.9270904064178467,
"learning_rate": 0.0001,
"loss": 1.2556,
"step": 1920
},
{
"epoch": 0.10630719491478985,
"grad_norm": 2.4564895629882812,
"learning_rate": 0.0001,
"loss": 1.1812,
"step": 1940
},
{
"epoch": 0.10740314537782893,
"grad_norm": 2.983851909637451,
"learning_rate": 0.0001,
"loss": 1.1445,
"step": 1960
},
{
"epoch": 0.10849909584086799,
"grad_norm": 2.772733688354492,
"learning_rate": 0.0001,
"loss": 1.1968,
"step": 1980
},
{
"epoch": 0.10959504630390707,
"grad_norm": 2.9768126010894775,
"learning_rate": 0.0001,
"loss": 1.0942,
"step": 2000
},
{
"epoch": 0.10959504630390707,
"eval_loss": 1.1446514129638672,
"eval_runtime": 30634.8587,
"eval_samples_per_second": 2.118,
"eval_steps_per_second": 0.066,
"eval_wer": 61.28519240053001,
"step": 2000
},
{
"epoch": 0.11069099676694613,
"grad_norm": 2.806312322616577,
"learning_rate": 0.0001,
"loss": 1.1924,
"step": 2020
},
{
"epoch": 0.11178694722998521,
"grad_norm": 2.639443874359131,
"learning_rate": 0.0001,
"loss": 1.0572,
"step": 2040
},
{
"epoch": 0.11288289769302427,
"grad_norm": 2.2005367279052734,
"learning_rate": 0.0001,
"loss": 1.1337,
"step": 2060
},
{
"epoch": 0.11397884815606335,
"grad_norm": 2.4102020263671875,
"learning_rate": 0.0001,
"loss": 1.1297,
"step": 2080
},
{
"epoch": 0.11507479861910241,
"grad_norm": 3.410691976547241,
"learning_rate": 0.0001,
"loss": 1.1354,
"step": 2100
},
{
"epoch": 0.11617074908214149,
"grad_norm": 2.1337172985076904,
"learning_rate": 0.0001,
"loss": 1.1725,
"step": 2120
},
{
"epoch": 0.11726669954518056,
"grad_norm": 2.627319097518921,
"learning_rate": 0.0001,
"loss": 1.1006,
"step": 2140
},
{
"epoch": 0.11836265000821963,
"grad_norm": 2.6450726985931396,
"learning_rate": 0.0001,
"loss": 1.0985,
"step": 2160
},
{
"epoch": 0.1194586004712587,
"grad_norm": 2.3205084800720215,
"learning_rate": 0.0001,
"loss": 1.1634,
"step": 2180
},
{
"epoch": 0.12055455093429777,
"grad_norm": 2.51177978515625,
"learning_rate": 0.0001,
"loss": 1.1697,
"step": 2200
},
{
"epoch": 0.12165050139733684,
"grad_norm": 2.6632323265075684,
"learning_rate": 0.0001,
"loss": 1.071,
"step": 2220
},
{
"epoch": 0.12274645186037592,
"grad_norm": 2.8322274684906006,
"learning_rate": 0.0001,
"loss": 1.0983,
"step": 2240
},
{
"epoch": 0.12384240232341498,
"grad_norm": 2.547708749771118,
"learning_rate": 0.0001,
"loss": 1.0629,
"step": 2260
},
{
"epoch": 0.12493835278645406,
"grad_norm": 2.6638150215148926,
"learning_rate": 0.0001,
"loss": 1.1985,
"step": 2280
},
{
"epoch": 0.12603430324949313,
"grad_norm": 2.980463743209839,
"learning_rate": 0.0001,
"loss": 1.1885,
"step": 2300
},
{
"epoch": 0.1271302537125322,
"grad_norm": 1.9924368858337402,
"learning_rate": 0.0001,
"loss": 1.0971,
"step": 2320
},
{
"epoch": 0.12822620417557126,
"grad_norm": 2.2847180366516113,
"learning_rate": 0.0001,
"loss": 1.1149,
"step": 2340
},
{
"epoch": 0.12932215463861033,
"grad_norm": 2.4860479831695557,
"learning_rate": 0.0001,
"loss": 1.0927,
"step": 2360
},
{
"epoch": 0.13041810510164942,
"grad_norm": 2.3988494873046875,
"learning_rate": 0.0001,
"loss": 1.1918,
"step": 2380
},
{
"epoch": 0.13151405556468848,
"grad_norm": 2.5361902713775635,
"learning_rate": 0.0001,
"loss": 1.0603,
"step": 2400
},
{
"epoch": 0.13261000602772754,
"grad_norm": 2.4060215950012207,
"learning_rate": 0.0001,
"loss": 1.056,
"step": 2420
},
{
"epoch": 0.1337059564907666,
"grad_norm": 2.4094231128692627,
"learning_rate": 0.0001,
"loss": 1.0787,
"step": 2440
},
{
"epoch": 0.1348019069538057,
"grad_norm": 2.5207912921905518,
"learning_rate": 0.0001,
"loss": 1.0901,
"step": 2460
},
{
"epoch": 0.13589785741684476,
"grad_norm": 2.1340293884277344,
"learning_rate": 0.0001,
"loss": 1.1691,
"step": 2480
},
{
"epoch": 0.13699380787988383,
"grad_norm": 2.312554359436035,
"learning_rate": 0.0001,
"loss": 0.9791,
"step": 2500
},
{
"epoch": 0.1380897583429229,
"grad_norm": 2.2881298065185547,
"learning_rate": 0.0001,
"loss": 0.9998,
"step": 2520
},
{
"epoch": 0.13918570880596198,
"grad_norm": 2.2146573066711426,
"learning_rate": 0.0001,
"loss": 1.094,
"step": 2540
},
{
"epoch": 0.14028165926900105,
"grad_norm": 2.3992650508880615,
"learning_rate": 0.0001,
"loss": 1.0667,
"step": 2560
},
{
"epoch": 0.1413776097320401,
"grad_norm": 2.7630209922790527,
"learning_rate": 0.0001,
"loss": 1.1541,
"step": 2580
},
{
"epoch": 0.14247356019507917,
"grad_norm": 2.9216675758361816,
"learning_rate": 0.0001,
"loss": 1.0463,
"step": 2600
},
{
"epoch": 0.14356951065811827,
"grad_norm": 2.366373062133789,
"learning_rate": 0.0001,
"loss": 1.0557,
"step": 2620
},
{
"epoch": 0.14466546112115733,
"grad_norm": 2.7161865234375,
"learning_rate": 0.0001,
"loss": 1.1066,
"step": 2640
},
{
"epoch": 0.1457614115841964,
"grad_norm": 2.046992778778076,
"learning_rate": 0.0001,
"loss": 0.9786,
"step": 2660
},
{
"epoch": 0.14685736204723546,
"grad_norm": 2.6320793628692627,
"learning_rate": 0.0001,
"loss": 0.9564,
"step": 2680
},
{
"epoch": 0.14795331251027455,
"grad_norm": 2.485445737838745,
"learning_rate": 0.0001,
"loss": 1.0283,
"step": 2700
},
{
"epoch": 0.1490492629733136,
"grad_norm": 2.267420768737793,
"learning_rate": 0.0001,
"loss": 1.0092,
"step": 2720
},
{
"epoch": 0.15014521343635268,
"grad_norm": 2.618067502975464,
"learning_rate": 0.0001,
"loss": 1.0369,
"step": 2740
},
{
"epoch": 0.15124116389939174,
"grad_norm": 2.502471685409546,
"learning_rate": 0.0001,
"loss": 0.9982,
"step": 2760
},
{
"epoch": 0.15233711436243083,
"grad_norm": 2.936964273452759,
"learning_rate": 0.0001,
"loss": 1.1122,
"step": 2780
},
{
"epoch": 0.1534330648254699,
"grad_norm": 2.5342159271240234,
"learning_rate": 0.0001,
"loss": 1.0409,
"step": 2800
},
{
"epoch": 0.15452901528850896,
"grad_norm": 2.88598895072937,
"learning_rate": 0.0001,
"loss": 1.0259,
"step": 2820
},
{
"epoch": 0.15562496575154802,
"grad_norm": 2.6327946186065674,
"learning_rate": 0.0001,
"loss": 0.9829,
"step": 2840
},
{
"epoch": 0.1567209162145871,
"grad_norm": 2.4873671531677246,
"learning_rate": 0.0001,
"loss": 1.0472,
"step": 2860
},
{
"epoch": 0.15781686667762618,
"grad_norm": 2.1543166637420654,
"learning_rate": 0.0001,
"loss": 1.0157,
"step": 2880
},
{
"epoch": 0.15891281714066524,
"grad_norm": 1.9687381982803345,
"learning_rate": 0.0001,
"loss": 1.0465,
"step": 2900
},
{
"epoch": 0.1600087676037043,
"grad_norm": 2.868544816970825,
"learning_rate": 0.0001,
"loss": 0.9835,
"step": 2920
},
{
"epoch": 0.1611047180667434,
"grad_norm": 2.3211984634399414,
"learning_rate": 0.0001,
"loss": 1.1204,
"step": 2940
},
{
"epoch": 0.16220066852978246,
"grad_norm": 2.631458282470703,
"learning_rate": 0.0001,
"loss": 1.0175,
"step": 2960
},
{
"epoch": 0.16329661899282152,
"grad_norm": 2.7994022369384766,
"learning_rate": 0.0001,
"loss": 1.0828,
"step": 2980
},
{
"epoch": 0.1643925694558606,
"grad_norm": 2.051626443862915,
"learning_rate": 0.0001,
"loss": 0.97,
"step": 3000
},
{
"epoch": 0.1643925694558606,
"eval_loss": 1.0072325468063354,
"eval_runtime": 30710.9249,
"eval_samples_per_second": 2.113,
"eval_steps_per_second": 0.066,
"eval_wer": 55.08434535201816,
"step": 3000
},
{
"epoch": 0.16548851991889968,
"grad_norm": 2.6088364124298096,
"learning_rate": 0.0001,
"loss": 0.9803,
"step": 3020
},
{
"epoch": 0.16658447038193874,
"grad_norm": 2.234034299850464,
"learning_rate": 0.0001,
"loss": 1.0757,
"step": 3040
},
{
"epoch": 0.1676804208449778,
"grad_norm": 2.3472328186035156,
"learning_rate": 0.0001,
"loss": 0.9408,
"step": 3060
},
{
"epoch": 0.16877637130801687,
"grad_norm": 2.5871200561523438,
"learning_rate": 0.0001,
"loss": 0.9269,
"step": 3080
},
{
"epoch": 0.16987232177105596,
"grad_norm": 2.0150465965270996,
"learning_rate": 0.0001,
"loss": 1.0547,
"step": 3100
},
{
"epoch": 0.17096827223409503,
"grad_norm": 2.5823395252227783,
"learning_rate": 0.0001,
"loss": 1.0559,
"step": 3120
},
{
"epoch": 0.1720642226971341,
"grad_norm": 2.8252885341644287,
"learning_rate": 0.0001,
"loss": 1.1219,
"step": 3140
},
{
"epoch": 0.17316017316017315,
"grad_norm": 2.1086535453796387,
"learning_rate": 0.0001,
"loss": 1.0089,
"step": 3160
},
{
"epoch": 0.17425612362321224,
"grad_norm": 2.2288014888763428,
"learning_rate": 0.0001,
"loss": 1.136,
"step": 3180
},
{
"epoch": 0.1753520740862513,
"grad_norm": 2.6622703075408936,
"learning_rate": 0.0001,
"loss": 1.0395,
"step": 3200
},
{
"epoch": 0.17644802454929037,
"grad_norm": 1.9478541612625122,
"learning_rate": 0.0001,
"loss": 1.0658,
"step": 3220
},
{
"epoch": 0.17754397501232944,
"grad_norm": 2.55828857421875,
"learning_rate": 0.0001,
"loss": 0.9904,
"step": 3240
},
{
"epoch": 0.1786399254753685,
"grad_norm": 2.533651828765869,
"learning_rate": 0.0001,
"loss": 0.9733,
"step": 3260
},
{
"epoch": 0.1797358759384076,
"grad_norm": 1.8745101690292358,
"learning_rate": 0.0001,
"loss": 0.9903,
"step": 3280
},
{
"epoch": 0.18083182640144665,
"grad_norm": 1.8459206819534302,
"learning_rate": 0.0001,
"loss": 0.9095,
"step": 3300
},
{
"epoch": 0.18192777686448572,
"grad_norm": 2.6654012203216553,
"learning_rate": 0.0001,
"loss": 0.9854,
"step": 3320
},
{
"epoch": 0.18302372732752478,
"grad_norm": 2.6444480419158936,
"learning_rate": 0.0001,
"loss": 0.8857,
"step": 3340
},
{
"epoch": 0.18411967779056387,
"grad_norm": 2.190462827682495,
"learning_rate": 0.0001,
"loss": 0.9375,
"step": 3360
},
{
"epoch": 0.18521562825360294,
"grad_norm": 2.8208882808685303,
"learning_rate": 0.0001,
"loss": 0.9646,
"step": 3380
},
{
"epoch": 0.186311578716642,
"grad_norm": 2.4978795051574707,
"learning_rate": 0.0001,
"loss": 0.9724,
"step": 3400
},
{
"epoch": 0.18740752917968106,
"grad_norm": 2.4202938079833984,
"learning_rate": 0.0001,
"loss": 0.9659,
"step": 3420
},
{
"epoch": 0.18850347964272016,
"grad_norm": 1.9026118516921997,
"learning_rate": 0.0001,
"loss": 1.0321,
"step": 3440
},
{
"epoch": 0.18959943010575922,
"grad_norm": 2.6031651496887207,
"learning_rate": 0.0001,
"loss": 0.9622,
"step": 3460
},
{
"epoch": 0.19069538056879828,
"grad_norm": 1.962509274482727,
"learning_rate": 0.0001,
"loss": 1.0262,
"step": 3480
},
{
"epoch": 0.19179133103183735,
"grad_norm": 2.794633626937866,
"learning_rate": 0.0001,
"loss": 1.0626,
"step": 3500
},
{
"epoch": 0.19288728149487644,
"grad_norm": 2.4276185035705566,
"learning_rate": 0.0001,
"loss": 0.9961,
"step": 3520
},
{
"epoch": 0.1939832319579155,
"grad_norm": 2.0747737884521484,
"learning_rate": 0.0001,
"loss": 0.8945,
"step": 3540
},
{
"epoch": 0.19507918242095457,
"grad_norm": 1.9151681661605835,
"learning_rate": 0.0001,
"loss": 1.0664,
"step": 3560
},
{
"epoch": 0.19617513288399363,
"grad_norm": 2.11547589302063,
"learning_rate": 0.0001,
"loss": 0.9865,
"step": 3580
},
{
"epoch": 0.19727108334703272,
"grad_norm": 2.359848737716675,
"learning_rate": 0.0001,
"loss": 0.95,
"step": 3600
},
{
"epoch": 0.19836703381007179,
"grad_norm": 1.9854378700256348,
"learning_rate": 0.0001,
"loss": 0.9992,
"step": 3620
},
{
"epoch": 0.19946298427311085,
"grad_norm": 2.476423978805542,
"learning_rate": 0.0001,
"loss": 0.9097,
"step": 3640
},
{
"epoch": 0.2005589347361499,
"grad_norm": 2.420011281967163,
"learning_rate": 0.0001,
"loss": 1.0167,
"step": 3660
},
{
"epoch": 0.201654885199189,
"grad_norm": 2.12312388420105,
"learning_rate": 0.0001,
"loss": 0.9298,
"step": 3680
},
{
"epoch": 0.20275083566222807,
"grad_norm": 1.9679986238479614,
"learning_rate": 0.0001,
"loss": 1.0064,
"step": 3700
},
{
"epoch": 0.20384678612526713,
"grad_norm": 2.608135461807251,
"learning_rate": 0.0001,
"loss": 0.9396,
"step": 3720
},
{
"epoch": 0.2049427365883062,
"grad_norm": 2.542102098464966,
"learning_rate": 0.0001,
"loss": 1.0868,
"step": 3740
},
{
"epoch": 0.2060386870513453,
"grad_norm": 2.5252091884613037,
"learning_rate": 0.0001,
"loss": 1.0417,
"step": 3760
},
{
"epoch": 0.20713463751438435,
"grad_norm": 1.98774254322052,
"learning_rate": 0.0001,
"loss": 0.9949,
"step": 3780
},
{
"epoch": 0.20823058797742341,
"grad_norm": 1.9502965211868286,
"learning_rate": 0.0001,
"loss": 0.9862,
"step": 3800
},
{
"epoch": 0.20932653844046248,
"grad_norm": 2.2537944316864014,
"learning_rate": 0.0001,
"loss": 0.9087,
"step": 3820
},
{
"epoch": 0.21042248890350157,
"grad_norm": 2.2866523265838623,
"learning_rate": 0.0001,
"loss": 1.0128,
"step": 3840
},
{
"epoch": 0.21151843936654063,
"grad_norm": 2.2907001972198486,
"learning_rate": 0.0001,
"loss": 0.9654,
"step": 3860
},
{
"epoch": 0.2126143898295797,
"grad_norm": 2.5648560523986816,
"learning_rate": 0.0001,
"loss": 1.0269,
"step": 3880
},
{
"epoch": 0.21371034029261876,
"grad_norm": 2.198974847793579,
"learning_rate": 0.0001,
"loss": 0.9823,
"step": 3900
},
{
"epoch": 0.21480629075565785,
"grad_norm": 2.1045591831207275,
"learning_rate": 0.0001,
"loss": 0.9139,
"step": 3920
},
{
"epoch": 0.21590224121869692,
"grad_norm": 2.1462857723236084,
"learning_rate": 0.0001,
"loss": 0.9406,
"step": 3940
},
{
"epoch": 0.21699819168173598,
"grad_norm": 2.3216285705566406,
"learning_rate": 0.0001,
"loss": 0.8597,
"step": 3960
},
{
"epoch": 0.21809414214477504,
"grad_norm": 1.867150068283081,
"learning_rate": 0.0001,
"loss": 0.9776,
"step": 3980
},
{
"epoch": 0.21919009260781414,
"grad_norm": 2.3432791233062744,
"learning_rate": 0.0001,
"loss": 0.9546,
"step": 4000
},
{
"epoch": 0.21919009260781414,
"eval_loss": 0.9323587417602539,
"eval_runtime": 30935.2713,
"eval_samples_per_second": 2.098,
"eval_steps_per_second": 0.066,
"eval_wer": 63.836951720973865,
"step": 4000
},
{
"epoch": 0.2202860430708532,
"grad_norm": 1.9426536560058594,
"learning_rate": 0.0001,
"loss": 0.9291,
"step": 4020
},
{
"epoch": 0.22138199353389226,
"grad_norm": 2.693723201751709,
"learning_rate": 0.0001,
"loss": 0.9072,
"step": 4040
},
{
"epoch": 0.22247794399693133,
"grad_norm": 2.237900972366333,
"learning_rate": 0.0001,
"loss": 0.8571,
"step": 4060
},
{
"epoch": 0.22357389445997042,
"grad_norm": 2.739129066467285,
"learning_rate": 0.0001,
"loss": 0.9132,
"step": 4080
},
{
"epoch": 0.22466984492300948,
"grad_norm": 1.886438012123108,
"learning_rate": 0.0001,
"loss": 0.9646,
"step": 4100
},
{
"epoch": 0.22576579538604855,
"grad_norm": 2.3505897521972656,
"learning_rate": 0.0001,
"loss": 1.0479,
"step": 4120
},
{
"epoch": 0.2268617458490876,
"grad_norm": 2.4302868843078613,
"learning_rate": 0.0001,
"loss": 0.9956,
"step": 4140
},
{
"epoch": 0.2279576963121267,
"grad_norm": 2.2747528553009033,
"learning_rate": 0.0001,
"loss": 0.9621,
"step": 4160
},
{
"epoch": 0.22905364677516576,
"grad_norm": 2.312248945236206,
"learning_rate": 0.0001,
"loss": 0.9292,
"step": 4180
},
{
"epoch": 0.23014959723820483,
"grad_norm": 2.0439066886901855,
"learning_rate": 0.0001,
"loss": 0.8804,
"step": 4200
},
{
"epoch": 0.2312455477012439,
"grad_norm": 2.615898609161377,
"learning_rate": 0.0001,
"loss": 0.9302,
"step": 4220
},
{
"epoch": 0.23234149816428298,
"grad_norm": 2.306796073913574,
"learning_rate": 0.0001,
"loss": 1.0401,
"step": 4240
},
{
"epoch": 0.23343744862732205,
"grad_norm": 2.4527432918548584,
"learning_rate": 0.0001,
"loss": 0.9195,
"step": 4260
},
{
"epoch": 0.2345333990903611,
"grad_norm": 1.8589290380477905,
"learning_rate": 0.0001,
"loss": 0.9284,
"step": 4280
},
{
"epoch": 0.23562934955340017,
"grad_norm": 1.8492025136947632,
"learning_rate": 0.0001,
"loss": 0.8898,
"step": 4300
},
{
"epoch": 0.23672530001643927,
"grad_norm": 2.574871063232422,
"learning_rate": 0.0001,
"loss": 1.0026,
"step": 4320
},
{
"epoch": 0.23782125047947833,
"grad_norm": 2.2600936889648438,
"learning_rate": 0.0001,
"loss": 1.0738,
"step": 4340
},
{
"epoch": 0.2389172009425174,
"grad_norm": 2.35066556930542,
"learning_rate": 0.0001,
"loss": 0.8573,
"step": 4360
},
{
"epoch": 0.24001315140555646,
"grad_norm": 2.165745496749878,
"learning_rate": 0.0001,
"loss": 0.8989,
"step": 4380
},
{
"epoch": 0.24110910186859555,
"grad_norm": 2.1494085788726807,
"learning_rate": 0.0001,
"loss": 0.8292,
"step": 4400
},
{
"epoch": 0.2422050523316346,
"grad_norm": 2.185359239578247,
"learning_rate": 0.0001,
"loss": 0.8954,
"step": 4420
},
{
"epoch": 0.24330100279467368,
"grad_norm": 2.193904161453247,
"learning_rate": 0.0001,
"loss": 0.8944,
"step": 4440
},
{
"epoch": 0.24439695325771274,
"grad_norm": 2.1101438999176025,
"learning_rate": 0.0001,
"loss": 0.9059,
"step": 4460
},
{
"epoch": 0.24549290372075183,
"grad_norm": 2.026642084121704,
"learning_rate": 0.0001,
"loss": 0.8978,
"step": 4480
},
{
"epoch": 0.2465888541837909,
"grad_norm": 2.0481228828430176,
"learning_rate": 0.0001,
"loss": 0.8835,
"step": 4500
},
{
"epoch": 0.24768480464682996,
"grad_norm": 2.201350688934326,
"learning_rate": 0.0001,
"loss": 0.9519,
"step": 4520
},
{
"epoch": 0.24878075510986902,
"grad_norm": 1.852100133895874,
"learning_rate": 0.0001,
"loss": 0.8458,
"step": 4540
},
{
"epoch": 0.24987670557290811,
"grad_norm": 2.1303794384002686,
"learning_rate": 0.0001,
"loss": 0.9092,
"step": 4560
},
{
"epoch": 0.25097265603594715,
"grad_norm": 2.2715415954589844,
"learning_rate": 0.0001,
"loss": 0.8931,
"step": 4580
},
{
"epoch": 0.25206860649898627,
"grad_norm": 2.091785192489624,
"learning_rate": 0.0001,
"loss": 0.8645,
"step": 4600
},
{
"epoch": 0.25316455696202533,
"grad_norm": 2.108103036880493,
"learning_rate": 0.0001,
"loss": 0.8387,
"step": 4620
},
{
"epoch": 0.2542605074250644,
"grad_norm": 2.083848237991333,
"learning_rate": 0.0001,
"loss": 0.8315,
"step": 4640
},
{
"epoch": 0.25535645788810346,
"grad_norm": 1.570475459098816,
"learning_rate": 0.0001,
"loss": 0.9355,
"step": 4660
},
{
"epoch": 0.2564524083511425,
"grad_norm": 1.90199875831604,
"learning_rate": 0.0001,
"loss": 0.8308,
"step": 4680
},
{
"epoch": 0.2575483588141816,
"grad_norm": 2.1952812671661377,
"learning_rate": 0.0001,
"loss": 0.8618,
"step": 4700
},
{
"epoch": 0.25864430927722065,
"grad_norm": 2.0530431270599365,
"learning_rate": 0.0001,
"loss": 0.7951,
"step": 4720
},
{
"epoch": 0.2597402597402597,
"grad_norm": 2.202252149581909,
"learning_rate": 0.0001,
"loss": 0.8858,
"step": 4740
},
{
"epoch": 0.26083621020329883,
"grad_norm": 1.9541796445846558,
"learning_rate": 0.0001,
"loss": 0.8466,
"step": 4760
},
{
"epoch": 0.2619321606663379,
"grad_norm": 1.9440534114837646,
"learning_rate": 0.0001,
"loss": 0.8488,
"step": 4780
},
{
"epoch": 0.26302811112937696,
"grad_norm": 2.569821834564209,
"learning_rate": 0.0001,
"loss": 0.963,
"step": 4800
},
{
"epoch": 0.264124061592416,
"grad_norm": 1.8896031379699707,
"learning_rate": 0.0001,
"loss": 0.837,
"step": 4820
},
{
"epoch": 0.2652200120554551,
"grad_norm": 1.9390859603881836,
"learning_rate": 0.0001,
"loss": 0.8855,
"step": 4840
},
{
"epoch": 0.26631596251849415,
"grad_norm": 2.2261974811553955,
"learning_rate": 0.0001,
"loss": 0.8901,
"step": 4860
},
{
"epoch": 0.2674119129815332,
"grad_norm": 2.0486056804656982,
"learning_rate": 0.0001,
"loss": 0.8073,
"step": 4880
},
{
"epoch": 0.2685078634445723,
"grad_norm": 2.292015314102173,
"learning_rate": 0.0001,
"loss": 0.9492,
"step": 4900
},
{
"epoch": 0.2696038139076114,
"grad_norm": 2.0762240886688232,
"learning_rate": 0.0001,
"loss": 0.8528,
"step": 4920
},
{
"epoch": 0.27069976437065046,
"grad_norm": 1.870642066001892,
"learning_rate": 0.0001,
"loss": 0.9482,
"step": 4940
},
{
"epoch": 0.27179571483368953,
"grad_norm": 2.436768054962158,
"learning_rate": 0.0001,
"loss": 0.9299,
"step": 4960
},
{
"epoch": 0.2728916652967286,
"grad_norm": 2.505880832672119,
"learning_rate": 0.0001,
"loss": 0.9259,
"step": 4980
},
{
"epoch": 0.27398761575976766,
"grad_norm": 1.717252492904663,
"learning_rate": 0.0001,
"loss": 0.8134,
"step": 5000
},
{
"epoch": 0.27398761575976766,
"eval_loss": 0.8726964592933655,
"eval_runtime": 30710.3822,
"eval_samples_per_second": 2.113,
"eval_steps_per_second": 0.066,
"eval_wer": 52.213316533880224,
"step": 5000
},
{
"epoch": 0.2750835662228067,
"grad_norm": 2.28765869140625,
"learning_rate": 0.0001,
"loss": 1.0229,
"step": 5020
},
{
"epoch": 0.2761795166858458,
"grad_norm": 2.2264580726623535,
"learning_rate": 0.0001,
"loss": 0.8291,
"step": 5040
},
{
"epoch": 0.27727546714888485,
"grad_norm": 1.9387757778167725,
"learning_rate": 0.0001,
"loss": 0.821,
"step": 5060
},
{
"epoch": 0.27837141761192397,
"grad_norm": 2.8628933429718018,
"learning_rate": 0.0001,
"loss": 0.9521,
"step": 5080
},
{
"epoch": 0.27946736807496303,
"grad_norm": 2.2691447734832764,
"learning_rate": 0.0001,
"loss": 0.8182,
"step": 5100
},
{
"epoch": 0.2805633185380021,
"grad_norm": 1.9515260457992554,
"learning_rate": 0.0001,
"loss": 0.9342,
"step": 5120
},
{
"epoch": 0.28165926900104116,
"grad_norm": 2.1714837551116943,
"learning_rate": 0.0001,
"loss": 0.9663,
"step": 5140
},
{
"epoch": 0.2827552194640802,
"grad_norm": 2.0159664154052734,
"learning_rate": 0.0001,
"loss": 0.8294,
"step": 5160
},
{
"epoch": 0.2838511699271193,
"grad_norm": 2.024634599685669,
"learning_rate": 0.0001,
"loss": 0.896,
"step": 5180
},
{
"epoch": 0.28494712039015835,
"grad_norm": 2.0035595893859863,
"learning_rate": 0.0001,
"loss": 0.8446,
"step": 5200
},
{
"epoch": 0.2860430708531974,
"grad_norm": 2.4142866134643555,
"learning_rate": 0.0001,
"loss": 0.8835,
"step": 5220
},
{
"epoch": 0.28713902131623653,
"grad_norm": 2.070338010787964,
"learning_rate": 0.0001,
"loss": 0.8687,
"step": 5240
},
{
"epoch": 0.2882349717792756,
"grad_norm": 1.9818578958511353,
"learning_rate": 0.0001,
"loss": 0.8296,
"step": 5260
},
{
"epoch": 0.28933092224231466,
"grad_norm": 1.8923412561416626,
"learning_rate": 0.0001,
"loss": 0.8999,
"step": 5280
},
{
"epoch": 0.2904268727053537,
"grad_norm": 2.200206995010376,
"learning_rate": 0.0001,
"loss": 0.8662,
"step": 5300
},
{
"epoch": 0.2915228231683928,
"grad_norm": 1.982446551322937,
"learning_rate": 0.0001,
"loss": 0.8301,
"step": 5320
},
{
"epoch": 0.29261877363143185,
"grad_norm": 1.934844732284546,
"learning_rate": 0.0001,
"loss": 0.8219,
"step": 5340
},
{
"epoch": 0.2937147240944709,
"grad_norm": 2.2790510654449463,
"learning_rate": 0.0001,
"loss": 0.8666,
"step": 5360
},
{
"epoch": 0.29481067455751,
"grad_norm": 1.771672248840332,
"learning_rate": 0.0001,
"loss": 0.843,
"step": 5380
},
{
"epoch": 0.2959066250205491,
"grad_norm": 2.3459877967834473,
"learning_rate": 0.0001,
"loss": 0.8516,
"step": 5400
},
{
"epoch": 0.29700257548358816,
"grad_norm": 2.156458854675293,
"learning_rate": 0.0001,
"loss": 0.8425,
"step": 5420
},
{
"epoch": 0.2980985259466272,
"grad_norm": 1.9492950439453125,
"learning_rate": 0.0001,
"loss": 0.8445,
"step": 5440
},
{
"epoch": 0.2991944764096663,
"grad_norm": 2.1061997413635254,
"learning_rate": 0.0001,
"loss": 0.8858,
"step": 5460
},
{
"epoch": 0.30029042687270535,
"grad_norm": 2.3567299842834473,
"learning_rate": 0.0001,
"loss": 0.8376,
"step": 5480
},
{
"epoch": 0.3013863773357444,
"grad_norm": 2.1302335262298584,
"learning_rate": 0.0001,
"loss": 0.8272,
"step": 5500
},
{
"epoch": 0.3024823277987835,
"grad_norm": 2.2098424434661865,
"learning_rate": 0.0001,
"loss": 0.8742,
"step": 5520
},
{
"epoch": 0.30357827826182254,
"grad_norm": 1.7558562755584717,
"learning_rate": 0.0001,
"loss": 0.8863,
"step": 5540
},
{
"epoch": 0.30467422872486166,
"grad_norm": 1.8461397886276245,
"learning_rate": 0.0001,
"loss": 0.8792,
"step": 5560
},
{
"epoch": 0.3057701791879007,
"grad_norm": 2.0006344318389893,
"learning_rate": 0.0001,
"loss": 0.8263,
"step": 5580
},
{
"epoch": 0.3068661296509398,
"grad_norm": 1.6772565841674805,
"learning_rate": 0.0001,
"loss": 0.789,
"step": 5600
},
{
"epoch": 0.30796208011397885,
"grad_norm": 1.9263228178024292,
"learning_rate": 0.0001,
"loss": 0.842,
"step": 5620
},
{
"epoch": 0.3090580305770179,
"grad_norm": 1.8888592720031738,
"learning_rate": 0.0001,
"loss": 0.8475,
"step": 5640
},
{
"epoch": 0.310153981040057,
"grad_norm": 2.2354602813720703,
"learning_rate": 0.0001,
"loss": 1.0036,
"step": 5660
},
{
"epoch": 0.31124993150309604,
"grad_norm": 1.9634332656860352,
"learning_rate": 0.0001,
"loss": 0.8517,
"step": 5680
},
{
"epoch": 0.3123458819661351,
"grad_norm": 2.348825216293335,
"learning_rate": 0.0001,
"loss": 0.8731,
"step": 5700
},
{
"epoch": 0.3134418324291742,
"grad_norm": 2.487741708755493,
"learning_rate": 0.0001,
"loss": 0.8556,
"step": 5720
},
{
"epoch": 0.3145377828922133,
"grad_norm": 1.999516248703003,
"learning_rate": 0.0001,
"loss": 0.7969,
"step": 5740
},
{
"epoch": 0.31563373335525235,
"grad_norm": 1.9654616117477417,
"learning_rate": 0.0001,
"loss": 0.7843,
"step": 5760
},
{
"epoch": 0.3167296838182914,
"grad_norm": 2.1070950031280518,
"learning_rate": 0.0001,
"loss": 0.8399,
"step": 5780
},
{
"epoch": 0.3178256342813305,
"grad_norm": 2.257129192352295,
"learning_rate": 0.0001,
"loss": 0.8224,
"step": 5800
},
{
"epoch": 0.31892158474436955,
"grad_norm": 1.8256118297576904,
"learning_rate": 0.0001,
"loss": 0.794,
"step": 5820
},
{
"epoch": 0.3200175352074086,
"grad_norm": 1.8899625539779663,
"learning_rate": 0.0001,
"loss": 0.8614,
"step": 5840
},
{
"epoch": 0.3211134856704477,
"grad_norm": 2.221484661102295,
"learning_rate": 0.0001,
"loss": 0.765,
"step": 5860
},
{
"epoch": 0.3222094361334868,
"grad_norm": 1.796877384185791,
"learning_rate": 0.0001,
"loss": 0.8359,
"step": 5880
},
{
"epoch": 0.32330538659652586,
"grad_norm": 1.7495447397232056,
"learning_rate": 0.0001,
"loss": 0.8688,
"step": 5900
},
{
"epoch": 0.3244013370595649,
"grad_norm": 2.136664628982544,
"learning_rate": 0.0001,
"loss": 0.9163,
"step": 5920
},
{
"epoch": 0.325497287522604,
"grad_norm": 1.8508238792419434,
"learning_rate": 0.0001,
"loss": 0.7975,
"step": 5940
},
{
"epoch": 0.32659323798564305,
"grad_norm": 2.144523859024048,
"learning_rate": 0.0001,
"loss": 0.7749,
"step": 5960
},
{
"epoch": 0.3276891884486821,
"grad_norm": 2.208815336227417,
"learning_rate": 0.0001,
"loss": 0.8148,
"step": 5980
},
{
"epoch": 0.3287851389117212,
"grad_norm": 2.0617401599884033,
"learning_rate": 0.0001,
"loss": 0.8884,
"step": 6000
},
{
"epoch": 0.3287851389117212,
"eval_loss": 0.8316722512245178,
"eval_runtime": 30850.8589,
"eval_samples_per_second": 2.103,
"eval_steps_per_second": 0.066,
"eval_wer": 45.9960352377659,
"step": 6000
},
{
"epoch": 0.32988108937476024,
"grad_norm": 2.0406434535980225,
"learning_rate": 0.0001,
"loss": 0.8504,
"step": 6020
},
{
"epoch": 0.33097703983779936,
"grad_norm": 2.1899139881134033,
"learning_rate": 0.0001,
"loss": 0.7782,
"step": 6040
},
{
"epoch": 0.3320729903008384,
"grad_norm": 2.650421380996704,
"learning_rate": 0.0001,
"loss": 0.7823,
"step": 6060
},
{
"epoch": 0.3331689407638775,
"grad_norm": 2.085683584213257,
"learning_rate": 0.0001,
"loss": 0.754,
"step": 6080
},
{
"epoch": 0.33426489122691655,
"grad_norm": 2.1783502101898193,
"learning_rate": 0.0001,
"loss": 0.8819,
"step": 6100
},
{
"epoch": 0.3353608416899556,
"grad_norm": 2.096208333969116,
"learning_rate": 0.0001,
"loss": 0.8702,
"step": 6120
},
{
"epoch": 0.3364567921529947,
"grad_norm": 2.005629062652588,
"learning_rate": 0.0001,
"loss": 0.8827,
"step": 6140
},
{
"epoch": 0.33755274261603374,
"grad_norm": 2.1545634269714355,
"learning_rate": 0.0001,
"loss": 0.8496,
"step": 6160
},
{
"epoch": 0.3386486930790728,
"grad_norm": 1.8190851211547852,
"learning_rate": 0.0001,
"loss": 0.7622,
"step": 6180
},
{
"epoch": 0.3397446435421119,
"grad_norm": 1.9555623531341553,
"learning_rate": 0.0001,
"loss": 0.8338,
"step": 6200
},
{
"epoch": 0.340840594005151,
"grad_norm": 1.8530341386795044,
"learning_rate": 0.0001,
"loss": 0.8017,
"step": 6220
},
{
"epoch": 0.34193654446819005,
"grad_norm": 1.8724114894866943,
"learning_rate": 0.0001,
"loss": 0.848,
"step": 6240
},
{
"epoch": 0.3430324949312291,
"grad_norm": 1.8598796129226685,
"learning_rate": 0.0001,
"loss": 0.8074,
"step": 6260
},
{
"epoch": 0.3441284453942682,
"grad_norm": 2.1442923545837402,
"learning_rate": 0.0001,
"loss": 0.8473,
"step": 6280
},
{
"epoch": 0.34522439585730724,
"grad_norm": 2.3083174228668213,
"learning_rate": 0.0001,
"loss": 0.9016,
"step": 6300
},
{
"epoch": 0.3463203463203463,
"grad_norm": 1.8194735050201416,
"learning_rate": 0.0001,
"loss": 0.8267,
"step": 6320
},
{
"epoch": 0.34741629678338537,
"grad_norm": 2.063523054122925,
"learning_rate": 0.0001,
"loss": 0.7841,
"step": 6340
},
{
"epoch": 0.3485122472464245,
"grad_norm": 2.17594051361084,
"learning_rate": 0.0001,
"loss": 0.8318,
"step": 6360
},
{
"epoch": 0.34960819770946355,
"grad_norm": 1.665189504623413,
"learning_rate": 0.0001,
"loss": 0.7983,
"step": 6380
},
{
"epoch": 0.3507041481725026,
"grad_norm": 2.2596445083618164,
"learning_rate": 0.0001,
"loss": 0.8421,
"step": 6400
},
{
"epoch": 0.3518000986355417,
"grad_norm": 1.7096545696258545,
"learning_rate": 0.0001,
"loss": 0.889,
"step": 6420
},
{
"epoch": 0.35289604909858074,
"grad_norm": 1.7475535869598389,
"learning_rate": 0.0001,
"loss": 0.8006,
"step": 6440
},
{
"epoch": 0.3539919995616198,
"grad_norm": 1.8176007270812988,
"learning_rate": 0.0001,
"loss": 0.8632,
"step": 6460
},
{
"epoch": 0.35508795002465887,
"grad_norm": 2.6806535720825195,
"learning_rate": 0.0001,
"loss": 0.8427,
"step": 6480
},
{
"epoch": 0.35618390048769794,
"grad_norm": 2.094172477722168,
"learning_rate": 0.0001,
"loss": 0.7812,
"step": 6500
},
{
"epoch": 0.357279850950737,
"grad_norm": 1.8341765403747559,
"learning_rate": 0.0001,
"loss": 0.8051,
"step": 6520
},
{
"epoch": 0.3583758014137761,
"grad_norm": 2.2341349124908447,
"learning_rate": 0.0001,
"loss": 0.8001,
"step": 6540
},
{
"epoch": 0.3594717518768152,
"grad_norm": 2.1017801761627197,
"learning_rate": 0.0001,
"loss": 0.8142,
"step": 6560
},
{
"epoch": 0.36056770233985425,
"grad_norm": 1.9903994798660278,
"learning_rate": 0.0001,
"loss": 0.8117,
"step": 6580
},
{
"epoch": 0.3616636528028933,
"grad_norm": 2.273465394973755,
"learning_rate": 0.0001,
"loss": 0.8864,
"step": 6600
},
{
"epoch": 0.3627596032659324,
"grad_norm": 2.0767428874969482,
"learning_rate": 0.0001,
"loss": 0.7687,
"step": 6620
},
{
"epoch": 0.36385555372897144,
"grad_norm": 2.559774398803711,
"learning_rate": 0.0001,
"loss": 0.8181,
"step": 6640
},
{
"epoch": 0.3649515041920105,
"grad_norm": 2.1393582820892334,
"learning_rate": 0.0001,
"loss": 0.7936,
"step": 6660
},
{
"epoch": 0.36604745465504956,
"grad_norm": 2.06675386428833,
"learning_rate": 0.0001,
"loss": 0.8263,
"step": 6680
},
{
"epoch": 0.3671434051180887,
"grad_norm": 1.7674784660339355,
"learning_rate": 0.0001,
"loss": 0.7818,
"step": 6700
},
{
"epoch": 0.36823935558112775,
"grad_norm": 1.765442132949829,
"learning_rate": 0.0001,
"loss": 0.8335,
"step": 6720
},
{
"epoch": 0.3693353060441668,
"grad_norm": 2.044288158416748,
"learning_rate": 0.0001,
"loss": 0.8742,
"step": 6740
},
{
"epoch": 0.3704312565072059,
"grad_norm": 1.9821726083755493,
"learning_rate": 0.0001,
"loss": 0.928,
"step": 6760
},
{
"epoch": 0.37152720697024494,
"grad_norm": 2.0798370838165283,
"learning_rate": 0.0001,
"loss": 0.7627,
"step": 6780
},
{
"epoch": 0.372623157433284,
"grad_norm": 1.6817582845687866,
"learning_rate": 0.0001,
"loss": 0.7985,
"step": 6800
},
{
"epoch": 0.37371910789632307,
"grad_norm": 1.872247576713562,
"learning_rate": 0.0001,
"loss": 0.8102,
"step": 6820
},
{
"epoch": 0.37481505835936213,
"grad_norm": 1.7761516571044922,
"learning_rate": 0.0001,
"loss": 0.8435,
"step": 6840
},
{
"epoch": 0.37591100882240125,
"grad_norm": 1.739585518836975,
"learning_rate": 0.0001,
"loss": 0.8706,
"step": 6860
},
{
"epoch": 0.3770069592854403,
"grad_norm": 2.0503687858581543,
"learning_rate": 0.0001,
"loss": 0.8354,
"step": 6880
},
{
"epoch": 0.3781029097484794,
"grad_norm": 2.283393621444702,
"learning_rate": 0.0001,
"loss": 0.7476,
"step": 6900
},
{
"epoch": 0.37919886021151844,
"grad_norm": 1.801018238067627,
"learning_rate": 0.0001,
"loss": 0.7817,
"step": 6920
},
{
"epoch": 0.3802948106745575,
"grad_norm": 2.5343267917633057,
"learning_rate": 0.0001,
"loss": 0.7628,
"step": 6940
},
{
"epoch": 0.38139076113759657,
"grad_norm": 2.010507822036743,
"learning_rate": 0.0001,
"loss": 0.7931,
"step": 6960
},
{
"epoch": 0.38248671160063563,
"grad_norm": 1.7228796482086182,
"learning_rate": 0.0001,
"loss": 0.7517,
"step": 6980
},
{
"epoch": 0.3835826620636747,
"grad_norm": 1.967822551727295,
"learning_rate": 0.0001,
"loss": 0.804,
"step": 7000
},
{
"epoch": 0.3835826620636747,
"eval_loss": 0.7978512644767761,
"eval_runtime": 30977.7517,
"eval_samples_per_second": 2.095,
"eval_steps_per_second": 0.065,
"eval_wer": 61.261910549759826,
"step": 7000
},
{
"epoch": 0.3846786125267138,
"grad_norm": 1.9999229907989502,
"learning_rate": 0.0001,
"loss": 0.7634,
"step": 7020
},
{
"epoch": 0.3857745629897529,
"grad_norm": 1.956128716468811,
"learning_rate": 0.0001,
"loss": 0.8102,
"step": 7040
},
{
"epoch": 0.38687051345279194,
"grad_norm": 2.0134966373443604,
"learning_rate": 0.0001,
"loss": 0.7957,
"step": 7060
},
{
"epoch": 0.387966463915831,
"grad_norm": 2.0373167991638184,
"learning_rate": 0.0001,
"loss": 0.8251,
"step": 7080
},
{
"epoch": 0.38906241437887007,
"grad_norm": 1.7772964239120483,
"learning_rate": 0.0001,
"loss": 0.8128,
"step": 7100
},
{
"epoch": 0.39015836484190913,
"grad_norm": 1.7618379592895508,
"learning_rate": 0.0001,
"loss": 0.8345,
"step": 7120
},
{
"epoch": 0.3912543153049482,
"grad_norm": 2.181671380996704,
"learning_rate": 0.0001,
"loss": 0.8345,
"step": 7140
},
{
"epoch": 0.39235026576798726,
"grad_norm": 1.8794726133346558,
"learning_rate": 0.0001,
"loss": 0.7615,
"step": 7160
},
{
"epoch": 0.3934462162310264,
"grad_norm": 1.9297798871994019,
"learning_rate": 0.0001,
"loss": 0.7618,
"step": 7180
},
{
"epoch": 0.39454216669406544,
"grad_norm": 1.9441471099853516,
"learning_rate": 0.0001,
"loss": 0.859,
"step": 7200
},
{
"epoch": 0.3956381171571045,
"grad_norm": 2.2561404705047607,
"learning_rate": 0.0001,
"loss": 0.7877,
"step": 7220
},
{
"epoch": 0.39673406762014357,
"grad_norm": 1.8441416025161743,
"learning_rate": 0.0001,
"loss": 0.7734,
"step": 7240
},
{
"epoch": 0.39783001808318263,
"grad_norm": 1.686120867729187,
"learning_rate": 0.0001,
"loss": 0.7066,
"step": 7260
},
{
"epoch": 0.3989259685462217,
"grad_norm": 1.9456263780593872,
"learning_rate": 0.0001,
"loss": 0.7469,
"step": 7280
},
{
"epoch": 0.40002191900926076,
"grad_norm": 1.9112725257873535,
"learning_rate": 0.0001,
"loss": 0.7607,
"step": 7300
},
{
"epoch": 0.4011178694722998,
"grad_norm": 2.5668513774871826,
"learning_rate": 0.0001,
"loss": 0.7859,
"step": 7320
},
{
"epoch": 0.40221381993533895,
"grad_norm": 1.9502942562103271,
"learning_rate": 0.0001,
"loss": 0.7607,
"step": 7340
},
{
"epoch": 0.403309770398378,
"grad_norm": 1.6973525285720825,
"learning_rate": 0.0001,
"loss": 0.8313,
"step": 7360
},
{
"epoch": 0.4044057208614171,
"grad_norm": 2.3962297439575195,
"learning_rate": 0.0001,
"loss": 0.7806,
"step": 7380
},
{
"epoch": 0.40550167132445614,
"grad_norm": 1.887536883354187,
"learning_rate": 0.0001,
"loss": 0.7524,
"step": 7400
},
{
"epoch": 0.4065976217874952,
"grad_norm": 1.999687910079956,
"learning_rate": 0.0001,
"loss": 0.7349,
"step": 7420
},
{
"epoch": 0.40769357225053426,
"grad_norm": 1.7444576025009155,
"learning_rate": 0.0001,
"loss": 0.8156,
"step": 7440
},
{
"epoch": 0.40878952271357333,
"grad_norm": 1.7175132036209106,
"learning_rate": 0.0001,
"loss": 0.7419,
"step": 7460
},
{
"epoch": 0.4098854731766124,
"grad_norm": 2.23638653755188,
"learning_rate": 0.0001,
"loss": 0.666,
"step": 7480
},
{
"epoch": 0.4109814236396515,
"grad_norm": 2.024102210998535,
"learning_rate": 0.0001,
"loss": 0.7541,
"step": 7500
},
{
"epoch": 0.4120773741026906,
"grad_norm": 2.042541265487671,
"learning_rate": 0.0001,
"loss": 0.7915,
"step": 7520
},
{
"epoch": 0.41317332456572964,
"grad_norm": 1.9140897989273071,
"learning_rate": 0.0001,
"loss": 0.8712,
"step": 7540
},
{
"epoch": 0.4142692750287687,
"grad_norm": 1.8435416221618652,
"learning_rate": 0.0001,
"loss": 0.8241,
"step": 7560
},
{
"epoch": 0.41536522549180777,
"grad_norm": 2.027944803237915,
"learning_rate": 0.0001,
"loss": 0.9422,
"step": 7580
},
{
"epoch": 0.41646117595484683,
"grad_norm": 2.07381534576416,
"learning_rate": 0.0001,
"loss": 0.812,
"step": 7600
},
{
"epoch": 0.4175571264178859,
"grad_norm": 1.9762136936187744,
"learning_rate": 0.0001,
"loss": 0.7852,
"step": 7620
},
{
"epoch": 0.41865307688092496,
"grad_norm": 1.8222426176071167,
"learning_rate": 0.0001,
"loss": 0.752,
"step": 7640
},
{
"epoch": 0.4197490273439641,
"grad_norm": 2.0519089698791504,
"learning_rate": 0.0001,
"loss": 0.8031,
"step": 7660
},
{
"epoch": 0.42084497780700314,
"grad_norm": 1.8777110576629639,
"learning_rate": 0.0001,
"loss": 0.8173,
"step": 7680
},
{
"epoch": 0.4219409282700422,
"grad_norm": 2.323411703109741,
"learning_rate": 0.0001,
"loss": 0.8479,
"step": 7700
},
{
"epoch": 0.42303687873308127,
"grad_norm": 1.6403400897979736,
"learning_rate": 0.0001,
"loss": 0.7567,
"step": 7720
},
{
"epoch": 0.42413282919612033,
"grad_norm": 1.6627925634384155,
"learning_rate": 0.0001,
"loss": 0.7734,
"step": 7740
},
{
"epoch": 0.4252287796591594,
"grad_norm": 1.8771709203720093,
"learning_rate": 0.0001,
"loss": 0.7652,
"step": 7760
},
{
"epoch": 0.42632473012219846,
"grad_norm": 1.9806597232818604,
"learning_rate": 0.0001,
"loss": 0.7699,
"step": 7780
},
{
"epoch": 0.4274206805852375,
"grad_norm": 2.1376988887786865,
"learning_rate": 0.0001,
"loss": 0.7825,
"step": 7800
},
{
"epoch": 0.42851663104827664,
"grad_norm": 1.5566449165344238,
"learning_rate": 0.0001,
"loss": 0.704,
"step": 7820
},
{
"epoch": 0.4296125815113157,
"grad_norm": 2.1835947036743164,
"learning_rate": 0.0001,
"loss": 0.8101,
"step": 7840
},
{
"epoch": 0.43070853197435477,
"grad_norm": 2.055119037628174,
"learning_rate": 0.0001,
"loss": 0.703,
"step": 7860
},
{
"epoch": 0.43180448243739383,
"grad_norm": 1.9324967861175537,
"learning_rate": 0.0001,
"loss": 0.81,
"step": 7880
},
{
"epoch": 0.4329004329004329,
"grad_norm": 2.1087846755981445,
"learning_rate": 0.0001,
"loss": 0.7676,
"step": 7900
},
{
"epoch": 0.43399638336347196,
"grad_norm": 1.8521897792816162,
"learning_rate": 0.0001,
"loss": 0.7546,
"step": 7920
},
{
"epoch": 0.435092333826511,
"grad_norm": 2.145947217941284,
"learning_rate": 0.0001,
"loss": 0.7992,
"step": 7940
},
{
"epoch": 0.4361882842895501,
"grad_norm": 1.7739931344985962,
"learning_rate": 0.0001,
"loss": 0.7133,
"step": 7960
},
{
"epoch": 0.4372842347525892,
"grad_norm": 1.6032921075820923,
"learning_rate": 0.0001,
"loss": 0.8207,
"step": 7980
},
{
"epoch": 0.43838018521562827,
"grad_norm": 2.1895668506622314,
"learning_rate": 0.0001,
"loss": 0.7638,
"step": 8000
},
{
"epoch": 0.43838018521562827,
"eval_loss": 0.770411491394043,
"eval_runtime": 30675.7059,
"eval_samples_per_second": 2.115,
"eval_steps_per_second": 0.066,
"eval_wer": 43.10069742838263,
"step": 8000
},
{
"epoch": 0.43947613567866733,
"grad_norm": 1.9759962558746338,
"learning_rate": 0.0001,
"loss": 0.7792,
"step": 8020
},
{
"epoch": 0.4405720861417064,
"grad_norm": 1.845012903213501,
"learning_rate": 0.0001,
"loss": 0.847,
"step": 8040
},
{
"epoch": 0.44166803660474546,
"grad_norm": 1.9666188955307007,
"learning_rate": 0.0001,
"loss": 0.767,
"step": 8060
},
{
"epoch": 0.4427639870677845,
"grad_norm": 2.1448235511779785,
"learning_rate": 0.0001,
"loss": 0.7924,
"step": 8080
},
{
"epoch": 0.4438599375308236,
"grad_norm": 1.9017919301986694,
"learning_rate": 0.0001,
"loss": 0.7239,
"step": 8100
},
{
"epoch": 0.44495588799386265,
"grad_norm": 1.8005828857421875,
"learning_rate": 0.0001,
"loss": 0.7202,
"step": 8120
},
{
"epoch": 0.4460518384569018,
"grad_norm": 1.7341022491455078,
"learning_rate": 0.0001,
"loss": 0.7045,
"step": 8140
},
{
"epoch": 0.44714778891994084,
"grad_norm": 2.094618320465088,
"learning_rate": 0.0001,
"loss": 0.8067,
"step": 8160
},
{
"epoch": 0.4482437393829799,
"grad_norm": 2.0414187908172607,
"learning_rate": 0.0001,
"loss": 0.6888,
"step": 8180
},
{
"epoch": 0.44933968984601896,
"grad_norm": 1.8842118978500366,
"learning_rate": 0.0001,
"loss": 0.7125,
"step": 8200
},
{
"epoch": 0.450435640309058,
"grad_norm": 1.9878696203231812,
"learning_rate": 0.0001,
"loss": 0.723,
"step": 8220
},
{
"epoch": 0.4515315907720971,
"grad_norm": 1.94351065158844,
"learning_rate": 0.0001,
"loss": 0.727,
"step": 8240
},
{
"epoch": 0.45262754123513615,
"grad_norm": 1.900718331336975,
"learning_rate": 0.0001,
"loss": 0.7306,
"step": 8260
},
{
"epoch": 0.4537234916981752,
"grad_norm": 2.5974204540252686,
"learning_rate": 0.0001,
"loss": 0.7968,
"step": 8280
},
{
"epoch": 0.45481944216121434,
"grad_norm": 1.9214075803756714,
"learning_rate": 0.0001,
"loss": 0.7767,
"step": 8300
},
{
"epoch": 0.4559153926242534,
"grad_norm": 2.6079931259155273,
"learning_rate": 0.0001,
"loss": 0.7787,
"step": 8320
},
{
"epoch": 0.45701134308729247,
"grad_norm": 1.8398691415786743,
"learning_rate": 0.0001,
"loss": 0.7941,
"step": 8340
},
{
"epoch": 0.45810729355033153,
"grad_norm": 1.740376591682434,
"learning_rate": 0.0001,
"loss": 0.7714,
"step": 8360
},
{
"epoch": 0.4592032440133706,
"grad_norm": 2.109416961669922,
"learning_rate": 0.0001,
"loss": 0.8015,
"step": 8380
},
{
"epoch": 0.46029919447640966,
"grad_norm": 1.9565001726150513,
"learning_rate": 0.0001,
"loss": 0.7473,
"step": 8400
},
{
"epoch": 0.4613951449394487,
"grad_norm": 1.88534414768219,
"learning_rate": 0.0001,
"loss": 0.7828,
"step": 8420
},
{
"epoch": 0.4624910954024878,
"grad_norm": 1.7713934183120728,
"learning_rate": 0.0001,
"loss": 0.7289,
"step": 8440
},
{
"epoch": 0.4635870458655269,
"grad_norm": 1.9173312187194824,
"learning_rate": 0.0001,
"loss": 0.7478,
"step": 8460
},
{
"epoch": 0.46468299632856597,
"grad_norm": 1.6866717338562012,
"learning_rate": 0.0001,
"loss": 0.8235,
"step": 8480
},
{
"epoch": 0.46577894679160503,
"grad_norm": 1.6713476181030273,
"learning_rate": 0.0001,
"loss": 0.7216,
"step": 8500
},
{
"epoch": 0.4668748972546441,
"grad_norm": 1.9601606130599976,
"learning_rate": 0.0001,
"loss": 0.6994,
"step": 8520
},
{
"epoch": 0.46797084771768316,
"grad_norm": 1.7472949028015137,
"learning_rate": 0.0001,
"loss": 0.7694,
"step": 8540
},
{
"epoch": 0.4690667981807222,
"grad_norm": 1.8540037870407104,
"learning_rate": 0.0001,
"loss": 0.7253,
"step": 8560
},
{
"epoch": 0.4701627486437613,
"grad_norm": 2.0671746730804443,
"learning_rate": 0.0001,
"loss": 0.7514,
"step": 8580
},
{
"epoch": 0.47125869910680035,
"grad_norm": 1.900918960571289,
"learning_rate": 0.0001,
"loss": 0.7871,
"step": 8600
},
{
"epoch": 0.47235464956983947,
"grad_norm": 1.7465757131576538,
"learning_rate": 0.0001,
"loss": 0.8009,
"step": 8620
},
{
"epoch": 0.47345060003287853,
"grad_norm": 2.3400652408599854,
"learning_rate": 0.0001,
"loss": 0.7741,
"step": 8640
},
{
"epoch": 0.4745465504959176,
"grad_norm": 2.1384716033935547,
"learning_rate": 0.0001,
"loss": 0.7577,
"step": 8660
},
{
"epoch": 0.47564250095895666,
"grad_norm": 2.7113006114959717,
"learning_rate": 0.0001,
"loss": 0.6968,
"step": 8680
},
{
"epoch": 0.4767384514219957,
"grad_norm": 1.6666728258132935,
"learning_rate": 0.0001,
"loss": 0.7307,
"step": 8700
},
{
"epoch": 0.4778344018850348,
"grad_norm": 1.8394851684570312,
"learning_rate": 0.0001,
"loss": 0.7353,
"step": 8720
},
{
"epoch": 0.47893035234807385,
"grad_norm": 2.0569512844085693,
"learning_rate": 0.0001,
"loss": 0.814,
"step": 8740
},
{
"epoch": 0.4800263028111129,
"grad_norm": 1.6457910537719727,
"learning_rate": 0.0001,
"loss": 0.7521,
"step": 8760
},
{
"epoch": 0.48112225327415203,
"grad_norm": 2.010711908340454,
"learning_rate": 0.0001,
"loss": 0.7101,
"step": 8780
},
{
"epoch": 0.4822182037371911,
"grad_norm": 2.422718048095703,
"learning_rate": 0.0001,
"loss": 0.7867,
"step": 8800
},
{
"epoch": 0.48331415420023016,
"grad_norm": 1.5170652866363525,
"learning_rate": 0.0001,
"loss": 0.8042,
"step": 8820
},
{
"epoch": 0.4844101046632692,
"grad_norm": 1.9751352071762085,
"learning_rate": 0.0001,
"loss": 0.7408,
"step": 8840
},
{
"epoch": 0.4855060551263083,
"grad_norm": 1.8477592468261719,
"learning_rate": 0.0001,
"loss": 0.7675,
"step": 8860
},
{
"epoch": 0.48660200558934735,
"grad_norm": 1.9999114274978638,
"learning_rate": 0.0001,
"loss": 0.745,
"step": 8880
},
{
"epoch": 0.4876979560523864,
"grad_norm": 1.7456104755401611,
"learning_rate": 0.0001,
"loss": 0.7713,
"step": 8900
},
{
"epoch": 0.4887939065154255,
"grad_norm": 1.9687026739120483,
"learning_rate": 0.0001,
"loss": 0.7349,
"step": 8920
},
{
"epoch": 0.4898898569784646,
"grad_norm": 1.8585296869277954,
"learning_rate": 0.0001,
"loss": 0.7369,
"step": 8940
},
{
"epoch": 0.49098580744150366,
"grad_norm": 2.7875003814697266,
"learning_rate": 0.0001,
"loss": 0.7002,
"step": 8960
},
{
"epoch": 0.4920817579045427,
"grad_norm": 2.01347017288208,
"learning_rate": 0.0001,
"loss": 0.7598,
"step": 8980
},
{
"epoch": 0.4931777083675818,
"grad_norm": 1.8863261938095093,
"learning_rate": 0.0001,
"loss": 0.7617,
"step": 9000
},
{
"epoch": 0.4931777083675818,
"eval_loss": 0.7443549036979675,
"eval_runtime": 31204.044,
"eval_samples_per_second": 2.079,
"eval_steps_per_second": 0.065,
"eval_wer": 44.168238762227254,
"step": 9000
},
{
"epoch": 0.49427365883062085,
"grad_norm": 2.0402464866638184,
"learning_rate": 0.0001,
"loss": 0.8118,
"step": 9020
},
{
"epoch": 0.4953696092936599,
"grad_norm": 2.072380304336548,
"learning_rate": 0.0001,
"loss": 0.6703,
"step": 9040
},
{
"epoch": 0.496465559756699,
"grad_norm": 1.9627012014389038,
"learning_rate": 0.0001,
"loss": 0.7547,
"step": 9060
},
{
"epoch": 0.49756151021973805,
"grad_norm": 1.904860496520996,
"learning_rate": 0.0001,
"loss": 0.8141,
"step": 9080
},
{
"epoch": 0.49865746068277716,
"grad_norm": 2.153672933578491,
"learning_rate": 0.0001,
"loss": 0.8167,
"step": 9100
},
{
"epoch": 0.49975341114581623,
"grad_norm": 2.0599303245544434,
"learning_rate": 0.0001,
"loss": 0.8632,
"step": 9120
},
{
"epoch": 0.5008493616088553,
"grad_norm": 1.9562146663665771,
"learning_rate": 0.0001,
"loss": 0.7477,
"step": 9140
},
{
"epoch": 0.5019453120718943,
"grad_norm": 2.086508274078369,
"learning_rate": 0.0001,
"loss": 0.7973,
"step": 9160
},
{
"epoch": 0.5030412625349334,
"grad_norm": 1.9192993640899658,
"learning_rate": 0.0001,
"loss": 0.8359,
"step": 9180
},
{
"epoch": 0.5041372129979725,
"grad_norm": 1.9085866212844849,
"learning_rate": 0.0001,
"loss": 0.7942,
"step": 9200
},
{
"epoch": 0.5052331634610115,
"grad_norm": 1.901637315750122,
"learning_rate": 0.0001,
"loss": 0.7608,
"step": 9220
},
{
"epoch": 0.5063291139240507,
"grad_norm": 2.145914316177368,
"learning_rate": 0.0001,
"loss": 0.6919,
"step": 9240
},
{
"epoch": 0.5074250643870897,
"grad_norm": 1.9005271196365356,
"learning_rate": 0.0001,
"loss": 0.7506,
"step": 9260
},
{
"epoch": 0.5085210148501288,
"grad_norm": 1.6468952894210815,
"learning_rate": 0.0001,
"loss": 0.7843,
"step": 9280
},
{
"epoch": 0.5096169653131678,
"grad_norm": 1.7703279256820679,
"learning_rate": 0.0001,
"loss": 0.7192,
"step": 9300
},
{
"epoch": 0.5107129157762069,
"grad_norm": 2.0094175338745117,
"learning_rate": 0.0001,
"loss": 0.847,
"step": 9320
},
{
"epoch": 0.511808866239246,
"grad_norm": 2.0970561504364014,
"learning_rate": 0.0001,
"loss": 0.7679,
"step": 9340
},
{
"epoch": 0.512904816702285,
"grad_norm": 1.757664680480957,
"learning_rate": 0.0001,
"loss": 0.7391,
"step": 9360
},
{
"epoch": 0.5140007671653242,
"grad_norm": 1.8297368288040161,
"learning_rate": 0.0001,
"loss": 0.8382,
"step": 9380
},
{
"epoch": 0.5150967176283632,
"grad_norm": 1.9832725524902344,
"learning_rate": 0.0001,
"loss": 0.7226,
"step": 9400
},
{
"epoch": 0.5161926680914023,
"grad_norm": 1.7083086967468262,
"learning_rate": 0.0001,
"loss": 0.7798,
"step": 9420
},
{
"epoch": 0.5172886185544413,
"grad_norm": 1.7105575799942017,
"learning_rate": 0.0001,
"loss": 0.7118,
"step": 9440
},
{
"epoch": 0.5183845690174804,
"grad_norm": 1.547608494758606,
"learning_rate": 0.0001,
"loss": 0.7259,
"step": 9460
},
{
"epoch": 0.5194805194805194,
"grad_norm": 2.0215799808502197,
"learning_rate": 0.0001,
"loss": 0.7648,
"step": 9480
},
{
"epoch": 0.5205764699435586,
"grad_norm": 1.5999863147735596,
"learning_rate": 0.0001,
"loss": 0.7863,
"step": 9500
},
{
"epoch": 0.5216724204065977,
"grad_norm": 2.0813591480255127,
"learning_rate": 0.0001,
"loss": 0.758,
"step": 9520
},
{
"epoch": 0.5227683708696367,
"grad_norm": 1.6513686180114746,
"learning_rate": 0.0001,
"loss": 0.7735,
"step": 9540
},
{
"epoch": 0.5238643213326758,
"grad_norm": 1.49434232711792,
"learning_rate": 0.0001,
"loss": 0.6547,
"step": 9560
},
{
"epoch": 0.5249602717957148,
"grad_norm": 1.8316184282302856,
"learning_rate": 0.0001,
"loss": 0.7428,
"step": 9580
},
{
"epoch": 0.5260562222587539,
"grad_norm": 2.0041682720184326,
"learning_rate": 0.0001,
"loss": 0.7058,
"step": 9600
},
{
"epoch": 0.5271521727217929,
"grad_norm": 1.9916651248931885,
"learning_rate": 0.0001,
"loss": 0.7049,
"step": 9620
},
{
"epoch": 0.528248123184832,
"grad_norm": 1.8289718627929688,
"learning_rate": 0.0001,
"loss": 0.7179,
"step": 9640
},
{
"epoch": 0.5293440736478712,
"grad_norm": 1.7447452545166016,
"learning_rate": 0.0001,
"loss": 0.7432,
"step": 9660
},
{
"epoch": 0.5304400241109102,
"grad_norm": 2.375234365463257,
"learning_rate": 0.0001,
"loss": 0.6676,
"step": 9680
},
{
"epoch": 0.5315359745739493,
"grad_norm": 1.683435320854187,
"learning_rate": 0.0001,
"loss": 0.7269,
"step": 9700
},
{
"epoch": 0.5326319250369883,
"grad_norm": 1.6535717248916626,
"learning_rate": 0.0001,
"loss": 0.7315,
"step": 9720
},
{
"epoch": 0.5337278755000274,
"grad_norm": 1.5276830196380615,
"learning_rate": 0.0001,
"loss": 0.7382,
"step": 9740
},
{
"epoch": 0.5348238259630664,
"grad_norm": 1.8443965911865234,
"learning_rate": 0.0001,
"loss": 0.7471,
"step": 9760
},
{
"epoch": 0.5359197764261056,
"grad_norm": 2.0346148014068604,
"learning_rate": 0.0001,
"loss": 0.7268,
"step": 9780
},
{
"epoch": 0.5370157268891446,
"grad_norm": 1.750613808631897,
"learning_rate": 0.0001,
"loss": 0.8444,
"step": 9800
},
{
"epoch": 0.5381116773521837,
"grad_norm": 1.9546024799346924,
"learning_rate": 0.0001,
"loss": 0.6968,
"step": 9820
},
{
"epoch": 0.5392076278152228,
"grad_norm": 1.6618010997772217,
"learning_rate": 0.0001,
"loss": 0.7222,
"step": 9840
},
{
"epoch": 0.5403035782782618,
"grad_norm": 1.6404950618743896,
"learning_rate": 0.0001,
"loss": 0.6896,
"step": 9860
},
{
"epoch": 0.5413995287413009,
"grad_norm": 1.7741234302520752,
"learning_rate": 0.0001,
"loss": 0.7412,
"step": 9880
},
{
"epoch": 0.5424954792043399,
"grad_norm": 1.8278882503509521,
"learning_rate": 0.0001,
"loss": 0.7385,
"step": 9900
},
{
"epoch": 0.5435914296673791,
"grad_norm": 1.6102566719055176,
"learning_rate": 0.0001,
"loss": 0.7461,
"step": 9920
},
{
"epoch": 0.5446873801304181,
"grad_norm": 1.7899205684661865,
"learning_rate": 0.0001,
"loss": 0.6349,
"step": 9940
},
{
"epoch": 0.5457833305934572,
"grad_norm": 1.9663938283920288,
"learning_rate": 0.0001,
"loss": 0.8028,
"step": 9960
},
{
"epoch": 0.5468792810564962,
"grad_norm": 1.841476559638977,
"learning_rate": 0.0001,
"loss": 0.7503,
"step": 9980
},
{
"epoch": 0.5479752315195353,
"grad_norm": 1.9106056690216064,
"learning_rate": 0.0001,
"loss": 0.7097,
"step": 10000
},
{
"epoch": 0.5479752315195353,
"eval_loss": 0.7265371084213257,
"eval_runtime": 30536.1813,
"eval_samples_per_second": 2.125,
"eval_steps_per_second": 0.066,
"eval_wer": 42.517110448415295,
"step": 10000
},
{
"epoch": 0.5490711819825744,
"grad_norm": 1.680649995803833,
"learning_rate": 0.0001,
"loss": 0.6445,
"step": 10020
},
{
"epoch": 0.5501671324456134,
"grad_norm": 2.079050064086914,
"learning_rate": 0.0001,
"loss": 0.7648,
"step": 10040
},
{
"epoch": 0.5512630829086526,
"grad_norm": 1.4419294595718384,
"learning_rate": 0.0001,
"loss": 0.6953,
"step": 10060
},
{
"epoch": 0.5523590333716916,
"grad_norm": 1.9906927347183228,
"learning_rate": 0.0001,
"loss": 0.749,
"step": 10080
},
{
"epoch": 0.5534549838347307,
"grad_norm": 1.7384852170944214,
"learning_rate": 0.0001,
"loss": 0.745,
"step": 10100
},
{
"epoch": 0.5545509342977697,
"grad_norm": 1.7342479228973389,
"learning_rate": 0.0001,
"loss": 0.7687,
"step": 10120
},
{
"epoch": 0.5556468847608088,
"grad_norm": 1.887969970703125,
"learning_rate": 0.0001,
"loss": 0.7662,
"step": 10140
},
{
"epoch": 0.5567428352238479,
"grad_norm": 1.6345020532608032,
"learning_rate": 0.0001,
"loss": 0.7843,
"step": 10160
},
{
"epoch": 0.5578387856868869,
"grad_norm": 1.5596251487731934,
"learning_rate": 0.0001,
"loss": 0.6983,
"step": 10180
},
{
"epoch": 0.5589347361499261,
"grad_norm": 1.6423192024230957,
"learning_rate": 0.0001,
"loss": 0.6622,
"step": 10200
},
{
"epoch": 0.5600306866129651,
"grad_norm": 1.7268792390823364,
"learning_rate": 0.0001,
"loss": 0.8409,
"step": 10220
},
{
"epoch": 0.5611266370760042,
"grad_norm": 1.6870604753494263,
"learning_rate": 0.0001,
"loss": 0.7801,
"step": 10240
},
{
"epoch": 0.5622225875390432,
"grad_norm": 1.5945113897323608,
"learning_rate": 0.0001,
"loss": 0.6695,
"step": 10260
},
{
"epoch": 0.5633185380020823,
"grad_norm": 1.7995914220809937,
"learning_rate": 0.0001,
"loss": 0.7088,
"step": 10280
},
{
"epoch": 0.5644144884651213,
"grad_norm": 1.8924362659454346,
"learning_rate": 0.0001,
"loss": 0.7621,
"step": 10300
},
{
"epoch": 0.5655104389281604,
"grad_norm": 1.5099490880966187,
"learning_rate": 0.0001,
"loss": 0.6923,
"step": 10320
},
{
"epoch": 0.5666063893911996,
"grad_norm": 1.481195092201233,
"learning_rate": 0.0001,
"loss": 0.6801,
"step": 10340
},
{
"epoch": 0.5677023398542386,
"grad_norm": 1.9247808456420898,
"learning_rate": 0.0001,
"loss": 0.7247,
"step": 10360
},
{
"epoch": 0.5687982903172777,
"grad_norm": 1.721666693687439,
"learning_rate": 0.0001,
"loss": 0.85,
"step": 10380
},
{
"epoch": 0.5698942407803167,
"grad_norm": 1.981312870979309,
"learning_rate": 0.0001,
"loss": 0.6894,
"step": 10400
},
{
"epoch": 0.5709901912433558,
"grad_norm": 1.825363039970398,
"learning_rate": 0.0001,
"loss": 0.7017,
"step": 10420
},
{
"epoch": 0.5720861417063948,
"grad_norm": 2.021385669708252,
"learning_rate": 0.0001,
"loss": 0.7996,
"step": 10440
},
{
"epoch": 0.5731820921694339,
"grad_norm": 1.9287372827529907,
"learning_rate": 0.0001,
"loss": 0.7387,
"step": 10460
},
{
"epoch": 0.5742780426324731,
"grad_norm": 2.0109355449676514,
"learning_rate": 0.0001,
"loss": 0.7359,
"step": 10480
},
{
"epoch": 0.5753739930955121,
"grad_norm": 1.7715758085250854,
"learning_rate": 0.0001,
"loss": 0.7126,
"step": 10500
},
{
"epoch": 0.5764699435585512,
"grad_norm": 1.5866303443908691,
"learning_rate": 0.0001,
"loss": 0.6808,
"step": 10520
},
{
"epoch": 0.5775658940215902,
"grad_norm": 1.3831912279129028,
"learning_rate": 0.0001,
"loss": 0.7251,
"step": 10540
},
{
"epoch": 0.5786618444846293,
"grad_norm": 1.603388786315918,
"learning_rate": 0.0001,
"loss": 0.6497,
"step": 10560
},
{
"epoch": 0.5797577949476683,
"grad_norm": 1.8507051467895508,
"learning_rate": 0.0001,
"loss": 0.7247,
"step": 10580
},
{
"epoch": 0.5808537454107074,
"grad_norm": 2.240337610244751,
"learning_rate": 0.0001,
"loss": 0.7879,
"step": 10600
},
{
"epoch": 0.5819496958737465,
"grad_norm": 1.858344316482544,
"learning_rate": 0.0001,
"loss": 0.647,
"step": 10620
},
{
"epoch": 0.5830456463367856,
"grad_norm": 1.840640664100647,
"learning_rate": 0.0001,
"loss": 0.6924,
"step": 10640
},
{
"epoch": 0.5841415967998247,
"grad_norm": 2.0423295497894287,
"learning_rate": 0.0001,
"loss": 0.6762,
"step": 10660
},
{
"epoch": 0.5852375472628637,
"grad_norm": 1.7426679134368896,
"learning_rate": 0.0001,
"loss": 0.7824,
"step": 10680
},
{
"epoch": 0.5863334977259028,
"grad_norm": 1.5974029302597046,
"learning_rate": 0.0001,
"loss": 0.6874,
"step": 10700
},
{
"epoch": 0.5874294481889418,
"grad_norm": 1.6082810163497925,
"learning_rate": 0.0001,
"loss": 0.6916,
"step": 10720
},
{
"epoch": 0.588525398651981,
"grad_norm": 1.6124242544174194,
"learning_rate": 0.0001,
"loss": 0.676,
"step": 10740
},
{
"epoch": 0.58962134911502,
"grad_norm": 1.9140983819961548,
"learning_rate": 0.0001,
"loss": 0.6281,
"step": 10760
},
{
"epoch": 0.5907172995780591,
"grad_norm": 1.708742618560791,
"learning_rate": 0.0001,
"loss": 0.7245,
"step": 10780
},
{
"epoch": 0.5918132500410982,
"grad_norm": 2.36368989944458,
"learning_rate": 0.0001,
"loss": 0.6934,
"step": 10800
},
{
"epoch": 0.5929092005041372,
"grad_norm": 1.9806820154190063,
"learning_rate": 0.0001,
"loss": 0.677,
"step": 10820
},
{
"epoch": 0.5940051509671763,
"grad_norm": 1.893801212310791,
"learning_rate": 0.0001,
"loss": 0.7629,
"step": 10840
},
{
"epoch": 0.5951011014302153,
"grad_norm": 1.917204737663269,
"learning_rate": 0.0001,
"loss": 0.7836,
"step": 10860
},
{
"epoch": 0.5961970518932544,
"grad_norm": 1.5599673986434937,
"learning_rate": 0.0001,
"loss": 0.7132,
"step": 10880
},
{
"epoch": 0.5972930023562935,
"grad_norm": 1.9569772481918335,
"learning_rate": 0.0001,
"loss": 0.7466,
"step": 10900
},
{
"epoch": 0.5983889528193326,
"grad_norm": 1.8709198236465454,
"learning_rate": 0.0001,
"loss": 0.6456,
"step": 10920
},
{
"epoch": 0.5994849032823716,
"grad_norm": 1.8249480724334717,
"learning_rate": 0.0001,
"loss": 0.7159,
"step": 10940
},
{
"epoch": 0.6005808537454107,
"grad_norm": 1.7063779830932617,
"learning_rate": 0.0001,
"loss": 0.7093,
"step": 10960
},
{
"epoch": 0.6016768042084498,
"grad_norm": 1.681219220161438,
"learning_rate": 0.0001,
"loss": 0.6995,
"step": 10980
},
{
"epoch": 0.6027727546714888,
"grad_norm": 1.640663504600525,
"learning_rate": 0.0001,
"loss": 0.7045,
"step": 11000
},
{
"epoch": 0.6027727546714888,
"eval_loss": 0.7112395763397217,
"eval_runtime": 30901.8839,
"eval_samples_per_second": 2.1,
"eval_steps_per_second": 0.066,
"eval_wer": 47.33354332649714,
"step": 11000
},
{
"epoch": 0.603868705134528,
"grad_norm": 1.7204805612564087,
"learning_rate": 0.0001,
"loss": 0.7217,
"step": 11020
},
{
"epoch": 0.604964655597567,
"grad_norm": 1.507012128829956,
"learning_rate": 0.0001,
"loss": 0.7482,
"step": 11040
},
{
"epoch": 0.6060606060606061,
"grad_norm": 1.7084465026855469,
"learning_rate": 0.0001,
"loss": 0.645,
"step": 11060
},
{
"epoch": 0.6071565565236451,
"grad_norm": 1.73207426071167,
"learning_rate": 0.0001,
"loss": 0.7687,
"step": 11080
},
{
"epoch": 0.6082525069866842,
"grad_norm": 2.2146365642547607,
"learning_rate": 0.0001,
"loss": 0.7771,
"step": 11100
},
{
"epoch": 0.6093484574497233,
"grad_norm": 1.6794184446334839,
"learning_rate": 0.0001,
"loss": 0.6613,
"step": 11120
},
{
"epoch": 0.6104444079127623,
"grad_norm": 1.8254398107528687,
"learning_rate": 0.0001,
"loss": 0.6787,
"step": 11140
},
{
"epoch": 0.6115403583758015,
"grad_norm": 1.8397271633148193,
"learning_rate": 0.0001,
"loss": 0.7119,
"step": 11160
},
{
"epoch": 0.6126363088388405,
"grad_norm": 1.8676248788833618,
"learning_rate": 0.0001,
"loss": 0.7294,
"step": 11180
},
{
"epoch": 0.6137322593018796,
"grad_norm": 1.4971026182174683,
"learning_rate": 0.0001,
"loss": 0.6312,
"step": 11200
},
{
"epoch": 0.6148282097649186,
"grad_norm": 1.8128615617752075,
"learning_rate": 0.0001,
"loss": 0.653,
"step": 11220
},
{
"epoch": 0.6159241602279577,
"grad_norm": 1.426620364189148,
"learning_rate": 0.0001,
"loss": 0.7087,
"step": 11240
},
{
"epoch": 0.6170201106909967,
"grad_norm": 1.4840887784957886,
"learning_rate": 0.0001,
"loss": 0.6665,
"step": 11260
},
{
"epoch": 0.6181160611540358,
"grad_norm": 1.7882121801376343,
"learning_rate": 0.0001,
"loss": 0.7236,
"step": 11280
},
{
"epoch": 0.619212011617075,
"grad_norm": 1.8195546865463257,
"learning_rate": 0.0001,
"loss": 0.7998,
"step": 11300
},
{
"epoch": 0.620307962080114,
"grad_norm": 1.9482252597808838,
"learning_rate": 0.0001,
"loss": 0.751,
"step": 11320
},
{
"epoch": 0.6214039125431531,
"grad_norm": 2.1224782466888428,
"learning_rate": 0.0001,
"loss": 0.7518,
"step": 11340
},
{
"epoch": 0.6224998630061921,
"grad_norm": 1.811909556388855,
"learning_rate": 0.0001,
"loss": 0.679,
"step": 11360
},
{
"epoch": 0.6235958134692312,
"grad_norm": 2.0843353271484375,
"learning_rate": 0.0001,
"loss": 0.7381,
"step": 11380
},
{
"epoch": 0.6246917639322702,
"grad_norm": 1.5517933368682861,
"learning_rate": 0.0001,
"loss": 0.7318,
"step": 11400
},
{
"epoch": 0.6257877143953093,
"grad_norm": 1.3482716083526611,
"learning_rate": 0.0001,
"loss": 0.6999,
"step": 11420
},
{
"epoch": 0.6268836648583485,
"grad_norm": 1.548904299736023,
"learning_rate": 0.0001,
"loss": 0.8772,
"step": 11440
},
{
"epoch": 0.6279796153213875,
"grad_norm": 1.553775429725647,
"learning_rate": 0.0001,
"loss": 0.6479,
"step": 11460
},
{
"epoch": 0.6290755657844266,
"grad_norm": 2.0762696266174316,
"learning_rate": 0.0001,
"loss": 0.6457,
"step": 11480
},
{
"epoch": 0.6301715162474656,
"grad_norm": 1.9620105028152466,
"learning_rate": 0.0001,
"loss": 0.7098,
"step": 11500
},
{
"epoch": 0.6312674667105047,
"grad_norm": 1.382176399230957,
"learning_rate": 0.0001,
"loss": 0.686,
"step": 11520
},
{
"epoch": 0.6323634171735437,
"grad_norm": 1.9390108585357666,
"learning_rate": 0.0001,
"loss": 0.6882,
"step": 11540
},
{
"epoch": 0.6334593676365828,
"grad_norm": 1.7750768661499023,
"learning_rate": 0.0001,
"loss": 0.6942,
"step": 11560
},
{
"epoch": 0.6345553180996218,
"grad_norm": 1.8459293842315674,
"learning_rate": 0.0001,
"loss": 0.7118,
"step": 11580
},
{
"epoch": 0.635651268562661,
"grad_norm": 1.8210084438323975,
"learning_rate": 0.0001,
"loss": 0.7166,
"step": 11600
},
{
"epoch": 0.6367472190257001,
"grad_norm": 1.7728508710861206,
"learning_rate": 0.0001,
"loss": 0.692,
"step": 11620
},
{
"epoch": 0.6378431694887391,
"grad_norm": 1.7886627912521362,
"learning_rate": 0.0001,
"loss": 0.7185,
"step": 11640
},
{
"epoch": 0.6389391199517782,
"grad_norm": 1.895150065422058,
"learning_rate": 0.0001,
"loss": 0.7174,
"step": 11660
},
{
"epoch": 0.6400350704148172,
"grad_norm": 1.8740530014038086,
"learning_rate": 0.0001,
"loss": 0.6893,
"step": 11680
},
{
"epoch": 0.6411310208778563,
"grad_norm": 1.6588834524154663,
"learning_rate": 0.0001,
"loss": 0.7073,
"step": 11700
},
{
"epoch": 0.6422269713408953,
"grad_norm": 1.9573453664779663,
"learning_rate": 0.0001,
"loss": 0.671,
"step": 11720
},
{
"epoch": 0.6433229218039345,
"grad_norm": 1.7064661979675293,
"learning_rate": 0.0001,
"loss": 0.7401,
"step": 11740
},
{
"epoch": 0.6444188722669736,
"grad_norm": 1.8850706815719604,
"learning_rate": 0.0001,
"loss": 0.7397,
"step": 11760
},
{
"epoch": 0.6455148227300126,
"grad_norm": 1.7744836807250977,
"learning_rate": 0.0001,
"loss": 0.7204,
"step": 11780
},
{
"epoch": 0.6466107731930517,
"grad_norm": 1.5768756866455078,
"learning_rate": 0.0001,
"loss": 0.7868,
"step": 11800
},
{
"epoch": 0.6477067236560907,
"grad_norm": 2.0770552158355713,
"learning_rate": 0.0001,
"loss": 0.7434,
"step": 11820
},
{
"epoch": 0.6488026741191298,
"grad_norm": 2.0797810554504395,
"learning_rate": 0.0001,
"loss": 0.7342,
"step": 11840
},
{
"epoch": 0.6498986245821688,
"grad_norm": 1.8984261751174927,
"learning_rate": 0.0001,
"loss": 0.6642,
"step": 11860
},
{
"epoch": 0.650994575045208,
"grad_norm": 2.00124192237854,
"learning_rate": 0.0001,
"loss": 0.749,
"step": 11880
},
{
"epoch": 0.652090525508247,
"grad_norm": 1.5575506687164307,
"learning_rate": 0.0001,
"loss": 0.8315,
"step": 11900
},
{
"epoch": 0.6531864759712861,
"grad_norm": 2.6183197498321533,
"learning_rate": 0.0001,
"loss": 0.7533,
"step": 11920
},
{
"epoch": 0.6542824264343252,
"grad_norm": 1.7211464643478394,
"learning_rate": 0.0001,
"loss": 0.7073,
"step": 11940
},
{
"epoch": 0.6553783768973642,
"grad_norm": 1.9105095863342285,
"learning_rate": 0.0001,
"loss": 0.6526,
"step": 11960
},
{
"epoch": 0.6564743273604033,
"grad_norm": 1.9578741788864136,
"learning_rate": 0.0001,
"loss": 0.6746,
"step": 11980
},
{
"epoch": 0.6575702778234424,
"grad_norm": 1.8473331928253174,
"learning_rate": 0.0001,
"loss": 0.6808,
"step": 12000
},
{
"epoch": 0.6575702778234424,
"eval_loss": 0.6952778100967407,
"eval_runtime": 30743.2328,
"eval_samples_per_second": 2.111,
"eval_steps_per_second": 0.066,
"eval_wer": 51.05538683822195,
"step": 12000
},
{
"epoch": 0.6586662282864815,
"grad_norm": 1.7486096620559692,
"learning_rate": 0.0001,
"loss": 0.7417,
"step": 12020
},
{
"epoch": 0.6597621787495205,
"grad_norm": 1.6540303230285645,
"learning_rate": 0.0001,
"loss": 0.7118,
"step": 12040
},
{
"epoch": 0.6608581292125596,
"grad_norm": 1.89935302734375,
"learning_rate": 0.0001,
"loss": 0.6315,
"step": 12060
},
{
"epoch": 0.6619540796755987,
"grad_norm": 1.8266342878341675,
"learning_rate": 0.0001,
"loss": 0.7434,
"step": 12080
},
{
"epoch": 0.6630500301386377,
"grad_norm": 1.8254984617233276,
"learning_rate": 0.0001,
"loss": 0.7832,
"step": 12100
},
{
"epoch": 0.6641459806016768,
"grad_norm": 2.0791878700256348,
"learning_rate": 0.0001,
"loss": 0.6694,
"step": 12120
},
{
"epoch": 0.6652419310647159,
"grad_norm": 1.8277227878570557,
"learning_rate": 0.0001,
"loss": 0.6686,
"step": 12140
},
{
"epoch": 0.666337881527755,
"grad_norm": 1.697810411453247,
"learning_rate": 0.0001,
"loss": 0.6895,
"step": 12160
},
{
"epoch": 0.667433831990794,
"grad_norm": 1.6084686517715454,
"learning_rate": 0.0001,
"loss": 0.7431,
"step": 12180
},
{
"epoch": 0.6685297824538331,
"grad_norm": 1.7437437772750854,
"learning_rate": 0.0001,
"loss": 0.6851,
"step": 12200
},
{
"epoch": 0.6696257329168721,
"grad_norm": 1.849237322807312,
"learning_rate": 0.0001,
"loss": 0.6926,
"step": 12220
},
{
"epoch": 0.6707216833799112,
"grad_norm": 1.8398326635360718,
"learning_rate": 0.0001,
"loss": 0.7282,
"step": 12240
},
{
"epoch": 0.6718176338429503,
"grad_norm": 2.056136131286621,
"learning_rate": 0.0001,
"loss": 0.76,
"step": 12260
},
{
"epoch": 0.6729135843059894,
"grad_norm": 1.8255378007888794,
"learning_rate": 0.0001,
"loss": 0.7155,
"step": 12280
},
{
"epoch": 0.6740095347690285,
"grad_norm": 1.6555898189544678,
"learning_rate": 0.0001,
"loss": 0.6333,
"step": 12300
},
{
"epoch": 0.6751054852320675,
"grad_norm": 1.825000286102295,
"learning_rate": 0.0001,
"loss": 0.6603,
"step": 12320
},
{
"epoch": 0.6762014356951066,
"grad_norm": 1.5000559091567993,
"learning_rate": 0.0001,
"loss": 0.6861,
"step": 12340
},
{
"epoch": 0.6772973861581456,
"grad_norm": 1.826874017715454,
"learning_rate": 0.0001,
"loss": 0.7337,
"step": 12360
},
{
"epoch": 0.6783933366211847,
"grad_norm": 2.042325735092163,
"learning_rate": 0.0001,
"loss": 0.7566,
"step": 12380
},
{
"epoch": 0.6794892870842238,
"grad_norm": 1.6419124603271484,
"learning_rate": 0.0001,
"loss": 0.6825,
"step": 12400
},
{
"epoch": 0.6805852375472629,
"grad_norm": 2.1221911907196045,
"learning_rate": 0.0001,
"loss": 0.7013,
"step": 12420
},
{
"epoch": 0.681681188010302,
"grad_norm": 1.598191738128662,
"learning_rate": 0.0001,
"loss": 0.6976,
"step": 12440
},
{
"epoch": 0.682777138473341,
"grad_norm": 1.8890109062194824,
"learning_rate": 0.0001,
"loss": 0.6792,
"step": 12460
},
{
"epoch": 0.6838730889363801,
"grad_norm": 1.7647831439971924,
"learning_rate": 0.0001,
"loss": 0.6878,
"step": 12480
},
{
"epoch": 0.6849690393994191,
"grad_norm": 1.358193278312683,
"learning_rate": 0.0001,
"loss": 0.754,
"step": 12500
},
{
"epoch": 0.6860649898624582,
"grad_norm": 1.9739768505096436,
"learning_rate": 0.0001,
"loss": 0.6799,
"step": 12520
},
{
"epoch": 0.6871609403254972,
"grad_norm": 1.532867670059204,
"learning_rate": 0.0001,
"loss": 0.7063,
"step": 12540
},
{
"epoch": 0.6882568907885364,
"grad_norm": 1.3203604221343994,
"learning_rate": 0.0001,
"loss": 0.6725,
"step": 12560
},
{
"epoch": 0.6893528412515755,
"grad_norm": 1.835530161857605,
"learning_rate": 0.0001,
"loss": 0.6447,
"step": 12580
},
{
"epoch": 0.6904487917146145,
"grad_norm": 1.4508098363876343,
"learning_rate": 0.0001,
"loss": 0.7137,
"step": 12600
},
{
"epoch": 0.6915447421776536,
"grad_norm": 1.520942211151123,
"learning_rate": 0.0001,
"loss": 0.6454,
"step": 12620
},
{
"epoch": 0.6926406926406926,
"grad_norm": 1.655716061592102,
"learning_rate": 0.0001,
"loss": 0.6659,
"step": 12640
},
{
"epoch": 0.6937366431037317,
"grad_norm": 1.5934149026870728,
"learning_rate": 0.0001,
"loss": 0.7084,
"step": 12660
},
{
"epoch": 0.6948325935667707,
"grad_norm": 1.961393117904663,
"learning_rate": 0.0001,
"loss": 0.7553,
"step": 12680
},
{
"epoch": 0.6959285440298099,
"grad_norm": 1.4186025857925415,
"learning_rate": 0.0001,
"loss": 0.7205,
"step": 12700
},
{
"epoch": 0.697024494492849,
"grad_norm": 1.6756350994110107,
"learning_rate": 0.0001,
"loss": 0.6166,
"step": 12720
},
{
"epoch": 0.698120444955888,
"grad_norm": 1.8438879251480103,
"learning_rate": 0.0001,
"loss": 0.6837,
"step": 12740
},
{
"epoch": 0.6992163954189271,
"grad_norm": 1.5732409954071045,
"learning_rate": 0.0001,
"loss": 0.7777,
"step": 12760
},
{
"epoch": 0.7003123458819661,
"grad_norm": 1.8927737474441528,
"learning_rate": 0.0001,
"loss": 0.6978,
"step": 12780
},
{
"epoch": 0.7014082963450052,
"grad_norm": 1.4720592498779297,
"learning_rate": 0.0001,
"loss": 0.7043,
"step": 12800
},
{
"epoch": 0.7025042468080442,
"grad_norm": 1.8671678304672241,
"learning_rate": 0.0001,
"loss": 0.7245,
"step": 12820
},
{
"epoch": 0.7036001972710834,
"grad_norm": 1.5541017055511475,
"learning_rate": 0.0001,
"loss": 0.728,
"step": 12840
},
{
"epoch": 0.7046961477341224,
"grad_norm": 1.6623157262802124,
"learning_rate": 0.0001,
"loss": 0.7211,
"step": 12860
},
{
"epoch": 0.7057920981971615,
"grad_norm": 2.1644530296325684,
"learning_rate": 0.0001,
"loss": 0.7958,
"step": 12880
},
{
"epoch": 0.7068880486602006,
"grad_norm": 1.4526203870773315,
"learning_rate": 0.0001,
"loss": 0.6385,
"step": 12900
},
{
"epoch": 0.7079839991232396,
"grad_norm": 1.586296796798706,
"learning_rate": 0.0001,
"loss": 0.7759,
"step": 12920
},
{
"epoch": 0.7090799495862787,
"grad_norm": 1.8547158241271973,
"learning_rate": 0.0001,
"loss": 0.7478,
"step": 12940
},
{
"epoch": 0.7101759000493177,
"grad_norm": 1.46295964717865,
"learning_rate": 0.0001,
"loss": 0.5804,
"step": 12960
},
{
"epoch": 0.7112718505123569,
"grad_norm": 1.8653600215911865,
"learning_rate": 0.0001,
"loss": 0.6874,
"step": 12980
},
{
"epoch": 0.7123678009753959,
"grad_norm": 1.8301453590393066,
"learning_rate": 0.0001,
"loss": 0.6886,
"step": 13000
},
{
"epoch": 0.7123678009753959,
"eval_loss": 0.6790329217910767,
"eval_runtime": 30731.9197,
"eval_samples_per_second": 2.111,
"eval_steps_per_second": 0.066,
"eval_wer": 52.619379401724906,
"step": 13000
},
{
"epoch": 0.713463751438435,
"grad_norm": 2.6560330390930176,
"learning_rate": 0.0001,
"loss": 0.6422,
"step": 13020
},
{
"epoch": 0.714559701901474,
"grad_norm": 1.9559868574142456,
"learning_rate": 0.0001,
"loss": 0.7321,
"step": 13040
},
{
"epoch": 0.7156556523645131,
"grad_norm": 1.8091590404510498,
"learning_rate": 0.0001,
"loss": 0.6303,
"step": 13060
},
{
"epoch": 0.7167516028275522,
"grad_norm": 1.428688406944275,
"learning_rate": 0.0001,
"loss": 0.6489,
"step": 13080
},
{
"epoch": 0.7178475532905912,
"grad_norm": 1.6543529033660889,
"learning_rate": 0.0001,
"loss": 0.6793,
"step": 13100
},
{
"epoch": 0.7189435037536304,
"grad_norm": 2.012596368789673,
"learning_rate": 0.0001,
"loss": 0.6502,
"step": 13120
},
{
"epoch": 0.7200394542166694,
"grad_norm": 2.0701732635498047,
"learning_rate": 0.0001,
"loss": 0.6746,
"step": 13140
},
{
"epoch": 0.7211354046797085,
"grad_norm": 1.5318336486816406,
"learning_rate": 0.0001,
"loss": 0.6142,
"step": 13160
},
{
"epoch": 0.7222313551427475,
"grad_norm": 1.7924253940582275,
"learning_rate": 0.0001,
"loss": 0.6536,
"step": 13180
},
{
"epoch": 0.7233273056057866,
"grad_norm": 1.8197805881500244,
"learning_rate": 0.0001,
"loss": 0.7804,
"step": 13200
},
{
"epoch": 0.7244232560688257,
"grad_norm": 1.5444835424423218,
"learning_rate": 0.0001,
"loss": 0.6019,
"step": 13220
},
{
"epoch": 0.7255192065318647,
"grad_norm": 1.735474705696106,
"learning_rate": 0.0001,
"loss": 0.5891,
"step": 13240
},
{
"epoch": 0.7266151569949039,
"grad_norm": 1.9891881942749023,
"learning_rate": 0.0001,
"loss": 0.6498,
"step": 13260
},
{
"epoch": 0.7277111074579429,
"grad_norm": 1.6917784214019775,
"learning_rate": 0.0001,
"loss": 0.7044,
"step": 13280
},
{
"epoch": 0.728807057920982,
"grad_norm": 1.661033034324646,
"learning_rate": 0.0001,
"loss": 0.6185,
"step": 13300
},
{
"epoch": 0.729903008384021,
"grad_norm": 2.326937198638916,
"learning_rate": 0.0001,
"loss": 0.7274,
"step": 13320
},
{
"epoch": 0.7309989588470601,
"grad_norm": 1.6929740905761719,
"learning_rate": 0.0001,
"loss": 0.6452,
"step": 13340
},
{
"epoch": 0.7320949093100991,
"grad_norm": 1.7399369478225708,
"learning_rate": 0.0001,
"loss": 0.6243,
"step": 13360
},
{
"epoch": 0.7331908597731382,
"grad_norm": 1.7071975469589233,
"learning_rate": 0.0001,
"loss": 0.6578,
"step": 13380
},
{
"epoch": 0.7342868102361774,
"grad_norm": 1.7368084192276,
"learning_rate": 0.0001,
"loss": 0.6337,
"step": 13400
},
{
"epoch": 0.7353827606992164,
"grad_norm": 2.0973663330078125,
"learning_rate": 0.0001,
"loss": 0.7101,
"step": 13420
},
{
"epoch": 0.7364787111622555,
"grad_norm": 1.636421799659729,
"learning_rate": 0.0001,
"loss": 0.6958,
"step": 13440
},
{
"epoch": 0.7375746616252945,
"grad_norm": 1.6134982109069824,
"learning_rate": 0.0001,
"loss": 0.7336,
"step": 13460
},
{
"epoch": 0.7386706120883336,
"grad_norm": 1.8911906480789185,
"learning_rate": 0.0001,
"loss": 0.6723,
"step": 13480
},
{
"epoch": 0.7397665625513726,
"grad_norm": 1.8372421264648438,
"learning_rate": 0.0001,
"loss": 0.6748,
"step": 13500
},
{
"epoch": 0.7408625130144117,
"grad_norm": 1.8735203742980957,
"learning_rate": 0.0001,
"loss": 0.7481,
"step": 13520
},
{
"epoch": 0.7419584634774509,
"grad_norm": 1.7684818506240845,
"learning_rate": 0.0001,
"loss": 0.6395,
"step": 13540
},
{
"epoch": 0.7430544139404899,
"grad_norm": 1.8018254041671753,
"learning_rate": 0.0001,
"loss": 0.8523,
"step": 13560
},
{
"epoch": 0.744150364403529,
"grad_norm": 1.7703465223312378,
"learning_rate": 0.0001,
"loss": 0.6938,
"step": 13580
},
{
"epoch": 0.745246314866568,
"grad_norm": 1.6299625635147095,
"learning_rate": 0.0001,
"loss": 0.6341,
"step": 13600
},
{
"epoch": 0.7463422653296071,
"grad_norm": 2.0545101165771484,
"learning_rate": 0.0001,
"loss": 0.7248,
"step": 13620
},
{
"epoch": 0.7474382157926461,
"grad_norm": 1.5883153676986694,
"learning_rate": 0.0001,
"loss": 0.6917,
"step": 13640
},
{
"epoch": 0.7485341662556853,
"grad_norm": 1.5751030445098877,
"learning_rate": 0.0001,
"loss": 0.6789,
"step": 13660
},
{
"epoch": 0.7496301167187243,
"grad_norm": 1.53587806224823,
"learning_rate": 0.0001,
"loss": 0.6462,
"step": 13680
},
{
"epoch": 0.7507260671817634,
"grad_norm": 1.5108363628387451,
"learning_rate": 0.0001,
"loss": 0.7585,
"step": 13700
},
{
"epoch": 0.7518220176448025,
"grad_norm": 1.5622588396072388,
"learning_rate": 0.0001,
"loss": 0.6466,
"step": 13720
},
{
"epoch": 0.7529179681078415,
"grad_norm": 1.9326175451278687,
"learning_rate": 0.0001,
"loss": 0.7086,
"step": 13740
},
{
"epoch": 0.7540139185708806,
"grad_norm": 1.7847191095352173,
"learning_rate": 0.0001,
"loss": 0.661,
"step": 13760
},
{
"epoch": 0.7551098690339196,
"grad_norm": 2.1520116329193115,
"learning_rate": 0.0001,
"loss": 0.5861,
"step": 13780
},
{
"epoch": 0.7562058194969588,
"grad_norm": 1.9346301555633545,
"learning_rate": 0.0001,
"loss": 0.7735,
"step": 13800
},
{
"epoch": 0.7573017699599978,
"grad_norm": 1.5564959049224854,
"learning_rate": 0.0001,
"loss": 0.6997,
"step": 13820
},
{
"epoch": 0.7583977204230369,
"grad_norm": 1.848569393157959,
"learning_rate": 0.0001,
"loss": 0.6836,
"step": 13840
},
{
"epoch": 0.759493670886076,
"grad_norm": 1.5552887916564941,
"learning_rate": 0.0001,
"loss": 0.6014,
"step": 13860
},
{
"epoch": 0.760589621349115,
"grad_norm": 1.5576545000076294,
"learning_rate": 0.0001,
"loss": 0.7034,
"step": 13880
},
{
"epoch": 0.7616855718121541,
"grad_norm": 1.795949935913086,
"learning_rate": 0.0001,
"loss": 0.7322,
"step": 13900
},
{
"epoch": 0.7627815222751931,
"grad_norm": 1.498818039894104,
"learning_rate": 0.0001,
"loss": 0.697,
"step": 13920
},
{
"epoch": 0.7638774727382323,
"grad_norm": 1.7154011726379395,
"learning_rate": 0.0001,
"loss": 0.7057,
"step": 13940
},
{
"epoch": 0.7649734232012713,
"grad_norm": 1.693199872970581,
"learning_rate": 0.0001,
"loss": 0.722,
"step": 13960
},
{
"epoch": 0.7660693736643104,
"grad_norm": 1.7617517709732056,
"learning_rate": 0.0001,
"loss": 0.727,
"step": 13980
},
{
"epoch": 0.7671653241273494,
"grad_norm": 1.7693978548049927,
"learning_rate": 0.0001,
"loss": 0.6552,
"step": 14000
},
{
"epoch": 0.7671653241273494,
"eval_loss": 0.6688939929008484,
"eval_runtime": 30450.1084,
"eval_samples_per_second": 2.131,
"eval_steps_per_second": 0.067,
"eval_wer": 38.925884967114385,
"step": 14000
},
{
"epoch": 0.7682612745903885,
"grad_norm": 1.531043529510498,
"learning_rate": 0.0001,
"loss": 0.6237,
"step": 14020
},
{
"epoch": 0.7693572250534276,
"grad_norm": 1.7747310400009155,
"learning_rate": 0.0001,
"loss": 0.6769,
"step": 14040
},
{
"epoch": 0.7704531755164666,
"grad_norm": 1.457766056060791,
"learning_rate": 0.0001,
"loss": 0.6406,
"step": 14060
},
{
"epoch": 0.7715491259795058,
"grad_norm": 1.478061318397522,
"learning_rate": 0.0001,
"loss": 0.6797,
"step": 14080
},
{
"epoch": 0.7726450764425448,
"grad_norm": 1.462485909461975,
"learning_rate": 0.0001,
"loss": 0.654,
"step": 14100
},
{
"epoch": 0.7737410269055839,
"grad_norm": 2.1201417446136475,
"learning_rate": 0.0001,
"loss": 0.676,
"step": 14120
},
{
"epoch": 0.7748369773686229,
"grad_norm": 1.6672828197479248,
"learning_rate": 0.0001,
"loss": 0.6402,
"step": 14140
},
{
"epoch": 0.775932927831662,
"grad_norm": 1.848254680633545,
"learning_rate": 0.0001,
"loss": 0.6623,
"step": 14160
},
{
"epoch": 0.7770288782947011,
"grad_norm": 1.8868560791015625,
"learning_rate": 0.0001,
"loss": 0.6598,
"step": 14180
},
{
"epoch": 0.7781248287577401,
"grad_norm": 2.0615594387054443,
"learning_rate": 0.0001,
"loss": 0.6786,
"step": 14200
},
{
"epoch": 0.7792207792207793,
"grad_norm": 1.6596072912216187,
"learning_rate": 0.0001,
"loss": 0.6448,
"step": 14220
},
{
"epoch": 0.7803167296838183,
"grad_norm": 2.0829083919525146,
"learning_rate": 0.0001,
"loss": 0.6558,
"step": 14240
},
{
"epoch": 0.7814126801468574,
"grad_norm": 1.7660095691680908,
"learning_rate": 0.0001,
"loss": 0.7399,
"step": 14260
},
{
"epoch": 0.7825086306098964,
"grad_norm": 1.7068332433700562,
"learning_rate": 0.0001,
"loss": 0.637,
"step": 14280
},
{
"epoch": 0.7836045810729355,
"grad_norm": 1.5040172338485718,
"learning_rate": 0.0001,
"loss": 0.5708,
"step": 14300
},
{
"epoch": 0.7847005315359745,
"grad_norm": 1.7479969263076782,
"learning_rate": 0.0001,
"loss": 0.7348,
"step": 14320
},
{
"epoch": 0.7857964819990136,
"grad_norm": 1.7886347770690918,
"learning_rate": 0.0001,
"loss": 0.72,
"step": 14340
},
{
"epoch": 0.7868924324620528,
"grad_norm": 1.6001741886138916,
"learning_rate": 0.0001,
"loss": 0.6512,
"step": 14360
},
{
"epoch": 0.7879883829250918,
"grad_norm": 1.7489492893218994,
"learning_rate": 0.0001,
"loss": 0.6714,
"step": 14380
},
{
"epoch": 0.7890843333881309,
"grad_norm": 1.9967806339263916,
"learning_rate": 0.0001,
"loss": 0.6651,
"step": 14400
},
{
"epoch": 0.7901802838511699,
"grad_norm": 1.6555088758468628,
"learning_rate": 0.0001,
"loss": 0.6584,
"step": 14420
},
{
"epoch": 0.791276234314209,
"grad_norm": 1.589168667793274,
"learning_rate": 0.0001,
"loss": 0.6448,
"step": 14440
},
{
"epoch": 0.792372184777248,
"grad_norm": 1.2876309156417847,
"learning_rate": 0.0001,
"loss": 0.6612,
"step": 14460
},
{
"epoch": 0.7934681352402871,
"grad_norm": 1.6673985719680786,
"learning_rate": 0.0001,
"loss": 0.5769,
"step": 14480
},
{
"epoch": 0.7945640857033263,
"grad_norm": 1.6478184461593628,
"learning_rate": 0.0001,
"loss": 0.6457,
"step": 14500
},
{
"epoch": 0.7956600361663653,
"grad_norm": 1.5702099800109863,
"learning_rate": 0.0001,
"loss": 0.6866,
"step": 14520
},
{
"epoch": 0.7967559866294044,
"grad_norm": 1.850900411605835,
"learning_rate": 0.0001,
"loss": 0.6475,
"step": 14540
},
{
"epoch": 0.7978519370924434,
"grad_norm": 1.2784024477005005,
"learning_rate": 0.0001,
"loss": 0.6366,
"step": 14560
},
{
"epoch": 0.7989478875554825,
"grad_norm": 2.2533817291259766,
"learning_rate": 0.0001,
"loss": 0.6164,
"step": 14580
},
{
"epoch": 0.8000438380185215,
"grad_norm": 1.442713737487793,
"learning_rate": 0.0001,
"loss": 0.6853,
"step": 14600
},
{
"epoch": 0.8011397884815606,
"grad_norm": 1.594449520111084,
"learning_rate": 0.0001,
"loss": 0.6535,
"step": 14620
},
{
"epoch": 0.8022357389445997,
"grad_norm": 1.4961411952972412,
"learning_rate": 0.0001,
"loss": 0.6696,
"step": 14640
},
{
"epoch": 0.8033316894076388,
"grad_norm": 2.1010756492614746,
"learning_rate": 0.0001,
"loss": 0.6607,
"step": 14660
},
{
"epoch": 0.8044276398706779,
"grad_norm": 2.134493589401245,
"learning_rate": 0.0001,
"loss": 0.6512,
"step": 14680
},
{
"epoch": 0.8055235903337169,
"grad_norm": 1.6435072422027588,
"learning_rate": 0.0001,
"loss": 0.6094,
"step": 14700
},
{
"epoch": 0.806619540796756,
"grad_norm": 1.8982771635055542,
"learning_rate": 0.0001,
"loss": 0.6761,
"step": 14720
},
{
"epoch": 0.807715491259795,
"grad_norm": 1.968770146369934,
"learning_rate": 0.0001,
"loss": 0.7178,
"step": 14740
},
{
"epoch": 0.8088114417228341,
"grad_norm": 2.19568133354187,
"learning_rate": 0.0001,
"loss": 0.6246,
"step": 14760
},
{
"epoch": 0.8099073921858732,
"grad_norm": 1.6024566888809204,
"learning_rate": 0.0001,
"loss": 0.6174,
"step": 14780
},
{
"epoch": 0.8110033426489123,
"grad_norm": 1.4896485805511475,
"learning_rate": 0.0001,
"loss": 0.5843,
"step": 14800
},
{
"epoch": 0.8120992931119514,
"grad_norm": 1.502487301826477,
"learning_rate": 0.0001,
"loss": 0.7747,
"step": 14820
},
{
"epoch": 0.8131952435749904,
"grad_norm": 1.5037872791290283,
"learning_rate": 0.0001,
"loss": 0.6745,
"step": 14840
},
{
"epoch": 0.8142911940380295,
"grad_norm": 1.3984043598175049,
"learning_rate": 0.0001,
"loss": 0.6938,
"step": 14860
},
{
"epoch": 0.8153871445010685,
"grad_norm": 1.7627023458480835,
"learning_rate": 0.0001,
"loss": 0.6407,
"step": 14880
},
{
"epoch": 0.8164830949641076,
"grad_norm": 1.5276484489440918,
"learning_rate": 0.0001,
"loss": 0.6142,
"step": 14900
},
{
"epoch": 0.8175790454271467,
"grad_norm": 1.598743200302124,
"learning_rate": 0.0001,
"loss": 0.6534,
"step": 14920
},
{
"epoch": 0.8186749958901858,
"grad_norm": 1.5528680086135864,
"learning_rate": 0.0001,
"loss": 0.6272,
"step": 14940
},
{
"epoch": 0.8197709463532248,
"grad_norm": 1.71839439868927,
"learning_rate": 0.0001,
"loss": 0.7259,
"step": 14960
},
{
"epoch": 0.8208668968162639,
"grad_norm": 1.5527739524841309,
"learning_rate": 0.0001,
"loss": 0.6387,
"step": 14980
},
{
"epoch": 0.821962847279303,
"grad_norm": 1.7775479555130005,
"learning_rate": 0.0001,
"loss": 0.6963,
"step": 15000
},
{
"epoch": 0.821962847279303,
"eval_loss": 0.6593644618988037,
"eval_runtime": 30360.7703,
"eval_samples_per_second": 2.137,
"eval_steps_per_second": 0.067,
"eval_wer": 42.68128173436093,
"step": 15000
},
{
"epoch": 0.823058797742342,
"grad_norm": 1.772290825843811,
"learning_rate": 0.0001,
"loss": 0.6439,
"step": 15020
},
{
"epoch": 0.8241547482053811,
"grad_norm": 1.655604600906372,
"learning_rate": 0.0001,
"loss": 0.7521,
"step": 15040
},
{
"epoch": 0.8252506986684202,
"grad_norm": 1.5305246114730835,
"learning_rate": 0.0001,
"loss": 0.5623,
"step": 15060
},
{
"epoch": 0.8263466491314593,
"grad_norm": 1.399568796157837,
"learning_rate": 0.0001,
"loss": 0.634,
"step": 15080
},
{
"epoch": 0.8274425995944983,
"grad_norm": 1.412463903427124,
"learning_rate": 0.0001,
"loss": 0.7272,
"step": 15100
},
{
"epoch": 0.8285385500575374,
"grad_norm": 1.793396234512329,
"learning_rate": 0.0001,
"loss": 0.684,
"step": 15120
},
{
"epoch": 0.8296345005205765,
"grad_norm": 1.9623442888259888,
"learning_rate": 0.0001,
"loss": 0.6247,
"step": 15140
},
{
"epoch": 0.8307304509836155,
"grad_norm": 1.4576257467269897,
"learning_rate": 0.0001,
"loss": 0.675,
"step": 15160
},
{
"epoch": 0.8318264014466547,
"grad_norm": 1.6135623455047607,
"learning_rate": 0.0001,
"loss": 0.7046,
"step": 15180
},
{
"epoch": 0.8329223519096937,
"grad_norm": 1.5553112030029297,
"learning_rate": 0.0001,
"loss": 0.7246,
"step": 15200
},
{
"epoch": 0.8340183023727328,
"grad_norm": 1.4521915912628174,
"learning_rate": 0.0001,
"loss": 0.7288,
"step": 15220
},
{
"epoch": 0.8351142528357718,
"grad_norm": 1.429190754890442,
"learning_rate": 0.0001,
"loss": 0.6357,
"step": 15240
},
{
"epoch": 0.8362102032988109,
"grad_norm": 1.80194890499115,
"learning_rate": 0.0001,
"loss": 0.6462,
"step": 15260
},
{
"epoch": 0.8373061537618499,
"grad_norm": 1.833225131034851,
"learning_rate": 0.0001,
"loss": 0.6942,
"step": 15280
},
{
"epoch": 0.838402104224889,
"grad_norm": 1.8329098224639893,
"learning_rate": 0.0001,
"loss": 0.6525,
"step": 15300
},
{
"epoch": 0.8394980546879282,
"grad_norm": 1.5729244947433472,
"learning_rate": 0.0001,
"loss": 0.6721,
"step": 15320
},
{
"epoch": 0.8405940051509672,
"grad_norm": 1.8156899213790894,
"learning_rate": 0.0001,
"loss": 0.6777,
"step": 15340
},
{
"epoch": 0.8416899556140063,
"grad_norm": 1.7255985736846924,
"learning_rate": 0.0001,
"loss": 0.6653,
"step": 15360
},
{
"epoch": 0.8427859060770453,
"grad_norm": 1.8051388263702393,
"learning_rate": 0.0001,
"loss": 0.6429,
"step": 15380
},
{
"epoch": 0.8438818565400844,
"grad_norm": 1.4799489974975586,
"learning_rate": 0.0001,
"loss": 0.6219,
"step": 15400
},
{
"epoch": 0.8449778070031234,
"grad_norm": 1.5661497116088867,
"learning_rate": 0.0001,
"loss": 0.7042,
"step": 15420
},
{
"epoch": 0.8460737574661625,
"grad_norm": 1.7842859029769897,
"learning_rate": 0.0001,
"loss": 0.5657,
"step": 15440
},
{
"epoch": 0.8471697079292017,
"grad_norm": 2.036591053009033,
"learning_rate": 0.0001,
"loss": 0.7448,
"step": 15460
},
{
"epoch": 0.8482656583922407,
"grad_norm": 1.5923106670379639,
"learning_rate": 0.0001,
"loss": 0.6937,
"step": 15480
},
{
"epoch": 0.8493616088552798,
"grad_norm": 1.7609819173812866,
"learning_rate": 0.0001,
"loss": 0.6282,
"step": 15500
},
{
"epoch": 0.8504575593183188,
"grad_norm": 1.627193570137024,
"learning_rate": 0.0001,
"loss": 0.6593,
"step": 15520
},
{
"epoch": 0.8515535097813579,
"grad_norm": 1.5199600458145142,
"learning_rate": 0.0001,
"loss": 0.6493,
"step": 15540
},
{
"epoch": 0.8526494602443969,
"grad_norm": 1.8375046253204346,
"learning_rate": 0.0001,
"loss": 0.7139,
"step": 15560
},
{
"epoch": 0.853745410707436,
"grad_norm": 1.7061831951141357,
"learning_rate": 0.0001,
"loss": 0.645,
"step": 15580
},
{
"epoch": 0.854841361170475,
"grad_norm": 1.5046154260635376,
"learning_rate": 0.0001,
"loss": 0.7154,
"step": 15600
},
{
"epoch": 0.8559373116335142,
"grad_norm": 2.0937325954437256,
"learning_rate": 0.0001,
"loss": 0.716,
"step": 15620
},
{
"epoch": 0.8570332620965533,
"grad_norm": 1.502930760383606,
"learning_rate": 0.0001,
"loss": 0.6476,
"step": 15640
},
{
"epoch": 0.8581292125595923,
"grad_norm": 1.832287073135376,
"learning_rate": 0.0001,
"loss": 0.6279,
"step": 15660
},
{
"epoch": 0.8592251630226314,
"grad_norm": 1.9679219722747803,
"learning_rate": 0.0001,
"loss": 0.7048,
"step": 15680
},
{
"epoch": 0.8603211134856704,
"grad_norm": 1.4660624265670776,
"learning_rate": 0.0001,
"loss": 0.6217,
"step": 15700
},
{
"epoch": 0.8614170639487095,
"grad_norm": 1.6641209125518799,
"learning_rate": 0.0001,
"loss": 0.5692,
"step": 15720
},
{
"epoch": 0.8625130144117485,
"grad_norm": 1.6354645490646362,
"learning_rate": 0.0001,
"loss": 0.7019,
"step": 15740
},
{
"epoch": 0.8636089648747877,
"grad_norm": 1.5404868125915527,
"learning_rate": 0.0001,
"loss": 0.667,
"step": 15760
},
{
"epoch": 0.8647049153378268,
"grad_norm": 1.759466528892517,
"learning_rate": 0.0001,
"loss": 0.7413,
"step": 15780
},
{
"epoch": 0.8658008658008658,
"grad_norm": 1.289501667022705,
"learning_rate": 0.0001,
"loss": 0.6696,
"step": 15800
},
{
"epoch": 0.8668968162639049,
"grad_norm": 1.516506552696228,
"learning_rate": 0.0001,
"loss": 0.6385,
"step": 15820
},
{
"epoch": 0.8679927667269439,
"grad_norm": 1.602023959159851,
"learning_rate": 0.0001,
"loss": 0.6034,
"step": 15840
},
{
"epoch": 0.869088717189983,
"grad_norm": 1.6681197881698608,
"learning_rate": 0.0001,
"loss": 0.6524,
"step": 15860
},
{
"epoch": 0.870184667653022,
"grad_norm": 1.7448092699050903,
"learning_rate": 0.0001,
"loss": 0.5845,
"step": 15880
},
{
"epoch": 0.8712806181160612,
"grad_norm": 1.763609766960144,
"learning_rate": 0.0001,
"loss": 0.6078,
"step": 15900
},
{
"epoch": 0.8723765685791002,
"grad_norm": 1.8752708435058594,
"learning_rate": 0.0001,
"loss": 0.6415,
"step": 15920
},
{
"epoch": 0.8734725190421393,
"grad_norm": 1.4633687734603882,
"learning_rate": 0.0001,
"loss": 0.6509,
"step": 15940
},
{
"epoch": 0.8745684695051784,
"grad_norm": 1.630188226699829,
"learning_rate": 0.0001,
"loss": 0.678,
"step": 15960
},
{
"epoch": 0.8756644199682174,
"grad_norm": 1.746390461921692,
"learning_rate": 0.0001,
"loss": 0.6883,
"step": 15980
},
{
"epoch": 0.8767603704312565,
"grad_norm": 1.8357354402542114,
"learning_rate": 0.0001,
"loss": 0.5674,
"step": 16000
},
{
"epoch": 0.8767603704312565,
"eval_loss": 0.6495629549026489,
"eval_runtime": 30578.7623,
"eval_samples_per_second": 2.122,
"eval_steps_per_second": 0.066,
"eval_wer": 46.744478263995646,
"step": 16000
},
{
"epoch": 0.8778563208942955,
"grad_norm": 1.3959294557571411,
"learning_rate": 0.0001,
"loss": 0.7334,
"step": 16020
},
{
"epoch": 0.8789522713573347,
"grad_norm": 1.7587610483169556,
"learning_rate": 0.0001,
"loss": 0.6916,
"step": 16040
},
{
"epoch": 0.8800482218203737,
"grad_norm": 2.157567024230957,
"learning_rate": 0.0001,
"loss": 0.6288,
"step": 16060
},
{
"epoch": 0.8811441722834128,
"grad_norm": 1.927071452140808,
"learning_rate": 0.0001,
"loss": 0.6596,
"step": 16080
},
{
"epoch": 0.8822401227464519,
"grad_norm": 1.7229890823364258,
"learning_rate": 0.0001,
"loss": 0.6351,
"step": 16100
},
{
"epoch": 0.8833360732094909,
"grad_norm": 1.4584635496139526,
"learning_rate": 0.0001,
"loss": 0.6347,
"step": 16120
},
{
"epoch": 0.88443202367253,
"grad_norm": 1.4768098592758179,
"learning_rate": 0.0001,
"loss": 0.6326,
"step": 16140
},
{
"epoch": 0.885527974135569,
"grad_norm": 1.6411234140396118,
"learning_rate": 0.0001,
"loss": 0.6935,
"step": 16160
},
{
"epoch": 0.8866239245986082,
"grad_norm": 1.4742987155914307,
"learning_rate": 0.0001,
"loss": 0.6188,
"step": 16180
},
{
"epoch": 0.8877198750616472,
"grad_norm": 2.1708977222442627,
"learning_rate": 0.0001,
"loss": 0.6837,
"step": 16200
},
{
"epoch": 0.8888158255246863,
"grad_norm": 1.5142560005187988,
"learning_rate": 0.0001,
"loss": 0.6218,
"step": 16220
},
{
"epoch": 0.8899117759877253,
"grad_norm": 1.5650640726089478,
"learning_rate": 0.0001,
"loss": 0.5991,
"step": 16240
},
{
"epoch": 0.8910077264507644,
"grad_norm": 1.5553919076919556,
"learning_rate": 0.0001,
"loss": 0.6081,
"step": 16260
},
{
"epoch": 0.8921036769138035,
"grad_norm": 1.813482642173767,
"learning_rate": 0.0001,
"loss": 0.6599,
"step": 16280
},
{
"epoch": 0.8931996273768426,
"grad_norm": 1.6864385604858398,
"learning_rate": 0.0001,
"loss": 0.6337,
"step": 16300
},
{
"epoch": 0.8942955778398817,
"grad_norm": 1.5707799196243286,
"learning_rate": 0.0001,
"loss": 0.7029,
"step": 16320
},
{
"epoch": 0.8953915283029207,
"grad_norm": 1.3465133905410767,
"learning_rate": 0.0001,
"loss": 0.6519,
"step": 16340
},
{
"epoch": 0.8964874787659598,
"grad_norm": 1.5546880960464478,
"learning_rate": 0.0001,
"loss": 0.6111,
"step": 16360
},
{
"epoch": 0.8975834292289988,
"grad_norm": 1.6297564506530762,
"learning_rate": 0.0001,
"loss": 0.6825,
"step": 16380
},
{
"epoch": 0.8986793796920379,
"grad_norm": 1.5396370887756348,
"learning_rate": 0.0001,
"loss": 0.6454,
"step": 16400
},
{
"epoch": 0.8997753301550769,
"grad_norm": 1.3082808256149292,
"learning_rate": 0.0001,
"loss": 0.6489,
"step": 16420
},
{
"epoch": 0.900871280618116,
"grad_norm": 1.68564772605896,
"learning_rate": 0.0001,
"loss": 0.6688,
"step": 16440
},
{
"epoch": 0.9019672310811552,
"grad_norm": 1.6919423341751099,
"learning_rate": 0.0001,
"loss": 0.6353,
"step": 16460
},
{
"epoch": 0.9030631815441942,
"grad_norm": 1.4040336608886719,
"learning_rate": 0.0001,
"loss": 0.6286,
"step": 16480
},
{
"epoch": 0.9041591320072333,
"grad_norm": 1.5394583940505981,
"learning_rate": 0.0001,
"loss": 0.648,
"step": 16500
},
{
"epoch": 0.9052550824702723,
"grad_norm": 1.8135911226272583,
"learning_rate": 0.0001,
"loss": 0.6315,
"step": 16520
},
{
"epoch": 0.9063510329333114,
"grad_norm": 1.6827434301376343,
"learning_rate": 0.0001,
"loss": 0.637,
"step": 16540
},
{
"epoch": 0.9074469833963504,
"grad_norm": 1.3692152500152588,
"learning_rate": 0.0001,
"loss": 0.7015,
"step": 16560
},
{
"epoch": 0.9085429338593896,
"grad_norm": 1.6391196250915527,
"learning_rate": 0.0001,
"loss": 0.6479,
"step": 16580
},
{
"epoch": 0.9096388843224287,
"grad_norm": 2.5071117877960205,
"learning_rate": 0.0001,
"loss": 0.6746,
"step": 16600
},
{
"epoch": 0.9107348347854677,
"grad_norm": 1.7680779695510864,
"learning_rate": 0.0001,
"loss": 0.6543,
"step": 16620
},
{
"epoch": 0.9118307852485068,
"grad_norm": 1.487269639968872,
"learning_rate": 0.0001,
"loss": 0.618,
"step": 16640
},
{
"epoch": 0.9129267357115458,
"grad_norm": 1.322325348854065,
"learning_rate": 0.0001,
"loss": 0.635,
"step": 16660
},
{
"epoch": 0.9140226861745849,
"grad_norm": 2.054997682571411,
"learning_rate": 0.0001,
"loss": 0.645,
"step": 16680
},
{
"epoch": 0.9151186366376239,
"grad_norm": 1.7619165182113647,
"learning_rate": 0.0001,
"loss": 0.6405,
"step": 16700
},
{
"epoch": 0.9162145871006631,
"grad_norm": 1.3276571035385132,
"learning_rate": 0.0001,
"loss": 0.5797,
"step": 16720
},
{
"epoch": 0.9173105375637021,
"grad_norm": 2.2796542644500732,
"learning_rate": 0.0001,
"loss": 0.7342,
"step": 16740
},
{
"epoch": 0.9184064880267412,
"grad_norm": 1.637654423713684,
"learning_rate": 0.0001,
"loss": 0.6149,
"step": 16760
},
{
"epoch": 0.9195024384897803,
"grad_norm": 1.4013864994049072,
"learning_rate": 0.0001,
"loss": 0.627,
"step": 16780
},
{
"epoch": 0.9205983889528193,
"grad_norm": 1.5173211097717285,
"learning_rate": 0.0001,
"loss": 0.5449,
"step": 16800
},
{
"epoch": 0.9216943394158584,
"grad_norm": 1.5530805587768555,
"learning_rate": 0.0001,
"loss": 0.5416,
"step": 16820
},
{
"epoch": 0.9227902898788974,
"grad_norm": 1.5294363498687744,
"learning_rate": 0.0001,
"loss": 0.6146,
"step": 16840
},
{
"epoch": 0.9238862403419366,
"grad_norm": 1.7312266826629639,
"learning_rate": 0.0001,
"loss": 0.6952,
"step": 16860
},
{
"epoch": 0.9249821908049756,
"grad_norm": 1.301459789276123,
"learning_rate": 0.0001,
"loss": 0.6258,
"step": 16880
},
{
"epoch": 0.9260781412680147,
"grad_norm": 1.915128469467163,
"learning_rate": 0.0001,
"loss": 0.6955,
"step": 16900
},
{
"epoch": 0.9271740917310538,
"grad_norm": 1.3437505960464478,
"learning_rate": 0.0001,
"loss": 0.7044,
"step": 16920
},
{
"epoch": 0.9282700421940928,
"grad_norm": 1.5920603275299072,
"learning_rate": 0.0001,
"loss": 0.5871,
"step": 16940
},
{
"epoch": 0.9293659926571319,
"grad_norm": 1.2615900039672852,
"learning_rate": 0.0001,
"loss": 0.6311,
"step": 16960
},
{
"epoch": 0.9304619431201709,
"grad_norm": 1.6863378286361694,
"learning_rate": 0.0001,
"loss": 0.5746,
"step": 16980
},
{
"epoch": 0.9315578935832101,
"grad_norm": 1.3633450269699097,
"learning_rate": 0.0001,
"loss": 0.6354,
"step": 17000
},
{
"epoch": 0.9315578935832101,
"eval_loss": 0.6385661959648132,
"eval_runtime": 30462.2265,
"eval_samples_per_second": 2.13,
"eval_steps_per_second": 0.067,
"eval_wer": 36.07830918982583,
"step": 17000
},
{
"epoch": 0.9326538440462491,
"grad_norm": 1.7412103414535522,
"learning_rate": 0.0001,
"loss": 0.6635,
"step": 17020
},
{
"epoch": 0.9337497945092882,
"grad_norm": 2.0697691440582275,
"learning_rate": 0.0001,
"loss": 0.678,
"step": 17040
},
{
"epoch": 0.9348457449723272,
"grad_norm": 1.6238869428634644,
"learning_rate": 0.0001,
"loss": 0.6427,
"step": 17060
},
{
"epoch": 0.9359416954353663,
"grad_norm": 1.498334288597107,
"learning_rate": 0.0001,
"loss": 0.614,
"step": 17080
},
{
"epoch": 0.9370376458984054,
"grad_norm": 1.4905815124511719,
"learning_rate": 0.0001,
"loss": 0.5759,
"step": 17100
},
{
"epoch": 0.9381335963614444,
"grad_norm": 1.433747410774231,
"learning_rate": 0.0001,
"loss": 0.6754,
"step": 17120
},
{
"epoch": 0.9392295468244836,
"grad_norm": 1.8419586420059204,
"learning_rate": 0.0001,
"loss": 0.6392,
"step": 17140
},
{
"epoch": 0.9403254972875226,
"grad_norm": 1.5990883111953735,
"learning_rate": 0.0001,
"loss": 0.6204,
"step": 17160
},
{
"epoch": 0.9414214477505617,
"grad_norm": 1.482010841369629,
"learning_rate": 0.0001,
"loss": 0.6887,
"step": 17180
},
{
"epoch": 0.9425173982136007,
"grad_norm": 1.6629010438919067,
"learning_rate": 0.0001,
"loss": 0.6457,
"step": 17200
},
{
"epoch": 0.9436133486766398,
"grad_norm": 1.4538336992263794,
"learning_rate": 0.0001,
"loss": 0.6401,
"step": 17220
},
{
"epoch": 0.9447092991396789,
"grad_norm": 1.5684305429458618,
"learning_rate": 0.0001,
"loss": 0.676,
"step": 17240
},
{
"epoch": 0.945805249602718,
"grad_norm": 1.4637812376022339,
"learning_rate": 0.0001,
"loss": 0.5963,
"step": 17260
},
{
"epoch": 0.9469012000657571,
"grad_norm": 2.155348300933838,
"learning_rate": 0.0001,
"loss": 0.6624,
"step": 17280
},
{
"epoch": 0.9479971505287961,
"grad_norm": 1.6532953977584839,
"learning_rate": 0.0001,
"loss": 0.6784,
"step": 17300
},
{
"epoch": 0.9490931009918352,
"grad_norm": 1.934787392616272,
"learning_rate": 0.0001,
"loss": 0.6876,
"step": 17320
},
{
"epoch": 0.9501890514548742,
"grad_norm": 2.319920063018799,
"learning_rate": 0.0001,
"loss": 0.7676,
"step": 17340
},
{
"epoch": 0.9512850019179133,
"grad_norm": 1.5026947259902954,
"learning_rate": 0.0001,
"loss": 0.5634,
"step": 17360
},
{
"epoch": 0.9523809523809523,
"grad_norm": 1.9578672647476196,
"learning_rate": 0.0001,
"loss": 0.5959,
"step": 17380
},
{
"epoch": 0.9534769028439914,
"grad_norm": 1.9930877685546875,
"learning_rate": 0.0001,
"loss": 0.5947,
"step": 17400
},
{
"epoch": 0.9545728533070306,
"grad_norm": 1.6241062879562378,
"learning_rate": 0.0001,
"loss": 0.587,
"step": 17420
},
{
"epoch": 0.9556688037700696,
"grad_norm": 1.7155011892318726,
"learning_rate": 0.0001,
"loss": 0.6352,
"step": 17440
},
{
"epoch": 0.9567647542331087,
"grad_norm": 1.7239856719970703,
"learning_rate": 0.0001,
"loss": 0.647,
"step": 17460
},
{
"epoch": 0.9578607046961477,
"grad_norm": 1.6342066526412964,
"learning_rate": 0.0001,
"loss": 0.6017,
"step": 17480
},
{
"epoch": 0.9589566551591868,
"grad_norm": 1.4042915105819702,
"learning_rate": 0.0001,
"loss": 0.6479,
"step": 17500
},
{
"epoch": 0.9600526056222258,
"grad_norm": 1.5023634433746338,
"learning_rate": 0.0001,
"loss": 0.6008,
"step": 17520
},
{
"epoch": 0.961148556085265,
"grad_norm": 1.5713409185409546,
"learning_rate": 0.0001,
"loss": 0.6643,
"step": 17540
},
{
"epoch": 0.9622445065483041,
"grad_norm": 1.8917444944381714,
"learning_rate": 0.0001,
"loss": 0.6992,
"step": 17560
},
{
"epoch": 0.9633404570113431,
"grad_norm": 1.918900728225708,
"learning_rate": 0.0001,
"loss": 0.6004,
"step": 17580
},
{
"epoch": 0.9644364074743822,
"grad_norm": 1.7599738836288452,
"learning_rate": 0.0001,
"loss": 0.6479,
"step": 17600
},
{
"epoch": 0.9655323579374212,
"grad_norm": 1.4554500579833984,
"learning_rate": 0.0001,
"loss": 0.6367,
"step": 17620
},
{
"epoch": 0.9666283084004603,
"grad_norm": 1.6860467195510864,
"learning_rate": 0.0001,
"loss": 0.5769,
"step": 17640
},
{
"epoch": 0.9677242588634993,
"grad_norm": 1.6800360679626465,
"learning_rate": 0.0001,
"loss": 0.6402,
"step": 17660
},
{
"epoch": 0.9688202093265385,
"grad_norm": 1.3988690376281738,
"learning_rate": 0.0001,
"loss": 0.589,
"step": 17680
},
{
"epoch": 0.9699161597895775,
"grad_norm": 1.6789034605026245,
"learning_rate": 0.0001,
"loss": 0.6469,
"step": 17700
},
{
"epoch": 0.9710121102526166,
"grad_norm": 1.4465025663375854,
"learning_rate": 0.0001,
"loss": 0.6362,
"step": 17720
},
{
"epoch": 0.9721080607156557,
"grad_norm": 1.6466797590255737,
"learning_rate": 0.0001,
"loss": 0.5882,
"step": 17740
},
{
"epoch": 0.9732040111786947,
"grad_norm": 1.4487119913101196,
"learning_rate": 0.0001,
"loss": 0.6481,
"step": 17760
},
{
"epoch": 0.9742999616417338,
"grad_norm": 1.769286870956421,
"learning_rate": 0.0001,
"loss": 0.7198,
"step": 17780
},
{
"epoch": 0.9753959121047728,
"grad_norm": 1.8515903949737549,
"learning_rate": 0.0001,
"loss": 0.6257,
"step": 17800
},
{
"epoch": 0.976491862567812,
"grad_norm": 1.9811028242111206,
"learning_rate": 0.0001,
"loss": 0.6154,
"step": 17820
},
{
"epoch": 0.977587813030851,
"grad_norm": 1.6197538375854492,
"learning_rate": 0.0001,
"loss": 0.6196,
"step": 17840
},
{
"epoch": 0.9786837634938901,
"grad_norm": 1.605971336364746,
"learning_rate": 0.0001,
"loss": 0.5549,
"step": 17860
},
{
"epoch": 0.9797797139569292,
"grad_norm": 1.5246946811676025,
"learning_rate": 0.0001,
"loss": 0.5979,
"step": 17880
},
{
"epoch": 0.9808756644199682,
"grad_norm": 1.8534538745880127,
"learning_rate": 0.0001,
"loss": 0.663,
"step": 17900
},
{
"epoch": 0.9819716148830073,
"grad_norm": 1.6625508069992065,
"learning_rate": 0.0001,
"loss": 0.5863,
"step": 17920
},
{
"epoch": 0.9830675653460463,
"grad_norm": 1.704788088798523,
"learning_rate": 0.0001,
"loss": 0.6529,
"step": 17940
},
{
"epoch": 0.9841635158090855,
"grad_norm": 1.3650102615356445,
"learning_rate": 0.0001,
"loss": 0.6317,
"step": 17960
},
{
"epoch": 0.9852594662721245,
"grad_norm": 1.7923402786254883,
"learning_rate": 0.0001,
"loss": 0.6023,
"step": 17980
},
{
"epoch": 0.9863554167351636,
"grad_norm": 2.0028254985809326,
"learning_rate": 0.0001,
"loss": 0.6569,
"step": 18000
},
{
"epoch": 0.9863554167351636,
"eval_loss": 0.6309429407119751,
"eval_runtime": 30654.7333,
"eval_samples_per_second": 2.117,
"eval_steps_per_second": 0.066,
"eval_wer": 36.64375002139876,
"step": 18000
},
{
"epoch": 0.9874513671982026,
"grad_norm": 1.66196870803833,
"learning_rate": 0.0001,
"loss": 0.6687,
"step": 18020
},
{
"epoch": 0.9885473176612417,
"grad_norm": 2.031445264816284,
"learning_rate": 0.0001,
"loss": 0.6032,
"step": 18040
},
{
"epoch": 0.9896432681242808,
"grad_norm": 1.3204675912857056,
"learning_rate": 0.0001,
"loss": 0.5913,
"step": 18060
},
{
"epoch": 0.9907392185873198,
"grad_norm": 1.7389861345291138,
"learning_rate": 0.0001,
"loss": 0.6678,
"step": 18080
},
{
"epoch": 0.991835169050359,
"grad_norm": 1.9815995693206787,
"learning_rate": 0.0001,
"loss": 0.6566,
"step": 18100
},
{
"epoch": 0.992931119513398,
"grad_norm": 1.5902196168899536,
"learning_rate": 0.0001,
"loss": 0.617,
"step": 18120
},
{
"epoch": 0.9940270699764371,
"grad_norm": 1.4741644859313965,
"learning_rate": 0.0001,
"loss": 0.5921,
"step": 18140
},
{
"epoch": 0.9951230204394761,
"grad_norm": 1.419965147972107,
"learning_rate": 0.0001,
"loss": 0.6031,
"step": 18160
},
{
"epoch": 0.9962189709025152,
"grad_norm": 1.5964018106460571,
"learning_rate": 0.0001,
"loss": 0.6015,
"step": 18180
},
{
"epoch": 0.9973149213655543,
"grad_norm": 1.4470981359481812,
"learning_rate": 0.0001,
"loss": 0.6898,
"step": 18200
},
{
"epoch": 0.9984108718285933,
"grad_norm": 1.54426908493042,
"learning_rate": 0.0001,
"loss": 0.6328,
"step": 18220
},
{
"epoch": 0.9995068222916325,
"grad_norm": 1.778437614440918,
"learning_rate": 0.0001,
"loss": 0.6381,
"step": 18240
},
{
"epoch": 1.0006027727546716,
"grad_norm": 1.5332226753234863,
"learning_rate": 0.0001,
"loss": 0.5782,
"step": 18260
},
{
"epoch": 1.0016987232177106,
"grad_norm": 1.785762906074524,
"learning_rate": 0.0001,
"loss": 0.6779,
"step": 18280
},
{
"epoch": 1.0027946736807496,
"grad_norm": 1.684708595275879,
"learning_rate": 0.0001,
"loss": 0.5786,
"step": 18300
},
{
"epoch": 1.0038906241437886,
"grad_norm": 1.5112040042877197,
"learning_rate": 0.0001,
"loss": 0.5989,
"step": 18320
},
{
"epoch": 1.0049865746068278,
"grad_norm": 1.5711543560028076,
"learning_rate": 0.0001,
"loss": 0.6525,
"step": 18340
},
{
"epoch": 1.0060825250698668,
"grad_norm": 1.4053367376327515,
"learning_rate": 0.0001,
"loss": 0.5913,
"step": 18360
},
{
"epoch": 1.0071784755329058,
"grad_norm": 1.7791050672531128,
"learning_rate": 0.0001,
"loss": 0.6212,
"step": 18380
},
{
"epoch": 1.008274425995945,
"grad_norm": 1.4202812910079956,
"learning_rate": 0.0001,
"loss": 0.6168,
"step": 18400
},
{
"epoch": 1.009370376458984,
"grad_norm": 1.7469732761383057,
"learning_rate": 0.0001,
"loss": 0.6425,
"step": 18420
},
{
"epoch": 1.010466326922023,
"grad_norm": 2.1195449829101562,
"learning_rate": 0.0001,
"loss": 0.6564,
"step": 18440
},
{
"epoch": 1.011562277385062,
"grad_norm": 1.4056214094161987,
"learning_rate": 0.0001,
"loss": 0.6809,
"step": 18460
},
{
"epoch": 1.0126582278481013,
"grad_norm": 2.07029128074646,
"learning_rate": 0.0001,
"loss": 0.6119,
"step": 18480
},
{
"epoch": 1.0137541783111403,
"grad_norm": 1.6518419981002808,
"learning_rate": 0.0001,
"loss": 0.5954,
"step": 18500
},
{
"epoch": 1.0148501287741793,
"grad_norm": 1.7785189151763916,
"learning_rate": 0.0001,
"loss": 0.5959,
"step": 18520
},
{
"epoch": 1.0159460792372186,
"grad_norm": 1.4902641773223877,
"learning_rate": 0.0001,
"loss": 0.6066,
"step": 18540
},
{
"epoch": 1.0170420297002576,
"grad_norm": 1.6291300058364868,
"learning_rate": 0.0001,
"loss": 0.5781,
"step": 18560
},
{
"epoch": 1.0181379801632966,
"grad_norm": 1.5571300983428955,
"learning_rate": 0.0001,
"loss": 0.5622,
"step": 18580
},
{
"epoch": 1.0192339306263356,
"grad_norm": 1.5963464975357056,
"learning_rate": 0.0001,
"loss": 0.6797,
"step": 18600
},
{
"epoch": 1.0203298810893748,
"grad_norm": 1.4604226350784302,
"learning_rate": 0.0001,
"loss": 0.6712,
"step": 18620
},
{
"epoch": 1.0214258315524138,
"grad_norm": 1.6052221059799194,
"learning_rate": 0.0001,
"loss": 0.595,
"step": 18640
},
{
"epoch": 1.0225217820154529,
"grad_norm": 1.4075971841812134,
"learning_rate": 0.0001,
"loss": 0.6123,
"step": 18660
},
{
"epoch": 1.0236177324784919,
"grad_norm": 1.910475730895996,
"learning_rate": 0.0001,
"loss": 0.7016,
"step": 18680
},
{
"epoch": 1.024713682941531,
"grad_norm": 1.946268081665039,
"learning_rate": 0.0001,
"loss": 0.6178,
"step": 18700
},
{
"epoch": 1.02580963340457,
"grad_norm": 1.5547478199005127,
"learning_rate": 0.0001,
"loss": 0.6068,
"step": 18720
},
{
"epoch": 1.026905583867609,
"grad_norm": 1.5006910562515259,
"learning_rate": 0.0001,
"loss": 0.5818,
"step": 18740
},
{
"epoch": 1.0280015343306483,
"grad_norm": 1.5395736694335938,
"learning_rate": 0.0001,
"loss": 0.6614,
"step": 18760
},
{
"epoch": 1.0290974847936873,
"grad_norm": 1.5935709476470947,
"learning_rate": 0.0001,
"loss": 0.5593,
"step": 18780
},
{
"epoch": 1.0301934352567264,
"grad_norm": 1.6643317937850952,
"learning_rate": 0.0001,
"loss": 0.6441,
"step": 18800
},
{
"epoch": 1.0312893857197654,
"grad_norm": 1.6811660528182983,
"learning_rate": 0.0001,
"loss": 0.5663,
"step": 18820
},
{
"epoch": 1.0323853361828046,
"grad_norm": 1.4203201532363892,
"learning_rate": 0.0001,
"loss": 0.6622,
"step": 18840
},
{
"epoch": 1.0334812866458436,
"grad_norm": 1.9712319374084473,
"learning_rate": 0.0001,
"loss": 0.5818,
"step": 18860
},
{
"epoch": 1.0345772371088826,
"grad_norm": 2.0921614170074463,
"learning_rate": 0.0001,
"loss": 0.6489,
"step": 18880
},
{
"epoch": 1.0356731875719218,
"grad_norm": 1.3215636014938354,
"learning_rate": 0.0001,
"loss": 0.5784,
"step": 18900
},
{
"epoch": 1.0367691380349608,
"grad_norm": 1.6520031690597534,
"learning_rate": 0.0001,
"loss": 0.6069,
"step": 18920
},
{
"epoch": 1.0378650884979999,
"grad_norm": 1.8051795959472656,
"learning_rate": 0.0001,
"loss": 0.6596,
"step": 18940
},
{
"epoch": 1.0389610389610389,
"grad_norm": 1.7375438213348389,
"learning_rate": 0.0001,
"loss": 0.5161,
"step": 18960
},
{
"epoch": 1.040056989424078,
"grad_norm": 1.4865177869796753,
"learning_rate": 0.0001,
"loss": 0.5436,
"step": 18980
},
{
"epoch": 1.041152939887117,
"grad_norm": 1.4444303512573242,
"learning_rate": 0.0001,
"loss": 0.5906,
"step": 19000
},
{
"epoch": 1.041152939887117,
"eval_loss": 0.622921347618103,
"eval_runtime": 30997.4799,
"eval_samples_per_second": 2.093,
"eval_steps_per_second": 0.065,
"eval_wer": 37.84105343527132,
"step": 19000
},
{
"epoch": 1.042248890350156,
"grad_norm": 1.4964603185653687,
"learning_rate": 0.0001,
"loss": 0.6014,
"step": 19020
},
{
"epoch": 1.0433448408131953,
"grad_norm": 2.2886295318603516,
"learning_rate": 0.0001,
"loss": 0.6472,
"step": 19040
},
{
"epoch": 1.0444407912762343,
"grad_norm": 1.7065175771713257,
"learning_rate": 0.0001,
"loss": 0.5561,
"step": 19060
},
{
"epoch": 1.0455367417392734,
"grad_norm": 1.6928189992904663,
"learning_rate": 0.0001,
"loss": 0.5224,
"step": 19080
},
{
"epoch": 1.0466326922023124,
"grad_norm": 1.4461798667907715,
"learning_rate": 0.0001,
"loss": 0.5549,
"step": 19100
},
{
"epoch": 1.0477286426653516,
"grad_norm": 3.3619306087493896,
"learning_rate": 0.0001,
"loss": 0.6065,
"step": 19120
},
{
"epoch": 1.0488245931283906,
"grad_norm": 1.8468629121780396,
"learning_rate": 0.0001,
"loss": 0.716,
"step": 19140
},
{
"epoch": 1.0499205435914296,
"grad_norm": 1.7207484245300293,
"learning_rate": 0.0001,
"loss": 0.6078,
"step": 19160
},
{
"epoch": 1.0510164940544688,
"grad_norm": 1.6650727987289429,
"learning_rate": 0.0001,
"loss": 0.6347,
"step": 19180
},
{
"epoch": 1.0521124445175078,
"grad_norm": 1.3957616090774536,
"learning_rate": 0.0001,
"loss": 0.6265,
"step": 19200
},
{
"epoch": 1.0532083949805469,
"grad_norm": 1.4066413640975952,
"learning_rate": 0.0001,
"loss": 0.6118,
"step": 19220
},
{
"epoch": 1.0543043454435859,
"grad_norm": 1.8007709980010986,
"learning_rate": 0.0001,
"loss": 0.6989,
"step": 19240
},
{
"epoch": 1.055400295906625,
"grad_norm": 1.4759665727615356,
"learning_rate": 0.0001,
"loss": 0.621,
"step": 19260
},
{
"epoch": 1.056496246369664,
"grad_norm": 1.7062383890151978,
"learning_rate": 0.0001,
"loss": 0.6413,
"step": 19280
},
{
"epoch": 1.0575921968327031,
"grad_norm": 1.5822961330413818,
"learning_rate": 0.0001,
"loss": 0.5877,
"step": 19300
},
{
"epoch": 1.0586881472957423,
"grad_norm": 1.7339930534362793,
"learning_rate": 0.0001,
"loss": 0.6121,
"step": 19320
},
{
"epoch": 1.0597840977587814,
"grad_norm": 1.3066824674606323,
"learning_rate": 0.0001,
"loss": 0.6278,
"step": 19340
},
{
"epoch": 1.0608800482218204,
"grad_norm": 1.6953777074813843,
"learning_rate": 0.0001,
"loss": 0.624,
"step": 19360
},
{
"epoch": 1.0619759986848594,
"grad_norm": 1.5192081928253174,
"learning_rate": 0.0001,
"loss": 0.6078,
"step": 19380
},
{
"epoch": 1.0630719491478986,
"grad_norm": 1.5474629402160645,
"learning_rate": 0.0001,
"loss": 0.7147,
"step": 19400
},
{
"epoch": 1.0641678996109376,
"grad_norm": 1.5060781240463257,
"learning_rate": 0.0001,
"loss": 0.6342,
"step": 19420
},
{
"epoch": 1.0652638500739766,
"grad_norm": 1.4227863550186157,
"learning_rate": 0.0001,
"loss": 0.5967,
"step": 19440
},
{
"epoch": 1.0663598005370156,
"grad_norm": 1.4965442419052124,
"learning_rate": 0.0001,
"loss": 0.6344,
"step": 19460
},
{
"epoch": 1.0674557510000549,
"grad_norm": 1.4466912746429443,
"learning_rate": 0.0001,
"loss": 0.6276,
"step": 19480
},
{
"epoch": 1.0685517014630939,
"grad_norm": 1.7357358932495117,
"learning_rate": 0.0001,
"loss": 0.5664,
"step": 19500
},
{
"epoch": 1.0696476519261329,
"grad_norm": 1.6092090606689453,
"learning_rate": 0.0001,
"loss": 0.6207,
"step": 19520
},
{
"epoch": 1.070743602389172,
"grad_norm": 1.5991522073745728,
"learning_rate": 0.0001,
"loss": 0.6113,
"step": 19540
},
{
"epoch": 1.071839552852211,
"grad_norm": 1.320917010307312,
"learning_rate": 0.0001,
"loss": 0.577,
"step": 19560
},
{
"epoch": 1.0729355033152501,
"grad_norm": 1.4419164657592773,
"learning_rate": 0.0001,
"loss": 0.6622,
"step": 19580
},
{
"epoch": 1.0740314537782891,
"grad_norm": 1.7298556566238403,
"learning_rate": 0.0001,
"loss": 0.5721,
"step": 19600
},
{
"epoch": 1.0751274042413284,
"grad_norm": 1.8013224601745605,
"learning_rate": 0.0001,
"loss": 0.6079,
"step": 19620
},
{
"epoch": 1.0762233547043674,
"grad_norm": 1.5226448774337769,
"learning_rate": 0.0001,
"loss": 0.6681,
"step": 19640
},
{
"epoch": 1.0773193051674064,
"grad_norm": 1.894225835800171,
"learning_rate": 0.0001,
"loss": 0.6599,
"step": 19660
},
{
"epoch": 1.0784152556304456,
"grad_norm": 1.58738112449646,
"learning_rate": 0.0001,
"loss": 0.5796,
"step": 19680
},
{
"epoch": 1.0795112060934846,
"grad_norm": 1.880391240119934,
"learning_rate": 0.0001,
"loss": 0.6305,
"step": 19700
},
{
"epoch": 1.0806071565565236,
"grad_norm": 1.4821720123291016,
"learning_rate": 0.0001,
"loss": 0.5675,
"step": 19720
},
{
"epoch": 1.0817031070195626,
"grad_norm": 1.504714846611023,
"learning_rate": 0.0001,
"loss": 0.5888,
"step": 19740
},
{
"epoch": 1.0827990574826019,
"grad_norm": 1.6745513677597046,
"learning_rate": 0.0001,
"loss": 0.6199,
"step": 19760
},
{
"epoch": 1.0838950079456409,
"grad_norm": 1.827014684677124,
"learning_rate": 0.0001,
"loss": 0.5889,
"step": 19780
},
{
"epoch": 1.0849909584086799,
"grad_norm": 1.6627857685089111,
"learning_rate": 0.0001,
"loss": 0.5649,
"step": 19800
},
{
"epoch": 1.086086908871719,
"grad_norm": 1.5660628080368042,
"learning_rate": 0.0001,
"loss": 0.6814,
"step": 19820
},
{
"epoch": 1.0871828593347581,
"grad_norm": 1.4713698625564575,
"learning_rate": 0.0001,
"loss": 0.5789,
"step": 19840
},
{
"epoch": 1.0882788097977971,
"grad_norm": 1.5290462970733643,
"learning_rate": 0.0001,
"loss": 0.5894,
"step": 19860
},
{
"epoch": 1.0893747602608361,
"grad_norm": 1.7101823091506958,
"learning_rate": 0.0001,
"loss": 0.5732,
"step": 19880
},
{
"epoch": 1.0904707107238754,
"grad_norm": 1.8752964735031128,
"learning_rate": 0.0001,
"loss": 0.6358,
"step": 19900
},
{
"epoch": 1.0915666611869144,
"grad_norm": 1.7808418273925781,
"learning_rate": 0.0001,
"loss": 0.5942,
"step": 19920
},
{
"epoch": 1.0926626116499534,
"grad_norm": 1.4797093868255615,
"learning_rate": 0.0001,
"loss": 0.605,
"step": 19940
},
{
"epoch": 1.0937585621129924,
"grad_norm": 2.443544387817383,
"learning_rate": 0.0001,
"loss": 0.7019,
"step": 19960
},
{
"epoch": 1.0948545125760316,
"grad_norm": 1.4167999029159546,
"learning_rate": 0.0001,
"loss": 0.5969,
"step": 19980
},
{
"epoch": 1.0959504630390706,
"grad_norm": 1.3823322057724,
"learning_rate": 0.0001,
"loss": 0.634,
"step": 20000
},
{
"epoch": 1.0959504630390706,
"eval_loss": 0.6163960099220276,
"eval_runtime": 30555.9795,
"eval_samples_per_second": 2.124,
"eval_steps_per_second": 0.066,
"eval_wer": 36.777449473248126,
"step": 20000
},
{
"epoch": 1.0970464135021096,
"grad_norm": 1.5388779640197754,
"learning_rate": 0.0001,
"loss": 0.5836,
"step": 20020
},
{
"epoch": 1.0981423639651489,
"grad_norm": 1.9624749422073364,
"learning_rate": 0.0001,
"loss": 0.5574,
"step": 20040
},
{
"epoch": 1.0992383144281879,
"grad_norm": 1.4712483882904053,
"learning_rate": 0.0001,
"loss": 0.6284,
"step": 20060
},
{
"epoch": 1.1003342648912269,
"grad_norm": 1.4846380949020386,
"learning_rate": 0.0001,
"loss": 0.5815,
"step": 20080
},
{
"epoch": 1.1014302153542659,
"grad_norm": 1.4635918140411377,
"learning_rate": 0.0001,
"loss": 0.6003,
"step": 20100
},
{
"epoch": 1.1025261658173051,
"grad_norm": 1.775586485862732,
"learning_rate": 0.0001,
"loss": 0.5997,
"step": 20120
},
{
"epoch": 1.1036221162803441,
"grad_norm": 1.5434575080871582,
"learning_rate": 0.0001,
"loss": 0.5924,
"step": 20140
},
{
"epoch": 1.1047180667433831,
"grad_norm": 1.661482572555542,
"learning_rate": 0.0001,
"loss": 0.6288,
"step": 20160
},
{
"epoch": 1.1058140172064224,
"grad_norm": 1.706123948097229,
"learning_rate": 0.0001,
"loss": 0.5624,
"step": 20180
},
{
"epoch": 1.1069099676694614,
"grad_norm": 1.9980905055999756,
"learning_rate": 0.0001,
"loss": 0.5577,
"step": 20200
},
{
"epoch": 1.1080059181325004,
"grad_norm": 1.60072922706604,
"learning_rate": 0.0001,
"loss": 0.5947,
"step": 20220
},
{
"epoch": 1.1091018685955394,
"grad_norm": 1.6381016969680786,
"learning_rate": 0.0001,
"loss": 0.5183,
"step": 20240
},
{
"epoch": 1.1101978190585786,
"grad_norm": 1.7384517192840576,
"learning_rate": 0.0001,
"loss": 0.5752,
"step": 20260
},
{
"epoch": 1.1112937695216176,
"grad_norm": 1.9785683155059814,
"learning_rate": 0.0001,
"loss": 0.576,
"step": 20280
},
{
"epoch": 1.1123897199846566,
"grad_norm": 1.2318958044052124,
"learning_rate": 0.0001,
"loss": 0.5853,
"step": 20300
},
{
"epoch": 1.1134856704476959,
"grad_norm": 1.5406831502914429,
"learning_rate": 0.0001,
"loss": 0.6059,
"step": 20320
},
{
"epoch": 1.1145816209107349,
"grad_norm": 1.6655981540679932,
"learning_rate": 0.0001,
"loss": 0.6834,
"step": 20340
},
{
"epoch": 1.1156775713737739,
"grad_norm": 1.513757586479187,
"learning_rate": 0.0001,
"loss": 0.6158,
"step": 20360
},
{
"epoch": 1.1167735218368129,
"grad_norm": 2.1243133544921875,
"learning_rate": 0.0001,
"loss": 0.5769,
"step": 20380
},
{
"epoch": 1.1178694722998521,
"grad_norm": 1.6118996143341064,
"learning_rate": 0.0001,
"loss": 0.5197,
"step": 20400
},
{
"epoch": 1.1189654227628911,
"grad_norm": 1.8882734775543213,
"learning_rate": 0.0001,
"loss": 0.5459,
"step": 20420
},
{
"epoch": 1.1200613732259301,
"grad_norm": 1.8385454416275024,
"learning_rate": 0.0001,
"loss": 0.5589,
"step": 20440
},
{
"epoch": 1.1211573236889691,
"grad_norm": 1.7789567708969116,
"learning_rate": 0.0001,
"loss": 0.589,
"step": 20460
},
{
"epoch": 1.1222532741520084,
"grad_norm": 1.839416265487671,
"learning_rate": 0.0001,
"loss": 0.5979,
"step": 20480
},
{
"epoch": 1.1233492246150474,
"grad_norm": 1.8148174285888672,
"learning_rate": 0.0001,
"loss": 0.6179,
"step": 20500
},
{
"epoch": 1.1244451750780864,
"grad_norm": 1.594193696975708,
"learning_rate": 0.0001,
"loss": 0.6427,
"step": 20520
},
{
"epoch": 1.1255411255411256,
"grad_norm": 1.5754518508911133,
"learning_rate": 0.0001,
"loss": 0.5933,
"step": 20540
},
{
"epoch": 1.1266370760041646,
"grad_norm": 1.7265543937683105,
"learning_rate": 0.0001,
"loss": 0.6021,
"step": 20560
},
{
"epoch": 1.1277330264672036,
"grad_norm": 1.4708410501480103,
"learning_rate": 0.0001,
"loss": 0.5824,
"step": 20580
},
{
"epoch": 1.1288289769302429,
"grad_norm": 1.7831743955612183,
"learning_rate": 0.0001,
"loss": 0.5806,
"step": 20600
},
{
"epoch": 1.1299249273932819,
"grad_norm": 1.530446171760559,
"learning_rate": 0.0001,
"loss": 0.6523,
"step": 20620
},
{
"epoch": 1.1310208778563209,
"grad_norm": 2.154409408569336,
"learning_rate": 0.0001,
"loss": 0.5898,
"step": 20640
},
{
"epoch": 1.13211682831936,
"grad_norm": 1.3791108131408691,
"learning_rate": 0.0001,
"loss": 0.6321,
"step": 20660
},
{
"epoch": 1.1332127787823991,
"grad_norm": 2.259727954864502,
"learning_rate": 0.0001,
"loss": 0.67,
"step": 20680
},
{
"epoch": 1.1343087292454381,
"grad_norm": 1.7098194360733032,
"learning_rate": 0.0001,
"loss": 0.5554,
"step": 20700
},
{
"epoch": 1.1354046797084771,
"grad_norm": 1.5874308347702026,
"learning_rate": 0.0001,
"loss": 0.5596,
"step": 20720
},
{
"epoch": 1.1365006301715161,
"grad_norm": 1.9818806648254395,
"learning_rate": 0.0001,
"loss": 0.6618,
"step": 20740
},
{
"epoch": 1.1375965806345554,
"grad_norm": 1.4672831296920776,
"learning_rate": 0.0001,
"loss": 0.5771,
"step": 20760
},
{
"epoch": 1.1386925310975944,
"grad_norm": 1.746772050857544,
"learning_rate": 0.0001,
"loss": 0.6085,
"step": 20780
},
{
"epoch": 1.1397884815606334,
"grad_norm": 1.7367818355560303,
"learning_rate": 0.0001,
"loss": 0.6401,
"step": 20800
},
{
"epoch": 1.1408844320236726,
"grad_norm": 1.5269123315811157,
"learning_rate": 0.0001,
"loss": 0.678,
"step": 20820
},
{
"epoch": 1.1419803824867116,
"grad_norm": 1.6921030282974243,
"learning_rate": 0.0001,
"loss": 0.6292,
"step": 20840
},
{
"epoch": 1.1430763329497506,
"grad_norm": 1.4888218641281128,
"learning_rate": 0.0001,
"loss": 0.6034,
"step": 20860
},
{
"epoch": 1.1441722834127896,
"grad_norm": 1.9028393030166626,
"learning_rate": 0.0001,
"loss": 0.604,
"step": 20880
},
{
"epoch": 1.1452682338758289,
"grad_norm": 1.4702014923095703,
"learning_rate": 0.0001,
"loss": 0.6375,
"step": 20900
},
{
"epoch": 1.1463641843388679,
"grad_norm": 1.9169687032699585,
"learning_rate": 0.0001,
"loss": 0.6173,
"step": 20920
},
{
"epoch": 1.147460134801907,
"grad_norm": 1.6540066003799438,
"learning_rate": 0.0001,
"loss": 0.5366,
"step": 20940
},
{
"epoch": 1.148556085264946,
"grad_norm": 1.564468502998352,
"learning_rate": 0.0001,
"loss": 0.5614,
"step": 20960
},
{
"epoch": 1.1496520357279851,
"grad_norm": 1.75001060962677,
"learning_rate": 0.0001,
"loss": 0.6408,
"step": 20980
},
{
"epoch": 1.1507479861910241,
"grad_norm": 1.759342074394226,
"learning_rate": 0.0001,
"loss": 0.6267,
"step": 21000
},
{
"epoch": 1.1507479861910241,
"eval_loss": 0.6101906895637512,
"eval_runtime": 30794.7182,
"eval_samples_per_second": 2.107,
"eval_steps_per_second": 0.066,
"eval_wer": 46.360156536208414,
"step": 21000
},
{
"epoch": 1.1518439366540631,
"grad_norm": 1.5255391597747803,
"learning_rate": 0.0001,
"loss": 0.6105,
"step": 21020
},
{
"epoch": 1.1529398871171024,
"grad_norm": 1.8833086490631104,
"learning_rate": 0.0001,
"loss": 0.628,
"step": 21040
},
{
"epoch": 1.1540358375801414,
"grad_norm": 1.6667803525924683,
"learning_rate": 0.0001,
"loss": 0.5758,
"step": 21060
},
{
"epoch": 1.1551317880431804,
"grad_norm": 1.6798675060272217,
"learning_rate": 0.0001,
"loss": 0.5869,
"step": 21080
},
{
"epoch": 1.1562277385062196,
"grad_norm": 1.3532921075820923,
"learning_rate": 0.0001,
"loss": 0.5978,
"step": 21100
},
{
"epoch": 1.1573236889692586,
"grad_norm": 1.9075069427490234,
"learning_rate": 0.0001,
"loss": 0.636,
"step": 21120
},
{
"epoch": 1.1584196394322976,
"grad_norm": 1.6051623821258545,
"learning_rate": 0.0001,
"loss": 0.6624,
"step": 21140
},
{
"epoch": 1.1595155898953367,
"grad_norm": 1.6509668827056885,
"learning_rate": 0.0001,
"loss": 0.6098,
"step": 21160
},
{
"epoch": 1.1606115403583759,
"grad_norm": 1.8513017892837524,
"learning_rate": 0.0001,
"loss": 0.5753,
"step": 21180
},
{
"epoch": 1.161707490821415,
"grad_norm": 1.7680573463439941,
"learning_rate": 0.0001,
"loss": 0.5971,
"step": 21200
},
{
"epoch": 1.162803441284454,
"grad_norm": 1.7778613567352295,
"learning_rate": 0.0001,
"loss": 0.6127,
"step": 21220
},
{
"epoch": 1.163899391747493,
"grad_norm": 1.524961233139038,
"learning_rate": 0.0001,
"loss": 0.6181,
"step": 21240
},
{
"epoch": 1.1649953422105321,
"grad_norm": 1.7401758432388306,
"learning_rate": 0.0001,
"loss": 0.6575,
"step": 21260
},
{
"epoch": 1.1660912926735711,
"grad_norm": 1.7773276567459106,
"learning_rate": 0.0001,
"loss": 0.6264,
"step": 21280
},
{
"epoch": 1.1671872431366102,
"grad_norm": 1.987033486366272,
"learning_rate": 0.0001,
"loss": 0.5309,
"step": 21300
},
{
"epoch": 1.1682831935996494,
"grad_norm": 1.7425816059112549,
"learning_rate": 0.0001,
"loss": 0.6644,
"step": 21320
},
{
"epoch": 1.1693791440626884,
"grad_norm": 1.6333574056625366,
"learning_rate": 0.0001,
"loss": 0.586,
"step": 21340
},
{
"epoch": 1.1704750945257274,
"grad_norm": 1.6959367990493774,
"learning_rate": 0.0001,
"loss": 0.566,
"step": 21360
},
{
"epoch": 1.1715710449887666,
"grad_norm": 1.6892461776733398,
"learning_rate": 0.0001,
"loss": 0.5788,
"step": 21380
},
{
"epoch": 1.1726669954518056,
"grad_norm": 1.805227279663086,
"learning_rate": 0.0001,
"loss": 0.5984,
"step": 21400
},
{
"epoch": 1.1737629459148446,
"grad_norm": 1.5054594278335571,
"learning_rate": 0.0001,
"loss": 0.5851,
"step": 21420
},
{
"epoch": 1.1748588963778837,
"grad_norm": 1.7826286554336548,
"learning_rate": 0.0001,
"loss": 0.5872,
"step": 21440
},
{
"epoch": 1.1759548468409229,
"grad_norm": 1.1451081037521362,
"learning_rate": 0.0001,
"loss": 0.5533,
"step": 21460
},
{
"epoch": 1.177050797303962,
"grad_norm": 1.425122618675232,
"learning_rate": 0.0001,
"loss": 0.5934,
"step": 21480
},
{
"epoch": 1.178146747767001,
"grad_norm": 1.6452502012252808,
"learning_rate": 0.0001,
"loss": 0.6008,
"step": 21500
},
{
"epoch": 1.17924269823004,
"grad_norm": 1.6565967798233032,
"learning_rate": 0.0001,
"loss": 0.5945,
"step": 21520
},
{
"epoch": 1.1803386486930791,
"grad_norm": 1.7541433572769165,
"learning_rate": 0.0001,
"loss": 0.6243,
"step": 21540
},
{
"epoch": 1.1814345991561181,
"grad_norm": 1.5369079113006592,
"learning_rate": 0.0001,
"loss": 0.5968,
"step": 21560
},
{
"epoch": 1.1825305496191572,
"grad_norm": 2.0941789150238037,
"learning_rate": 0.0001,
"loss": 0.599,
"step": 21580
},
{
"epoch": 1.1836265000821964,
"grad_norm": 1.6641647815704346,
"learning_rate": 0.0001,
"loss": 0.634,
"step": 21600
},
{
"epoch": 1.1847224505452354,
"grad_norm": 1.5256268978118896,
"learning_rate": 0.0001,
"loss": 0.5939,
"step": 21620
},
{
"epoch": 1.1858184010082744,
"grad_norm": 1.5720540285110474,
"learning_rate": 0.0001,
"loss": 0.6005,
"step": 21640
},
{
"epoch": 1.1869143514713134,
"grad_norm": 1.773973822593689,
"learning_rate": 0.0001,
"loss": 0.5607,
"step": 21660
},
{
"epoch": 1.1880103019343526,
"grad_norm": 1.6868877410888672,
"learning_rate": 0.0001,
"loss": 0.6013,
"step": 21680
},
{
"epoch": 1.1891062523973916,
"grad_norm": 1.518250584602356,
"learning_rate": 0.0001,
"loss": 0.612,
"step": 21700
},
{
"epoch": 1.1902022028604307,
"grad_norm": 1.4469574689865112,
"learning_rate": 0.0001,
"loss": 0.5684,
"step": 21720
},
{
"epoch": 1.1912981533234697,
"grad_norm": 1.3651134967803955,
"learning_rate": 0.0001,
"loss": 0.6275,
"step": 21740
},
{
"epoch": 1.192394103786509,
"grad_norm": 1.1910673379898071,
"learning_rate": 0.0001,
"loss": 0.6281,
"step": 21760
},
{
"epoch": 1.193490054249548,
"grad_norm": 1.5071038007736206,
"learning_rate": 0.0001,
"loss": 0.5909,
"step": 21780
},
{
"epoch": 1.194586004712587,
"grad_norm": 1.3401362895965576,
"learning_rate": 0.0001,
"loss": 0.6273,
"step": 21800
},
{
"epoch": 1.1956819551756261,
"grad_norm": 1.3563752174377441,
"learning_rate": 0.0001,
"loss": 0.5634,
"step": 21820
},
{
"epoch": 1.1967779056386652,
"grad_norm": 1.5860759019851685,
"learning_rate": 0.0001,
"loss": 0.5996,
"step": 21840
},
{
"epoch": 1.1978738561017042,
"grad_norm": 1.6106479167938232,
"learning_rate": 0.0001,
"loss": 0.6038,
"step": 21860
},
{
"epoch": 1.1989698065647434,
"grad_norm": 1.2792236804962158,
"learning_rate": 0.0001,
"loss": 0.569,
"step": 21880
},
{
"epoch": 1.2000657570277824,
"grad_norm": 1.7770174741744995,
"learning_rate": 0.0001,
"loss": 0.6159,
"step": 21900
},
{
"epoch": 1.2011617074908214,
"grad_norm": 1.522647738456726,
"learning_rate": 0.0001,
"loss": 0.5756,
"step": 21920
},
{
"epoch": 1.2022576579538604,
"grad_norm": 1.4393162727355957,
"learning_rate": 0.0001,
"loss": 0.598,
"step": 21940
},
{
"epoch": 1.2033536084168996,
"grad_norm": 1.9221006631851196,
"learning_rate": 0.0001,
"loss": 0.5736,
"step": 21960
},
{
"epoch": 1.2044495588799387,
"grad_norm": 1.4930051565170288,
"learning_rate": 0.0001,
"loss": 0.591,
"step": 21980
},
{
"epoch": 1.2055455093429777,
"grad_norm": 1.1012686491012573,
"learning_rate": 0.0001,
"loss": 0.6139,
"step": 22000
},
{
"epoch": 1.2055455093429777,
"eval_loss": 0.6032226085662842,
"eval_runtime": 30533.3597,
"eval_samples_per_second": 2.125,
"eval_steps_per_second": 0.066,
"eval_wer": 36.777449473248126,
"step": 22000
},
{
"epoch": 1.2066414598060167,
"grad_norm": 1.501633644104004,
"learning_rate": 0.0001,
"loss": 0.55,
"step": 22020
},
{
"epoch": 1.207737410269056,
"grad_norm": 1.7444618940353394,
"learning_rate": 0.0001,
"loss": 0.62,
"step": 22040
},
{
"epoch": 1.208833360732095,
"grad_norm": 1.3666551113128662,
"learning_rate": 0.0001,
"loss": 0.6087,
"step": 22060
},
{
"epoch": 1.209929311195134,
"grad_norm": 1.5576726198196411,
"learning_rate": 0.0001,
"loss": 0.5958,
"step": 22080
},
{
"epoch": 1.2110252616581731,
"grad_norm": 1.457824945449829,
"learning_rate": 0.0001,
"loss": 0.5588,
"step": 22100
},
{
"epoch": 1.2121212121212122,
"grad_norm": 1.621120810508728,
"learning_rate": 0.0001,
"loss": 0.5574,
"step": 22120
},
{
"epoch": 1.2132171625842512,
"grad_norm": 1.1742050647735596,
"learning_rate": 0.0001,
"loss": 0.5498,
"step": 22140
},
{
"epoch": 1.2143131130472902,
"grad_norm": 1.3734312057495117,
"learning_rate": 0.0001,
"loss": 0.5899,
"step": 22160
},
{
"epoch": 1.2154090635103294,
"grad_norm": 2.046262741088867,
"learning_rate": 0.0001,
"loss": 0.6574,
"step": 22180
},
{
"epoch": 1.2165050139733684,
"grad_norm": 1.3114126920700073,
"learning_rate": 0.0001,
"loss": 0.607,
"step": 22200
},
{
"epoch": 1.2176009644364074,
"grad_norm": 1.5335580110549927,
"learning_rate": 0.0001,
"loss": 0.5828,
"step": 22220
},
{
"epoch": 1.2186969148994464,
"grad_norm": 1.7492777109146118,
"learning_rate": 0.0001,
"loss": 0.557,
"step": 22240
},
{
"epoch": 1.2197928653624857,
"grad_norm": 1.3159027099609375,
"learning_rate": 0.0001,
"loss": 0.6923,
"step": 22260
},
{
"epoch": 1.2208888158255247,
"grad_norm": 1.5170766115188599,
"learning_rate": 0.0001,
"loss": 0.7194,
"step": 22280
},
{
"epoch": 1.2219847662885637,
"grad_norm": 1.336846113204956,
"learning_rate": 0.0001,
"loss": 0.583,
"step": 22300
},
{
"epoch": 1.223080716751603,
"grad_norm": 1.768999457359314,
"learning_rate": 0.0001,
"loss": 0.7009,
"step": 22320
},
{
"epoch": 1.224176667214642,
"grad_norm": 1.6113883256912231,
"learning_rate": 0.0001,
"loss": 0.5747,
"step": 22340
},
{
"epoch": 1.225272617677681,
"grad_norm": 1.4983850717544556,
"learning_rate": 0.0001,
"loss": 0.5477,
"step": 22360
},
{
"epoch": 1.2263685681407201,
"grad_norm": 1.380181908607483,
"learning_rate": 0.0001,
"loss": 0.6003,
"step": 22380
},
{
"epoch": 1.2274645186037592,
"grad_norm": 1.4921499490737915,
"learning_rate": 0.0001,
"loss": 0.586,
"step": 22400
},
{
"epoch": 1.2285604690667982,
"grad_norm": 1.3056907653808594,
"learning_rate": 0.0001,
"loss": 0.5393,
"step": 22420
},
{
"epoch": 1.2296564195298372,
"grad_norm": 1.702541470527649,
"learning_rate": 0.0001,
"loss": 0.6474,
"step": 22440
},
{
"epoch": 1.2307523699928764,
"grad_norm": 1.703065276145935,
"learning_rate": 0.0001,
"loss": 0.6102,
"step": 22460
},
{
"epoch": 1.2318483204559154,
"grad_norm": 1.7823582887649536,
"learning_rate": 0.0001,
"loss": 0.529,
"step": 22480
},
{
"epoch": 1.2329442709189544,
"grad_norm": 1.5001643896102905,
"learning_rate": 0.0001,
"loss": 0.5936,
"step": 22500
},
{
"epoch": 1.2340402213819934,
"grad_norm": 1.4515180587768555,
"learning_rate": 0.0001,
"loss": 0.5817,
"step": 22520
},
{
"epoch": 1.2351361718450327,
"grad_norm": 1.6166529655456543,
"learning_rate": 0.0001,
"loss": 0.5901,
"step": 22540
},
{
"epoch": 1.2362321223080717,
"grad_norm": 1.6117253303527832,
"learning_rate": 0.0001,
"loss": 0.668,
"step": 22560
},
{
"epoch": 1.2373280727711107,
"grad_norm": 1.4674168825149536,
"learning_rate": 0.0001,
"loss": 0.5783,
"step": 22580
},
{
"epoch": 1.23842402323415,
"grad_norm": 1.5282671451568604,
"learning_rate": 0.0001,
"loss": 0.6069,
"step": 22600
},
{
"epoch": 1.239519973697189,
"grad_norm": 1.446772575378418,
"learning_rate": 0.0001,
"loss": 0.5645,
"step": 22620
},
{
"epoch": 1.240615924160228,
"grad_norm": 1.7833497524261475,
"learning_rate": 0.0001,
"loss": 0.5555,
"step": 22640
},
{
"epoch": 1.241711874623267,
"grad_norm": 1.8573659658432007,
"learning_rate": 0.0001,
"loss": 0.6258,
"step": 22660
},
{
"epoch": 1.2428078250863062,
"grad_norm": 1.375735878944397,
"learning_rate": 0.0001,
"loss": 0.5316,
"step": 22680
},
{
"epoch": 1.2439037755493452,
"grad_norm": 1.4545280933380127,
"learning_rate": 0.0001,
"loss": 0.609,
"step": 22700
},
{
"epoch": 1.2449997260123842,
"grad_norm": 1.499182105064392,
"learning_rate": 0.0001,
"loss": 0.6205,
"step": 22720
},
{
"epoch": 1.2460956764754232,
"grad_norm": 1.418739914894104,
"learning_rate": 0.0001,
"loss": 0.5038,
"step": 22740
},
{
"epoch": 1.2471916269384624,
"grad_norm": 1.4958001375198364,
"learning_rate": 0.0001,
"loss": 0.5607,
"step": 22760
},
{
"epoch": 1.2482875774015014,
"grad_norm": 1.7422837018966675,
"learning_rate": 0.0001,
"loss": 0.5593,
"step": 22780
},
{
"epoch": 1.2493835278645404,
"grad_norm": 1.536526083946228,
"learning_rate": 0.0001,
"loss": 0.5518,
"step": 22800
},
{
"epoch": 1.2504794783275797,
"grad_norm": 1.2415670156478882,
"learning_rate": 0.0001,
"loss": 0.625,
"step": 22820
},
{
"epoch": 1.2515754287906187,
"grad_norm": 1.2609211206436157,
"learning_rate": 0.0001,
"loss": 0.5213,
"step": 22840
},
{
"epoch": 1.2526713792536577,
"grad_norm": 1.4843876361846924,
"learning_rate": 0.0001,
"loss": 0.5618,
"step": 22860
},
{
"epoch": 1.253767329716697,
"grad_norm": 1.7089099884033203,
"learning_rate": 0.0001,
"loss": 0.587,
"step": 22880
},
{
"epoch": 1.254863280179736,
"grad_norm": 1.8894917964935303,
"learning_rate": 0.0001,
"loss": 0.5952,
"step": 22900
},
{
"epoch": 1.255959230642775,
"grad_norm": 1.3892401456832886,
"learning_rate": 0.0001,
"loss": 0.5949,
"step": 22920
},
{
"epoch": 1.257055181105814,
"grad_norm": 1.8688722848892212,
"learning_rate": 0.0001,
"loss": 0.6524,
"step": 22940
},
{
"epoch": 1.2581511315688532,
"grad_norm": 1.8726931810379028,
"learning_rate": 0.0001,
"loss": 0.5547,
"step": 22960
},
{
"epoch": 1.2592470820318922,
"grad_norm": 1.9214690923690796,
"learning_rate": 0.0001,
"loss": 0.618,
"step": 22980
},
{
"epoch": 1.2603430324949312,
"grad_norm": 1.6148467063903809,
"learning_rate": 0.0001,
"loss": 0.4822,
"step": 23000
},
{
"epoch": 1.2603430324949312,
"eval_loss": 0.599087655544281,
"eval_runtime": 30610.1389,
"eval_samples_per_second": 2.12,
"eval_steps_per_second": 0.066,
"eval_wer": 40.07337206794192,
"step": 23000
},
{
"epoch": 1.2614389829579702,
"grad_norm": 1.536855936050415,
"learning_rate": 0.0001,
"loss": 0.5852,
"step": 23020
},
{
"epoch": 1.2625349334210094,
"grad_norm": 1.884334683418274,
"learning_rate": 0.0001,
"loss": 0.6474,
"step": 23040
},
{
"epoch": 1.2636308838840484,
"grad_norm": 1.359174132347107,
"learning_rate": 0.0001,
"loss": 0.6204,
"step": 23060
},
{
"epoch": 1.2647268343470874,
"grad_norm": 1.7376055717468262,
"learning_rate": 0.0001,
"loss": 0.5524,
"step": 23080
},
{
"epoch": 1.2658227848101267,
"grad_norm": 1.6594703197479248,
"learning_rate": 0.0001,
"loss": 0.5396,
"step": 23100
},
{
"epoch": 1.2669187352731657,
"grad_norm": 1.4215294122695923,
"learning_rate": 0.0001,
"loss": 0.5842,
"step": 23120
},
{
"epoch": 1.2680146857362047,
"grad_norm": 1.7680842876434326,
"learning_rate": 0.0001,
"loss": 0.5625,
"step": 23140
},
{
"epoch": 1.269110636199244,
"grad_norm": 1.3353180885314941,
"learning_rate": 0.0001,
"loss": 0.5775,
"step": 23160
},
{
"epoch": 1.270206586662283,
"grad_norm": 1.450649619102478,
"learning_rate": 0.0001,
"loss": 0.5319,
"step": 23180
},
{
"epoch": 1.271302537125322,
"grad_norm": 1.7398178577423096,
"learning_rate": 0.0001,
"loss": 0.5615,
"step": 23200
},
{
"epoch": 1.272398487588361,
"grad_norm": 1.6940994262695312,
"learning_rate": 0.0001,
"loss": 0.6069,
"step": 23220
},
{
"epoch": 1.2734944380514,
"grad_norm": 1.899994969367981,
"learning_rate": 0.0001,
"loss": 0.6491,
"step": 23240
},
{
"epoch": 1.2745903885144392,
"grad_norm": 1.457036018371582,
"learning_rate": 0.0001,
"loss": 0.5762,
"step": 23260
},
{
"epoch": 1.2756863389774782,
"grad_norm": 1.4215611219406128,
"learning_rate": 0.0001,
"loss": 0.5966,
"step": 23280
},
{
"epoch": 1.2767822894405172,
"grad_norm": 1.7165329456329346,
"learning_rate": 0.0001,
"loss": 0.5897,
"step": 23300
},
{
"epoch": 1.2778782399035564,
"grad_norm": 1.524688720703125,
"learning_rate": 0.0001,
"loss": 0.6541,
"step": 23320
},
{
"epoch": 1.2789741903665954,
"grad_norm": 1.3125251531600952,
"learning_rate": 0.0001,
"loss": 0.521,
"step": 23340
},
{
"epoch": 1.2800701408296344,
"grad_norm": 1.5787118673324585,
"learning_rate": 0.0001,
"loss": 0.5453,
"step": 23360
},
{
"epoch": 1.2811660912926737,
"grad_norm": 1.636098027229309,
"learning_rate": 0.0001,
"loss": 0.5606,
"step": 23380
},
{
"epoch": 1.2822620417557127,
"grad_norm": 1.4963462352752686,
"learning_rate": 0.0001,
"loss": 0.6251,
"step": 23400
},
{
"epoch": 1.2833579922187517,
"grad_norm": 1.3213664293289185,
"learning_rate": 0.0001,
"loss": 0.6293,
"step": 23420
},
{
"epoch": 1.284453942681791,
"grad_norm": 1.6737782955169678,
"learning_rate": 0.0001,
"loss": 0.6499,
"step": 23440
},
{
"epoch": 1.28554989314483,
"grad_norm": 1.525976300239563,
"learning_rate": 0.0001,
"loss": 0.6073,
"step": 23460
},
{
"epoch": 1.286645843607869,
"grad_norm": 1.3534733057022095,
"learning_rate": 0.0001,
"loss": 0.5434,
"step": 23480
},
{
"epoch": 1.287741794070908,
"grad_norm": 1.8090375661849976,
"learning_rate": 0.0001,
"loss": 0.6806,
"step": 23500
},
{
"epoch": 1.288837744533947,
"grad_norm": 1.7110000848770142,
"learning_rate": 0.0001,
"loss": 0.5678,
"step": 23520
},
{
"epoch": 1.2899336949969862,
"grad_norm": 1.6300121545791626,
"learning_rate": 0.0001,
"loss": 0.6674,
"step": 23540
},
{
"epoch": 1.2910296454600252,
"grad_norm": 1.4068278074264526,
"learning_rate": 0.0001,
"loss": 0.5294,
"step": 23560
},
{
"epoch": 1.2921255959230642,
"grad_norm": 1.6015020608901978,
"learning_rate": 0.0001,
"loss": 0.5791,
"step": 23580
},
{
"epoch": 1.2932215463861034,
"grad_norm": 1.7282171249389648,
"learning_rate": 0.0001,
"loss": 0.6358,
"step": 23600
},
{
"epoch": 1.2943174968491424,
"grad_norm": 1.3395479917526245,
"learning_rate": 0.0001,
"loss": 0.5972,
"step": 23620
},
{
"epoch": 1.2954134473121814,
"grad_norm": 1.5393882989883423,
"learning_rate": 0.0001,
"loss": 0.6243,
"step": 23640
},
{
"epoch": 1.2965093977752207,
"grad_norm": 2.0010182857513428,
"learning_rate": 0.0001,
"loss": 0.6047,
"step": 23660
},
{
"epoch": 1.2976053482382597,
"grad_norm": 1.742031455039978,
"learning_rate": 0.0001,
"loss": 0.586,
"step": 23680
},
{
"epoch": 1.2987012987012987,
"grad_norm": 1.5596591234207153,
"learning_rate": 0.0001,
"loss": 0.5545,
"step": 23700
},
{
"epoch": 1.2997972491643377,
"grad_norm": 1.2098394632339478,
"learning_rate": 0.0001,
"loss": 0.6264,
"step": 23720
},
{
"epoch": 1.3008931996273767,
"grad_norm": 1.5962443351745605,
"learning_rate": 0.0001,
"loss": 0.5827,
"step": 23740
},
{
"epoch": 1.301989150090416,
"grad_norm": 1.7482990026474,
"learning_rate": 0.0001,
"loss": 0.6113,
"step": 23760
},
{
"epoch": 1.303085100553455,
"grad_norm": 1.6832690238952637,
"learning_rate": 0.0001,
"loss": 0.5231,
"step": 23780
},
{
"epoch": 1.304181051016494,
"grad_norm": 1.2904006242752075,
"learning_rate": 0.0001,
"loss": 0.5472,
"step": 23800
},
{
"epoch": 1.3052770014795332,
"grad_norm": 1.260377287864685,
"learning_rate": 0.0001,
"loss": 0.5555,
"step": 23820
},
{
"epoch": 1.3063729519425722,
"grad_norm": 1.6346126794815063,
"learning_rate": 0.0001,
"loss": 0.5247,
"step": 23840
},
{
"epoch": 1.3074689024056112,
"grad_norm": 1.5854252576828003,
"learning_rate": 0.0001,
"loss": 0.5784,
"step": 23860
},
{
"epoch": 1.3085648528686504,
"grad_norm": 1.754293441772461,
"learning_rate": 0.0001,
"loss": 0.6555,
"step": 23880
},
{
"epoch": 1.3096608033316894,
"grad_norm": 1.642504096031189,
"learning_rate": 0.0001,
"loss": 0.5822,
"step": 23900
},
{
"epoch": 1.3107567537947284,
"grad_norm": 1.4986265897750854,
"learning_rate": 0.0001,
"loss": 0.6246,
"step": 23920
},
{
"epoch": 1.3118527042577677,
"grad_norm": 1.6175062656402588,
"learning_rate": 0.0001,
"loss": 0.5616,
"step": 23940
},
{
"epoch": 1.3129486547208067,
"grad_norm": 1.6189128160476685,
"learning_rate": 0.0001,
"loss": 0.6108,
"step": 23960
},
{
"epoch": 1.3140446051838457,
"grad_norm": 1.6187801361083984,
"learning_rate": 0.0001,
"loss": 0.5992,
"step": 23980
},
{
"epoch": 1.3151405556468847,
"grad_norm": 1.396136999130249,
"learning_rate": 0.0001,
"loss": 0.5409,
"step": 24000
},
{
"epoch": 1.3151405556468847,
"eval_loss": 0.5925264358520508,
"eval_runtime": 30722.0761,
"eval_samples_per_second": 2.112,
"eval_steps_per_second": 0.066,
"eval_wer": 50.53154519589281,
"step": 24000
},
{
"epoch": 1.3162365061099237,
"grad_norm": 1.4604203701019287,
"learning_rate": 0.0001,
"loss": 0.5795,
"step": 24020
},
{
"epoch": 1.317332456572963,
"grad_norm": 1.667830228805542,
"learning_rate": 0.0001,
"loss": 0.5799,
"step": 24040
},
{
"epoch": 1.318428407036002,
"grad_norm": 1.4066877365112305,
"learning_rate": 0.0001,
"loss": 0.603,
"step": 24060
},
{
"epoch": 1.319524357499041,
"grad_norm": 1.6986689567565918,
"learning_rate": 0.0001,
"loss": 0.5943,
"step": 24080
},
{
"epoch": 1.3206203079620802,
"grad_norm": 2.00864839553833,
"learning_rate": 0.0001,
"loss": 0.646,
"step": 24100
},
{
"epoch": 1.3217162584251192,
"grad_norm": 1.3557894229888916,
"learning_rate": 0.0001,
"loss": 0.6324,
"step": 24120
},
{
"epoch": 1.3228122088881582,
"grad_norm": 1.442983865737915,
"learning_rate": 0.0001,
"loss": 0.5326,
"step": 24140
},
{
"epoch": 1.3239081593511974,
"grad_norm": 1.4924156665802002,
"learning_rate": 0.0001,
"loss": 0.5464,
"step": 24160
},
{
"epoch": 1.3250041098142364,
"grad_norm": 1.408599615097046,
"learning_rate": 0.0001,
"loss": 0.5989,
"step": 24180
},
{
"epoch": 1.3261000602772754,
"grad_norm": 1.6432658433914185,
"learning_rate": 0.0001,
"loss": 0.6034,
"step": 24200
},
{
"epoch": 1.3271960107403145,
"grad_norm": 1.5687427520751953,
"learning_rate": 0.0001,
"loss": 0.5222,
"step": 24220
},
{
"epoch": 1.3282919612033537,
"grad_norm": 1.3878777027130127,
"learning_rate": 0.0001,
"loss": 0.5913,
"step": 24240
},
{
"epoch": 1.3293879116663927,
"grad_norm": 1.276931643486023,
"learning_rate": 0.0001,
"loss": 0.5509,
"step": 24260
},
{
"epoch": 1.3304838621294317,
"grad_norm": 1.5601953268051147,
"learning_rate": 0.0001,
"loss": 0.6077,
"step": 24280
},
{
"epoch": 1.3315798125924707,
"grad_norm": 1.9250099658966064,
"learning_rate": 0.0001,
"loss": 0.5629,
"step": 24300
},
{
"epoch": 1.33267576305551,
"grad_norm": 1.314794659614563,
"learning_rate": 0.0001,
"loss": 0.6085,
"step": 24320
},
{
"epoch": 1.333771713518549,
"grad_norm": 1.4445682764053345,
"learning_rate": 0.0001,
"loss": 0.5725,
"step": 24340
},
{
"epoch": 1.334867663981588,
"grad_norm": 1.6029905080795288,
"learning_rate": 0.0001,
"loss": 0.6448,
"step": 24360
},
{
"epoch": 1.3359636144446272,
"grad_norm": 1.969078540802002,
"learning_rate": 0.0001,
"loss": 0.5397,
"step": 24380
},
{
"epoch": 1.3370595649076662,
"grad_norm": 1.900762677192688,
"learning_rate": 0.0001,
"loss": 0.6243,
"step": 24400
},
{
"epoch": 1.3381555153707052,
"grad_norm": 1.8829255104064941,
"learning_rate": 0.0001,
"loss": 0.5744,
"step": 24420
},
{
"epoch": 1.3392514658337444,
"grad_norm": 1.4927318096160889,
"learning_rate": 0.0001,
"loss": 0.6023,
"step": 24440
},
{
"epoch": 1.3403474162967834,
"grad_norm": 1.7608602046966553,
"learning_rate": 0.0001,
"loss": 0.5654,
"step": 24460
},
{
"epoch": 1.3414433667598225,
"grad_norm": 1.2257969379425049,
"learning_rate": 0.0001,
"loss": 0.5953,
"step": 24480
},
{
"epoch": 1.3425393172228615,
"grad_norm": 1.5768262147903442,
"learning_rate": 0.0001,
"loss": 0.5972,
"step": 24500
},
{
"epoch": 1.3436352676859005,
"grad_norm": 1.5148476362228394,
"learning_rate": 0.0001,
"loss": 0.6313,
"step": 24520
},
{
"epoch": 1.3447312181489397,
"grad_norm": 1.347442865371704,
"learning_rate": 0.0001,
"loss": 0.5867,
"step": 24540
},
{
"epoch": 1.3458271686119787,
"grad_norm": 1.3003042936325073,
"learning_rate": 0.0001,
"loss": 0.5709,
"step": 24560
},
{
"epoch": 1.3469231190750177,
"grad_norm": 1.5577054023742676,
"learning_rate": 0.0001,
"loss": 0.5719,
"step": 24580
},
{
"epoch": 1.348019069538057,
"grad_norm": 1.6141449213027954,
"learning_rate": 0.0001,
"loss": 0.5684,
"step": 24600
},
{
"epoch": 1.349115020001096,
"grad_norm": 1.515576958656311,
"learning_rate": 0.0001,
"loss": 0.5421,
"step": 24620
},
{
"epoch": 1.350210970464135,
"grad_norm": 1.62236487865448,
"learning_rate": 0.0001,
"loss": 0.6338,
"step": 24640
},
{
"epoch": 1.3513069209271742,
"grad_norm": 1.5557656288146973,
"learning_rate": 0.0001,
"loss": 0.55,
"step": 24660
},
{
"epoch": 1.3524028713902132,
"grad_norm": 1.9079481363296509,
"learning_rate": 0.0001,
"loss": 0.6352,
"step": 24680
},
{
"epoch": 1.3534988218532522,
"grad_norm": 1.3640868663787842,
"learning_rate": 0.0001,
"loss": 0.6484,
"step": 24700
},
{
"epoch": 1.3545947723162914,
"grad_norm": 1.831858515739441,
"learning_rate": 0.0001,
"loss": 0.5577,
"step": 24720
},
{
"epoch": 1.3556907227793304,
"grad_norm": 1.5431773662567139,
"learning_rate": 0.0001,
"loss": 0.5918,
"step": 24740
},
{
"epoch": 1.3567866732423695,
"grad_norm": 1.1655539274215698,
"learning_rate": 0.0001,
"loss": 0.5892,
"step": 24760
},
{
"epoch": 1.3578826237054085,
"grad_norm": 1.7395954132080078,
"learning_rate": 0.0001,
"loss": 0.5416,
"step": 24780
},
{
"epoch": 1.3589785741684475,
"grad_norm": 1.89164400100708,
"learning_rate": 0.0001,
"loss": 0.5708,
"step": 24800
},
{
"epoch": 1.3600745246314867,
"grad_norm": 1.5013233423233032,
"learning_rate": 0.0001,
"loss": 0.5234,
"step": 24820
},
{
"epoch": 1.3611704750945257,
"grad_norm": 1.4959980249404907,
"learning_rate": 0.0001,
"loss": 0.7578,
"step": 24840
},
{
"epoch": 1.3622664255575647,
"grad_norm": 1.3962562084197998,
"learning_rate": 0.0001,
"loss": 0.564,
"step": 24860
},
{
"epoch": 1.363362376020604,
"grad_norm": 1.370695948600769,
"learning_rate": 0.0001,
"loss": 0.5533,
"step": 24880
},
{
"epoch": 1.364458326483643,
"grad_norm": 1.629671573638916,
"learning_rate": 0.0001,
"loss": 0.5463,
"step": 24900
},
{
"epoch": 1.365554276946682,
"grad_norm": 1.6115111112594604,
"learning_rate": 0.0001,
"loss": 0.5837,
"step": 24920
},
{
"epoch": 1.3666502274097212,
"grad_norm": 1.756898283958435,
"learning_rate": 0.0001,
"loss": 0.5784,
"step": 24940
},
{
"epoch": 1.3677461778727602,
"grad_norm": 1.3999930620193481,
"learning_rate": 0.0001,
"loss": 0.6007,
"step": 24960
},
{
"epoch": 1.3688421283357992,
"grad_norm": 1.3553202152252197,
"learning_rate": 0.0001,
"loss": 0.5996,
"step": 24980
},
{
"epoch": 1.3699380787988382,
"grad_norm": 1.3020998239517212,
"learning_rate": 0.0001,
"loss": 0.5006,
"step": 25000
},
{
"epoch": 1.3699380787988382,
"eval_loss": 0.5878660678863525,
"eval_runtime": 30562.5187,
"eval_samples_per_second": 2.123,
"eval_steps_per_second": 0.066,
"eval_wer": 44.19117823283905,
"step": 25000
},
{
"epoch": 1.3710340292618772,
"grad_norm": 1.622175931930542,
"learning_rate": 0.0001,
"loss": 0.6049,
"step": 25020
},
{
"epoch": 1.3721299797249165,
"grad_norm": 1.6178827285766602,
"learning_rate": 0.0001,
"loss": 0.6272,
"step": 25040
},
{
"epoch": 1.3732259301879555,
"grad_norm": 1.9687530994415283,
"learning_rate": 0.0001,
"loss": 0.6194,
"step": 25060
},
{
"epoch": 1.3743218806509945,
"grad_norm": 1.5638937950134277,
"learning_rate": 0.0001,
"loss": 0.6033,
"step": 25080
},
{
"epoch": 1.3754178311140337,
"grad_norm": 1.5316487550735474,
"learning_rate": 0.0001,
"loss": 0.6847,
"step": 25100
},
{
"epoch": 1.3765137815770727,
"grad_norm": 2.431041717529297,
"learning_rate": 0.0001,
"loss": 0.5975,
"step": 25120
},
{
"epoch": 1.3776097320401117,
"grad_norm": 1.2522825002670288,
"learning_rate": 0.0001,
"loss": 0.6188,
"step": 25140
},
{
"epoch": 1.378705682503151,
"grad_norm": 1.428832769393921,
"learning_rate": 0.0001,
"loss": 0.4743,
"step": 25160
},
{
"epoch": 1.37980163296619,
"grad_norm": 1.6147892475128174,
"learning_rate": 0.0001,
"loss": 0.6463,
"step": 25180
},
{
"epoch": 1.380897583429229,
"grad_norm": 1.6648330688476562,
"learning_rate": 0.0001,
"loss": 0.556,
"step": 25200
},
{
"epoch": 1.3819935338922682,
"grad_norm": 1.4745285511016846,
"learning_rate": 0.0001,
"loss": 0.5997,
"step": 25220
},
{
"epoch": 1.3830894843553072,
"grad_norm": 1.433664321899414,
"learning_rate": 0.0001,
"loss": 0.5288,
"step": 25240
},
{
"epoch": 1.3841854348183462,
"grad_norm": 1.264054775238037,
"learning_rate": 0.0001,
"loss": 0.6075,
"step": 25260
},
{
"epoch": 1.3852813852813852,
"grad_norm": 1.7683794498443604,
"learning_rate": 0.0001,
"loss": 0.6011,
"step": 25280
},
{
"epoch": 1.3863773357444242,
"grad_norm": 1.5316460132598877,
"learning_rate": 0.0001,
"loss": 0.5799,
"step": 25300
},
{
"epoch": 1.3874732862074635,
"grad_norm": 3.361269950866699,
"learning_rate": 0.0001,
"loss": 0.6577,
"step": 25320
},
{
"epoch": 1.3885692366705025,
"grad_norm": 1.7918739318847656,
"learning_rate": 0.0001,
"loss": 0.6106,
"step": 25340
},
{
"epoch": 1.3896651871335415,
"grad_norm": 1.5828975439071655,
"learning_rate": 0.0001,
"loss": 0.5683,
"step": 25360
},
{
"epoch": 1.3907611375965807,
"grad_norm": 1.9565653800964355,
"learning_rate": 0.0001,
"loss": 0.5368,
"step": 25380
},
{
"epoch": 1.3918570880596197,
"grad_norm": 1.446603775024414,
"learning_rate": 0.0001,
"loss": 0.551,
"step": 25400
},
{
"epoch": 1.3929530385226587,
"grad_norm": 1.3377403020858765,
"learning_rate": 0.0001,
"loss": 0.5889,
"step": 25420
},
{
"epoch": 1.394048988985698,
"grad_norm": 1.5956981182098389,
"learning_rate": 0.0001,
"loss": 0.5962,
"step": 25440
},
{
"epoch": 1.395144939448737,
"grad_norm": 2.122570276260376,
"learning_rate": 0.0001,
"loss": 0.6044,
"step": 25460
},
{
"epoch": 1.396240889911776,
"grad_norm": 1.8314157724380493,
"learning_rate": 0.0001,
"loss": 0.5612,
"step": 25480
},
{
"epoch": 1.397336840374815,
"grad_norm": 1.327453851699829,
"learning_rate": 0.0001,
"loss": 0.5214,
"step": 25500
},
{
"epoch": 1.3984327908378542,
"grad_norm": 2.1016182899475098,
"learning_rate": 0.0001,
"loss": 0.594,
"step": 25520
},
{
"epoch": 1.3995287413008932,
"grad_norm": 1.5892709493637085,
"learning_rate": 0.0001,
"loss": 0.5927,
"step": 25540
},
{
"epoch": 1.4006246917639322,
"grad_norm": 1.8178175687789917,
"learning_rate": 0.0001,
"loss": 0.5204,
"step": 25560
},
{
"epoch": 1.4017206422269712,
"grad_norm": 1.3808900117874146,
"learning_rate": 0.0001,
"loss": 0.5434,
"step": 25580
},
{
"epoch": 1.4028165926900105,
"grad_norm": 1.4849821329116821,
"learning_rate": 0.0001,
"loss": 0.6208,
"step": 25600
},
{
"epoch": 1.4039125431530495,
"grad_norm": 1.5404740571975708,
"learning_rate": 0.0001,
"loss": 0.5492,
"step": 25620
},
{
"epoch": 1.4050084936160885,
"grad_norm": 1.4663268327713013,
"learning_rate": 0.0001,
"loss": 0.5275,
"step": 25640
},
{
"epoch": 1.4061044440791277,
"grad_norm": 1.5518896579742432,
"learning_rate": 0.0001,
"loss": 0.5966,
"step": 25660
},
{
"epoch": 1.4072003945421667,
"grad_norm": 1.5495002269744873,
"learning_rate": 0.0001,
"loss": 0.5451,
"step": 25680
},
{
"epoch": 1.4082963450052057,
"grad_norm": 1.5400805473327637,
"learning_rate": 0.0001,
"loss": 0.6543,
"step": 25700
},
{
"epoch": 1.409392295468245,
"grad_norm": 1.5806297063827515,
"learning_rate": 0.0001,
"loss": 0.5849,
"step": 25720
},
{
"epoch": 1.410488245931284,
"grad_norm": 1.574846863746643,
"learning_rate": 0.0001,
"loss": 0.5653,
"step": 25740
},
{
"epoch": 1.411584196394323,
"grad_norm": 1.4742170572280884,
"learning_rate": 0.0001,
"loss": 0.5173,
"step": 25760
},
{
"epoch": 1.412680146857362,
"grad_norm": 2.009706735610962,
"learning_rate": 0.0001,
"loss": 0.5467,
"step": 25780
},
{
"epoch": 1.413776097320401,
"grad_norm": 1.9192876815795898,
"learning_rate": 0.0001,
"loss": 0.6211,
"step": 25800
},
{
"epoch": 1.4148720477834402,
"grad_norm": 1.2425312995910645,
"learning_rate": 0.0001,
"loss": 0.5398,
"step": 25820
},
{
"epoch": 1.4159679982464792,
"grad_norm": 1.8032817840576172,
"learning_rate": 0.0001,
"loss": 0.6143,
"step": 25840
},
{
"epoch": 1.4170639487095182,
"grad_norm": 1.5889620780944824,
"learning_rate": 0.0001,
"loss": 0.6361,
"step": 25860
},
{
"epoch": 1.4181598991725575,
"grad_norm": 1.3312866687774658,
"learning_rate": 0.0001,
"loss": 0.637,
"step": 25880
},
{
"epoch": 1.4192558496355965,
"grad_norm": 1.4662201404571533,
"learning_rate": 0.0001,
"loss": 0.6206,
"step": 25900
},
{
"epoch": 1.4203518000986355,
"grad_norm": 1.7590441703796387,
"learning_rate": 0.0001,
"loss": 0.5637,
"step": 25920
},
{
"epoch": 1.4214477505616747,
"grad_norm": 1.5360428094863892,
"learning_rate": 0.0001,
"loss": 0.6033,
"step": 25940
},
{
"epoch": 1.4225437010247137,
"grad_norm": 1.6273736953735352,
"learning_rate": 0.0001,
"loss": 0.559,
"step": 25960
},
{
"epoch": 1.4236396514877527,
"grad_norm": 1.3700859546661377,
"learning_rate": 0.0001,
"loss": 0.637,
"step": 25980
},
{
"epoch": 1.4247356019507917,
"grad_norm": 1.419872522354126,
"learning_rate": 0.0001,
"loss": 0.5674,
"step": 26000
},
{
"epoch": 1.4247356019507917,
"eval_loss": 0.5845187306404114,
"eval_runtime": 30268.7098,
"eval_samples_per_second": 2.144,
"eval_steps_per_second": 0.067,
"eval_wer": 33.472796184515516,
"step": 26000
}
],
"logging_steps": 20,
"max_steps": 54747,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.278115609397166e+21,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}