|
{ |
|
"best_metric": 33.472796184515516, |
|
"best_model_checkpoint": "./whisper-distil-v3/checkpoint-26000", |
|
"epoch": 1.4247356019507917, |
|
"eval_steps": 1000, |
|
"global_step": 26000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010959504630390707, |
|
"grad_norm": 5.807405471801758, |
|
"learning_rate": 3.6e-06, |
|
"loss": 8.6037, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0021919009260781414, |
|
"grad_norm": 5.726860523223877, |
|
"learning_rate": 7.6e-06, |
|
"loss": 8.4874, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.003287851389117212, |
|
"grad_norm": 7.615314960479736, |
|
"learning_rate": 1.16e-05, |
|
"loss": 8.0934, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004383801852156283, |
|
"grad_norm": 7.089465618133545, |
|
"learning_rate": 1.56e-05, |
|
"loss": 7.4227, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005479752315195353, |
|
"grad_norm": 5.158086776733398, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 6.3418, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006575702778234424, |
|
"grad_norm": 3.288583278656006, |
|
"learning_rate": 2.3400000000000003e-05, |
|
"loss": 5.2747, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.007671653241273495, |
|
"grad_norm": 3.1715681552886963, |
|
"learning_rate": 2.7400000000000002e-05, |
|
"loss": 4.3075, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.008767603704312565, |
|
"grad_norm": 3.033198833465576, |
|
"learning_rate": 3.1400000000000004e-05, |
|
"loss": 3.631, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.009863554167351636, |
|
"grad_norm": 3.027251720428467, |
|
"learning_rate": 3.54e-05, |
|
"loss": 3.2186, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.010959504630390707, |
|
"grad_norm": 2.9063901901245117, |
|
"learning_rate": 3.94e-05, |
|
"loss": 2.9226, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.012055455093429777, |
|
"grad_norm": 3.1165690422058105, |
|
"learning_rate": 4.3400000000000005e-05, |
|
"loss": 2.8402, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.013151405556468848, |
|
"grad_norm": 2.7977383136749268, |
|
"learning_rate": 4.74e-05, |
|
"loss": 2.613, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.014247356019507919, |
|
"grad_norm": 3.7818286418914795, |
|
"learning_rate": 5.14e-05, |
|
"loss": 2.377, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01534330648254699, |
|
"grad_norm": 3.2088804244995117, |
|
"learning_rate": 5.5400000000000005e-05, |
|
"loss": 2.3204, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.01643925694558606, |
|
"grad_norm": 3.2518157958984375, |
|
"learning_rate": 5.94e-05, |
|
"loss": 2.1812, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01753520740862513, |
|
"grad_norm": 3.725226640701294, |
|
"learning_rate": 6.340000000000001e-05, |
|
"loss": 2.158, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.0186311578716642, |
|
"grad_norm": 3.5929486751556396, |
|
"learning_rate": 6.740000000000001e-05, |
|
"loss": 2.1241, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.019727108334703272, |
|
"grad_norm": 4.1317572593688965, |
|
"learning_rate": 7.14e-05, |
|
"loss": 2.2284, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.020823058797742343, |
|
"grad_norm": 3.4276161193847656, |
|
"learning_rate": 7.54e-05, |
|
"loss": 1.9655, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.021919009260781414, |
|
"grad_norm": 3.9775540828704834, |
|
"learning_rate": 7.94e-05, |
|
"loss": 1.9407, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.023014959723820484, |
|
"grad_norm": 3.67511248588562, |
|
"learning_rate": 8.34e-05, |
|
"loss": 1.9312, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.024110910186859555, |
|
"grad_norm": 4.781565189361572, |
|
"learning_rate": 8.740000000000001e-05, |
|
"loss": 1.9218, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.025206860649898626, |
|
"grad_norm": 5.2797698974609375, |
|
"learning_rate": 9.140000000000001e-05, |
|
"loss": 1.8729, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.026302811112937696, |
|
"grad_norm": 6.1737284660339355, |
|
"learning_rate": 9.54e-05, |
|
"loss": 1.6848, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.027398761575976767, |
|
"grad_norm": 4.926702976226807, |
|
"learning_rate": 9.94e-05, |
|
"loss": 1.8866, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.028494712039015838, |
|
"grad_norm": 4.043098449707031, |
|
"learning_rate": 0.0001, |
|
"loss": 1.749, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.02959066250205491, |
|
"grad_norm": 4.022521495819092, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7654, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.03068661296509398, |
|
"grad_norm": 3.1964547634124756, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7496, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.03178256342813305, |
|
"grad_norm": 3.5182583332061768, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7312, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.03287851389117212, |
|
"grad_norm": 3.529665231704712, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6307, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03397446435421119, |
|
"grad_norm": 3.329401969909668, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7613, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.03507041481725026, |
|
"grad_norm": 3.4481399059295654, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6204, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03616636528028933, |
|
"grad_norm": 3.3551902770996094, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5846, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.0372623157433284, |
|
"grad_norm": 3.591031074523926, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6077, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.038358266206367474, |
|
"grad_norm": 3.8630764484405518, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5275, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.039454216669406544, |
|
"grad_norm": 3.77461838722229, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5386, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.040550167132445615, |
|
"grad_norm": 2.9158153533935547, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5536, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.041646117595484686, |
|
"grad_norm": 3.761077642440796, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5607, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.042742068058523756, |
|
"grad_norm": 3.5758230686187744, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5145, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.04383801852156283, |
|
"grad_norm": 3.01175856590271, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5639, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0449339689846019, |
|
"grad_norm": 3.8395230770111084, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6478, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.04602991944764097, |
|
"grad_norm": 2.9240541458129883, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5303, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.04712586991068004, |
|
"grad_norm": 3.603835344314575, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4436, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.04822182037371911, |
|
"grad_norm": 3.1701183319091797, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5622, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.04931777083675818, |
|
"grad_norm": 4.054835796356201, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4354, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05041372129979725, |
|
"grad_norm": 2.9651615619659424, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4676, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.05150967176283632, |
|
"grad_norm": 3.2480218410491943, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3769, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.05260562222587539, |
|
"grad_norm": 3.494356155395508, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4928, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05370157268891446, |
|
"grad_norm": 3.394205331802368, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4045, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.054797523151953534, |
|
"grad_norm": 3.333587646484375, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4953, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.054797523151953534, |
|
"eval_loss": 1.4313914775848389, |
|
"eval_runtime": 30911.9498, |
|
"eval_samples_per_second": 2.099, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 70.88587442180551, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.055893473614992605, |
|
"grad_norm": 3.2317609786987305, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4376, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.056989424078031675, |
|
"grad_norm": 2.9077706336975098, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4398, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.058085374541070746, |
|
"grad_norm": 3.0054707527160645, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4326, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.05918132500410982, |
|
"grad_norm": 3.7243480682373047, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4915, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.06027727546714889, |
|
"grad_norm": 2.9608402252197266, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4595, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.06137322593018796, |
|
"grad_norm": 2.652829885482788, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4378, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.06246917639322703, |
|
"grad_norm": 2.9017295837402344, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4257, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.0635651268562661, |
|
"grad_norm": 3.0610859394073486, |
|
"learning_rate": 0.0001, |
|
"loss": 1.348, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.06466107731930516, |
|
"grad_norm": 2.701765775680542, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3853, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.06575702778234424, |
|
"grad_norm": 3.83376145362854, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4708, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.0668529782453833, |
|
"grad_norm": 3.178449869155884, |
|
"learning_rate": 0.0001, |
|
"loss": 1.362, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.06794892870842238, |
|
"grad_norm": 3.796205997467041, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4331, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.06904487917146145, |
|
"grad_norm": 2.8163928985595703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2835, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.07014082963450052, |
|
"grad_norm": 2.698793888092041, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3444, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.07123678009753959, |
|
"grad_norm": 2.584484815597534, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2145, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.07233273056057866, |
|
"grad_norm": 2.696967363357544, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2855, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.07342868102361773, |
|
"grad_norm": 3.382924795150757, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2164, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.0745246314866568, |
|
"grad_norm": 2.8127260208129883, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2873, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.07562058194969587, |
|
"grad_norm": 2.631011724472046, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3759, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.07671653241273495, |
|
"grad_norm": 2.913276433944702, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2688, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.07781248287577401, |
|
"grad_norm": 2.811455488204956, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2179, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.07890843333881309, |
|
"grad_norm": 2.8242247104644775, |
|
"learning_rate": 0.0001, |
|
"loss": 1.142, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.08000438380185215, |
|
"grad_norm": 3.1733341217041016, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2934, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.08110033426489123, |
|
"grad_norm": 2.491945743560791, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3274, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.0821962847279303, |
|
"grad_norm": 2.717165470123291, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2484, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08329223519096937, |
|
"grad_norm": 2.3187918663024902, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2038, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.08438818565400844, |
|
"grad_norm": 2.9296529293060303, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1962, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.08548413611704751, |
|
"grad_norm": 2.5763330459594727, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2122, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.08658008658008658, |
|
"grad_norm": 3.4159390926361084, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2302, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.08767603704312565, |
|
"grad_norm": 2.893261432647705, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2106, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.08877198750616472, |
|
"grad_norm": 2.1891727447509766, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2282, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.0898679379692038, |
|
"grad_norm": 2.4100029468536377, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2039, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.09096388843224286, |
|
"grad_norm": 2.5420494079589844, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2201, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.09205983889528194, |
|
"grad_norm": 3.1885313987731934, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2446, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.093155789358321, |
|
"grad_norm": 3.120586633682251, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2308, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.09425173982136008, |
|
"grad_norm": 2.4548628330230713, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1777, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.09534769028439914, |
|
"grad_norm": 3.101803779602051, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2123, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.09644364074743822, |
|
"grad_norm": 2.536121368408203, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1914, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.09753959121047728, |
|
"grad_norm": 2.3796801567077637, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1848, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.09863554167351636, |
|
"grad_norm": 2.67964243888855, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1973, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.09973149213655542, |
|
"grad_norm": 3.160212755203247, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2472, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.1008274425995945, |
|
"grad_norm": 2.7035927772521973, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1844, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.10192339306263357, |
|
"grad_norm": 2.7725090980529785, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1262, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.10301934352567264, |
|
"grad_norm": 2.2705016136169434, |
|
"learning_rate": 0.0001, |
|
"loss": 1.182, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.10411529398871171, |
|
"grad_norm": 3.0717403888702393, |
|
"learning_rate": 0.0001, |
|
"loss": 1.137, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.10521124445175078, |
|
"grad_norm": 2.9270904064178467, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2556, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.10630719491478985, |
|
"grad_norm": 2.4564895629882812, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1812, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.10740314537782893, |
|
"grad_norm": 2.983851909637451, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1445, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.10849909584086799, |
|
"grad_norm": 2.772733688354492, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1968, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.10959504630390707, |
|
"grad_norm": 2.9768126010894775, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0942, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.10959504630390707, |
|
"eval_loss": 1.1446514129638672, |
|
"eval_runtime": 30634.8587, |
|
"eval_samples_per_second": 2.118, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 61.28519240053001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11069099676694613, |
|
"grad_norm": 2.806312322616577, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1924, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.11178694722998521, |
|
"grad_norm": 2.639443874359131, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0572, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.11288289769302427, |
|
"grad_norm": 2.2005367279052734, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1337, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.11397884815606335, |
|
"grad_norm": 2.4102020263671875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1297, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.11507479861910241, |
|
"grad_norm": 3.410691976547241, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1354, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.11617074908214149, |
|
"grad_norm": 2.1337172985076904, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1725, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.11726669954518056, |
|
"grad_norm": 2.627319097518921, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1006, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.11836265000821963, |
|
"grad_norm": 2.6450726985931396, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0985, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.1194586004712587, |
|
"grad_norm": 2.3205084800720215, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1634, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.12055455093429777, |
|
"grad_norm": 2.51177978515625, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1697, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.12165050139733684, |
|
"grad_norm": 2.6632323265075684, |
|
"learning_rate": 0.0001, |
|
"loss": 1.071, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.12274645186037592, |
|
"grad_norm": 2.8322274684906006, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0983, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.12384240232341498, |
|
"grad_norm": 2.547708749771118, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0629, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.12493835278645406, |
|
"grad_norm": 2.6638150215148926, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1985, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.12603430324949313, |
|
"grad_norm": 2.980463743209839, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1885, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.1271302537125322, |
|
"grad_norm": 1.9924368858337402, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0971, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.12822620417557126, |
|
"grad_norm": 2.2847180366516113, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1149, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.12932215463861033, |
|
"grad_norm": 2.4860479831695557, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0927, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.13041810510164942, |
|
"grad_norm": 2.3988494873046875, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1918, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.13151405556468848, |
|
"grad_norm": 2.5361902713775635, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0603, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.13261000602772754, |
|
"grad_norm": 2.4060215950012207, |
|
"learning_rate": 0.0001, |
|
"loss": 1.056, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.1337059564907666, |
|
"grad_norm": 2.4094231128692627, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0787, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.1348019069538057, |
|
"grad_norm": 2.5207912921905518, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0901, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.13589785741684476, |
|
"grad_norm": 2.1340293884277344, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1691, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.13699380787988383, |
|
"grad_norm": 2.312554359436035, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9791, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1380897583429229, |
|
"grad_norm": 2.2881298065185547, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9998, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.13918570880596198, |
|
"grad_norm": 2.2146573066711426, |
|
"learning_rate": 0.0001, |
|
"loss": 1.094, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.14028165926900105, |
|
"grad_norm": 2.3992650508880615, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0667, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.1413776097320401, |
|
"grad_norm": 2.7630209922790527, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1541, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.14247356019507917, |
|
"grad_norm": 2.9216675758361816, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0463, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.14356951065811827, |
|
"grad_norm": 2.366373062133789, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0557, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.14466546112115733, |
|
"grad_norm": 2.7161865234375, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1066, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.1457614115841964, |
|
"grad_norm": 2.046992778778076, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9786, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.14685736204723546, |
|
"grad_norm": 2.6320793628692627, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9564, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.14795331251027455, |
|
"grad_norm": 2.485445737838745, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0283, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.1490492629733136, |
|
"grad_norm": 2.267420768737793, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0092, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.15014521343635268, |
|
"grad_norm": 2.618067502975464, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0369, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.15124116389939174, |
|
"grad_norm": 2.502471685409546, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9982, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.15233711436243083, |
|
"grad_norm": 2.936964273452759, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1122, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.1534330648254699, |
|
"grad_norm": 2.5342159271240234, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0409, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.15452901528850896, |
|
"grad_norm": 2.88598895072937, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0259, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.15562496575154802, |
|
"grad_norm": 2.6327946186065674, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9829, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.1567209162145871, |
|
"grad_norm": 2.4873671531677246, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0472, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.15781686667762618, |
|
"grad_norm": 2.1543166637420654, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0157, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.15891281714066524, |
|
"grad_norm": 1.9687381982803345, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0465, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.1600087676037043, |
|
"grad_norm": 2.868544816970825, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9835, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.1611047180667434, |
|
"grad_norm": 2.3211984634399414, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1204, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.16220066852978246, |
|
"grad_norm": 2.631458282470703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0175, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.16329661899282152, |
|
"grad_norm": 2.7994022369384766, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0828, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.1643925694558606, |
|
"grad_norm": 2.051626443862915, |
|
"learning_rate": 0.0001, |
|
"loss": 0.97, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1643925694558606, |
|
"eval_loss": 1.0072325468063354, |
|
"eval_runtime": 30710.9249, |
|
"eval_samples_per_second": 2.113, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 55.08434535201816, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16548851991889968, |
|
"grad_norm": 2.6088364124298096, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9803, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.16658447038193874, |
|
"grad_norm": 2.234034299850464, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0757, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.1676804208449778, |
|
"grad_norm": 2.3472328186035156, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9408, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.16877637130801687, |
|
"grad_norm": 2.5871200561523438, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9269, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.16987232177105596, |
|
"grad_norm": 2.0150465965270996, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0547, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.17096827223409503, |
|
"grad_norm": 2.5823395252227783, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0559, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.1720642226971341, |
|
"grad_norm": 2.8252885341644287, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1219, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.17316017316017315, |
|
"grad_norm": 2.1086535453796387, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0089, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.17425612362321224, |
|
"grad_norm": 2.2288014888763428, |
|
"learning_rate": 0.0001, |
|
"loss": 1.136, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.1753520740862513, |
|
"grad_norm": 2.6622703075408936, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0395, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.17644802454929037, |
|
"grad_norm": 1.9478541612625122, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0658, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.17754397501232944, |
|
"grad_norm": 2.55828857421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9904, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.1786399254753685, |
|
"grad_norm": 2.533651828765869, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9733, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.1797358759384076, |
|
"grad_norm": 1.8745101690292358, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9903, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.18083182640144665, |
|
"grad_norm": 1.8459206819534302, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9095, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.18192777686448572, |
|
"grad_norm": 2.6654012203216553, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9854, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.18302372732752478, |
|
"grad_norm": 2.6444480419158936, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8857, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.18411967779056387, |
|
"grad_norm": 2.190462827682495, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9375, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.18521562825360294, |
|
"grad_norm": 2.8208882808685303, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9646, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.186311578716642, |
|
"grad_norm": 2.4978795051574707, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9724, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.18740752917968106, |
|
"grad_norm": 2.4202938079833984, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9659, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.18850347964272016, |
|
"grad_norm": 1.9026118516921997, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0321, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.18959943010575922, |
|
"grad_norm": 2.6031651496887207, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9622, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.19069538056879828, |
|
"grad_norm": 1.962509274482727, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0262, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.19179133103183735, |
|
"grad_norm": 2.794633626937866, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0626, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.19288728149487644, |
|
"grad_norm": 2.4276185035705566, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9961, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.1939832319579155, |
|
"grad_norm": 2.0747737884521484, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8945, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.19507918242095457, |
|
"grad_norm": 1.9151681661605835, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0664, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.19617513288399363, |
|
"grad_norm": 2.11547589302063, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9865, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.19727108334703272, |
|
"grad_norm": 2.359848737716675, |
|
"learning_rate": 0.0001, |
|
"loss": 0.95, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.19836703381007179, |
|
"grad_norm": 1.9854378700256348, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9992, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.19946298427311085, |
|
"grad_norm": 2.476423978805542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9097, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2005589347361499, |
|
"grad_norm": 2.420011281967163, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0167, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.201654885199189, |
|
"grad_norm": 2.12312388420105, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9298, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.20275083566222807, |
|
"grad_norm": 1.9679986238479614, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0064, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.20384678612526713, |
|
"grad_norm": 2.608135461807251, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9396, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.2049427365883062, |
|
"grad_norm": 2.542102098464966, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0868, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.2060386870513453, |
|
"grad_norm": 2.5252091884613037, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0417, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.20713463751438435, |
|
"grad_norm": 1.98774254322052, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9949, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.20823058797742341, |
|
"grad_norm": 1.9502965211868286, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9862, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.20932653844046248, |
|
"grad_norm": 2.2537944316864014, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9087, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.21042248890350157, |
|
"grad_norm": 2.2866523265838623, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0128, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.21151843936654063, |
|
"grad_norm": 2.2907001972198486, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9654, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.2126143898295797, |
|
"grad_norm": 2.5648560523986816, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0269, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.21371034029261876, |
|
"grad_norm": 2.198974847793579, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9823, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.21480629075565785, |
|
"grad_norm": 2.1045591831207275, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9139, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.21590224121869692, |
|
"grad_norm": 2.1462857723236084, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9406, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.21699819168173598, |
|
"grad_norm": 2.3216285705566406, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8597, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.21809414214477504, |
|
"grad_norm": 1.867150068283081, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9776, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.21919009260781414, |
|
"grad_norm": 2.3432791233062744, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9546, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.21919009260781414, |
|
"eval_loss": 0.9323587417602539, |
|
"eval_runtime": 30935.2713, |
|
"eval_samples_per_second": 2.098, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 63.836951720973865, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2202860430708532, |
|
"grad_norm": 1.9426536560058594, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9291, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.22138199353389226, |
|
"grad_norm": 2.693723201751709, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9072, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.22247794399693133, |
|
"grad_norm": 2.237900972366333, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8571, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.22357389445997042, |
|
"grad_norm": 2.739129066467285, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9132, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.22466984492300948, |
|
"grad_norm": 1.886438012123108, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9646, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.22576579538604855, |
|
"grad_norm": 2.3505897521972656, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0479, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.2268617458490876, |
|
"grad_norm": 2.4302868843078613, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9956, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.2279576963121267, |
|
"grad_norm": 2.2747528553009033, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9621, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.22905364677516576, |
|
"grad_norm": 2.312248945236206, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9292, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.23014959723820483, |
|
"grad_norm": 2.0439066886901855, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8804, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.2312455477012439, |
|
"grad_norm": 2.615898609161377, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9302, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.23234149816428298, |
|
"grad_norm": 2.306796073913574, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0401, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.23343744862732205, |
|
"grad_norm": 2.4527432918548584, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9195, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.2345333990903611, |
|
"grad_norm": 1.8589290380477905, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9284, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.23562934955340017, |
|
"grad_norm": 1.8492025136947632, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8898, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.23672530001643927, |
|
"grad_norm": 2.574871063232422, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0026, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.23782125047947833, |
|
"grad_norm": 2.2600936889648438, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0738, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.2389172009425174, |
|
"grad_norm": 2.35066556930542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8573, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.24001315140555646, |
|
"grad_norm": 2.165745496749878, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8989, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.24110910186859555, |
|
"grad_norm": 2.1494085788726807, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8292, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.2422050523316346, |
|
"grad_norm": 2.185359239578247, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8954, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.24330100279467368, |
|
"grad_norm": 2.193904161453247, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8944, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.24439695325771274, |
|
"grad_norm": 2.1101438999176025, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9059, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.24549290372075183, |
|
"grad_norm": 2.026642084121704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8978, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.2465888541837909, |
|
"grad_norm": 2.0481228828430176, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8835, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.24768480464682996, |
|
"grad_norm": 2.201350688934326, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9519, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.24878075510986902, |
|
"grad_norm": 1.852100133895874, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8458, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.24987670557290811, |
|
"grad_norm": 2.1303794384002686, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9092, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.25097265603594715, |
|
"grad_norm": 2.2715415954589844, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8931, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.25206860649898627, |
|
"grad_norm": 2.091785192489624, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8645, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 2.108103036880493, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8387, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.2542605074250644, |
|
"grad_norm": 2.083848237991333, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8315, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.25535645788810346, |
|
"grad_norm": 1.570475459098816, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9355, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.2564524083511425, |
|
"grad_norm": 1.90199875831604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8308, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.2575483588141816, |
|
"grad_norm": 2.1952812671661377, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8618, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.25864430927722065, |
|
"grad_norm": 2.0530431270599365, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7951, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.2597402597402597, |
|
"grad_norm": 2.202252149581909, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8858, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.26083621020329883, |
|
"grad_norm": 1.9541796445846558, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8466, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.2619321606663379, |
|
"grad_norm": 1.9440534114837646, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8488, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.26302811112937696, |
|
"grad_norm": 2.569821834564209, |
|
"learning_rate": 0.0001, |
|
"loss": 0.963, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.264124061592416, |
|
"grad_norm": 1.8896031379699707, |
|
"learning_rate": 0.0001, |
|
"loss": 0.837, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.2652200120554551, |
|
"grad_norm": 1.9390859603881836, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8855, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.26631596251849415, |
|
"grad_norm": 2.2261974811553955, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8901, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.2674119129815332, |
|
"grad_norm": 2.0486056804656982, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8073, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.2685078634445723, |
|
"grad_norm": 2.292015314102173, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9492, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.2696038139076114, |
|
"grad_norm": 2.0762240886688232, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8528, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.27069976437065046, |
|
"grad_norm": 1.870642066001892, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9482, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.27179571483368953, |
|
"grad_norm": 2.436768054962158, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9299, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.2728916652967286, |
|
"grad_norm": 2.505880832672119, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9259, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.27398761575976766, |
|
"grad_norm": 1.717252492904663, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8134, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.27398761575976766, |
|
"eval_loss": 0.8726964592933655, |
|
"eval_runtime": 30710.3822, |
|
"eval_samples_per_second": 2.113, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 52.213316533880224, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2750835662228067, |
|
"grad_norm": 2.28765869140625, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0229, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.2761795166858458, |
|
"grad_norm": 2.2264580726623535, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8291, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.27727546714888485, |
|
"grad_norm": 1.9387757778167725, |
|
"learning_rate": 0.0001, |
|
"loss": 0.821, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.27837141761192397, |
|
"grad_norm": 2.8628933429718018, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9521, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.27946736807496303, |
|
"grad_norm": 2.2691447734832764, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8182, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.2805633185380021, |
|
"grad_norm": 1.9515260457992554, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9342, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.28165926900104116, |
|
"grad_norm": 2.1714837551116943, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9663, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.2827552194640802, |
|
"grad_norm": 2.0159664154052734, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8294, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.2838511699271193, |
|
"grad_norm": 2.024634599685669, |
|
"learning_rate": 0.0001, |
|
"loss": 0.896, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.28494712039015835, |
|
"grad_norm": 2.0035595893859863, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8446, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.2860430708531974, |
|
"grad_norm": 2.4142866134643555, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8835, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.28713902131623653, |
|
"grad_norm": 2.070338010787964, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8687, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.2882349717792756, |
|
"grad_norm": 1.9818578958511353, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8296, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.28933092224231466, |
|
"grad_norm": 1.8923412561416626, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8999, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.2904268727053537, |
|
"grad_norm": 2.200206995010376, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8662, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.2915228231683928, |
|
"grad_norm": 1.982446551322937, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8301, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.29261877363143185, |
|
"grad_norm": 1.934844732284546, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8219, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.2937147240944709, |
|
"grad_norm": 2.2790510654449463, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8666, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.29481067455751, |
|
"grad_norm": 1.771672248840332, |
|
"learning_rate": 0.0001, |
|
"loss": 0.843, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.2959066250205491, |
|
"grad_norm": 2.3459877967834473, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8516, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.29700257548358816, |
|
"grad_norm": 2.156458854675293, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8425, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.2980985259466272, |
|
"grad_norm": 1.9492950439453125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8445, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.2991944764096663, |
|
"grad_norm": 2.1061997413635254, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8858, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.30029042687270535, |
|
"grad_norm": 2.3567299842834473, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8376, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.3013863773357444, |
|
"grad_norm": 2.1302335262298584, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8272, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.3024823277987835, |
|
"grad_norm": 2.2098424434661865, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8742, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.30357827826182254, |
|
"grad_norm": 1.7558562755584717, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8863, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.30467422872486166, |
|
"grad_norm": 1.8461397886276245, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8792, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.3057701791879007, |
|
"grad_norm": 2.0006344318389893, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8263, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.3068661296509398, |
|
"grad_norm": 1.6772565841674805, |
|
"learning_rate": 0.0001, |
|
"loss": 0.789, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.30796208011397885, |
|
"grad_norm": 1.9263228178024292, |
|
"learning_rate": 0.0001, |
|
"loss": 0.842, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.3090580305770179, |
|
"grad_norm": 1.8888592720031738, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8475, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.310153981040057, |
|
"grad_norm": 2.2354602813720703, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0036, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.31124993150309604, |
|
"grad_norm": 1.9634332656860352, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8517, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.3123458819661351, |
|
"grad_norm": 2.348825216293335, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8731, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.3134418324291742, |
|
"grad_norm": 2.487741708755493, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8556, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.3145377828922133, |
|
"grad_norm": 1.999516248703003, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7969, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.31563373335525235, |
|
"grad_norm": 1.9654616117477417, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7843, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.3167296838182914, |
|
"grad_norm": 2.1070950031280518, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8399, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.3178256342813305, |
|
"grad_norm": 2.257129192352295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8224, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.31892158474436955, |
|
"grad_norm": 1.8256118297576904, |
|
"learning_rate": 0.0001, |
|
"loss": 0.794, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.3200175352074086, |
|
"grad_norm": 1.8899625539779663, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8614, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.3211134856704477, |
|
"grad_norm": 2.221484661102295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.765, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.3222094361334868, |
|
"grad_norm": 1.796877384185791, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8359, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.32330538659652586, |
|
"grad_norm": 1.7495447397232056, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8688, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.3244013370595649, |
|
"grad_norm": 2.136664628982544, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9163, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.325497287522604, |
|
"grad_norm": 1.8508238792419434, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7975, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.32659323798564305, |
|
"grad_norm": 2.144523859024048, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7749, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.3276891884486821, |
|
"grad_norm": 2.208815336227417, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8148, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.3287851389117212, |
|
"grad_norm": 2.0617401599884033, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8884, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3287851389117212, |
|
"eval_loss": 0.8316722512245178, |
|
"eval_runtime": 30850.8589, |
|
"eval_samples_per_second": 2.103, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 45.9960352377659, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.32988108937476024, |
|
"grad_norm": 2.0406434535980225, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8504, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.33097703983779936, |
|
"grad_norm": 2.1899139881134033, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7782, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.3320729903008384, |
|
"grad_norm": 2.650421380996704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7823, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.3331689407638775, |
|
"grad_norm": 2.085683584213257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.754, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.33426489122691655, |
|
"grad_norm": 2.1783502101898193, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8819, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.3353608416899556, |
|
"grad_norm": 2.096208333969116, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8702, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.3364567921529947, |
|
"grad_norm": 2.005629062652588, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8827, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.33755274261603374, |
|
"grad_norm": 2.1545634269714355, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8496, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.3386486930790728, |
|
"grad_norm": 1.8190851211547852, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7622, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.3397446435421119, |
|
"grad_norm": 1.9555623531341553, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8338, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.340840594005151, |
|
"grad_norm": 1.8530341386795044, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8017, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.34193654446819005, |
|
"grad_norm": 1.8724114894866943, |
|
"learning_rate": 0.0001, |
|
"loss": 0.848, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.3430324949312291, |
|
"grad_norm": 1.8598796129226685, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8074, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.3441284453942682, |
|
"grad_norm": 2.1442923545837402, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8473, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.34522439585730724, |
|
"grad_norm": 2.3083174228668213, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9016, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.3463203463203463, |
|
"grad_norm": 1.8194735050201416, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8267, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.34741629678338537, |
|
"grad_norm": 2.063523054122925, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7841, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.3485122472464245, |
|
"grad_norm": 2.17594051361084, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8318, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.34960819770946355, |
|
"grad_norm": 1.665189504623413, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7983, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.3507041481725026, |
|
"grad_norm": 2.2596445083618164, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8421, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.3518000986355417, |
|
"grad_norm": 1.7096545696258545, |
|
"learning_rate": 0.0001, |
|
"loss": 0.889, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.35289604909858074, |
|
"grad_norm": 1.7475535869598389, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8006, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.3539919995616198, |
|
"grad_norm": 1.8176007270812988, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8632, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.35508795002465887, |
|
"grad_norm": 2.6806535720825195, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8427, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.35618390048769794, |
|
"grad_norm": 2.094172477722168, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7812, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.357279850950737, |
|
"grad_norm": 1.8341765403747559, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8051, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.3583758014137761, |
|
"grad_norm": 2.2341349124908447, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8001, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.3594717518768152, |
|
"grad_norm": 2.1017801761627197, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8142, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.36056770233985425, |
|
"grad_norm": 1.9903994798660278, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8117, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.3616636528028933, |
|
"grad_norm": 2.273465394973755, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8864, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.3627596032659324, |
|
"grad_norm": 2.0767428874969482, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7687, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.36385555372897144, |
|
"grad_norm": 2.559774398803711, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8181, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.3649515041920105, |
|
"grad_norm": 2.1393582820892334, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7936, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.36604745465504956, |
|
"grad_norm": 2.06675386428833, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8263, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.3671434051180887, |
|
"grad_norm": 1.7674784660339355, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7818, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.36823935558112775, |
|
"grad_norm": 1.765442132949829, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8335, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.3693353060441668, |
|
"grad_norm": 2.044288158416748, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8742, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.3704312565072059, |
|
"grad_norm": 1.9821726083755493, |
|
"learning_rate": 0.0001, |
|
"loss": 0.928, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.37152720697024494, |
|
"grad_norm": 2.0798370838165283, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7627, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.372623157433284, |
|
"grad_norm": 1.6817582845687866, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7985, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.37371910789632307, |
|
"grad_norm": 1.872247576713562, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8102, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.37481505835936213, |
|
"grad_norm": 1.7761516571044922, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8435, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.37591100882240125, |
|
"grad_norm": 1.739585518836975, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8706, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.3770069592854403, |
|
"grad_norm": 2.0503687858581543, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8354, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.3781029097484794, |
|
"grad_norm": 2.283393621444702, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7476, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.37919886021151844, |
|
"grad_norm": 1.801018238067627, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7817, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.3802948106745575, |
|
"grad_norm": 2.5343267917633057, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7628, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.38139076113759657, |
|
"grad_norm": 2.010507822036743, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7931, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.38248671160063563, |
|
"grad_norm": 1.7228796482086182, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7517, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.3835826620636747, |
|
"grad_norm": 1.967822551727295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.804, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3835826620636747, |
|
"eval_loss": 0.7978512644767761, |
|
"eval_runtime": 30977.7517, |
|
"eval_samples_per_second": 2.095, |
|
"eval_steps_per_second": 0.065, |
|
"eval_wer": 61.261910549759826, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3846786125267138, |
|
"grad_norm": 1.9999229907989502, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7634, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.3857745629897529, |
|
"grad_norm": 1.956128716468811, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8102, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.38687051345279194, |
|
"grad_norm": 2.0134966373443604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7957, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.387966463915831, |
|
"grad_norm": 2.0373167991638184, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8251, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.38906241437887007, |
|
"grad_norm": 1.7772964239120483, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8128, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.39015836484190913, |
|
"grad_norm": 1.7618379592895508, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8345, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.3912543153049482, |
|
"grad_norm": 2.181671380996704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8345, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.39235026576798726, |
|
"grad_norm": 1.8794726133346558, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7615, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.3934462162310264, |
|
"grad_norm": 1.9297798871994019, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7618, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.39454216669406544, |
|
"grad_norm": 1.9441471099853516, |
|
"learning_rate": 0.0001, |
|
"loss": 0.859, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.3956381171571045, |
|
"grad_norm": 2.2561404705047607, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7877, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.39673406762014357, |
|
"grad_norm": 1.8441416025161743, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7734, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.39783001808318263, |
|
"grad_norm": 1.686120867729187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7066, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.3989259685462217, |
|
"grad_norm": 1.9456263780593872, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7469, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.40002191900926076, |
|
"grad_norm": 1.9112725257873535, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7607, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.4011178694722998, |
|
"grad_norm": 2.5668513774871826, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7859, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.40221381993533895, |
|
"grad_norm": 1.9502942562103271, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7607, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.403309770398378, |
|
"grad_norm": 1.6973525285720825, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8313, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.4044057208614171, |
|
"grad_norm": 2.3962297439575195, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7806, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.40550167132445614, |
|
"grad_norm": 1.887536883354187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7524, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.4065976217874952, |
|
"grad_norm": 1.999687910079956, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7349, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.40769357225053426, |
|
"grad_norm": 1.7444576025009155, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8156, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.40878952271357333, |
|
"grad_norm": 1.7175132036209106, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7419, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.4098854731766124, |
|
"grad_norm": 2.23638653755188, |
|
"learning_rate": 0.0001, |
|
"loss": 0.666, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.4109814236396515, |
|
"grad_norm": 2.024102210998535, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7541, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4120773741026906, |
|
"grad_norm": 2.042541265487671, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7915, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.41317332456572964, |
|
"grad_norm": 1.9140897989273071, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8712, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.4142692750287687, |
|
"grad_norm": 1.8435416221618652, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8241, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.41536522549180777, |
|
"grad_norm": 2.027944803237915, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9422, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.41646117595484683, |
|
"grad_norm": 2.07381534576416, |
|
"learning_rate": 0.0001, |
|
"loss": 0.812, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.4175571264178859, |
|
"grad_norm": 1.9762136936187744, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7852, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.41865307688092496, |
|
"grad_norm": 1.8222426176071167, |
|
"learning_rate": 0.0001, |
|
"loss": 0.752, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.4197490273439641, |
|
"grad_norm": 2.0519089698791504, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8031, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.42084497780700314, |
|
"grad_norm": 1.8777110576629639, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8173, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.4219409282700422, |
|
"grad_norm": 2.323411703109741, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8479, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.42303687873308127, |
|
"grad_norm": 1.6403400897979736, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7567, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.42413282919612033, |
|
"grad_norm": 1.6627925634384155, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7734, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.4252287796591594, |
|
"grad_norm": 1.8771709203720093, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7652, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.42632473012219846, |
|
"grad_norm": 1.9806597232818604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7699, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.4274206805852375, |
|
"grad_norm": 2.1376988887786865, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7825, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.42851663104827664, |
|
"grad_norm": 1.5566449165344238, |
|
"learning_rate": 0.0001, |
|
"loss": 0.704, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.4296125815113157, |
|
"grad_norm": 2.1835947036743164, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8101, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.43070853197435477, |
|
"grad_norm": 2.055119037628174, |
|
"learning_rate": 0.0001, |
|
"loss": 0.703, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.43180448243739383, |
|
"grad_norm": 1.9324967861175537, |
|
"learning_rate": 0.0001, |
|
"loss": 0.81, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.4329004329004329, |
|
"grad_norm": 2.1087846755981445, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7676, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.43399638336347196, |
|
"grad_norm": 1.8521897792816162, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7546, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.435092333826511, |
|
"grad_norm": 2.145947217941284, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7992, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.4361882842895501, |
|
"grad_norm": 1.7739931344985962, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7133, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.4372842347525892, |
|
"grad_norm": 1.6032921075820923, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8207, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.43838018521562827, |
|
"grad_norm": 2.1895668506622314, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7638, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.43838018521562827, |
|
"eval_loss": 0.770411491394043, |
|
"eval_runtime": 30675.7059, |
|
"eval_samples_per_second": 2.115, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 43.10069742838263, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.43947613567866733, |
|
"grad_norm": 1.9759962558746338, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7792, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.4405720861417064, |
|
"grad_norm": 1.845012903213501, |
|
"learning_rate": 0.0001, |
|
"loss": 0.847, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.44166803660474546, |
|
"grad_norm": 1.9666188955307007, |
|
"learning_rate": 0.0001, |
|
"loss": 0.767, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.4427639870677845, |
|
"grad_norm": 2.1448235511779785, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7924, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.4438599375308236, |
|
"grad_norm": 1.9017919301986694, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7239, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.44495588799386265, |
|
"grad_norm": 1.8005828857421875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7202, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.4460518384569018, |
|
"grad_norm": 1.7341022491455078, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7045, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.44714778891994084, |
|
"grad_norm": 2.094618320465088, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8067, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.4482437393829799, |
|
"grad_norm": 2.0414187908172607, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6888, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.44933968984601896, |
|
"grad_norm": 1.8842118978500366, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7125, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.450435640309058, |
|
"grad_norm": 1.9878696203231812, |
|
"learning_rate": 0.0001, |
|
"loss": 0.723, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.4515315907720971, |
|
"grad_norm": 1.94351065158844, |
|
"learning_rate": 0.0001, |
|
"loss": 0.727, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.45262754123513615, |
|
"grad_norm": 1.900718331336975, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7306, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.4537234916981752, |
|
"grad_norm": 2.5974204540252686, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7968, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.45481944216121434, |
|
"grad_norm": 1.9214075803756714, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7767, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.4559153926242534, |
|
"grad_norm": 2.6079931259155273, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7787, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.45701134308729247, |
|
"grad_norm": 1.8398691415786743, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7941, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.45810729355033153, |
|
"grad_norm": 1.740376591682434, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7714, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.4592032440133706, |
|
"grad_norm": 2.109416961669922, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8015, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.46029919447640966, |
|
"grad_norm": 1.9565001726150513, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7473, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.4613951449394487, |
|
"grad_norm": 1.88534414768219, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7828, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.4624910954024878, |
|
"grad_norm": 1.7713934183120728, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7289, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.4635870458655269, |
|
"grad_norm": 1.9173312187194824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7478, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.46468299632856597, |
|
"grad_norm": 1.6866717338562012, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8235, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.46577894679160503, |
|
"grad_norm": 1.6713476181030273, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7216, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.4668748972546441, |
|
"grad_norm": 1.9601606130599976, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6994, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.46797084771768316, |
|
"grad_norm": 1.7472949028015137, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7694, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.4690667981807222, |
|
"grad_norm": 1.8540037870407104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7253, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.4701627486437613, |
|
"grad_norm": 2.0671746730804443, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7514, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.47125869910680035, |
|
"grad_norm": 1.900918960571289, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7871, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.47235464956983947, |
|
"grad_norm": 1.7465757131576538, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8009, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.47345060003287853, |
|
"grad_norm": 2.3400652408599854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7741, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.4745465504959176, |
|
"grad_norm": 2.1384716033935547, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7577, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.47564250095895666, |
|
"grad_norm": 2.7113006114959717, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6968, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.4767384514219957, |
|
"grad_norm": 1.6666728258132935, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7307, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.4778344018850348, |
|
"grad_norm": 1.8394851684570312, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7353, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.47893035234807385, |
|
"grad_norm": 2.0569512844085693, |
|
"learning_rate": 0.0001, |
|
"loss": 0.814, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.4800263028111129, |
|
"grad_norm": 1.6457910537719727, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7521, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.48112225327415203, |
|
"grad_norm": 2.010711908340454, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7101, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.4822182037371911, |
|
"grad_norm": 2.422718048095703, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7867, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.48331415420023016, |
|
"grad_norm": 1.5170652866363525, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8042, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.4844101046632692, |
|
"grad_norm": 1.9751352071762085, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7408, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.4855060551263083, |
|
"grad_norm": 1.8477592468261719, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7675, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.48660200558934735, |
|
"grad_norm": 1.9999114274978638, |
|
"learning_rate": 0.0001, |
|
"loss": 0.745, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.4876979560523864, |
|
"grad_norm": 1.7456104755401611, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7713, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.4887939065154255, |
|
"grad_norm": 1.9687026739120483, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7349, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.4898898569784646, |
|
"grad_norm": 1.8585296869277954, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7369, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.49098580744150366, |
|
"grad_norm": 2.7875003814697266, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7002, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.4920817579045427, |
|
"grad_norm": 2.01347017288208, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7598, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.4931777083675818, |
|
"grad_norm": 1.8863261938095093, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7617, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.4931777083675818, |
|
"eval_loss": 0.7443549036979675, |
|
"eval_runtime": 31204.044, |
|
"eval_samples_per_second": 2.079, |
|
"eval_steps_per_second": 0.065, |
|
"eval_wer": 44.168238762227254, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.49427365883062085, |
|
"grad_norm": 2.0402464866638184, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8118, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.4953696092936599, |
|
"grad_norm": 2.072380304336548, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6703, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.496465559756699, |
|
"grad_norm": 1.9627012014389038, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7547, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.49756151021973805, |
|
"grad_norm": 1.904860496520996, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8141, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.49865746068277716, |
|
"grad_norm": 2.153672933578491, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8167, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.49975341114581623, |
|
"grad_norm": 2.0599303245544434, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8632, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.5008493616088553, |
|
"grad_norm": 1.9562146663665771, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7477, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.5019453120718943, |
|
"grad_norm": 2.086508274078369, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7973, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.5030412625349334, |
|
"grad_norm": 1.9192993640899658, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8359, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.5041372129979725, |
|
"grad_norm": 1.9085866212844849, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7942, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.5052331634610115, |
|
"grad_norm": 1.901637315750122, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7608, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 2.145914316177368, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6919, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.5074250643870897, |
|
"grad_norm": 1.9005271196365356, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7506, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.5085210148501288, |
|
"grad_norm": 1.6468952894210815, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7843, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.5096169653131678, |
|
"grad_norm": 1.7703279256820679, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7192, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.5107129157762069, |
|
"grad_norm": 2.0094175338745117, |
|
"learning_rate": 0.0001, |
|
"loss": 0.847, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.511808866239246, |
|
"grad_norm": 2.0970561504364014, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7679, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.512904816702285, |
|
"grad_norm": 1.757664680480957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7391, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.5140007671653242, |
|
"grad_norm": 1.8297368288040161, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8382, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.5150967176283632, |
|
"grad_norm": 1.9832725524902344, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7226, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.5161926680914023, |
|
"grad_norm": 1.7083086967468262, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7798, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.5172886185544413, |
|
"grad_norm": 1.7105575799942017, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7118, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.5183845690174804, |
|
"grad_norm": 1.547608494758606, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7259, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.5194805194805194, |
|
"grad_norm": 2.0215799808502197, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7648, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.5205764699435586, |
|
"grad_norm": 1.5999863147735596, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7863, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.5216724204065977, |
|
"grad_norm": 2.0813591480255127, |
|
"learning_rate": 0.0001, |
|
"loss": 0.758, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.5227683708696367, |
|
"grad_norm": 1.6513686180114746, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7735, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.5238643213326758, |
|
"grad_norm": 1.49434232711792, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6547, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.5249602717957148, |
|
"grad_norm": 1.8316184282302856, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7428, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.5260562222587539, |
|
"grad_norm": 2.0041682720184326, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7058, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.5271521727217929, |
|
"grad_norm": 1.9916651248931885, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7049, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.528248123184832, |
|
"grad_norm": 1.8289718627929688, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7179, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.5293440736478712, |
|
"grad_norm": 1.7447452545166016, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7432, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.5304400241109102, |
|
"grad_norm": 2.375234365463257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6676, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.5315359745739493, |
|
"grad_norm": 1.683435320854187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7269, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.5326319250369883, |
|
"grad_norm": 1.6535717248916626, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7315, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.5337278755000274, |
|
"grad_norm": 1.5276830196380615, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7382, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.5348238259630664, |
|
"grad_norm": 1.8443965911865234, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7471, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.5359197764261056, |
|
"grad_norm": 2.0346148014068604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7268, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.5370157268891446, |
|
"grad_norm": 1.750613808631897, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8444, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.5381116773521837, |
|
"grad_norm": 1.9546024799346924, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6968, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.5392076278152228, |
|
"grad_norm": 1.6618010997772217, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7222, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.5403035782782618, |
|
"grad_norm": 1.6404950618743896, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6896, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.5413995287413009, |
|
"grad_norm": 1.7741234302520752, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7412, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.5424954792043399, |
|
"grad_norm": 1.8278882503509521, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7385, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.5435914296673791, |
|
"grad_norm": 1.6102566719055176, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7461, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.5446873801304181, |
|
"grad_norm": 1.7899205684661865, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6349, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.5457833305934572, |
|
"grad_norm": 1.9663938283920288, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8028, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.5468792810564962, |
|
"grad_norm": 1.841476559638977, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7503, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.5479752315195353, |
|
"grad_norm": 1.9106056690216064, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7097, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.5479752315195353, |
|
"eval_loss": 0.7265371084213257, |
|
"eval_runtime": 30536.1813, |
|
"eval_samples_per_second": 2.125, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 42.517110448415295, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.5490711819825744, |
|
"grad_norm": 1.680649995803833, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6445, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.5501671324456134, |
|
"grad_norm": 2.079050064086914, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7648, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.5512630829086526, |
|
"grad_norm": 1.4419294595718384, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6953, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.5523590333716916, |
|
"grad_norm": 1.9906927347183228, |
|
"learning_rate": 0.0001, |
|
"loss": 0.749, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.5534549838347307, |
|
"grad_norm": 1.7384852170944214, |
|
"learning_rate": 0.0001, |
|
"loss": 0.745, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.5545509342977697, |
|
"grad_norm": 1.7342479228973389, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7687, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.5556468847608088, |
|
"grad_norm": 1.887969970703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7662, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.5567428352238479, |
|
"grad_norm": 1.6345020532608032, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7843, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.5578387856868869, |
|
"grad_norm": 1.5596251487731934, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6983, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.5589347361499261, |
|
"grad_norm": 1.6423192024230957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6622, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.5600306866129651, |
|
"grad_norm": 1.7268792390823364, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8409, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.5611266370760042, |
|
"grad_norm": 1.6870604753494263, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7801, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.5622225875390432, |
|
"grad_norm": 1.5945113897323608, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6695, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.5633185380020823, |
|
"grad_norm": 1.7995914220809937, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7088, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.5644144884651213, |
|
"grad_norm": 1.8924362659454346, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7621, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.5655104389281604, |
|
"grad_norm": 1.5099490880966187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6923, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.5666063893911996, |
|
"grad_norm": 1.481195092201233, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6801, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.5677023398542386, |
|
"grad_norm": 1.9247808456420898, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7247, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.5687982903172777, |
|
"grad_norm": 1.721666693687439, |
|
"learning_rate": 0.0001, |
|
"loss": 0.85, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.5698942407803167, |
|
"grad_norm": 1.981312870979309, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6894, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.5709901912433558, |
|
"grad_norm": 1.825363039970398, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7017, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.5720861417063948, |
|
"grad_norm": 2.021385669708252, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7996, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.5731820921694339, |
|
"grad_norm": 1.9287372827529907, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7387, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.5742780426324731, |
|
"grad_norm": 2.0109355449676514, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7359, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.5753739930955121, |
|
"grad_norm": 1.7715758085250854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7126, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.5764699435585512, |
|
"grad_norm": 1.5866303443908691, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6808, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.5775658940215902, |
|
"grad_norm": 1.3831912279129028, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7251, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.5786618444846293, |
|
"grad_norm": 1.603388786315918, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6497, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.5797577949476683, |
|
"grad_norm": 1.8507051467895508, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7247, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.5808537454107074, |
|
"grad_norm": 2.240337610244751, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7879, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.5819496958737465, |
|
"grad_norm": 1.858344316482544, |
|
"learning_rate": 0.0001, |
|
"loss": 0.647, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.5830456463367856, |
|
"grad_norm": 1.840640664100647, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6924, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.5841415967998247, |
|
"grad_norm": 2.0423295497894287, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6762, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.5852375472628637, |
|
"grad_norm": 1.7426679134368896, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7824, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.5863334977259028, |
|
"grad_norm": 1.5974029302597046, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6874, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.5874294481889418, |
|
"grad_norm": 1.6082810163497925, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6916, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.588525398651981, |
|
"grad_norm": 1.6124242544174194, |
|
"learning_rate": 0.0001, |
|
"loss": 0.676, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.58962134911502, |
|
"grad_norm": 1.9140983819961548, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6281, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.5907172995780591, |
|
"grad_norm": 1.708742618560791, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7245, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.5918132500410982, |
|
"grad_norm": 2.36368989944458, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6934, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.5929092005041372, |
|
"grad_norm": 1.9806820154190063, |
|
"learning_rate": 0.0001, |
|
"loss": 0.677, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.5940051509671763, |
|
"grad_norm": 1.893801212310791, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7629, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.5951011014302153, |
|
"grad_norm": 1.917204737663269, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7836, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.5961970518932544, |
|
"grad_norm": 1.5599673986434937, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7132, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.5972930023562935, |
|
"grad_norm": 1.9569772481918335, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7466, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.5983889528193326, |
|
"grad_norm": 1.8709198236465454, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6456, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.5994849032823716, |
|
"grad_norm": 1.8249480724334717, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7159, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.6005808537454107, |
|
"grad_norm": 1.7063779830932617, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7093, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.6016768042084498, |
|
"grad_norm": 1.681219220161438, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6995, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.6027727546714888, |
|
"grad_norm": 1.640663504600525, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7045, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.6027727546714888, |
|
"eval_loss": 0.7112395763397217, |
|
"eval_runtime": 30901.8839, |
|
"eval_samples_per_second": 2.1, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 47.33354332649714, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.603868705134528, |
|
"grad_norm": 1.7204805612564087, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7217, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.604964655597567, |
|
"grad_norm": 1.507012128829956, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7482, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 1.7084465026855469, |
|
"learning_rate": 0.0001, |
|
"loss": 0.645, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.6071565565236451, |
|
"grad_norm": 1.73207426071167, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7687, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.6082525069866842, |
|
"grad_norm": 2.2146365642547607, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7771, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.6093484574497233, |
|
"grad_norm": 1.6794184446334839, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6613, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.6104444079127623, |
|
"grad_norm": 1.8254398107528687, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6787, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.6115403583758015, |
|
"grad_norm": 1.8397271633148193, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7119, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.6126363088388405, |
|
"grad_norm": 1.8676248788833618, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7294, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.6137322593018796, |
|
"grad_norm": 1.4971026182174683, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6312, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.6148282097649186, |
|
"grad_norm": 1.8128615617752075, |
|
"learning_rate": 0.0001, |
|
"loss": 0.653, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.6159241602279577, |
|
"grad_norm": 1.426620364189148, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7087, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.6170201106909967, |
|
"grad_norm": 1.4840887784957886, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6665, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.6181160611540358, |
|
"grad_norm": 1.7882121801376343, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7236, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.619212011617075, |
|
"grad_norm": 1.8195546865463257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7998, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.620307962080114, |
|
"grad_norm": 1.9482252597808838, |
|
"learning_rate": 0.0001, |
|
"loss": 0.751, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.6214039125431531, |
|
"grad_norm": 2.1224782466888428, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7518, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.6224998630061921, |
|
"grad_norm": 1.811909556388855, |
|
"learning_rate": 0.0001, |
|
"loss": 0.679, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.6235958134692312, |
|
"grad_norm": 2.0843353271484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7381, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.6246917639322702, |
|
"grad_norm": 1.5517933368682861, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7318, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.6257877143953093, |
|
"grad_norm": 1.3482716083526611, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6999, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.6268836648583485, |
|
"grad_norm": 1.548904299736023, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8772, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.6279796153213875, |
|
"grad_norm": 1.553775429725647, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6479, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.6290755657844266, |
|
"grad_norm": 2.0762696266174316, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6457, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.6301715162474656, |
|
"grad_norm": 1.9620105028152466, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7098, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.6312674667105047, |
|
"grad_norm": 1.382176399230957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.686, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.6323634171735437, |
|
"grad_norm": 1.9390108585357666, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6882, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.6334593676365828, |
|
"grad_norm": 1.7750768661499023, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6942, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.6345553180996218, |
|
"grad_norm": 1.8459293842315674, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7118, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.635651268562661, |
|
"grad_norm": 1.8210084438323975, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7166, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.6367472190257001, |
|
"grad_norm": 1.7728508710861206, |
|
"learning_rate": 0.0001, |
|
"loss": 0.692, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.6378431694887391, |
|
"grad_norm": 1.7886627912521362, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7185, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.6389391199517782, |
|
"grad_norm": 1.895150065422058, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7174, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.6400350704148172, |
|
"grad_norm": 1.8740530014038086, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6893, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.6411310208778563, |
|
"grad_norm": 1.6588834524154663, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7073, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.6422269713408953, |
|
"grad_norm": 1.9573453664779663, |
|
"learning_rate": 0.0001, |
|
"loss": 0.671, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.6433229218039345, |
|
"grad_norm": 1.7064661979675293, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7401, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.6444188722669736, |
|
"grad_norm": 1.8850706815719604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7397, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.6455148227300126, |
|
"grad_norm": 1.7744836807250977, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7204, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.6466107731930517, |
|
"grad_norm": 1.5768756866455078, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7868, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.6477067236560907, |
|
"grad_norm": 2.0770552158355713, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7434, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.6488026741191298, |
|
"grad_norm": 2.0797810554504395, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7342, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.6498986245821688, |
|
"grad_norm": 1.8984261751174927, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6642, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.650994575045208, |
|
"grad_norm": 2.00124192237854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.749, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.652090525508247, |
|
"grad_norm": 1.5575506687164307, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8315, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.6531864759712861, |
|
"grad_norm": 2.6183197498321533, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7533, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.6542824264343252, |
|
"grad_norm": 1.7211464643478394, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7073, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.6553783768973642, |
|
"grad_norm": 1.9105095863342285, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6526, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.6564743273604033, |
|
"grad_norm": 1.9578741788864136, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6746, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.6575702778234424, |
|
"grad_norm": 1.8473331928253174, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6808, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.6575702778234424, |
|
"eval_loss": 0.6952778100967407, |
|
"eval_runtime": 30743.2328, |
|
"eval_samples_per_second": 2.111, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 51.05538683822195, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.6586662282864815, |
|
"grad_norm": 1.7486096620559692, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7417, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.6597621787495205, |
|
"grad_norm": 1.6540303230285645, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7118, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.6608581292125596, |
|
"grad_norm": 1.89935302734375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6315, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.6619540796755987, |
|
"grad_norm": 1.8266342878341675, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7434, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.6630500301386377, |
|
"grad_norm": 1.8254984617233276, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7832, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.6641459806016768, |
|
"grad_norm": 2.0791878700256348, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6694, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.6652419310647159, |
|
"grad_norm": 1.8277227878570557, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6686, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.666337881527755, |
|
"grad_norm": 1.697810411453247, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6895, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.667433831990794, |
|
"grad_norm": 1.6084686517715454, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7431, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.6685297824538331, |
|
"grad_norm": 1.7437437772750854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6851, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.6696257329168721, |
|
"grad_norm": 1.849237322807312, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6926, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.6707216833799112, |
|
"grad_norm": 1.8398326635360718, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7282, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.6718176338429503, |
|
"grad_norm": 2.056136131286621, |
|
"learning_rate": 0.0001, |
|
"loss": 0.76, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.6729135843059894, |
|
"grad_norm": 1.8255378007888794, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7155, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.6740095347690285, |
|
"grad_norm": 1.6555898189544678, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6333, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.6751054852320675, |
|
"grad_norm": 1.825000286102295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6603, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.6762014356951066, |
|
"grad_norm": 1.5000559091567993, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6861, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.6772973861581456, |
|
"grad_norm": 1.826874017715454, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7337, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.6783933366211847, |
|
"grad_norm": 2.042325735092163, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7566, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.6794892870842238, |
|
"grad_norm": 1.6419124603271484, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6825, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.6805852375472629, |
|
"grad_norm": 2.1221911907196045, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7013, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.681681188010302, |
|
"grad_norm": 1.598191738128662, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6976, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.682777138473341, |
|
"grad_norm": 1.8890109062194824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6792, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.6838730889363801, |
|
"grad_norm": 1.7647831439971924, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6878, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.6849690393994191, |
|
"grad_norm": 1.358193278312683, |
|
"learning_rate": 0.0001, |
|
"loss": 0.754, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.6860649898624582, |
|
"grad_norm": 1.9739768505096436, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6799, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.6871609403254972, |
|
"grad_norm": 1.532867670059204, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7063, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.6882568907885364, |
|
"grad_norm": 1.3203604221343994, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6725, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.6893528412515755, |
|
"grad_norm": 1.835530161857605, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6447, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.6904487917146145, |
|
"grad_norm": 1.4508098363876343, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7137, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.6915447421776536, |
|
"grad_norm": 1.520942211151123, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6454, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.6926406926406926, |
|
"grad_norm": 1.655716061592102, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6659, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.6937366431037317, |
|
"grad_norm": 1.5934149026870728, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7084, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.6948325935667707, |
|
"grad_norm": 1.961393117904663, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7553, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.6959285440298099, |
|
"grad_norm": 1.4186025857925415, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7205, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.697024494492849, |
|
"grad_norm": 1.6756350994110107, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6166, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.698120444955888, |
|
"grad_norm": 1.8438879251480103, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6837, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.6992163954189271, |
|
"grad_norm": 1.5732409954071045, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7777, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.7003123458819661, |
|
"grad_norm": 1.8927737474441528, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6978, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.7014082963450052, |
|
"grad_norm": 1.4720592498779297, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7043, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.7025042468080442, |
|
"grad_norm": 1.8671678304672241, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7245, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.7036001972710834, |
|
"grad_norm": 1.5541017055511475, |
|
"learning_rate": 0.0001, |
|
"loss": 0.728, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.7046961477341224, |
|
"grad_norm": 1.6623157262802124, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7211, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.7057920981971615, |
|
"grad_norm": 2.1644530296325684, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7958, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.7068880486602006, |
|
"grad_norm": 1.4526203870773315, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6385, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.7079839991232396, |
|
"grad_norm": 1.586296796798706, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7759, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.7090799495862787, |
|
"grad_norm": 1.8547158241271973, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7478, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.7101759000493177, |
|
"grad_norm": 1.46295964717865, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5804, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.7112718505123569, |
|
"grad_norm": 1.8653600215911865, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6874, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.7123678009753959, |
|
"grad_norm": 1.8301453590393066, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6886, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.7123678009753959, |
|
"eval_loss": 0.6790329217910767, |
|
"eval_runtime": 30731.9197, |
|
"eval_samples_per_second": 2.111, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 52.619379401724906, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.713463751438435, |
|
"grad_norm": 2.6560330390930176, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6422, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.714559701901474, |
|
"grad_norm": 1.9559868574142456, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7321, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.7156556523645131, |
|
"grad_norm": 1.8091590404510498, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6303, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.7167516028275522, |
|
"grad_norm": 1.428688406944275, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6489, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.7178475532905912, |
|
"grad_norm": 1.6543529033660889, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6793, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.7189435037536304, |
|
"grad_norm": 2.012596368789673, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6502, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.7200394542166694, |
|
"grad_norm": 2.0701732635498047, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6746, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.7211354046797085, |
|
"grad_norm": 1.5318336486816406, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6142, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.7222313551427475, |
|
"grad_norm": 1.7924253940582275, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6536, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.7233273056057866, |
|
"grad_norm": 1.8197805881500244, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7804, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.7244232560688257, |
|
"grad_norm": 1.5444835424423218, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6019, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.7255192065318647, |
|
"grad_norm": 1.735474705696106, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5891, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.7266151569949039, |
|
"grad_norm": 1.9891881942749023, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6498, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.7277111074579429, |
|
"grad_norm": 1.6917784214019775, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7044, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.728807057920982, |
|
"grad_norm": 1.661033034324646, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6185, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.729903008384021, |
|
"grad_norm": 2.326937198638916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7274, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.7309989588470601, |
|
"grad_norm": 1.6929740905761719, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6452, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.7320949093100991, |
|
"grad_norm": 1.7399369478225708, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6243, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.7331908597731382, |
|
"grad_norm": 1.7071975469589233, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6578, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.7342868102361774, |
|
"grad_norm": 1.7368084192276, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6337, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.7353827606992164, |
|
"grad_norm": 2.0973663330078125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7101, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.7364787111622555, |
|
"grad_norm": 1.636421799659729, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6958, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.7375746616252945, |
|
"grad_norm": 1.6134982109069824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7336, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.7386706120883336, |
|
"grad_norm": 1.8911906480789185, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6723, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.7397665625513726, |
|
"grad_norm": 1.8372421264648438, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6748, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.7408625130144117, |
|
"grad_norm": 1.8735203742980957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7481, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 0.7419584634774509, |
|
"grad_norm": 1.7684818506240845, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6395, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 0.7430544139404899, |
|
"grad_norm": 1.8018254041671753, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8523, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 0.744150364403529, |
|
"grad_norm": 1.7703465223312378, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6938, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 0.745246314866568, |
|
"grad_norm": 1.6299625635147095, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6341, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.7463422653296071, |
|
"grad_norm": 2.0545101165771484, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7248, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 0.7474382157926461, |
|
"grad_norm": 1.5883153676986694, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6917, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.7485341662556853, |
|
"grad_norm": 1.5751030445098877, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6789, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 0.7496301167187243, |
|
"grad_norm": 1.53587806224823, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6462, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 0.7507260671817634, |
|
"grad_norm": 1.5108363628387451, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7585, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.7518220176448025, |
|
"grad_norm": 1.5622588396072388, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6466, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 0.7529179681078415, |
|
"grad_norm": 1.9326175451278687, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7086, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 0.7540139185708806, |
|
"grad_norm": 1.7847191095352173, |
|
"learning_rate": 0.0001, |
|
"loss": 0.661, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.7551098690339196, |
|
"grad_norm": 2.1520116329193115, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5861, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 0.7562058194969588, |
|
"grad_norm": 1.9346301555633545, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7735, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.7573017699599978, |
|
"grad_norm": 1.5564959049224854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6997, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 0.7583977204230369, |
|
"grad_norm": 1.848569393157959, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6836, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 1.5552887916564941, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6014, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 0.760589621349115, |
|
"grad_norm": 1.5576545000076294, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7034, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 0.7616855718121541, |
|
"grad_norm": 1.795949935913086, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7322, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.7627815222751931, |
|
"grad_norm": 1.498818039894104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.697, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 0.7638774727382323, |
|
"grad_norm": 1.7154011726379395, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7057, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 0.7649734232012713, |
|
"grad_norm": 1.693199872970581, |
|
"learning_rate": 0.0001, |
|
"loss": 0.722, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 0.7660693736643104, |
|
"grad_norm": 1.7617517709732056, |
|
"learning_rate": 0.0001, |
|
"loss": 0.727, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 0.7671653241273494, |
|
"grad_norm": 1.7693978548049927, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6552, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.7671653241273494, |
|
"eval_loss": 0.6688939929008484, |
|
"eval_runtime": 30450.1084, |
|
"eval_samples_per_second": 2.131, |
|
"eval_steps_per_second": 0.067, |
|
"eval_wer": 38.925884967114385, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.7682612745903885, |
|
"grad_norm": 1.531043529510498, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6237, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 0.7693572250534276, |
|
"grad_norm": 1.7747310400009155, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6769, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 0.7704531755164666, |
|
"grad_norm": 1.457766056060791, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6406, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 0.7715491259795058, |
|
"grad_norm": 1.478061318397522, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6797, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 0.7726450764425448, |
|
"grad_norm": 1.462485909461975, |
|
"learning_rate": 0.0001, |
|
"loss": 0.654, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.7737410269055839, |
|
"grad_norm": 2.1201417446136475, |
|
"learning_rate": 0.0001, |
|
"loss": 0.676, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 0.7748369773686229, |
|
"grad_norm": 1.6672828197479248, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6402, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 0.775932927831662, |
|
"grad_norm": 1.848254680633545, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6623, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 0.7770288782947011, |
|
"grad_norm": 1.8868560791015625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6598, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 0.7781248287577401, |
|
"grad_norm": 2.0615594387054443, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6786, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.7792207792207793, |
|
"grad_norm": 1.6596072912216187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6448, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 0.7803167296838183, |
|
"grad_norm": 2.0829083919525146, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6558, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 0.7814126801468574, |
|
"grad_norm": 1.7660095691680908, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7399, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.7825086306098964, |
|
"grad_norm": 1.7068332433700562, |
|
"learning_rate": 0.0001, |
|
"loss": 0.637, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 0.7836045810729355, |
|
"grad_norm": 1.5040172338485718, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5708, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.7847005315359745, |
|
"grad_norm": 1.7479969263076782, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7348, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 0.7857964819990136, |
|
"grad_norm": 1.7886347770690918, |
|
"learning_rate": 0.0001, |
|
"loss": 0.72, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 0.7868924324620528, |
|
"grad_norm": 1.6001741886138916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6512, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 0.7879883829250918, |
|
"grad_norm": 1.7489492893218994, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6714, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 0.7890843333881309, |
|
"grad_norm": 1.9967806339263916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6651, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.7901802838511699, |
|
"grad_norm": 1.6555088758468628, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6584, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 0.791276234314209, |
|
"grad_norm": 1.589168667793274, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6448, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 0.792372184777248, |
|
"grad_norm": 1.2876309156417847, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6612, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 0.7934681352402871, |
|
"grad_norm": 1.6673985719680786, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5769, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 0.7945640857033263, |
|
"grad_norm": 1.6478184461593628, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6457, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.7956600361663653, |
|
"grad_norm": 1.5702099800109863, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6866, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 0.7967559866294044, |
|
"grad_norm": 1.850900411605835, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6475, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 0.7978519370924434, |
|
"grad_norm": 1.2784024477005005, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6366, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 0.7989478875554825, |
|
"grad_norm": 2.2533817291259766, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6164, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 0.8000438380185215, |
|
"grad_norm": 1.442713737487793, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6853, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.8011397884815606, |
|
"grad_norm": 1.594449520111084, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6535, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 0.8022357389445997, |
|
"grad_norm": 1.4961411952972412, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6696, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 0.8033316894076388, |
|
"grad_norm": 2.1010756492614746, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6607, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 0.8044276398706779, |
|
"grad_norm": 2.134493589401245, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6512, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 0.8055235903337169, |
|
"grad_norm": 1.6435072422027588, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6094, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.806619540796756, |
|
"grad_norm": 1.8982771635055542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6761, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 0.807715491259795, |
|
"grad_norm": 1.968770146369934, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7178, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 0.8088114417228341, |
|
"grad_norm": 2.19568133354187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6246, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 0.8099073921858732, |
|
"grad_norm": 1.6024566888809204, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6174, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 0.8110033426489123, |
|
"grad_norm": 1.4896485805511475, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5843, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.8120992931119514, |
|
"grad_norm": 1.502487301826477, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7747, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 0.8131952435749904, |
|
"grad_norm": 1.5037872791290283, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6745, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 0.8142911940380295, |
|
"grad_norm": 1.3984043598175049, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6938, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 0.8153871445010685, |
|
"grad_norm": 1.7627023458480835, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6407, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.8164830949641076, |
|
"grad_norm": 1.5276484489440918, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6142, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.8175790454271467, |
|
"grad_norm": 1.598743200302124, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6534, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 0.8186749958901858, |
|
"grad_norm": 1.5528680086135864, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6272, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 0.8197709463532248, |
|
"grad_norm": 1.71839439868927, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7259, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 0.8208668968162639, |
|
"grad_norm": 1.5527739524841309, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6387, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 0.821962847279303, |
|
"grad_norm": 1.7775479555130005, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6963, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.821962847279303, |
|
"eval_loss": 0.6593644618988037, |
|
"eval_runtime": 30360.7703, |
|
"eval_samples_per_second": 2.137, |
|
"eval_steps_per_second": 0.067, |
|
"eval_wer": 42.68128173436093, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.823058797742342, |
|
"grad_norm": 1.772290825843811, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6439, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 0.8241547482053811, |
|
"grad_norm": 1.655604600906372, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7521, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 0.8252506986684202, |
|
"grad_norm": 1.5305246114730835, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5623, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 0.8263466491314593, |
|
"grad_norm": 1.399568796157837, |
|
"learning_rate": 0.0001, |
|
"loss": 0.634, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 0.8274425995944983, |
|
"grad_norm": 1.412463903427124, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7272, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.8285385500575374, |
|
"grad_norm": 1.793396234512329, |
|
"learning_rate": 0.0001, |
|
"loss": 0.684, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 0.8296345005205765, |
|
"grad_norm": 1.9623442888259888, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6247, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 0.8307304509836155, |
|
"grad_norm": 1.4576257467269897, |
|
"learning_rate": 0.0001, |
|
"loss": 0.675, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 0.8318264014466547, |
|
"grad_norm": 1.6135623455047607, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7046, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 0.8329223519096937, |
|
"grad_norm": 1.5553112030029297, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7246, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.8340183023727328, |
|
"grad_norm": 1.4521915912628174, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7288, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 0.8351142528357718, |
|
"grad_norm": 1.429190754890442, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6357, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 0.8362102032988109, |
|
"grad_norm": 1.80194890499115, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6462, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 0.8373061537618499, |
|
"grad_norm": 1.833225131034851, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6942, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 0.838402104224889, |
|
"grad_norm": 1.8329098224639893, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6525, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.8394980546879282, |
|
"grad_norm": 1.5729244947433472, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6721, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 0.8405940051509672, |
|
"grad_norm": 1.8156899213790894, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6777, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 0.8416899556140063, |
|
"grad_norm": 1.7255985736846924, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6653, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.8427859060770453, |
|
"grad_norm": 1.8051388263702393, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6429, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 0.8438818565400844, |
|
"grad_norm": 1.4799489974975586, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6219, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.8449778070031234, |
|
"grad_norm": 1.5661497116088867, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7042, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 0.8460737574661625, |
|
"grad_norm": 1.7842859029769897, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5657, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 0.8471697079292017, |
|
"grad_norm": 2.036591053009033, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7448, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 0.8482656583922407, |
|
"grad_norm": 1.5923106670379639, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6937, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 0.8493616088552798, |
|
"grad_norm": 1.7609819173812866, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6282, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.8504575593183188, |
|
"grad_norm": 1.627193570137024, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6593, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 0.8515535097813579, |
|
"grad_norm": 1.5199600458145142, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6493, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 0.8526494602443969, |
|
"grad_norm": 1.8375046253204346, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7139, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 0.853745410707436, |
|
"grad_norm": 1.7061831951141357, |
|
"learning_rate": 0.0001, |
|
"loss": 0.645, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 0.854841361170475, |
|
"grad_norm": 1.5046154260635376, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7154, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.8559373116335142, |
|
"grad_norm": 2.0937325954437256, |
|
"learning_rate": 0.0001, |
|
"loss": 0.716, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 0.8570332620965533, |
|
"grad_norm": 1.502930760383606, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6476, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 0.8581292125595923, |
|
"grad_norm": 1.832287073135376, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6279, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 0.8592251630226314, |
|
"grad_norm": 1.9679219722747803, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7048, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 0.8603211134856704, |
|
"grad_norm": 1.4660624265670776, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6217, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.8614170639487095, |
|
"grad_norm": 1.6641209125518799, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5692, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 0.8625130144117485, |
|
"grad_norm": 1.6354645490646362, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7019, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 0.8636089648747877, |
|
"grad_norm": 1.5404868125915527, |
|
"learning_rate": 0.0001, |
|
"loss": 0.667, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 0.8647049153378268, |
|
"grad_norm": 1.759466528892517, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7413, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 0.8658008658008658, |
|
"grad_norm": 1.289501667022705, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6696, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.8668968162639049, |
|
"grad_norm": 1.516506552696228, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6385, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 0.8679927667269439, |
|
"grad_norm": 1.602023959159851, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6034, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 0.869088717189983, |
|
"grad_norm": 1.6681197881698608, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6524, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 0.870184667653022, |
|
"grad_norm": 1.7448092699050903, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5845, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 0.8712806181160612, |
|
"grad_norm": 1.763609766960144, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6078, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.8723765685791002, |
|
"grad_norm": 1.8752708435058594, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6415, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 0.8734725190421393, |
|
"grad_norm": 1.4633687734603882, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6509, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 0.8745684695051784, |
|
"grad_norm": 1.630188226699829, |
|
"learning_rate": 0.0001, |
|
"loss": 0.678, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 0.8756644199682174, |
|
"grad_norm": 1.746390461921692, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6883, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 0.8767603704312565, |
|
"grad_norm": 1.8357354402542114, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5674, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.8767603704312565, |
|
"eval_loss": 0.6495629549026489, |
|
"eval_runtime": 30578.7623, |
|
"eval_samples_per_second": 2.122, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 46.744478263995646, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.8778563208942955, |
|
"grad_norm": 1.3959294557571411, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7334, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 0.8789522713573347, |
|
"grad_norm": 1.7587610483169556, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6916, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 0.8800482218203737, |
|
"grad_norm": 2.157567024230957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6288, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 0.8811441722834128, |
|
"grad_norm": 1.927071452140808, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6596, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 0.8822401227464519, |
|
"grad_norm": 1.7229890823364258, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6351, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.8833360732094909, |
|
"grad_norm": 1.4584635496139526, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6347, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.88443202367253, |
|
"grad_norm": 1.4768098592758179, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6326, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 0.885527974135569, |
|
"grad_norm": 1.6411234140396118, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6935, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 0.8866239245986082, |
|
"grad_norm": 1.4742987155914307, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6188, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 0.8877198750616472, |
|
"grad_norm": 2.1708977222442627, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6837, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.8888158255246863, |
|
"grad_norm": 1.5142560005187988, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6218, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 0.8899117759877253, |
|
"grad_norm": 1.5650640726089478, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5991, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 0.8910077264507644, |
|
"grad_norm": 1.5553919076919556, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6081, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 0.8921036769138035, |
|
"grad_norm": 1.813482642173767, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6599, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 0.8931996273768426, |
|
"grad_norm": 1.6864385604858398, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6337, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.8942955778398817, |
|
"grad_norm": 1.5707799196243286, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7029, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 0.8953915283029207, |
|
"grad_norm": 1.3465133905410767, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6519, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 0.8964874787659598, |
|
"grad_norm": 1.5546880960464478, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6111, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 0.8975834292289988, |
|
"grad_norm": 1.6297564506530762, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6825, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 0.8986793796920379, |
|
"grad_norm": 1.5396370887756348, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6454, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.8997753301550769, |
|
"grad_norm": 1.3082808256149292, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6489, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 0.900871280618116, |
|
"grad_norm": 1.68564772605896, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6688, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 0.9019672310811552, |
|
"grad_norm": 1.6919423341751099, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6353, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 0.9030631815441942, |
|
"grad_norm": 1.4040336608886719, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6286, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 0.9041591320072333, |
|
"grad_norm": 1.5394583940505981, |
|
"learning_rate": 0.0001, |
|
"loss": 0.648, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.9052550824702723, |
|
"grad_norm": 1.8135911226272583, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6315, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 0.9063510329333114, |
|
"grad_norm": 1.6827434301376343, |
|
"learning_rate": 0.0001, |
|
"loss": 0.637, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 0.9074469833963504, |
|
"grad_norm": 1.3692152500152588, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7015, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 0.9085429338593896, |
|
"grad_norm": 1.6391196250915527, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6479, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 0.9096388843224287, |
|
"grad_norm": 2.5071117877960205, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6746, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.9107348347854677, |
|
"grad_norm": 1.7680779695510864, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6543, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 0.9118307852485068, |
|
"grad_norm": 1.487269639968872, |
|
"learning_rate": 0.0001, |
|
"loss": 0.618, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 0.9129267357115458, |
|
"grad_norm": 1.322325348854065, |
|
"learning_rate": 0.0001, |
|
"loss": 0.635, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 0.9140226861745849, |
|
"grad_norm": 2.054997682571411, |
|
"learning_rate": 0.0001, |
|
"loss": 0.645, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 0.9151186366376239, |
|
"grad_norm": 1.7619165182113647, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6405, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.9162145871006631, |
|
"grad_norm": 1.3276571035385132, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5797, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 0.9173105375637021, |
|
"grad_norm": 2.2796542644500732, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7342, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.9184064880267412, |
|
"grad_norm": 1.637654423713684, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6149, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 0.9195024384897803, |
|
"grad_norm": 1.4013864994049072, |
|
"learning_rate": 0.0001, |
|
"loss": 0.627, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 0.9205983889528193, |
|
"grad_norm": 1.5173211097717285, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5449, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.9216943394158584, |
|
"grad_norm": 1.5530805587768555, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5416, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 0.9227902898788974, |
|
"grad_norm": 1.5294363498687744, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6146, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 0.9238862403419366, |
|
"grad_norm": 1.7312266826629639, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6952, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 0.9249821908049756, |
|
"grad_norm": 1.301459789276123, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6258, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 0.9260781412680147, |
|
"grad_norm": 1.915128469467163, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6955, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.9271740917310538, |
|
"grad_norm": 1.3437505960464478, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7044, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 0.9282700421940928, |
|
"grad_norm": 1.5920603275299072, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5871, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 0.9293659926571319, |
|
"grad_norm": 1.2615900039672852, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6311, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 0.9304619431201709, |
|
"grad_norm": 1.6863378286361694, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5746, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 0.9315578935832101, |
|
"grad_norm": 1.3633450269699097, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6354, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.9315578935832101, |
|
"eval_loss": 0.6385661959648132, |
|
"eval_runtime": 30462.2265, |
|
"eval_samples_per_second": 2.13, |
|
"eval_steps_per_second": 0.067, |
|
"eval_wer": 36.07830918982583, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.9326538440462491, |
|
"grad_norm": 1.7412103414535522, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6635, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 0.9337497945092882, |
|
"grad_norm": 2.0697691440582275, |
|
"learning_rate": 0.0001, |
|
"loss": 0.678, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 0.9348457449723272, |
|
"grad_norm": 1.6238869428634644, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6427, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 0.9359416954353663, |
|
"grad_norm": 1.498334288597107, |
|
"learning_rate": 0.0001, |
|
"loss": 0.614, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 0.9370376458984054, |
|
"grad_norm": 1.4905815124511719, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5759, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.9381335963614444, |
|
"grad_norm": 1.433747410774231, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6754, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 0.9392295468244836, |
|
"grad_norm": 1.8419586420059204, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6392, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 0.9403254972875226, |
|
"grad_norm": 1.5990883111953735, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6204, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 0.9414214477505617, |
|
"grad_norm": 1.482010841369629, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6887, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 0.9425173982136007, |
|
"grad_norm": 1.6629010438919067, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6457, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.9436133486766398, |
|
"grad_norm": 1.4538336992263794, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6401, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 0.9447092991396789, |
|
"grad_norm": 1.5684305429458618, |
|
"learning_rate": 0.0001, |
|
"loss": 0.676, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 0.945805249602718, |
|
"grad_norm": 1.4637812376022339, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5963, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 0.9469012000657571, |
|
"grad_norm": 2.155348300933838, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6624, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 0.9479971505287961, |
|
"grad_norm": 1.6532953977584839, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6784, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.9490931009918352, |
|
"grad_norm": 1.934787392616272, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6876, |
|
"step": 17320 |
|
}, |
|
{ |
|
"epoch": 0.9501890514548742, |
|
"grad_norm": 2.319920063018799, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7676, |
|
"step": 17340 |
|
}, |
|
{ |
|
"epoch": 0.9512850019179133, |
|
"grad_norm": 1.5026947259902954, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5634, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 1.9578672647476196, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5959, |
|
"step": 17380 |
|
}, |
|
{ |
|
"epoch": 0.9534769028439914, |
|
"grad_norm": 1.9930877685546875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5947, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.9545728533070306, |
|
"grad_norm": 1.6241062879562378, |
|
"learning_rate": 0.0001, |
|
"loss": 0.587, |
|
"step": 17420 |
|
}, |
|
{ |
|
"epoch": 0.9556688037700696, |
|
"grad_norm": 1.7155011892318726, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6352, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 0.9567647542331087, |
|
"grad_norm": 1.7239856719970703, |
|
"learning_rate": 0.0001, |
|
"loss": 0.647, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 0.9578607046961477, |
|
"grad_norm": 1.6342066526412964, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6017, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 0.9589566551591868, |
|
"grad_norm": 1.4042915105819702, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6479, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.9600526056222258, |
|
"grad_norm": 1.5023634433746338, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6008, |
|
"step": 17520 |
|
}, |
|
{ |
|
"epoch": 0.961148556085265, |
|
"grad_norm": 1.5713409185409546, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6643, |
|
"step": 17540 |
|
}, |
|
{ |
|
"epoch": 0.9622445065483041, |
|
"grad_norm": 1.8917444944381714, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6992, |
|
"step": 17560 |
|
}, |
|
{ |
|
"epoch": 0.9633404570113431, |
|
"grad_norm": 1.918900728225708, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6004, |
|
"step": 17580 |
|
}, |
|
{ |
|
"epoch": 0.9644364074743822, |
|
"grad_norm": 1.7599738836288452, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6479, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.9655323579374212, |
|
"grad_norm": 1.4554500579833984, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6367, |
|
"step": 17620 |
|
}, |
|
{ |
|
"epoch": 0.9666283084004603, |
|
"grad_norm": 1.6860467195510864, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5769, |
|
"step": 17640 |
|
}, |
|
{ |
|
"epoch": 0.9677242588634993, |
|
"grad_norm": 1.6800360679626465, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6402, |
|
"step": 17660 |
|
}, |
|
{ |
|
"epoch": 0.9688202093265385, |
|
"grad_norm": 1.3988690376281738, |
|
"learning_rate": 0.0001, |
|
"loss": 0.589, |
|
"step": 17680 |
|
}, |
|
{ |
|
"epoch": 0.9699161597895775, |
|
"grad_norm": 1.6789034605026245, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6469, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.9710121102526166, |
|
"grad_norm": 1.4465025663375854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6362, |
|
"step": 17720 |
|
}, |
|
{ |
|
"epoch": 0.9721080607156557, |
|
"grad_norm": 1.6466797590255737, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5882, |
|
"step": 17740 |
|
}, |
|
{ |
|
"epoch": 0.9732040111786947, |
|
"grad_norm": 1.4487119913101196, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6481, |
|
"step": 17760 |
|
}, |
|
{ |
|
"epoch": 0.9742999616417338, |
|
"grad_norm": 1.769286870956421, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7198, |
|
"step": 17780 |
|
}, |
|
{ |
|
"epoch": 0.9753959121047728, |
|
"grad_norm": 1.8515903949737549, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6257, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.976491862567812, |
|
"grad_norm": 1.9811028242111206, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6154, |
|
"step": 17820 |
|
}, |
|
{ |
|
"epoch": 0.977587813030851, |
|
"grad_norm": 1.6197538375854492, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6196, |
|
"step": 17840 |
|
}, |
|
{ |
|
"epoch": 0.9786837634938901, |
|
"grad_norm": 1.605971336364746, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5549, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 0.9797797139569292, |
|
"grad_norm": 1.5246946811676025, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5979, |
|
"step": 17880 |
|
}, |
|
{ |
|
"epoch": 0.9808756644199682, |
|
"grad_norm": 1.8534538745880127, |
|
"learning_rate": 0.0001, |
|
"loss": 0.663, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.9819716148830073, |
|
"grad_norm": 1.6625508069992065, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5863, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.9830675653460463, |
|
"grad_norm": 1.704788088798523, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6529, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 0.9841635158090855, |
|
"grad_norm": 1.3650102615356445, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6317, |
|
"step": 17960 |
|
}, |
|
{ |
|
"epoch": 0.9852594662721245, |
|
"grad_norm": 1.7923402786254883, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6023, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.9863554167351636, |
|
"grad_norm": 2.0028254985809326, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6569, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.9863554167351636, |
|
"eval_loss": 0.6309429407119751, |
|
"eval_runtime": 30654.7333, |
|
"eval_samples_per_second": 2.117, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 36.64375002139876, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.9874513671982026, |
|
"grad_norm": 1.66196870803833, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6687, |
|
"step": 18020 |
|
}, |
|
{ |
|
"epoch": 0.9885473176612417, |
|
"grad_norm": 2.031445264816284, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6032, |
|
"step": 18040 |
|
}, |
|
{ |
|
"epoch": 0.9896432681242808, |
|
"grad_norm": 1.3204675912857056, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5913, |
|
"step": 18060 |
|
}, |
|
{ |
|
"epoch": 0.9907392185873198, |
|
"grad_norm": 1.7389861345291138, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6678, |
|
"step": 18080 |
|
}, |
|
{ |
|
"epoch": 0.991835169050359, |
|
"grad_norm": 1.9815995693206787, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6566, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.992931119513398, |
|
"grad_norm": 1.5902196168899536, |
|
"learning_rate": 0.0001, |
|
"loss": 0.617, |
|
"step": 18120 |
|
}, |
|
{ |
|
"epoch": 0.9940270699764371, |
|
"grad_norm": 1.4741644859313965, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5921, |
|
"step": 18140 |
|
}, |
|
{ |
|
"epoch": 0.9951230204394761, |
|
"grad_norm": 1.419965147972107, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6031, |
|
"step": 18160 |
|
}, |
|
{ |
|
"epoch": 0.9962189709025152, |
|
"grad_norm": 1.5964018106460571, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6015, |
|
"step": 18180 |
|
}, |
|
{ |
|
"epoch": 0.9973149213655543, |
|
"grad_norm": 1.4470981359481812, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6898, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.9984108718285933, |
|
"grad_norm": 1.54426908493042, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6328, |
|
"step": 18220 |
|
}, |
|
{ |
|
"epoch": 0.9995068222916325, |
|
"grad_norm": 1.778437614440918, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6381, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 1.0006027727546716, |
|
"grad_norm": 1.5332226753234863, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5782, |
|
"step": 18260 |
|
}, |
|
{ |
|
"epoch": 1.0016987232177106, |
|
"grad_norm": 1.785762906074524, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6779, |
|
"step": 18280 |
|
}, |
|
{ |
|
"epoch": 1.0027946736807496, |
|
"grad_norm": 1.684708595275879, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5786, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.0038906241437886, |
|
"grad_norm": 1.5112040042877197, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5989, |
|
"step": 18320 |
|
}, |
|
{ |
|
"epoch": 1.0049865746068278, |
|
"grad_norm": 1.5711543560028076, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6525, |
|
"step": 18340 |
|
}, |
|
{ |
|
"epoch": 1.0060825250698668, |
|
"grad_norm": 1.4053367376327515, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5913, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 1.0071784755329058, |
|
"grad_norm": 1.7791050672531128, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6212, |
|
"step": 18380 |
|
}, |
|
{ |
|
"epoch": 1.008274425995945, |
|
"grad_norm": 1.4202812910079956, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6168, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.009370376458984, |
|
"grad_norm": 1.7469732761383057, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6425, |
|
"step": 18420 |
|
}, |
|
{ |
|
"epoch": 1.010466326922023, |
|
"grad_norm": 2.1195449829101562, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6564, |
|
"step": 18440 |
|
}, |
|
{ |
|
"epoch": 1.011562277385062, |
|
"grad_norm": 1.4056214094161987, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6809, |
|
"step": 18460 |
|
}, |
|
{ |
|
"epoch": 1.0126582278481013, |
|
"grad_norm": 2.07029128074646, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6119, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 1.0137541783111403, |
|
"grad_norm": 1.6518419981002808, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5954, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.0148501287741793, |
|
"grad_norm": 1.7785189151763916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5959, |
|
"step": 18520 |
|
}, |
|
{ |
|
"epoch": 1.0159460792372186, |
|
"grad_norm": 1.4902641773223877, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6066, |
|
"step": 18540 |
|
}, |
|
{ |
|
"epoch": 1.0170420297002576, |
|
"grad_norm": 1.6291300058364868, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5781, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 1.0181379801632966, |
|
"grad_norm": 1.5571300983428955, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5622, |
|
"step": 18580 |
|
}, |
|
{ |
|
"epoch": 1.0192339306263356, |
|
"grad_norm": 1.5963464975357056, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6797, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.0203298810893748, |
|
"grad_norm": 1.4604226350784302, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6712, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 1.0214258315524138, |
|
"grad_norm": 1.6052221059799194, |
|
"learning_rate": 0.0001, |
|
"loss": 0.595, |
|
"step": 18640 |
|
}, |
|
{ |
|
"epoch": 1.0225217820154529, |
|
"grad_norm": 1.4075971841812134, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6123, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 1.0236177324784919, |
|
"grad_norm": 1.910475730895996, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7016, |
|
"step": 18680 |
|
}, |
|
{ |
|
"epoch": 1.024713682941531, |
|
"grad_norm": 1.946268081665039, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6178, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.02580963340457, |
|
"grad_norm": 1.5547478199005127, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6068, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 1.026905583867609, |
|
"grad_norm": 1.5006910562515259, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5818, |
|
"step": 18740 |
|
}, |
|
{ |
|
"epoch": 1.0280015343306483, |
|
"grad_norm": 1.5395736694335938, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6614, |
|
"step": 18760 |
|
}, |
|
{ |
|
"epoch": 1.0290974847936873, |
|
"grad_norm": 1.5935709476470947, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5593, |
|
"step": 18780 |
|
}, |
|
{ |
|
"epoch": 1.0301934352567264, |
|
"grad_norm": 1.6643317937850952, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6441, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.0312893857197654, |
|
"grad_norm": 1.6811660528182983, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5663, |
|
"step": 18820 |
|
}, |
|
{ |
|
"epoch": 1.0323853361828046, |
|
"grad_norm": 1.4203201532363892, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6622, |
|
"step": 18840 |
|
}, |
|
{ |
|
"epoch": 1.0334812866458436, |
|
"grad_norm": 1.9712319374084473, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5818, |
|
"step": 18860 |
|
}, |
|
{ |
|
"epoch": 1.0345772371088826, |
|
"grad_norm": 2.0921614170074463, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6489, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 1.0356731875719218, |
|
"grad_norm": 1.3215636014938354, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5784, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.0367691380349608, |
|
"grad_norm": 1.6520031690597534, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6069, |
|
"step": 18920 |
|
}, |
|
{ |
|
"epoch": 1.0378650884979999, |
|
"grad_norm": 1.8051795959472656, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6596, |
|
"step": 18940 |
|
}, |
|
{ |
|
"epoch": 1.0389610389610389, |
|
"grad_norm": 1.7375438213348389, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5161, |
|
"step": 18960 |
|
}, |
|
{ |
|
"epoch": 1.040056989424078, |
|
"grad_norm": 1.4865177869796753, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5436, |
|
"step": 18980 |
|
}, |
|
{ |
|
"epoch": 1.041152939887117, |
|
"grad_norm": 1.4444303512573242, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5906, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.041152939887117, |
|
"eval_loss": 0.622921347618103, |
|
"eval_runtime": 30997.4799, |
|
"eval_samples_per_second": 2.093, |
|
"eval_steps_per_second": 0.065, |
|
"eval_wer": 37.84105343527132, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.042248890350156, |
|
"grad_norm": 1.4964603185653687, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6014, |
|
"step": 19020 |
|
}, |
|
{ |
|
"epoch": 1.0433448408131953, |
|
"grad_norm": 2.2886295318603516, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6472, |
|
"step": 19040 |
|
}, |
|
{ |
|
"epoch": 1.0444407912762343, |
|
"grad_norm": 1.7065175771713257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5561, |
|
"step": 19060 |
|
}, |
|
{ |
|
"epoch": 1.0455367417392734, |
|
"grad_norm": 1.6928189992904663, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5224, |
|
"step": 19080 |
|
}, |
|
{ |
|
"epoch": 1.0466326922023124, |
|
"grad_norm": 1.4461798667907715, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5549, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.0477286426653516, |
|
"grad_norm": 3.3619306087493896, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6065, |
|
"step": 19120 |
|
}, |
|
{ |
|
"epoch": 1.0488245931283906, |
|
"grad_norm": 1.8468629121780396, |
|
"learning_rate": 0.0001, |
|
"loss": 0.716, |
|
"step": 19140 |
|
}, |
|
{ |
|
"epoch": 1.0499205435914296, |
|
"grad_norm": 1.7207484245300293, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6078, |
|
"step": 19160 |
|
}, |
|
{ |
|
"epoch": 1.0510164940544688, |
|
"grad_norm": 1.6650727987289429, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6347, |
|
"step": 19180 |
|
}, |
|
{ |
|
"epoch": 1.0521124445175078, |
|
"grad_norm": 1.3957616090774536, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6265, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.0532083949805469, |
|
"grad_norm": 1.4066413640975952, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6118, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 1.0543043454435859, |
|
"grad_norm": 1.8007709980010986, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6989, |
|
"step": 19240 |
|
}, |
|
{ |
|
"epoch": 1.055400295906625, |
|
"grad_norm": 1.4759665727615356, |
|
"learning_rate": 0.0001, |
|
"loss": 0.621, |
|
"step": 19260 |
|
}, |
|
{ |
|
"epoch": 1.056496246369664, |
|
"grad_norm": 1.7062383890151978, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6413, |
|
"step": 19280 |
|
}, |
|
{ |
|
"epoch": 1.0575921968327031, |
|
"grad_norm": 1.5822961330413818, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5877, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.0586881472957423, |
|
"grad_norm": 1.7339930534362793, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6121, |
|
"step": 19320 |
|
}, |
|
{ |
|
"epoch": 1.0597840977587814, |
|
"grad_norm": 1.3066824674606323, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6278, |
|
"step": 19340 |
|
}, |
|
{ |
|
"epoch": 1.0608800482218204, |
|
"grad_norm": 1.6953777074813843, |
|
"learning_rate": 0.0001, |
|
"loss": 0.624, |
|
"step": 19360 |
|
}, |
|
{ |
|
"epoch": 1.0619759986848594, |
|
"grad_norm": 1.5192081928253174, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6078, |
|
"step": 19380 |
|
}, |
|
{ |
|
"epoch": 1.0630719491478986, |
|
"grad_norm": 1.5474629402160645, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7147, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.0641678996109376, |
|
"grad_norm": 1.5060781240463257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6342, |
|
"step": 19420 |
|
}, |
|
{ |
|
"epoch": 1.0652638500739766, |
|
"grad_norm": 1.4227863550186157, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5967, |
|
"step": 19440 |
|
}, |
|
{ |
|
"epoch": 1.0663598005370156, |
|
"grad_norm": 1.4965442419052124, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6344, |
|
"step": 19460 |
|
}, |
|
{ |
|
"epoch": 1.0674557510000549, |
|
"grad_norm": 1.4466912746429443, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6276, |
|
"step": 19480 |
|
}, |
|
{ |
|
"epoch": 1.0685517014630939, |
|
"grad_norm": 1.7357358932495117, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5664, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.0696476519261329, |
|
"grad_norm": 1.6092090606689453, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6207, |
|
"step": 19520 |
|
}, |
|
{ |
|
"epoch": 1.070743602389172, |
|
"grad_norm": 1.5991522073745728, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6113, |
|
"step": 19540 |
|
}, |
|
{ |
|
"epoch": 1.071839552852211, |
|
"grad_norm": 1.320917010307312, |
|
"learning_rate": 0.0001, |
|
"loss": 0.577, |
|
"step": 19560 |
|
}, |
|
{ |
|
"epoch": 1.0729355033152501, |
|
"grad_norm": 1.4419164657592773, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6622, |
|
"step": 19580 |
|
}, |
|
{ |
|
"epoch": 1.0740314537782891, |
|
"grad_norm": 1.7298556566238403, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5721, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.0751274042413284, |
|
"grad_norm": 1.8013224601745605, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6079, |
|
"step": 19620 |
|
}, |
|
{ |
|
"epoch": 1.0762233547043674, |
|
"grad_norm": 1.5226448774337769, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6681, |
|
"step": 19640 |
|
}, |
|
{ |
|
"epoch": 1.0773193051674064, |
|
"grad_norm": 1.894225835800171, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6599, |
|
"step": 19660 |
|
}, |
|
{ |
|
"epoch": 1.0784152556304456, |
|
"grad_norm": 1.58738112449646, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5796, |
|
"step": 19680 |
|
}, |
|
{ |
|
"epoch": 1.0795112060934846, |
|
"grad_norm": 1.880391240119934, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6305, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.0806071565565236, |
|
"grad_norm": 1.4821720123291016, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5675, |
|
"step": 19720 |
|
}, |
|
{ |
|
"epoch": 1.0817031070195626, |
|
"grad_norm": 1.504714846611023, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5888, |
|
"step": 19740 |
|
}, |
|
{ |
|
"epoch": 1.0827990574826019, |
|
"grad_norm": 1.6745513677597046, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6199, |
|
"step": 19760 |
|
}, |
|
{ |
|
"epoch": 1.0838950079456409, |
|
"grad_norm": 1.827014684677124, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5889, |
|
"step": 19780 |
|
}, |
|
{ |
|
"epoch": 1.0849909584086799, |
|
"grad_norm": 1.6627857685089111, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5649, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.086086908871719, |
|
"grad_norm": 1.5660628080368042, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6814, |
|
"step": 19820 |
|
}, |
|
{ |
|
"epoch": 1.0871828593347581, |
|
"grad_norm": 1.4713698625564575, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5789, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 1.0882788097977971, |
|
"grad_norm": 1.5290462970733643, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5894, |
|
"step": 19860 |
|
}, |
|
{ |
|
"epoch": 1.0893747602608361, |
|
"grad_norm": 1.7101823091506958, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5732, |
|
"step": 19880 |
|
}, |
|
{ |
|
"epoch": 1.0904707107238754, |
|
"grad_norm": 1.8752964735031128, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6358, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.0915666611869144, |
|
"grad_norm": 1.7808418273925781, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5942, |
|
"step": 19920 |
|
}, |
|
{ |
|
"epoch": 1.0926626116499534, |
|
"grad_norm": 1.4797093868255615, |
|
"learning_rate": 0.0001, |
|
"loss": 0.605, |
|
"step": 19940 |
|
}, |
|
{ |
|
"epoch": 1.0937585621129924, |
|
"grad_norm": 2.443544387817383, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7019, |
|
"step": 19960 |
|
}, |
|
{ |
|
"epoch": 1.0948545125760316, |
|
"grad_norm": 1.4167999029159546, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5969, |
|
"step": 19980 |
|
}, |
|
{ |
|
"epoch": 1.0959504630390706, |
|
"grad_norm": 1.3823322057724, |
|
"learning_rate": 0.0001, |
|
"loss": 0.634, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.0959504630390706, |
|
"eval_loss": 0.6163960099220276, |
|
"eval_runtime": 30555.9795, |
|
"eval_samples_per_second": 2.124, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 36.777449473248126, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.0970464135021096, |
|
"grad_norm": 1.5388779640197754, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5836, |
|
"step": 20020 |
|
}, |
|
{ |
|
"epoch": 1.0981423639651489, |
|
"grad_norm": 1.9624749422073364, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5574, |
|
"step": 20040 |
|
}, |
|
{ |
|
"epoch": 1.0992383144281879, |
|
"grad_norm": 1.4712483882904053, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6284, |
|
"step": 20060 |
|
}, |
|
{ |
|
"epoch": 1.1003342648912269, |
|
"grad_norm": 1.4846380949020386, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5815, |
|
"step": 20080 |
|
}, |
|
{ |
|
"epoch": 1.1014302153542659, |
|
"grad_norm": 1.4635918140411377, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6003, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.1025261658173051, |
|
"grad_norm": 1.775586485862732, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5997, |
|
"step": 20120 |
|
}, |
|
{ |
|
"epoch": 1.1036221162803441, |
|
"grad_norm": 1.5434575080871582, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5924, |
|
"step": 20140 |
|
}, |
|
{ |
|
"epoch": 1.1047180667433831, |
|
"grad_norm": 1.661482572555542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6288, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 1.1058140172064224, |
|
"grad_norm": 1.706123948097229, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5624, |
|
"step": 20180 |
|
}, |
|
{ |
|
"epoch": 1.1069099676694614, |
|
"grad_norm": 1.9980905055999756, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5577, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.1080059181325004, |
|
"grad_norm": 1.60072922706604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5947, |
|
"step": 20220 |
|
}, |
|
{ |
|
"epoch": 1.1091018685955394, |
|
"grad_norm": 1.6381016969680786, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5183, |
|
"step": 20240 |
|
}, |
|
{ |
|
"epoch": 1.1101978190585786, |
|
"grad_norm": 1.7384517192840576, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5752, |
|
"step": 20260 |
|
}, |
|
{ |
|
"epoch": 1.1112937695216176, |
|
"grad_norm": 1.9785683155059814, |
|
"learning_rate": 0.0001, |
|
"loss": 0.576, |
|
"step": 20280 |
|
}, |
|
{ |
|
"epoch": 1.1123897199846566, |
|
"grad_norm": 1.2318958044052124, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5853, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.1134856704476959, |
|
"grad_norm": 1.5406831502914429, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6059, |
|
"step": 20320 |
|
}, |
|
{ |
|
"epoch": 1.1145816209107349, |
|
"grad_norm": 1.6655981540679932, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6834, |
|
"step": 20340 |
|
}, |
|
{ |
|
"epoch": 1.1156775713737739, |
|
"grad_norm": 1.513757586479187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6158, |
|
"step": 20360 |
|
}, |
|
{ |
|
"epoch": 1.1167735218368129, |
|
"grad_norm": 2.1243133544921875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5769, |
|
"step": 20380 |
|
}, |
|
{ |
|
"epoch": 1.1178694722998521, |
|
"grad_norm": 1.6118996143341064, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5197, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.1189654227628911, |
|
"grad_norm": 1.8882734775543213, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5459, |
|
"step": 20420 |
|
}, |
|
{ |
|
"epoch": 1.1200613732259301, |
|
"grad_norm": 1.8385454416275024, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5589, |
|
"step": 20440 |
|
}, |
|
{ |
|
"epoch": 1.1211573236889691, |
|
"grad_norm": 1.7789567708969116, |
|
"learning_rate": 0.0001, |
|
"loss": 0.589, |
|
"step": 20460 |
|
}, |
|
{ |
|
"epoch": 1.1222532741520084, |
|
"grad_norm": 1.839416265487671, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5979, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 1.1233492246150474, |
|
"grad_norm": 1.8148174285888672, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6179, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.1244451750780864, |
|
"grad_norm": 1.594193696975708, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6427, |
|
"step": 20520 |
|
}, |
|
{ |
|
"epoch": 1.1255411255411256, |
|
"grad_norm": 1.5754518508911133, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5933, |
|
"step": 20540 |
|
}, |
|
{ |
|
"epoch": 1.1266370760041646, |
|
"grad_norm": 1.7265543937683105, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6021, |
|
"step": 20560 |
|
}, |
|
{ |
|
"epoch": 1.1277330264672036, |
|
"grad_norm": 1.4708410501480103, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5824, |
|
"step": 20580 |
|
}, |
|
{ |
|
"epoch": 1.1288289769302429, |
|
"grad_norm": 1.7831743955612183, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5806, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.1299249273932819, |
|
"grad_norm": 1.530446171760559, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6523, |
|
"step": 20620 |
|
}, |
|
{ |
|
"epoch": 1.1310208778563209, |
|
"grad_norm": 2.154409408569336, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5898, |
|
"step": 20640 |
|
}, |
|
{ |
|
"epoch": 1.13211682831936, |
|
"grad_norm": 1.3791108131408691, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6321, |
|
"step": 20660 |
|
}, |
|
{ |
|
"epoch": 1.1332127787823991, |
|
"grad_norm": 2.259727954864502, |
|
"learning_rate": 0.0001, |
|
"loss": 0.67, |
|
"step": 20680 |
|
}, |
|
{ |
|
"epoch": 1.1343087292454381, |
|
"grad_norm": 1.7098194360733032, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5554, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.1354046797084771, |
|
"grad_norm": 1.5874308347702026, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5596, |
|
"step": 20720 |
|
}, |
|
{ |
|
"epoch": 1.1365006301715161, |
|
"grad_norm": 1.9818806648254395, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6618, |
|
"step": 20740 |
|
}, |
|
{ |
|
"epoch": 1.1375965806345554, |
|
"grad_norm": 1.4672831296920776, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5771, |
|
"step": 20760 |
|
}, |
|
{ |
|
"epoch": 1.1386925310975944, |
|
"grad_norm": 1.746772050857544, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6085, |
|
"step": 20780 |
|
}, |
|
{ |
|
"epoch": 1.1397884815606334, |
|
"grad_norm": 1.7367818355560303, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6401, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.1408844320236726, |
|
"grad_norm": 1.5269123315811157, |
|
"learning_rate": 0.0001, |
|
"loss": 0.678, |
|
"step": 20820 |
|
}, |
|
{ |
|
"epoch": 1.1419803824867116, |
|
"grad_norm": 1.6921030282974243, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6292, |
|
"step": 20840 |
|
}, |
|
{ |
|
"epoch": 1.1430763329497506, |
|
"grad_norm": 1.4888218641281128, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6034, |
|
"step": 20860 |
|
}, |
|
{ |
|
"epoch": 1.1441722834127896, |
|
"grad_norm": 1.9028393030166626, |
|
"learning_rate": 0.0001, |
|
"loss": 0.604, |
|
"step": 20880 |
|
}, |
|
{ |
|
"epoch": 1.1452682338758289, |
|
"grad_norm": 1.4702014923095703, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6375, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.1463641843388679, |
|
"grad_norm": 1.9169687032699585, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6173, |
|
"step": 20920 |
|
}, |
|
{ |
|
"epoch": 1.147460134801907, |
|
"grad_norm": 1.6540066003799438, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5366, |
|
"step": 20940 |
|
}, |
|
{ |
|
"epoch": 1.148556085264946, |
|
"grad_norm": 1.564468502998352, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5614, |
|
"step": 20960 |
|
}, |
|
{ |
|
"epoch": 1.1496520357279851, |
|
"grad_norm": 1.75001060962677, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6408, |
|
"step": 20980 |
|
}, |
|
{ |
|
"epoch": 1.1507479861910241, |
|
"grad_norm": 1.759342074394226, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6267, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.1507479861910241, |
|
"eval_loss": 0.6101906895637512, |
|
"eval_runtime": 30794.7182, |
|
"eval_samples_per_second": 2.107, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 46.360156536208414, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.1518439366540631, |
|
"grad_norm": 1.5255391597747803, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6105, |
|
"step": 21020 |
|
}, |
|
{ |
|
"epoch": 1.1529398871171024, |
|
"grad_norm": 1.8833086490631104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.628, |
|
"step": 21040 |
|
}, |
|
{ |
|
"epoch": 1.1540358375801414, |
|
"grad_norm": 1.6667803525924683, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5758, |
|
"step": 21060 |
|
}, |
|
{ |
|
"epoch": 1.1551317880431804, |
|
"grad_norm": 1.6798675060272217, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5869, |
|
"step": 21080 |
|
}, |
|
{ |
|
"epoch": 1.1562277385062196, |
|
"grad_norm": 1.3532921075820923, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5978, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.1573236889692586, |
|
"grad_norm": 1.9075069427490234, |
|
"learning_rate": 0.0001, |
|
"loss": 0.636, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 1.1584196394322976, |
|
"grad_norm": 1.6051623821258545, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6624, |
|
"step": 21140 |
|
}, |
|
{ |
|
"epoch": 1.1595155898953367, |
|
"grad_norm": 1.6509668827056885, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6098, |
|
"step": 21160 |
|
}, |
|
{ |
|
"epoch": 1.1606115403583759, |
|
"grad_norm": 1.8513017892837524, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5753, |
|
"step": 21180 |
|
}, |
|
{ |
|
"epoch": 1.161707490821415, |
|
"grad_norm": 1.7680573463439941, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5971, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.162803441284454, |
|
"grad_norm": 1.7778613567352295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6127, |
|
"step": 21220 |
|
}, |
|
{ |
|
"epoch": 1.163899391747493, |
|
"grad_norm": 1.524961233139038, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6181, |
|
"step": 21240 |
|
}, |
|
{ |
|
"epoch": 1.1649953422105321, |
|
"grad_norm": 1.7401758432388306, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6575, |
|
"step": 21260 |
|
}, |
|
{ |
|
"epoch": 1.1660912926735711, |
|
"grad_norm": 1.7773276567459106, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6264, |
|
"step": 21280 |
|
}, |
|
{ |
|
"epoch": 1.1671872431366102, |
|
"grad_norm": 1.987033486366272, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5309, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.1682831935996494, |
|
"grad_norm": 1.7425816059112549, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6644, |
|
"step": 21320 |
|
}, |
|
{ |
|
"epoch": 1.1693791440626884, |
|
"grad_norm": 1.6333574056625366, |
|
"learning_rate": 0.0001, |
|
"loss": 0.586, |
|
"step": 21340 |
|
}, |
|
{ |
|
"epoch": 1.1704750945257274, |
|
"grad_norm": 1.6959367990493774, |
|
"learning_rate": 0.0001, |
|
"loss": 0.566, |
|
"step": 21360 |
|
}, |
|
{ |
|
"epoch": 1.1715710449887666, |
|
"grad_norm": 1.6892461776733398, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5788, |
|
"step": 21380 |
|
}, |
|
{ |
|
"epoch": 1.1726669954518056, |
|
"grad_norm": 1.805227279663086, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5984, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.1737629459148446, |
|
"grad_norm": 1.5054594278335571, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5851, |
|
"step": 21420 |
|
}, |
|
{ |
|
"epoch": 1.1748588963778837, |
|
"grad_norm": 1.7826286554336548, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5872, |
|
"step": 21440 |
|
}, |
|
{ |
|
"epoch": 1.1759548468409229, |
|
"grad_norm": 1.1451081037521362, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5533, |
|
"step": 21460 |
|
}, |
|
{ |
|
"epoch": 1.177050797303962, |
|
"grad_norm": 1.425122618675232, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5934, |
|
"step": 21480 |
|
}, |
|
{ |
|
"epoch": 1.178146747767001, |
|
"grad_norm": 1.6452502012252808, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6008, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.17924269823004, |
|
"grad_norm": 1.6565967798233032, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5945, |
|
"step": 21520 |
|
}, |
|
{ |
|
"epoch": 1.1803386486930791, |
|
"grad_norm": 1.7541433572769165, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6243, |
|
"step": 21540 |
|
}, |
|
{ |
|
"epoch": 1.1814345991561181, |
|
"grad_norm": 1.5369079113006592, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5968, |
|
"step": 21560 |
|
}, |
|
{ |
|
"epoch": 1.1825305496191572, |
|
"grad_norm": 2.0941789150238037, |
|
"learning_rate": 0.0001, |
|
"loss": 0.599, |
|
"step": 21580 |
|
}, |
|
{ |
|
"epoch": 1.1836265000821964, |
|
"grad_norm": 1.6641647815704346, |
|
"learning_rate": 0.0001, |
|
"loss": 0.634, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.1847224505452354, |
|
"grad_norm": 1.5256268978118896, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5939, |
|
"step": 21620 |
|
}, |
|
{ |
|
"epoch": 1.1858184010082744, |
|
"grad_norm": 1.5720540285110474, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6005, |
|
"step": 21640 |
|
}, |
|
{ |
|
"epoch": 1.1869143514713134, |
|
"grad_norm": 1.773973822593689, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5607, |
|
"step": 21660 |
|
}, |
|
{ |
|
"epoch": 1.1880103019343526, |
|
"grad_norm": 1.6868877410888672, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6013, |
|
"step": 21680 |
|
}, |
|
{ |
|
"epoch": 1.1891062523973916, |
|
"grad_norm": 1.518250584602356, |
|
"learning_rate": 0.0001, |
|
"loss": 0.612, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.1902022028604307, |
|
"grad_norm": 1.4469574689865112, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5684, |
|
"step": 21720 |
|
}, |
|
{ |
|
"epoch": 1.1912981533234697, |
|
"grad_norm": 1.3651134967803955, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6275, |
|
"step": 21740 |
|
}, |
|
{ |
|
"epoch": 1.192394103786509, |
|
"grad_norm": 1.1910673379898071, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6281, |
|
"step": 21760 |
|
}, |
|
{ |
|
"epoch": 1.193490054249548, |
|
"grad_norm": 1.5071038007736206, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5909, |
|
"step": 21780 |
|
}, |
|
{ |
|
"epoch": 1.194586004712587, |
|
"grad_norm": 1.3401362895965576, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6273, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.1956819551756261, |
|
"grad_norm": 1.3563752174377441, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5634, |
|
"step": 21820 |
|
}, |
|
{ |
|
"epoch": 1.1967779056386652, |
|
"grad_norm": 1.5860759019851685, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5996, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 1.1978738561017042, |
|
"grad_norm": 1.6106479167938232, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6038, |
|
"step": 21860 |
|
}, |
|
{ |
|
"epoch": 1.1989698065647434, |
|
"grad_norm": 1.2792236804962158, |
|
"learning_rate": 0.0001, |
|
"loss": 0.569, |
|
"step": 21880 |
|
}, |
|
{ |
|
"epoch": 1.2000657570277824, |
|
"grad_norm": 1.7770174741744995, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6159, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.2011617074908214, |
|
"grad_norm": 1.522647738456726, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5756, |
|
"step": 21920 |
|
}, |
|
{ |
|
"epoch": 1.2022576579538604, |
|
"grad_norm": 1.4393162727355957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.598, |
|
"step": 21940 |
|
}, |
|
{ |
|
"epoch": 1.2033536084168996, |
|
"grad_norm": 1.9221006631851196, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5736, |
|
"step": 21960 |
|
}, |
|
{ |
|
"epoch": 1.2044495588799387, |
|
"grad_norm": 1.4930051565170288, |
|
"learning_rate": 0.0001, |
|
"loss": 0.591, |
|
"step": 21980 |
|
}, |
|
{ |
|
"epoch": 1.2055455093429777, |
|
"grad_norm": 1.1012686491012573, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6139, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.2055455093429777, |
|
"eval_loss": 0.6032226085662842, |
|
"eval_runtime": 30533.3597, |
|
"eval_samples_per_second": 2.125, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 36.777449473248126, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.2066414598060167, |
|
"grad_norm": 1.501633644104004, |
|
"learning_rate": 0.0001, |
|
"loss": 0.55, |
|
"step": 22020 |
|
}, |
|
{ |
|
"epoch": 1.207737410269056, |
|
"grad_norm": 1.7444618940353394, |
|
"learning_rate": 0.0001, |
|
"loss": 0.62, |
|
"step": 22040 |
|
}, |
|
{ |
|
"epoch": 1.208833360732095, |
|
"grad_norm": 1.3666551113128662, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6087, |
|
"step": 22060 |
|
}, |
|
{ |
|
"epoch": 1.209929311195134, |
|
"grad_norm": 1.5576726198196411, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5958, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 1.2110252616581731, |
|
"grad_norm": 1.457824945449829, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5588, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"grad_norm": 1.621120810508728, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5574, |
|
"step": 22120 |
|
}, |
|
{ |
|
"epoch": 1.2132171625842512, |
|
"grad_norm": 1.1742050647735596, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5498, |
|
"step": 22140 |
|
}, |
|
{ |
|
"epoch": 1.2143131130472902, |
|
"grad_norm": 1.3734312057495117, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5899, |
|
"step": 22160 |
|
}, |
|
{ |
|
"epoch": 1.2154090635103294, |
|
"grad_norm": 2.046262741088867, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6574, |
|
"step": 22180 |
|
}, |
|
{ |
|
"epoch": 1.2165050139733684, |
|
"grad_norm": 1.3114126920700073, |
|
"learning_rate": 0.0001, |
|
"loss": 0.607, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.2176009644364074, |
|
"grad_norm": 1.5335580110549927, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5828, |
|
"step": 22220 |
|
}, |
|
{ |
|
"epoch": 1.2186969148994464, |
|
"grad_norm": 1.7492777109146118, |
|
"learning_rate": 0.0001, |
|
"loss": 0.557, |
|
"step": 22240 |
|
}, |
|
{ |
|
"epoch": 1.2197928653624857, |
|
"grad_norm": 1.3159027099609375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6923, |
|
"step": 22260 |
|
}, |
|
{ |
|
"epoch": 1.2208888158255247, |
|
"grad_norm": 1.5170766115188599, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7194, |
|
"step": 22280 |
|
}, |
|
{ |
|
"epoch": 1.2219847662885637, |
|
"grad_norm": 1.336846113204956, |
|
"learning_rate": 0.0001, |
|
"loss": 0.583, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.223080716751603, |
|
"grad_norm": 1.768999457359314, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7009, |
|
"step": 22320 |
|
}, |
|
{ |
|
"epoch": 1.224176667214642, |
|
"grad_norm": 1.6113883256912231, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5747, |
|
"step": 22340 |
|
}, |
|
{ |
|
"epoch": 1.225272617677681, |
|
"grad_norm": 1.4983850717544556, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5477, |
|
"step": 22360 |
|
}, |
|
{ |
|
"epoch": 1.2263685681407201, |
|
"grad_norm": 1.380181908607483, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6003, |
|
"step": 22380 |
|
}, |
|
{ |
|
"epoch": 1.2274645186037592, |
|
"grad_norm": 1.4921499490737915, |
|
"learning_rate": 0.0001, |
|
"loss": 0.586, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.2285604690667982, |
|
"grad_norm": 1.3056907653808594, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5393, |
|
"step": 22420 |
|
}, |
|
{ |
|
"epoch": 1.2296564195298372, |
|
"grad_norm": 1.702541470527649, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6474, |
|
"step": 22440 |
|
}, |
|
{ |
|
"epoch": 1.2307523699928764, |
|
"grad_norm": 1.703065276145935, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6102, |
|
"step": 22460 |
|
}, |
|
{ |
|
"epoch": 1.2318483204559154, |
|
"grad_norm": 1.7823582887649536, |
|
"learning_rate": 0.0001, |
|
"loss": 0.529, |
|
"step": 22480 |
|
}, |
|
{ |
|
"epoch": 1.2329442709189544, |
|
"grad_norm": 1.5001643896102905, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5936, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.2340402213819934, |
|
"grad_norm": 1.4515180587768555, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5817, |
|
"step": 22520 |
|
}, |
|
{ |
|
"epoch": 1.2351361718450327, |
|
"grad_norm": 1.6166529655456543, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5901, |
|
"step": 22540 |
|
}, |
|
{ |
|
"epoch": 1.2362321223080717, |
|
"grad_norm": 1.6117253303527832, |
|
"learning_rate": 0.0001, |
|
"loss": 0.668, |
|
"step": 22560 |
|
}, |
|
{ |
|
"epoch": 1.2373280727711107, |
|
"grad_norm": 1.4674168825149536, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5783, |
|
"step": 22580 |
|
}, |
|
{ |
|
"epoch": 1.23842402323415, |
|
"grad_norm": 1.5282671451568604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6069, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.239519973697189, |
|
"grad_norm": 1.446772575378418, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5645, |
|
"step": 22620 |
|
}, |
|
{ |
|
"epoch": 1.240615924160228, |
|
"grad_norm": 1.7833497524261475, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5555, |
|
"step": 22640 |
|
}, |
|
{ |
|
"epoch": 1.241711874623267, |
|
"grad_norm": 1.8573659658432007, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6258, |
|
"step": 22660 |
|
}, |
|
{ |
|
"epoch": 1.2428078250863062, |
|
"grad_norm": 1.375735878944397, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5316, |
|
"step": 22680 |
|
}, |
|
{ |
|
"epoch": 1.2439037755493452, |
|
"grad_norm": 1.4545280933380127, |
|
"learning_rate": 0.0001, |
|
"loss": 0.609, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.2449997260123842, |
|
"grad_norm": 1.499182105064392, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6205, |
|
"step": 22720 |
|
}, |
|
{ |
|
"epoch": 1.2460956764754232, |
|
"grad_norm": 1.418739914894104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5038, |
|
"step": 22740 |
|
}, |
|
{ |
|
"epoch": 1.2471916269384624, |
|
"grad_norm": 1.4958001375198364, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5607, |
|
"step": 22760 |
|
}, |
|
{ |
|
"epoch": 1.2482875774015014, |
|
"grad_norm": 1.7422837018966675, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5593, |
|
"step": 22780 |
|
}, |
|
{ |
|
"epoch": 1.2493835278645404, |
|
"grad_norm": 1.536526083946228, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5518, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.2504794783275797, |
|
"grad_norm": 1.2415670156478882, |
|
"learning_rate": 0.0001, |
|
"loss": 0.625, |
|
"step": 22820 |
|
}, |
|
{ |
|
"epoch": 1.2515754287906187, |
|
"grad_norm": 1.2609211206436157, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5213, |
|
"step": 22840 |
|
}, |
|
{ |
|
"epoch": 1.2526713792536577, |
|
"grad_norm": 1.4843876361846924, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5618, |
|
"step": 22860 |
|
}, |
|
{ |
|
"epoch": 1.253767329716697, |
|
"grad_norm": 1.7089099884033203, |
|
"learning_rate": 0.0001, |
|
"loss": 0.587, |
|
"step": 22880 |
|
}, |
|
{ |
|
"epoch": 1.254863280179736, |
|
"grad_norm": 1.8894917964935303, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5952, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.255959230642775, |
|
"grad_norm": 1.3892401456832886, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5949, |
|
"step": 22920 |
|
}, |
|
{ |
|
"epoch": 1.257055181105814, |
|
"grad_norm": 1.8688722848892212, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6524, |
|
"step": 22940 |
|
}, |
|
{ |
|
"epoch": 1.2581511315688532, |
|
"grad_norm": 1.8726931810379028, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5547, |
|
"step": 22960 |
|
}, |
|
{ |
|
"epoch": 1.2592470820318922, |
|
"grad_norm": 1.9214690923690796, |
|
"learning_rate": 0.0001, |
|
"loss": 0.618, |
|
"step": 22980 |
|
}, |
|
{ |
|
"epoch": 1.2603430324949312, |
|
"grad_norm": 1.6148467063903809, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4822, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.2603430324949312, |
|
"eval_loss": 0.599087655544281, |
|
"eval_runtime": 30610.1389, |
|
"eval_samples_per_second": 2.12, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 40.07337206794192, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.2614389829579702, |
|
"grad_norm": 1.536855936050415, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5852, |
|
"step": 23020 |
|
}, |
|
{ |
|
"epoch": 1.2625349334210094, |
|
"grad_norm": 1.884334683418274, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6474, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 1.2636308838840484, |
|
"grad_norm": 1.359174132347107, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6204, |
|
"step": 23060 |
|
}, |
|
{ |
|
"epoch": 1.2647268343470874, |
|
"grad_norm": 1.7376055717468262, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5524, |
|
"step": 23080 |
|
}, |
|
{ |
|
"epoch": 1.2658227848101267, |
|
"grad_norm": 1.6594703197479248, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5396, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.2669187352731657, |
|
"grad_norm": 1.4215294122695923, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5842, |
|
"step": 23120 |
|
}, |
|
{ |
|
"epoch": 1.2680146857362047, |
|
"grad_norm": 1.7680842876434326, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5625, |
|
"step": 23140 |
|
}, |
|
{ |
|
"epoch": 1.269110636199244, |
|
"grad_norm": 1.3353180885314941, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5775, |
|
"step": 23160 |
|
}, |
|
{ |
|
"epoch": 1.270206586662283, |
|
"grad_norm": 1.450649619102478, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5319, |
|
"step": 23180 |
|
}, |
|
{ |
|
"epoch": 1.271302537125322, |
|
"grad_norm": 1.7398178577423096, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5615, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.272398487588361, |
|
"grad_norm": 1.6940994262695312, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6069, |
|
"step": 23220 |
|
}, |
|
{ |
|
"epoch": 1.2734944380514, |
|
"grad_norm": 1.899994969367981, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6491, |
|
"step": 23240 |
|
}, |
|
{ |
|
"epoch": 1.2745903885144392, |
|
"grad_norm": 1.457036018371582, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5762, |
|
"step": 23260 |
|
}, |
|
{ |
|
"epoch": 1.2756863389774782, |
|
"grad_norm": 1.4215611219406128, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5966, |
|
"step": 23280 |
|
}, |
|
{ |
|
"epoch": 1.2767822894405172, |
|
"grad_norm": 1.7165329456329346, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5897, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.2778782399035564, |
|
"grad_norm": 1.524688720703125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6541, |
|
"step": 23320 |
|
}, |
|
{ |
|
"epoch": 1.2789741903665954, |
|
"grad_norm": 1.3125251531600952, |
|
"learning_rate": 0.0001, |
|
"loss": 0.521, |
|
"step": 23340 |
|
}, |
|
{ |
|
"epoch": 1.2800701408296344, |
|
"grad_norm": 1.5787118673324585, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5453, |
|
"step": 23360 |
|
}, |
|
{ |
|
"epoch": 1.2811660912926737, |
|
"grad_norm": 1.636098027229309, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5606, |
|
"step": 23380 |
|
}, |
|
{ |
|
"epoch": 1.2822620417557127, |
|
"grad_norm": 1.4963462352752686, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6251, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.2833579922187517, |
|
"grad_norm": 1.3213664293289185, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6293, |
|
"step": 23420 |
|
}, |
|
{ |
|
"epoch": 1.284453942681791, |
|
"grad_norm": 1.6737782955169678, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6499, |
|
"step": 23440 |
|
}, |
|
{ |
|
"epoch": 1.28554989314483, |
|
"grad_norm": 1.525976300239563, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6073, |
|
"step": 23460 |
|
}, |
|
{ |
|
"epoch": 1.286645843607869, |
|
"grad_norm": 1.3534733057022095, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5434, |
|
"step": 23480 |
|
}, |
|
{ |
|
"epoch": 1.287741794070908, |
|
"grad_norm": 1.8090375661849976, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6806, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.288837744533947, |
|
"grad_norm": 1.7110000848770142, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5678, |
|
"step": 23520 |
|
}, |
|
{ |
|
"epoch": 1.2899336949969862, |
|
"grad_norm": 1.6300121545791626, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6674, |
|
"step": 23540 |
|
}, |
|
{ |
|
"epoch": 1.2910296454600252, |
|
"grad_norm": 1.4068278074264526, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5294, |
|
"step": 23560 |
|
}, |
|
{ |
|
"epoch": 1.2921255959230642, |
|
"grad_norm": 1.6015020608901978, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5791, |
|
"step": 23580 |
|
}, |
|
{ |
|
"epoch": 1.2932215463861034, |
|
"grad_norm": 1.7282171249389648, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6358, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.2943174968491424, |
|
"grad_norm": 1.3395479917526245, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5972, |
|
"step": 23620 |
|
}, |
|
{ |
|
"epoch": 1.2954134473121814, |
|
"grad_norm": 1.5393882989883423, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6243, |
|
"step": 23640 |
|
}, |
|
{ |
|
"epoch": 1.2965093977752207, |
|
"grad_norm": 2.0010182857513428, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6047, |
|
"step": 23660 |
|
}, |
|
{ |
|
"epoch": 1.2976053482382597, |
|
"grad_norm": 1.742031455039978, |
|
"learning_rate": 0.0001, |
|
"loss": 0.586, |
|
"step": 23680 |
|
}, |
|
{ |
|
"epoch": 1.2987012987012987, |
|
"grad_norm": 1.5596591234207153, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5545, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.2997972491643377, |
|
"grad_norm": 1.2098394632339478, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6264, |
|
"step": 23720 |
|
}, |
|
{ |
|
"epoch": 1.3008931996273767, |
|
"grad_norm": 1.5962443351745605, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5827, |
|
"step": 23740 |
|
}, |
|
{ |
|
"epoch": 1.301989150090416, |
|
"grad_norm": 1.7482990026474, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6113, |
|
"step": 23760 |
|
}, |
|
{ |
|
"epoch": 1.303085100553455, |
|
"grad_norm": 1.6832690238952637, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5231, |
|
"step": 23780 |
|
}, |
|
{ |
|
"epoch": 1.304181051016494, |
|
"grad_norm": 1.2904006242752075, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5472, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.3052770014795332, |
|
"grad_norm": 1.260377287864685, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5555, |
|
"step": 23820 |
|
}, |
|
{ |
|
"epoch": 1.3063729519425722, |
|
"grad_norm": 1.6346126794815063, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5247, |
|
"step": 23840 |
|
}, |
|
{ |
|
"epoch": 1.3074689024056112, |
|
"grad_norm": 1.5854252576828003, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5784, |
|
"step": 23860 |
|
}, |
|
{ |
|
"epoch": 1.3085648528686504, |
|
"grad_norm": 1.754293441772461, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6555, |
|
"step": 23880 |
|
}, |
|
{ |
|
"epoch": 1.3096608033316894, |
|
"grad_norm": 1.642504096031189, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5822, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.3107567537947284, |
|
"grad_norm": 1.4986265897750854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6246, |
|
"step": 23920 |
|
}, |
|
{ |
|
"epoch": 1.3118527042577677, |
|
"grad_norm": 1.6175062656402588, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5616, |
|
"step": 23940 |
|
}, |
|
{ |
|
"epoch": 1.3129486547208067, |
|
"grad_norm": 1.6189128160476685, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6108, |
|
"step": 23960 |
|
}, |
|
{ |
|
"epoch": 1.3140446051838457, |
|
"grad_norm": 1.6187801361083984, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5992, |
|
"step": 23980 |
|
}, |
|
{ |
|
"epoch": 1.3151405556468847, |
|
"grad_norm": 1.396136999130249, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5409, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.3151405556468847, |
|
"eval_loss": 0.5925264358520508, |
|
"eval_runtime": 30722.0761, |
|
"eval_samples_per_second": 2.112, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 50.53154519589281, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.3162365061099237, |
|
"grad_norm": 1.4604203701019287, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5795, |
|
"step": 24020 |
|
}, |
|
{ |
|
"epoch": 1.317332456572963, |
|
"grad_norm": 1.667830228805542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5799, |
|
"step": 24040 |
|
}, |
|
{ |
|
"epoch": 1.318428407036002, |
|
"grad_norm": 1.4066877365112305, |
|
"learning_rate": 0.0001, |
|
"loss": 0.603, |
|
"step": 24060 |
|
}, |
|
{ |
|
"epoch": 1.319524357499041, |
|
"grad_norm": 1.6986689567565918, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5943, |
|
"step": 24080 |
|
}, |
|
{ |
|
"epoch": 1.3206203079620802, |
|
"grad_norm": 2.00864839553833, |
|
"learning_rate": 0.0001, |
|
"loss": 0.646, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.3217162584251192, |
|
"grad_norm": 1.3557894229888916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6324, |
|
"step": 24120 |
|
}, |
|
{ |
|
"epoch": 1.3228122088881582, |
|
"grad_norm": 1.442983865737915, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5326, |
|
"step": 24140 |
|
}, |
|
{ |
|
"epoch": 1.3239081593511974, |
|
"grad_norm": 1.4924156665802002, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5464, |
|
"step": 24160 |
|
}, |
|
{ |
|
"epoch": 1.3250041098142364, |
|
"grad_norm": 1.408599615097046, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5989, |
|
"step": 24180 |
|
}, |
|
{ |
|
"epoch": 1.3261000602772754, |
|
"grad_norm": 1.6432658433914185, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6034, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.3271960107403145, |
|
"grad_norm": 1.5687427520751953, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5222, |
|
"step": 24220 |
|
}, |
|
{ |
|
"epoch": 1.3282919612033537, |
|
"grad_norm": 1.3878777027130127, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5913, |
|
"step": 24240 |
|
}, |
|
{ |
|
"epoch": 1.3293879116663927, |
|
"grad_norm": 1.276931643486023, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5509, |
|
"step": 24260 |
|
}, |
|
{ |
|
"epoch": 1.3304838621294317, |
|
"grad_norm": 1.5601953268051147, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6077, |
|
"step": 24280 |
|
}, |
|
{ |
|
"epoch": 1.3315798125924707, |
|
"grad_norm": 1.9250099658966064, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5629, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.33267576305551, |
|
"grad_norm": 1.314794659614563, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6085, |
|
"step": 24320 |
|
}, |
|
{ |
|
"epoch": 1.333771713518549, |
|
"grad_norm": 1.4445682764053345, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5725, |
|
"step": 24340 |
|
}, |
|
{ |
|
"epoch": 1.334867663981588, |
|
"grad_norm": 1.6029905080795288, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6448, |
|
"step": 24360 |
|
}, |
|
{ |
|
"epoch": 1.3359636144446272, |
|
"grad_norm": 1.969078540802002, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5397, |
|
"step": 24380 |
|
}, |
|
{ |
|
"epoch": 1.3370595649076662, |
|
"grad_norm": 1.900762677192688, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6243, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.3381555153707052, |
|
"grad_norm": 1.8829255104064941, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5744, |
|
"step": 24420 |
|
}, |
|
{ |
|
"epoch": 1.3392514658337444, |
|
"grad_norm": 1.4927318096160889, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6023, |
|
"step": 24440 |
|
}, |
|
{ |
|
"epoch": 1.3403474162967834, |
|
"grad_norm": 1.7608602046966553, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5654, |
|
"step": 24460 |
|
}, |
|
{ |
|
"epoch": 1.3414433667598225, |
|
"grad_norm": 1.2257969379425049, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5953, |
|
"step": 24480 |
|
}, |
|
{ |
|
"epoch": 1.3425393172228615, |
|
"grad_norm": 1.5768262147903442, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5972, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.3436352676859005, |
|
"grad_norm": 1.5148476362228394, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6313, |
|
"step": 24520 |
|
}, |
|
{ |
|
"epoch": 1.3447312181489397, |
|
"grad_norm": 1.347442865371704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5867, |
|
"step": 24540 |
|
}, |
|
{ |
|
"epoch": 1.3458271686119787, |
|
"grad_norm": 1.3003042936325073, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5709, |
|
"step": 24560 |
|
}, |
|
{ |
|
"epoch": 1.3469231190750177, |
|
"grad_norm": 1.5577054023742676, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5719, |
|
"step": 24580 |
|
}, |
|
{ |
|
"epoch": 1.348019069538057, |
|
"grad_norm": 1.6141449213027954, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5684, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.349115020001096, |
|
"grad_norm": 1.515576958656311, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5421, |
|
"step": 24620 |
|
}, |
|
{ |
|
"epoch": 1.350210970464135, |
|
"grad_norm": 1.62236487865448, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6338, |
|
"step": 24640 |
|
}, |
|
{ |
|
"epoch": 1.3513069209271742, |
|
"grad_norm": 1.5557656288146973, |
|
"learning_rate": 0.0001, |
|
"loss": 0.55, |
|
"step": 24660 |
|
}, |
|
{ |
|
"epoch": 1.3524028713902132, |
|
"grad_norm": 1.9079481363296509, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6352, |
|
"step": 24680 |
|
}, |
|
{ |
|
"epoch": 1.3534988218532522, |
|
"grad_norm": 1.3640868663787842, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6484, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.3545947723162914, |
|
"grad_norm": 1.831858515739441, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5577, |
|
"step": 24720 |
|
}, |
|
{ |
|
"epoch": 1.3556907227793304, |
|
"grad_norm": 1.5431773662567139, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5918, |
|
"step": 24740 |
|
}, |
|
{ |
|
"epoch": 1.3567866732423695, |
|
"grad_norm": 1.1655539274215698, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5892, |
|
"step": 24760 |
|
}, |
|
{ |
|
"epoch": 1.3578826237054085, |
|
"grad_norm": 1.7395954132080078, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5416, |
|
"step": 24780 |
|
}, |
|
{ |
|
"epoch": 1.3589785741684475, |
|
"grad_norm": 1.89164400100708, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5708, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.3600745246314867, |
|
"grad_norm": 1.5013233423233032, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5234, |
|
"step": 24820 |
|
}, |
|
{ |
|
"epoch": 1.3611704750945257, |
|
"grad_norm": 1.4959980249404907, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7578, |
|
"step": 24840 |
|
}, |
|
{ |
|
"epoch": 1.3622664255575647, |
|
"grad_norm": 1.3962562084197998, |
|
"learning_rate": 0.0001, |
|
"loss": 0.564, |
|
"step": 24860 |
|
}, |
|
{ |
|
"epoch": 1.363362376020604, |
|
"grad_norm": 1.370695948600769, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5533, |
|
"step": 24880 |
|
}, |
|
{ |
|
"epoch": 1.364458326483643, |
|
"grad_norm": 1.629671573638916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5463, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.365554276946682, |
|
"grad_norm": 1.6115111112594604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5837, |
|
"step": 24920 |
|
}, |
|
{ |
|
"epoch": 1.3666502274097212, |
|
"grad_norm": 1.756898283958435, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5784, |
|
"step": 24940 |
|
}, |
|
{ |
|
"epoch": 1.3677461778727602, |
|
"grad_norm": 1.3999930620193481, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6007, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 1.3688421283357992, |
|
"grad_norm": 1.3553202152252197, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5996, |
|
"step": 24980 |
|
}, |
|
{ |
|
"epoch": 1.3699380787988382, |
|
"grad_norm": 1.3020998239517212, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5006, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.3699380787988382, |
|
"eval_loss": 0.5878660678863525, |
|
"eval_runtime": 30562.5187, |
|
"eval_samples_per_second": 2.123, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 44.19117823283905, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.3710340292618772, |
|
"grad_norm": 1.622175931930542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6049, |
|
"step": 25020 |
|
}, |
|
{ |
|
"epoch": 1.3721299797249165, |
|
"grad_norm": 1.6178827285766602, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6272, |
|
"step": 25040 |
|
}, |
|
{ |
|
"epoch": 1.3732259301879555, |
|
"grad_norm": 1.9687530994415283, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6194, |
|
"step": 25060 |
|
}, |
|
{ |
|
"epoch": 1.3743218806509945, |
|
"grad_norm": 1.5638937950134277, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6033, |
|
"step": 25080 |
|
}, |
|
{ |
|
"epoch": 1.3754178311140337, |
|
"grad_norm": 1.5316487550735474, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6847, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.3765137815770727, |
|
"grad_norm": 2.431041717529297, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5975, |
|
"step": 25120 |
|
}, |
|
{ |
|
"epoch": 1.3776097320401117, |
|
"grad_norm": 1.2522825002670288, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6188, |
|
"step": 25140 |
|
}, |
|
{ |
|
"epoch": 1.378705682503151, |
|
"grad_norm": 1.428832769393921, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4743, |
|
"step": 25160 |
|
}, |
|
{ |
|
"epoch": 1.37980163296619, |
|
"grad_norm": 1.6147892475128174, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6463, |
|
"step": 25180 |
|
}, |
|
{ |
|
"epoch": 1.380897583429229, |
|
"grad_norm": 1.6648330688476562, |
|
"learning_rate": 0.0001, |
|
"loss": 0.556, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.3819935338922682, |
|
"grad_norm": 1.4745285511016846, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5997, |
|
"step": 25220 |
|
}, |
|
{ |
|
"epoch": 1.3830894843553072, |
|
"grad_norm": 1.433664321899414, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5288, |
|
"step": 25240 |
|
}, |
|
{ |
|
"epoch": 1.3841854348183462, |
|
"grad_norm": 1.264054775238037, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6075, |
|
"step": 25260 |
|
}, |
|
{ |
|
"epoch": 1.3852813852813852, |
|
"grad_norm": 1.7683794498443604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6011, |
|
"step": 25280 |
|
}, |
|
{ |
|
"epoch": 1.3863773357444242, |
|
"grad_norm": 1.5316460132598877, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5799, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.3874732862074635, |
|
"grad_norm": 3.361269950866699, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6577, |
|
"step": 25320 |
|
}, |
|
{ |
|
"epoch": 1.3885692366705025, |
|
"grad_norm": 1.7918739318847656, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6106, |
|
"step": 25340 |
|
}, |
|
{ |
|
"epoch": 1.3896651871335415, |
|
"grad_norm": 1.5828975439071655, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5683, |
|
"step": 25360 |
|
}, |
|
{ |
|
"epoch": 1.3907611375965807, |
|
"grad_norm": 1.9565653800964355, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5368, |
|
"step": 25380 |
|
}, |
|
{ |
|
"epoch": 1.3918570880596197, |
|
"grad_norm": 1.446603775024414, |
|
"learning_rate": 0.0001, |
|
"loss": 0.551, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.3929530385226587, |
|
"grad_norm": 1.3377403020858765, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5889, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 1.394048988985698, |
|
"grad_norm": 1.5956981182098389, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5962, |
|
"step": 25440 |
|
}, |
|
{ |
|
"epoch": 1.395144939448737, |
|
"grad_norm": 2.122570276260376, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6044, |
|
"step": 25460 |
|
}, |
|
{ |
|
"epoch": 1.396240889911776, |
|
"grad_norm": 1.8314157724380493, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5612, |
|
"step": 25480 |
|
}, |
|
{ |
|
"epoch": 1.397336840374815, |
|
"grad_norm": 1.327453851699829, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5214, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.3984327908378542, |
|
"grad_norm": 2.1016182899475098, |
|
"learning_rate": 0.0001, |
|
"loss": 0.594, |
|
"step": 25520 |
|
}, |
|
{ |
|
"epoch": 1.3995287413008932, |
|
"grad_norm": 1.5892709493637085, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5927, |
|
"step": 25540 |
|
}, |
|
{ |
|
"epoch": 1.4006246917639322, |
|
"grad_norm": 1.8178175687789917, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5204, |
|
"step": 25560 |
|
}, |
|
{ |
|
"epoch": 1.4017206422269712, |
|
"grad_norm": 1.3808900117874146, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5434, |
|
"step": 25580 |
|
}, |
|
{ |
|
"epoch": 1.4028165926900105, |
|
"grad_norm": 1.4849821329116821, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6208, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.4039125431530495, |
|
"grad_norm": 1.5404740571975708, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5492, |
|
"step": 25620 |
|
}, |
|
{ |
|
"epoch": 1.4050084936160885, |
|
"grad_norm": 1.4663268327713013, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5275, |
|
"step": 25640 |
|
}, |
|
{ |
|
"epoch": 1.4061044440791277, |
|
"grad_norm": 1.5518896579742432, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5966, |
|
"step": 25660 |
|
}, |
|
{ |
|
"epoch": 1.4072003945421667, |
|
"grad_norm": 1.5495002269744873, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5451, |
|
"step": 25680 |
|
}, |
|
{ |
|
"epoch": 1.4082963450052057, |
|
"grad_norm": 1.5400805473327637, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6543, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.409392295468245, |
|
"grad_norm": 1.5806297063827515, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5849, |
|
"step": 25720 |
|
}, |
|
{ |
|
"epoch": 1.410488245931284, |
|
"grad_norm": 1.574846863746643, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5653, |
|
"step": 25740 |
|
}, |
|
{ |
|
"epoch": 1.411584196394323, |
|
"grad_norm": 1.4742170572280884, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5173, |
|
"step": 25760 |
|
}, |
|
{ |
|
"epoch": 1.412680146857362, |
|
"grad_norm": 2.009706735610962, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5467, |
|
"step": 25780 |
|
}, |
|
{ |
|
"epoch": 1.413776097320401, |
|
"grad_norm": 1.9192876815795898, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6211, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.4148720477834402, |
|
"grad_norm": 1.2425312995910645, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5398, |
|
"step": 25820 |
|
}, |
|
{ |
|
"epoch": 1.4159679982464792, |
|
"grad_norm": 1.8032817840576172, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6143, |
|
"step": 25840 |
|
}, |
|
{ |
|
"epoch": 1.4170639487095182, |
|
"grad_norm": 1.5889620780944824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6361, |
|
"step": 25860 |
|
}, |
|
{ |
|
"epoch": 1.4181598991725575, |
|
"grad_norm": 1.3312866687774658, |
|
"learning_rate": 0.0001, |
|
"loss": 0.637, |
|
"step": 25880 |
|
}, |
|
{ |
|
"epoch": 1.4192558496355965, |
|
"grad_norm": 1.4662201404571533, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6206, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.4203518000986355, |
|
"grad_norm": 1.7590441703796387, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5637, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 1.4214477505616747, |
|
"grad_norm": 1.5360428094863892, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6033, |
|
"step": 25940 |
|
}, |
|
{ |
|
"epoch": 1.4225437010247137, |
|
"grad_norm": 1.6273736953735352, |
|
"learning_rate": 0.0001, |
|
"loss": 0.559, |
|
"step": 25960 |
|
}, |
|
{ |
|
"epoch": 1.4236396514877527, |
|
"grad_norm": 1.3700859546661377, |
|
"learning_rate": 0.0001, |
|
"loss": 0.637, |
|
"step": 25980 |
|
}, |
|
{ |
|
"epoch": 1.4247356019507917, |
|
"grad_norm": 1.419872522354126, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5674, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.4247356019507917, |
|
"eval_loss": 0.5845187306404114, |
|
"eval_runtime": 30268.7098, |
|
"eval_samples_per_second": 2.144, |
|
"eval_steps_per_second": 0.067, |
|
"eval_wer": 33.472796184515516, |
|
"step": 26000 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 54747, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.278115609397166e+21, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|