whisper-tiny-common_voice_17_0-id / trainer_state.json
Bagus's picture
End of training
d994d81 verified
raw
history blame contribute delete
No virus
144 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.458447874814972,
"eval_steps": 1000,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010573059843518714,
"grad_norm": 59.40861129760742,
"learning_rate": 4.2000000000000006e-07,
"loss": 3.5073,
"step": 25
},
{
"epoch": 0.02114611968703743,
"grad_norm": 48.962947845458984,
"learning_rate": 9.200000000000001e-07,
"loss": 3.1121,
"step": 50
},
{
"epoch": 0.03171917953055614,
"grad_norm": 31.433500289916992,
"learning_rate": 1.42e-06,
"loss": 2.3354,
"step": 75
},
{
"epoch": 0.04229223937407486,
"grad_norm": 21.955114364624023,
"learning_rate": 1.9200000000000003e-06,
"loss": 1.676,
"step": 100
},
{
"epoch": 0.05286529921759357,
"grad_norm": 16.944923400878906,
"learning_rate": 2.42e-06,
"loss": 1.1735,
"step": 125
},
{
"epoch": 0.06343835906111228,
"grad_norm": 17.72088623046875,
"learning_rate": 2.92e-06,
"loss": 1.0192,
"step": 150
},
{
"epoch": 0.074011418904631,
"grad_norm": 14.417439460754395,
"learning_rate": 3.4200000000000007e-06,
"loss": 0.9191,
"step": 175
},
{
"epoch": 0.08458447874814971,
"grad_norm": 15.664250373840332,
"learning_rate": 3.920000000000001e-06,
"loss": 0.87,
"step": 200
},
{
"epoch": 0.09515753859166842,
"grad_norm": 15.430508613586426,
"learning_rate": 4.42e-06,
"loss": 0.8069,
"step": 225
},
{
"epoch": 0.10573059843518715,
"grad_norm": 16.615825653076172,
"learning_rate": 4.92e-06,
"loss": 0.7783,
"step": 250
},
{
"epoch": 0.11630365827870585,
"grad_norm": 19.77940559387207,
"learning_rate": 5.420000000000001e-06,
"loss": 0.7989,
"step": 275
},
{
"epoch": 0.12687671812222456,
"grad_norm": 14.731008529663086,
"learning_rate": 5.92e-06,
"loss": 0.7918,
"step": 300
},
{
"epoch": 0.13744977796574329,
"grad_norm": 14.75744915008545,
"learning_rate": 6.42e-06,
"loss": 0.7237,
"step": 325
},
{
"epoch": 0.148022837809262,
"grad_norm": 15.654507637023926,
"learning_rate": 6.92e-06,
"loss": 0.6985,
"step": 350
},
{
"epoch": 0.1585958976527807,
"grad_norm": 14.50328540802002,
"learning_rate": 7.420000000000001e-06,
"loss": 0.7516,
"step": 375
},
{
"epoch": 0.16916895749629943,
"grad_norm": 15.2078275680542,
"learning_rate": 7.92e-06,
"loss": 0.636,
"step": 400
},
{
"epoch": 0.17974201733981815,
"grad_norm": 13.78581714630127,
"learning_rate": 8.42e-06,
"loss": 0.7175,
"step": 425
},
{
"epoch": 0.19031507718333684,
"grad_norm": 13.427356719970703,
"learning_rate": 8.920000000000001e-06,
"loss": 0.6515,
"step": 450
},
{
"epoch": 0.20088813702685557,
"grad_norm": 14.424434661865234,
"learning_rate": 9.42e-06,
"loss": 0.626,
"step": 475
},
{
"epoch": 0.2114611968703743,
"grad_norm": 11.046919822692871,
"learning_rate": 9.920000000000002e-06,
"loss": 0.6333,
"step": 500
},
{
"epoch": 0.222034256713893,
"grad_norm": 12.185227394104004,
"learning_rate": 9.98923076923077e-06,
"loss": 0.6356,
"step": 525
},
{
"epoch": 0.2326073165574117,
"grad_norm": 15.45386791229248,
"learning_rate": 9.976410256410257e-06,
"loss": 0.6256,
"step": 550
},
{
"epoch": 0.24318037640093043,
"grad_norm": 15.364215850830078,
"learning_rate": 9.963589743589744e-06,
"loss": 0.6079,
"step": 575
},
{
"epoch": 0.2537534362444491,
"grad_norm": 12.546235084533691,
"learning_rate": 9.950769230769232e-06,
"loss": 0.588,
"step": 600
},
{
"epoch": 0.2643264960879679,
"grad_norm": 17.720355987548828,
"learning_rate": 9.937948717948719e-06,
"loss": 0.6077,
"step": 625
},
{
"epoch": 0.27489955593148657,
"grad_norm": 14.936497688293457,
"learning_rate": 9.925128205128206e-06,
"loss": 0.5884,
"step": 650
},
{
"epoch": 0.28547261577500527,
"grad_norm": 13.690508842468262,
"learning_rate": 9.912307692307693e-06,
"loss": 0.5346,
"step": 675
},
{
"epoch": 0.296045675618524,
"grad_norm": 14.297891616821289,
"learning_rate": 9.899487179487181e-06,
"loss": 0.5544,
"step": 700
},
{
"epoch": 0.3066187354620427,
"grad_norm": 13.275521278381348,
"learning_rate": 9.886666666666668e-06,
"loss": 0.5572,
"step": 725
},
{
"epoch": 0.3171917953055614,
"grad_norm": 12.175935745239258,
"learning_rate": 9.873846153846155e-06,
"loss": 0.5401,
"step": 750
},
{
"epoch": 0.32776485514908016,
"grad_norm": 14.95595932006836,
"learning_rate": 9.861025641025642e-06,
"loss": 0.5794,
"step": 775
},
{
"epoch": 0.33833791499259885,
"grad_norm": 16.529836654663086,
"learning_rate": 9.84820512820513e-06,
"loss": 0.5597,
"step": 800
},
{
"epoch": 0.34891097483611755,
"grad_norm": 18.89246368408203,
"learning_rate": 9.835384615384617e-06,
"loss": 0.5481,
"step": 825
},
{
"epoch": 0.3594840346796363,
"grad_norm": 14.310006141662598,
"learning_rate": 9.822564102564104e-06,
"loss": 0.5452,
"step": 850
},
{
"epoch": 0.370057094523155,
"grad_norm": 12.255072593688965,
"learning_rate": 9.80974358974359e-06,
"loss": 0.4932,
"step": 875
},
{
"epoch": 0.3806301543666737,
"grad_norm": 10.623993873596191,
"learning_rate": 9.796923076923077e-06,
"loss": 0.5534,
"step": 900
},
{
"epoch": 0.39120321421019244,
"grad_norm": 11.355914115905762,
"learning_rate": 9.784102564102564e-06,
"loss": 0.5154,
"step": 925
},
{
"epoch": 0.40177627405371114,
"grad_norm": 10.385716438293457,
"learning_rate": 9.771282051282051e-06,
"loss": 0.5318,
"step": 950
},
{
"epoch": 0.4123493338972299,
"grad_norm": 11.580507278442383,
"learning_rate": 9.75846153846154e-06,
"loss": 0.564,
"step": 975
},
{
"epoch": 0.4229223937407486,
"grad_norm": 18.01216697692871,
"learning_rate": 9.745641025641026e-06,
"loss": 0.4911,
"step": 1000
},
{
"epoch": 0.4229223937407486,
"eval_loss": 0.45455271005630493,
"eval_runtime": 445.4699,
"eval_samples_per_second": 8.173,
"eval_steps_per_second": 1.024,
"eval_wer": 0.3321267808250963,
"step": 1000
},
{
"epoch": 0.4334954535842673,
"grad_norm": 12.367226600646973,
"learning_rate": 9.732820512820513e-06,
"loss": 0.5246,
"step": 1025
},
{
"epoch": 0.444068513427786,
"grad_norm": 11.605467796325684,
"learning_rate": 9.72e-06,
"loss": 0.4813,
"step": 1050
},
{
"epoch": 0.4546415732713047,
"grad_norm": 12.51762580871582,
"learning_rate": 9.707179487179487e-06,
"loss": 0.4954,
"step": 1075
},
{
"epoch": 0.4652146331148234,
"grad_norm": 10.517561912536621,
"learning_rate": 9.694358974358975e-06,
"loss": 0.4649,
"step": 1100
},
{
"epoch": 0.47578769295834217,
"grad_norm": 10.634852409362793,
"learning_rate": 9.681538461538462e-06,
"loss": 0.4426,
"step": 1125
},
{
"epoch": 0.48636075280186086,
"grad_norm": 11.74187183380127,
"learning_rate": 9.668717948717949e-06,
"loss": 0.5364,
"step": 1150
},
{
"epoch": 0.49693381264537956,
"grad_norm": 9.76657772064209,
"learning_rate": 9.655897435897436e-06,
"loss": 0.4363,
"step": 1175
},
{
"epoch": 0.5075068724888983,
"grad_norm": 13.493627548217773,
"learning_rate": 9.643076923076924e-06,
"loss": 0.4867,
"step": 1200
},
{
"epoch": 0.518079932332417,
"grad_norm": 13.604817390441895,
"learning_rate": 9.630256410256411e-06,
"loss": 0.4778,
"step": 1225
},
{
"epoch": 0.5286529921759358,
"grad_norm": 11.087203979492188,
"learning_rate": 9.617435897435898e-06,
"loss": 0.4571,
"step": 1250
},
{
"epoch": 0.5392260520194544,
"grad_norm": 11.273646354675293,
"learning_rate": 9.604615384615385e-06,
"loss": 0.4579,
"step": 1275
},
{
"epoch": 0.5497991118629731,
"grad_norm": 11.16568660736084,
"learning_rate": 9.591794871794873e-06,
"loss": 0.4568,
"step": 1300
},
{
"epoch": 0.5603721717064919,
"grad_norm": 9.37689208984375,
"learning_rate": 9.57897435897436e-06,
"loss": 0.4577,
"step": 1325
},
{
"epoch": 0.5709452315500105,
"grad_norm": 15.166648864746094,
"learning_rate": 9.566153846153847e-06,
"loss": 0.439,
"step": 1350
},
{
"epoch": 0.5815182913935293,
"grad_norm": 9.157109260559082,
"learning_rate": 9.553333333333334e-06,
"loss": 0.4611,
"step": 1375
},
{
"epoch": 0.592091351237048,
"grad_norm": 11.311399459838867,
"learning_rate": 9.540512820512822e-06,
"loss": 0.4449,
"step": 1400
},
{
"epoch": 0.6026644110805667,
"grad_norm": 10.030913352966309,
"learning_rate": 9.52769230769231e-06,
"loss": 0.4634,
"step": 1425
},
{
"epoch": 0.6132374709240854,
"grad_norm": 12.241848945617676,
"learning_rate": 9.514871794871796e-06,
"loss": 0.4374,
"step": 1450
},
{
"epoch": 0.6238105307676042,
"grad_norm": 9.528079986572266,
"learning_rate": 9.502051282051283e-06,
"loss": 0.4202,
"step": 1475
},
{
"epoch": 0.6343835906111228,
"grad_norm": 9.903778076171875,
"learning_rate": 9.48923076923077e-06,
"loss": 0.3996,
"step": 1500
},
{
"epoch": 0.6449566504546416,
"grad_norm": 11.979716300964355,
"learning_rate": 9.476410256410257e-06,
"loss": 0.4275,
"step": 1525
},
{
"epoch": 0.6555297102981603,
"grad_norm": 11.950919151306152,
"learning_rate": 9.463589743589743e-06,
"loss": 0.4537,
"step": 1550
},
{
"epoch": 0.666102770141679,
"grad_norm": 12.06128978729248,
"learning_rate": 9.450769230769232e-06,
"loss": 0.4567,
"step": 1575
},
{
"epoch": 0.6766758299851977,
"grad_norm": 12.286346435546875,
"learning_rate": 9.437948717948719e-06,
"loss": 0.4666,
"step": 1600
},
{
"epoch": 0.6872488898287165,
"grad_norm": 14.898148536682129,
"learning_rate": 9.425128205128206e-06,
"loss": 0.4442,
"step": 1625
},
{
"epoch": 0.6978219496722351,
"grad_norm": 11.827438354492188,
"learning_rate": 9.412307692307692e-06,
"loss": 0.4067,
"step": 1650
},
{
"epoch": 0.7083950095157538,
"grad_norm": 12.726160049438477,
"learning_rate": 9.39948717948718e-06,
"loss": 0.4191,
"step": 1675
},
{
"epoch": 0.7189680693592726,
"grad_norm": 12.630799293518066,
"learning_rate": 9.386666666666668e-06,
"loss": 0.4785,
"step": 1700
},
{
"epoch": 0.7295411292027912,
"grad_norm": 10.818669319152832,
"learning_rate": 9.373846153846155e-06,
"loss": 0.4636,
"step": 1725
},
{
"epoch": 0.74011418904631,
"grad_norm": 11.605988502502441,
"learning_rate": 9.361025641025641e-06,
"loss": 0.398,
"step": 1750
},
{
"epoch": 0.7506872488898287,
"grad_norm": 13.083930969238281,
"learning_rate": 9.348205128205128e-06,
"loss": 0.4364,
"step": 1775
},
{
"epoch": 0.7612603087333474,
"grad_norm": 9.565361022949219,
"learning_rate": 9.335384615384617e-06,
"loss": 0.3984,
"step": 1800
},
{
"epoch": 0.7718333685768661,
"grad_norm": 11.05252742767334,
"learning_rate": 9.322564102564104e-06,
"loss": 0.412,
"step": 1825
},
{
"epoch": 0.7824064284203849,
"grad_norm": 8.760066032409668,
"learning_rate": 9.30974358974359e-06,
"loss": 0.4375,
"step": 1850
},
{
"epoch": 0.7929794882639035,
"grad_norm": 9.936036109924316,
"learning_rate": 9.296923076923077e-06,
"loss": 0.4016,
"step": 1875
},
{
"epoch": 0.8035525481074223,
"grad_norm": 10.414031028747559,
"learning_rate": 9.284102564102566e-06,
"loss": 0.4877,
"step": 1900
},
{
"epoch": 0.814125607950941,
"grad_norm": 13.588311195373535,
"learning_rate": 9.271282051282053e-06,
"loss": 0.4117,
"step": 1925
},
{
"epoch": 0.8246986677944598,
"grad_norm": 11.925647735595703,
"learning_rate": 9.25846153846154e-06,
"loss": 0.4358,
"step": 1950
},
{
"epoch": 0.8352717276379784,
"grad_norm": 10.80334186553955,
"learning_rate": 9.245641025641026e-06,
"loss": 0.3665,
"step": 1975
},
{
"epoch": 0.8458447874814972,
"grad_norm": 8.696456909179688,
"learning_rate": 9.232820512820515e-06,
"loss": 0.4078,
"step": 2000
},
{
"epoch": 0.8458447874814972,
"eval_loss": 0.3520306944847107,
"eval_runtime": 449.1186,
"eval_samples_per_second": 8.107,
"eval_steps_per_second": 1.015,
"eval_wer": 0.28070908162791774,
"step": 2000
},
{
"epoch": 0.8564178473250159,
"grad_norm": 9.429954528808594,
"learning_rate": 9.220000000000002e-06,
"loss": 0.4529,
"step": 2025
},
{
"epoch": 0.8669909071685346,
"grad_norm": 10.896949768066406,
"learning_rate": 9.207179487179488e-06,
"loss": 0.3886,
"step": 2050
},
{
"epoch": 0.8775639670120533,
"grad_norm": 12.577573776245117,
"learning_rate": 9.194358974358975e-06,
"loss": 0.3831,
"step": 2075
},
{
"epoch": 0.888137026855572,
"grad_norm": 7.320788860321045,
"learning_rate": 9.181538461538464e-06,
"loss": 0.3698,
"step": 2100
},
{
"epoch": 0.8987100866990907,
"grad_norm": 9.53736400604248,
"learning_rate": 9.168717948717949e-06,
"loss": 0.3932,
"step": 2125
},
{
"epoch": 0.9092831465426094,
"grad_norm": 11.435847282409668,
"learning_rate": 9.155897435897436e-06,
"loss": 0.4034,
"step": 2150
},
{
"epoch": 0.9198562063861282,
"grad_norm": 13.579808235168457,
"learning_rate": 9.143076923076924e-06,
"loss": 0.3795,
"step": 2175
},
{
"epoch": 0.9304292662296468,
"grad_norm": 10.266934394836426,
"learning_rate": 9.130256410256411e-06,
"loss": 0.3833,
"step": 2200
},
{
"epoch": 0.9410023260731656,
"grad_norm": 11.516539573669434,
"learning_rate": 9.117435897435898e-06,
"loss": 0.3711,
"step": 2225
},
{
"epoch": 0.9515753859166843,
"grad_norm": 8.717472076416016,
"learning_rate": 9.104615384615385e-06,
"loss": 0.3711,
"step": 2250
},
{
"epoch": 0.962148445760203,
"grad_norm": 10.881747245788574,
"learning_rate": 9.091794871794873e-06,
"loss": 0.4005,
"step": 2275
},
{
"epoch": 0.9727215056037217,
"grad_norm": 11.219775199890137,
"learning_rate": 9.07897435897436e-06,
"loss": 0.3693,
"step": 2300
},
{
"epoch": 0.9832945654472405,
"grad_norm": 11.497136116027832,
"learning_rate": 9.066153846153847e-06,
"loss": 0.3515,
"step": 2325
},
{
"epoch": 0.9938676252907591,
"grad_norm": 14.518182754516602,
"learning_rate": 9.053333333333334e-06,
"loss": 0.3744,
"step": 2350
},
{
"epoch": 1.0044406851342778,
"grad_norm": 8.55494213104248,
"learning_rate": 9.04051282051282e-06,
"loss": 0.342,
"step": 2375
},
{
"epoch": 1.0150137449777965,
"grad_norm": 9.091997146606445,
"learning_rate": 9.027692307692309e-06,
"loss": 0.3083,
"step": 2400
},
{
"epoch": 1.0255868048213153,
"grad_norm": 9.034937858581543,
"learning_rate": 9.014871794871796e-06,
"loss": 0.3031,
"step": 2425
},
{
"epoch": 1.036159864664834,
"grad_norm": 13.605484008789062,
"learning_rate": 9.002051282051283e-06,
"loss": 0.3051,
"step": 2450
},
{
"epoch": 1.0467329245083528,
"grad_norm": 10.612420082092285,
"learning_rate": 8.98923076923077e-06,
"loss": 0.2721,
"step": 2475
},
{
"epoch": 1.0573059843518715,
"grad_norm": 8.616438865661621,
"learning_rate": 8.976410256410258e-06,
"loss": 0.2989,
"step": 2500
},
{
"epoch": 1.06787904419539,
"grad_norm": 10.424883842468262,
"learning_rate": 8.963589743589745e-06,
"loss": 0.2742,
"step": 2525
},
{
"epoch": 1.0784521040389088,
"grad_norm": 9.381563186645508,
"learning_rate": 8.950769230769232e-06,
"loss": 0.331,
"step": 2550
},
{
"epoch": 1.0890251638824275,
"grad_norm": 7.882634162902832,
"learning_rate": 8.937948717948718e-06,
"loss": 0.2943,
"step": 2575
},
{
"epoch": 1.0995982237259463,
"grad_norm": 7.2311601638793945,
"learning_rate": 8.925128205128207e-06,
"loss": 0.2694,
"step": 2600
},
{
"epoch": 1.110171283569465,
"grad_norm": 8.663016319274902,
"learning_rate": 8.912307692307694e-06,
"loss": 0.2807,
"step": 2625
},
{
"epoch": 1.1207443434129838,
"grad_norm": 9.223522186279297,
"learning_rate": 8.89948717948718e-06,
"loss": 0.2863,
"step": 2650
},
{
"epoch": 1.1313174032565025,
"grad_norm": 8.749750137329102,
"learning_rate": 8.886666666666667e-06,
"loss": 0.2888,
"step": 2675
},
{
"epoch": 1.141890463100021,
"grad_norm": 9.135551452636719,
"learning_rate": 8.873846153846156e-06,
"loss": 0.3135,
"step": 2700
},
{
"epoch": 1.1524635229435398,
"grad_norm": 9.296309471130371,
"learning_rate": 8.861025641025641e-06,
"loss": 0.2728,
"step": 2725
},
{
"epoch": 1.1630365827870586,
"grad_norm": 9.713606834411621,
"learning_rate": 8.848205128205128e-06,
"loss": 0.2781,
"step": 2750
},
{
"epoch": 1.1736096426305773,
"grad_norm": 9.5156888961792,
"learning_rate": 8.835384615384616e-06,
"loss": 0.2863,
"step": 2775
},
{
"epoch": 1.184182702474096,
"grad_norm": 7.512620449066162,
"learning_rate": 8.822564102564103e-06,
"loss": 0.2719,
"step": 2800
},
{
"epoch": 1.1947557623176146,
"grad_norm": 8.517248153686523,
"learning_rate": 8.80974358974359e-06,
"loss": 0.2609,
"step": 2825
},
{
"epoch": 1.2053288221611334,
"grad_norm": 9.989941596984863,
"learning_rate": 8.796923076923077e-06,
"loss": 0.2476,
"step": 2850
},
{
"epoch": 1.215901882004652,
"grad_norm": 10.013803482055664,
"learning_rate": 8.784102564102565e-06,
"loss": 0.2683,
"step": 2875
},
{
"epoch": 1.2264749418481709,
"grad_norm": 8.619990348815918,
"learning_rate": 8.771282051282052e-06,
"loss": 0.2867,
"step": 2900
},
{
"epoch": 1.2370480016916896,
"grad_norm": 8.64876937866211,
"learning_rate": 8.758461538461539e-06,
"loss": 0.2843,
"step": 2925
},
{
"epoch": 1.2476210615352084,
"grad_norm": 6.9110541343688965,
"learning_rate": 8.745641025641026e-06,
"loss": 0.2731,
"step": 2950
},
{
"epoch": 1.258194121378727,
"grad_norm": 9.259427070617676,
"learning_rate": 8.732820512820513e-06,
"loss": 0.2849,
"step": 2975
},
{
"epoch": 1.2687671812222456,
"grad_norm": 8.702181816101074,
"learning_rate": 8.720000000000001e-06,
"loss": 0.2679,
"step": 3000
},
{
"epoch": 1.2687671812222456,
"eval_loss": 0.3050294816493988,
"eval_runtime": 439.9928,
"eval_samples_per_second": 8.275,
"eval_steps_per_second": 1.036,
"eval_wer": 0.24209940136433244,
"step": 3000
},
{
"epoch": 1.2793402410657644,
"grad_norm": 7.549104690551758,
"learning_rate": 8.707179487179488e-06,
"loss": 0.2563,
"step": 3025
},
{
"epoch": 1.2899133009092831,
"grad_norm": 9.588334083557129,
"learning_rate": 8.694358974358975e-06,
"loss": 0.3047,
"step": 3050
},
{
"epoch": 1.3004863607528019,
"grad_norm": 9.205412864685059,
"learning_rate": 8.681538461538462e-06,
"loss": 0.2512,
"step": 3075
},
{
"epoch": 1.3110594205963206,
"grad_norm": 9.721763610839844,
"learning_rate": 8.66871794871795e-06,
"loss": 0.2827,
"step": 3100
},
{
"epoch": 1.3216324804398392,
"grad_norm": 9.485997200012207,
"learning_rate": 8.655897435897437e-06,
"loss": 0.3243,
"step": 3125
},
{
"epoch": 1.3322055402833581,
"grad_norm": 9.156095504760742,
"learning_rate": 8.643076923076924e-06,
"loss": 0.2937,
"step": 3150
},
{
"epoch": 1.3427786001268767,
"grad_norm": 6.857023239135742,
"learning_rate": 8.63025641025641e-06,
"loss": 0.3141,
"step": 3175
},
{
"epoch": 1.3533516599703954,
"grad_norm": 9.577827453613281,
"learning_rate": 8.6174358974359e-06,
"loss": 0.2856,
"step": 3200
},
{
"epoch": 1.3639247198139142,
"grad_norm": 10.062349319458008,
"learning_rate": 8.604615384615386e-06,
"loss": 0.2712,
"step": 3225
},
{
"epoch": 1.374497779657433,
"grad_norm": 10.257223129272461,
"learning_rate": 8.591794871794873e-06,
"loss": 0.2936,
"step": 3250
},
{
"epoch": 1.3850708395009517,
"grad_norm": 8.2437105178833,
"learning_rate": 8.57897435897436e-06,
"loss": 0.239,
"step": 3275
},
{
"epoch": 1.3956438993444702,
"grad_norm": 8.366438865661621,
"learning_rate": 8.566153846153848e-06,
"loss": 0.2726,
"step": 3300
},
{
"epoch": 1.406216959187989,
"grad_norm": 10.779414176940918,
"learning_rate": 8.553333333333333e-06,
"loss": 0.2646,
"step": 3325
},
{
"epoch": 1.4167900190315077,
"grad_norm": 11.594691276550293,
"learning_rate": 8.54051282051282e-06,
"loss": 0.2835,
"step": 3350
},
{
"epoch": 1.4273630788750264,
"grad_norm": 8.851668357849121,
"learning_rate": 8.527692307692309e-06,
"loss": 0.256,
"step": 3375
},
{
"epoch": 1.4379361387185452,
"grad_norm": 7.315084457397461,
"learning_rate": 8.514871794871795e-06,
"loss": 0.2359,
"step": 3400
},
{
"epoch": 1.4485091985620637,
"grad_norm": 9.037189483642578,
"learning_rate": 8.502051282051282e-06,
"loss": 0.2955,
"step": 3425
},
{
"epoch": 1.4590822584055827,
"grad_norm": 10.756566047668457,
"learning_rate": 8.489230769230769e-06,
"loss": 0.2964,
"step": 3450
},
{
"epoch": 1.4696553182491012,
"grad_norm": 9.553821563720703,
"learning_rate": 8.476410256410258e-06,
"loss": 0.2674,
"step": 3475
},
{
"epoch": 1.48022837809262,
"grad_norm": 8.768060684204102,
"learning_rate": 8.463589743589744e-06,
"loss": 0.2678,
"step": 3500
},
{
"epoch": 1.4908014379361387,
"grad_norm": 8.738430976867676,
"learning_rate": 8.450769230769231e-06,
"loss": 0.2391,
"step": 3525
},
{
"epoch": 1.5013744977796575,
"grad_norm": 7.157522201538086,
"learning_rate": 8.437948717948718e-06,
"loss": 0.2845,
"step": 3550
},
{
"epoch": 1.5119475576231762,
"grad_norm": 9.683340072631836,
"learning_rate": 8.425128205128205e-06,
"loss": 0.2961,
"step": 3575
},
{
"epoch": 1.5225206174666948,
"grad_norm": 13.411097526550293,
"learning_rate": 8.412307692307693e-06,
"loss": 0.2654,
"step": 3600
},
{
"epoch": 1.5330936773102137,
"grad_norm": 8.458112716674805,
"learning_rate": 8.39948717948718e-06,
"loss": 0.2771,
"step": 3625
},
{
"epoch": 1.5436667371537323,
"grad_norm": 5.15408182144165,
"learning_rate": 8.386666666666667e-06,
"loss": 0.2654,
"step": 3650
},
{
"epoch": 1.554239796997251,
"grad_norm": 7.64996862411499,
"learning_rate": 8.373846153846154e-06,
"loss": 0.2631,
"step": 3675
},
{
"epoch": 1.5648128568407698,
"grad_norm": 10.501543998718262,
"learning_rate": 8.361025641025642e-06,
"loss": 0.2666,
"step": 3700
},
{
"epoch": 1.5753859166842883,
"grad_norm": 7.34133768081665,
"learning_rate": 8.34820512820513e-06,
"loss": 0.2186,
"step": 3725
},
{
"epoch": 1.5859589765278073,
"grad_norm": 9.846173286437988,
"learning_rate": 8.335384615384616e-06,
"loss": 0.264,
"step": 3750
},
{
"epoch": 1.5965320363713258,
"grad_norm": 7.888981819152832,
"learning_rate": 8.322564102564103e-06,
"loss": 0.2304,
"step": 3775
},
{
"epoch": 1.6071050962148445,
"grad_norm": 14.889488220214844,
"learning_rate": 8.309743589743591e-06,
"loss": 0.2509,
"step": 3800
},
{
"epoch": 1.6176781560583633,
"grad_norm": 7.089621067047119,
"learning_rate": 8.296923076923078e-06,
"loss": 0.2638,
"step": 3825
},
{
"epoch": 1.628251215901882,
"grad_norm": 8.458942413330078,
"learning_rate": 8.284102564102565e-06,
"loss": 0.2433,
"step": 3850
},
{
"epoch": 1.6388242757454008,
"grad_norm": 9.503662109375,
"learning_rate": 8.271282051282052e-06,
"loss": 0.2392,
"step": 3875
},
{
"epoch": 1.6493973355889193,
"grad_norm": 8.198525428771973,
"learning_rate": 8.25846153846154e-06,
"loss": 0.2561,
"step": 3900
},
{
"epoch": 1.6599703954324383,
"grad_norm": 8.04946517944336,
"learning_rate": 8.245641025641027e-06,
"loss": 0.262,
"step": 3925
},
{
"epoch": 1.6705434552759568,
"grad_norm": 11.769758224487305,
"learning_rate": 8.232820512820512e-06,
"loss": 0.2351,
"step": 3950
},
{
"epoch": 1.6811165151194756,
"grad_norm": 10.128782272338867,
"learning_rate": 8.220000000000001e-06,
"loss": 0.2444,
"step": 3975
},
{
"epoch": 1.6916895749629943,
"grad_norm": 9.632699966430664,
"learning_rate": 8.207179487179488e-06,
"loss": 0.2423,
"step": 4000
},
{
"epoch": 1.6916895749629943,
"eval_loss": 0.27250364422798157,
"eval_runtime": 441.2867,
"eval_samples_per_second": 8.251,
"eval_steps_per_second": 1.033,
"eval_wer": 0.22172722632140704,
"step": 4000
},
{
"epoch": 1.7022626348065129,
"grad_norm": 5.695612907409668,
"learning_rate": 8.194358974358975e-06,
"loss": 0.274,
"step": 4025
},
{
"epoch": 1.7128356946500318,
"grad_norm": 10.697525024414062,
"learning_rate": 8.181538461538461e-06,
"loss": 0.2634,
"step": 4050
},
{
"epoch": 1.7234087544935504,
"grad_norm": 12.469213485717773,
"learning_rate": 8.16871794871795e-06,
"loss": 0.2551,
"step": 4075
},
{
"epoch": 1.733981814337069,
"grad_norm": 7.183727264404297,
"learning_rate": 8.155897435897437e-06,
"loss": 0.2787,
"step": 4100
},
{
"epoch": 1.7445548741805879,
"grad_norm": 8.577070236206055,
"learning_rate": 8.143076923076924e-06,
"loss": 0.24,
"step": 4125
},
{
"epoch": 1.7551279340241066,
"grad_norm": 8.086338996887207,
"learning_rate": 8.13025641025641e-06,
"loss": 0.2337,
"step": 4150
},
{
"epoch": 1.7657009938676254,
"grad_norm": 10.768961906433105,
"learning_rate": 8.117435897435897e-06,
"loss": 0.2335,
"step": 4175
},
{
"epoch": 1.7762740537111439,
"grad_norm": 8.794693946838379,
"learning_rate": 8.104615384615386e-06,
"loss": 0.275,
"step": 4200
},
{
"epoch": 1.7868471135546629,
"grad_norm": 9.108808517456055,
"learning_rate": 8.091794871794873e-06,
"loss": 0.2573,
"step": 4225
},
{
"epoch": 1.7974201733981814,
"grad_norm": 8.723715782165527,
"learning_rate": 8.07897435897436e-06,
"loss": 0.2637,
"step": 4250
},
{
"epoch": 1.8079932332417001,
"grad_norm": 9.015399932861328,
"learning_rate": 8.066153846153846e-06,
"loss": 0.2372,
"step": 4275
},
{
"epoch": 1.8185662930852189,
"grad_norm": 8.118802070617676,
"learning_rate": 8.053333333333335e-06,
"loss": 0.2461,
"step": 4300
},
{
"epoch": 1.8291393529287374,
"grad_norm": 7.922321796417236,
"learning_rate": 8.040512820512822e-06,
"loss": 0.2389,
"step": 4325
},
{
"epoch": 1.8397124127722564,
"grad_norm": 13.12256145477295,
"learning_rate": 8.027692307692308e-06,
"loss": 0.2526,
"step": 4350
},
{
"epoch": 1.850285472615775,
"grad_norm": 7.943728923797607,
"learning_rate": 8.014871794871795e-06,
"loss": 0.2399,
"step": 4375
},
{
"epoch": 1.8608585324592937,
"grad_norm": 8.962715148925781,
"learning_rate": 8.002051282051284e-06,
"loss": 0.23,
"step": 4400
},
{
"epoch": 1.8714315923028124,
"grad_norm": 9.588024139404297,
"learning_rate": 7.98923076923077e-06,
"loss": 0.2147,
"step": 4425
},
{
"epoch": 1.8820046521463312,
"grad_norm": 10.069765090942383,
"learning_rate": 7.976410256410257e-06,
"loss": 0.2641,
"step": 4450
},
{
"epoch": 1.89257771198985,
"grad_norm": 9.397997856140137,
"learning_rate": 7.963589743589744e-06,
"loss": 0.2138,
"step": 4475
},
{
"epoch": 1.9031507718333684,
"grad_norm": 8.495611190795898,
"learning_rate": 7.950769230769233e-06,
"loss": 0.234,
"step": 4500
},
{
"epoch": 1.9137238316768874,
"grad_norm": 9.057598114013672,
"learning_rate": 7.93794871794872e-06,
"loss": 0.2415,
"step": 4525
},
{
"epoch": 1.924296891520406,
"grad_norm": 11.452310562133789,
"learning_rate": 7.925128205128205e-06,
"loss": 0.2428,
"step": 4550
},
{
"epoch": 1.9348699513639247,
"grad_norm": 8.656147003173828,
"learning_rate": 7.912307692307693e-06,
"loss": 0.2148,
"step": 4575
},
{
"epoch": 1.9454430112074435,
"grad_norm": 7.229951858520508,
"learning_rate": 7.89948717948718e-06,
"loss": 0.2303,
"step": 4600
},
{
"epoch": 1.9560160710509622,
"grad_norm": 9.499919891357422,
"learning_rate": 7.886666666666667e-06,
"loss": 0.2458,
"step": 4625
},
{
"epoch": 1.966589130894481,
"grad_norm": 8.735013008117676,
"learning_rate": 7.873846153846154e-06,
"loss": 0.2474,
"step": 4650
},
{
"epoch": 1.9771621907379995,
"grad_norm": 7.496155261993408,
"learning_rate": 7.861025641025642e-06,
"loss": 0.2132,
"step": 4675
},
{
"epoch": 1.9877352505815185,
"grad_norm": 10.107941627502441,
"learning_rate": 7.848205128205129e-06,
"loss": 0.2357,
"step": 4700
},
{
"epoch": 1.998308310425037,
"grad_norm": 9.089138984680176,
"learning_rate": 7.835384615384616e-06,
"loss": 0.2426,
"step": 4725
},
{
"epoch": 2.0088813702685555,
"grad_norm": 6.171664714813232,
"learning_rate": 7.822564102564103e-06,
"loss": 0.1577,
"step": 4750
},
{
"epoch": 2.0194544301120745,
"grad_norm": 8.21010684967041,
"learning_rate": 7.80974358974359e-06,
"loss": 0.1737,
"step": 4775
},
{
"epoch": 2.030027489955593,
"grad_norm": 7.623335838317871,
"learning_rate": 7.796923076923078e-06,
"loss": 0.1755,
"step": 4800
},
{
"epoch": 2.040600549799112,
"grad_norm": 6.5446391105651855,
"learning_rate": 7.784102564102565e-06,
"loss": 0.173,
"step": 4825
},
{
"epoch": 2.0511736096426305,
"grad_norm": 7.576420307159424,
"learning_rate": 7.771282051282052e-06,
"loss": 0.1673,
"step": 4850
},
{
"epoch": 2.0617466694861495,
"grad_norm": 6.1533074378967285,
"learning_rate": 7.758461538461538e-06,
"loss": 0.1584,
"step": 4875
},
{
"epoch": 2.072319729329668,
"grad_norm": 8.039182662963867,
"learning_rate": 7.745641025641027e-06,
"loss": 0.1761,
"step": 4900
},
{
"epoch": 2.0828927891731865,
"grad_norm": 6.898794174194336,
"learning_rate": 7.732820512820514e-06,
"loss": 0.1626,
"step": 4925
},
{
"epoch": 2.0934658490167055,
"grad_norm": 8.714920997619629,
"learning_rate": 7.72e-06,
"loss": 0.1822,
"step": 4950
},
{
"epoch": 2.104038908860224,
"grad_norm": 7.137439727783203,
"learning_rate": 7.707179487179487e-06,
"loss": 0.1531,
"step": 4975
},
{
"epoch": 2.114611968703743,
"grad_norm": 7.202429294586182,
"learning_rate": 7.694358974358976e-06,
"loss": 0.169,
"step": 5000
},
{
"epoch": 2.114611968703743,
"eval_loss": 0.25147655606269836,
"eval_runtime": 441.5298,
"eval_samples_per_second": 8.246,
"eval_steps_per_second": 1.033,
"eval_wer": 0.21838600399090444,
"step": 5000
},
{
"epoch": 2.1251850285472615,
"grad_norm": 8.99411678314209,
"learning_rate": 7.681538461538463e-06,
"loss": 0.1642,
"step": 5025
},
{
"epoch": 2.13575808839078,
"grad_norm": 7.8761420249938965,
"learning_rate": 7.66871794871795e-06,
"loss": 0.1877,
"step": 5050
},
{
"epoch": 2.146331148234299,
"grad_norm": 8.506233215332031,
"learning_rate": 7.655897435897436e-06,
"loss": 0.1671,
"step": 5075
},
{
"epoch": 2.1569042080778176,
"grad_norm": 11.958497047424316,
"learning_rate": 7.643076923076925e-06,
"loss": 0.181,
"step": 5100
},
{
"epoch": 2.1674772679213365,
"grad_norm": 6.008707523345947,
"learning_rate": 7.630256410256412e-06,
"loss": 0.1655,
"step": 5125
},
{
"epoch": 2.178050327764855,
"grad_norm": 7.9332451820373535,
"learning_rate": 7.617435897435898e-06,
"loss": 0.1782,
"step": 5150
},
{
"epoch": 2.188623387608374,
"grad_norm": 9.106295585632324,
"learning_rate": 7.604615384615385e-06,
"loss": 0.1521,
"step": 5175
},
{
"epoch": 2.1991964474518926,
"grad_norm": 7.487992286682129,
"learning_rate": 7.591794871794872e-06,
"loss": 0.1735,
"step": 5200
},
{
"epoch": 2.209769507295411,
"grad_norm": 9.595857620239258,
"learning_rate": 7.578974358974359e-06,
"loss": 0.1642,
"step": 5225
},
{
"epoch": 2.22034256713893,
"grad_norm": 6.064367294311523,
"learning_rate": 7.566153846153847e-06,
"loss": 0.1515,
"step": 5250
},
{
"epoch": 2.2309156269824486,
"grad_norm": 6.732094764709473,
"learning_rate": 7.553333333333334e-06,
"loss": 0.1615,
"step": 5275
},
{
"epoch": 2.2414886868259676,
"grad_norm": 6.89235258102417,
"learning_rate": 7.540512820512821e-06,
"loss": 0.1519,
"step": 5300
},
{
"epoch": 2.252061746669486,
"grad_norm": 6.550455570220947,
"learning_rate": 7.527692307692308e-06,
"loss": 0.1606,
"step": 5325
},
{
"epoch": 2.262634806513005,
"grad_norm": 6.678434371948242,
"learning_rate": 7.514871794871795e-06,
"loss": 0.1653,
"step": 5350
},
{
"epoch": 2.2732078663565236,
"grad_norm": 8.418935775756836,
"learning_rate": 7.5020512820512826e-06,
"loss": 0.1574,
"step": 5375
},
{
"epoch": 2.283780926200042,
"grad_norm": 6.893301486968994,
"learning_rate": 7.489230769230769e-06,
"loss": 0.1647,
"step": 5400
},
{
"epoch": 2.294353986043561,
"grad_norm": 7.643205165863037,
"learning_rate": 7.476410256410257e-06,
"loss": 0.187,
"step": 5425
},
{
"epoch": 2.3049270458870796,
"grad_norm": 9.927549362182617,
"learning_rate": 7.463589743589744e-06,
"loss": 0.1677,
"step": 5450
},
{
"epoch": 2.3155001057305986,
"grad_norm": 10.559179306030273,
"learning_rate": 7.4507692307692316e-06,
"loss": 0.1644,
"step": 5475
},
{
"epoch": 2.326073165574117,
"grad_norm": 7.4924187660217285,
"learning_rate": 7.437948717948718e-06,
"loss": 0.1544,
"step": 5500
},
{
"epoch": 2.3366462254176357,
"grad_norm": 7.554123401641846,
"learning_rate": 7.425128205128206e-06,
"loss": 0.1891,
"step": 5525
},
{
"epoch": 2.3472192852611546,
"grad_norm": 7.673270225524902,
"learning_rate": 7.412307692307693e-06,
"loss": 0.1841,
"step": 5550
},
{
"epoch": 2.357792345104673,
"grad_norm": 9.479844093322754,
"learning_rate": 7.3994871794871806e-06,
"loss": 0.1706,
"step": 5575
},
{
"epoch": 2.368365404948192,
"grad_norm": 11.788003921508789,
"learning_rate": 7.386666666666667e-06,
"loss": 0.1573,
"step": 5600
},
{
"epoch": 2.3789384647917107,
"grad_norm": 10.670574188232422,
"learning_rate": 7.373846153846155e-06,
"loss": 0.1762,
"step": 5625
},
{
"epoch": 2.389511524635229,
"grad_norm": 5.672201633453369,
"learning_rate": 7.361025641025642e-06,
"loss": 0.1868,
"step": 5650
},
{
"epoch": 2.400084584478748,
"grad_norm": 6.6854729652404785,
"learning_rate": 7.3482051282051295e-06,
"loss": 0.1377,
"step": 5675
},
{
"epoch": 2.4106576443222667,
"grad_norm": 8.25365924835205,
"learning_rate": 7.335384615384616e-06,
"loss": 0.181,
"step": 5700
},
{
"epoch": 2.4212307041657857,
"grad_norm": 8.488265991210938,
"learning_rate": 7.322564102564104e-06,
"loss": 0.1528,
"step": 5725
},
{
"epoch": 2.431803764009304,
"grad_norm": 11.608367919921875,
"learning_rate": 7.309743589743591e-06,
"loss": 0.1564,
"step": 5750
},
{
"epoch": 2.442376823852823,
"grad_norm": 10.334943771362305,
"learning_rate": 7.296923076923077e-06,
"loss": 0.1676,
"step": 5775
},
{
"epoch": 2.4529498836963417,
"grad_norm": 9.751703262329102,
"learning_rate": 7.2841025641025645e-06,
"loss": 0.157,
"step": 5800
},
{
"epoch": 2.4635229435398607,
"grad_norm": 8.788719177246094,
"learning_rate": 7.271282051282051e-06,
"loss": 0.1664,
"step": 5825
},
{
"epoch": 2.474096003383379,
"grad_norm": 5.966717720031738,
"learning_rate": 7.258461538461539e-06,
"loss": 0.1569,
"step": 5850
},
{
"epoch": 2.4846690632268977,
"grad_norm": 9.089386940002441,
"learning_rate": 7.245641025641026e-06,
"loss": 0.1751,
"step": 5875
},
{
"epoch": 2.4952421230704167,
"grad_norm": 7.7753190994262695,
"learning_rate": 7.2328205128205135e-06,
"loss": 0.174,
"step": 5900
},
{
"epoch": 2.5058151829139352,
"grad_norm": 8.18852424621582,
"learning_rate": 7.22e-06,
"loss": 0.2026,
"step": 5925
},
{
"epoch": 2.516388242757454,
"grad_norm": 7.778114318847656,
"learning_rate": 7.207179487179487e-06,
"loss": 0.181,
"step": 5950
},
{
"epoch": 2.5269613026009727,
"grad_norm": 7.439593315124512,
"learning_rate": 7.194358974358975e-06,
"loss": 0.1679,
"step": 5975
},
{
"epoch": 2.5375343624444913,
"grad_norm": 12.546255111694336,
"learning_rate": 7.181538461538462e-06,
"loss": 0.1646,
"step": 6000
},
{
"epoch": 2.5375343624444913,
"eval_loss": 0.23773407936096191,
"eval_runtime": 439.7036,
"eval_samples_per_second": 8.281,
"eval_steps_per_second": 1.037,
"eval_wer": 0.20822311940229246,
"step": 6000
},
{
"epoch": 2.5481074222880102,
"grad_norm": 5.546166896820068,
"learning_rate": 7.168717948717949e-06,
"loss": 0.1642,
"step": 6025
},
{
"epoch": 2.5586804821315288,
"grad_norm": 6.507399082183838,
"learning_rate": 7.155897435897436e-06,
"loss": 0.1715,
"step": 6050
},
{
"epoch": 2.5692535419750477,
"grad_norm": 8.499435424804688,
"learning_rate": 7.143076923076924e-06,
"loss": 0.1724,
"step": 6075
},
{
"epoch": 2.5798266018185663,
"grad_norm": 6.351449966430664,
"learning_rate": 7.130256410256411e-06,
"loss": 0.1514,
"step": 6100
},
{
"epoch": 2.590399661662085,
"grad_norm": 5.271184921264648,
"learning_rate": 7.117435897435898e-06,
"loss": 0.1625,
"step": 6125
},
{
"epoch": 2.6009727215056038,
"grad_norm": 6.257565021514893,
"learning_rate": 7.104615384615385e-06,
"loss": 0.1386,
"step": 6150
},
{
"epoch": 2.6115457813491223,
"grad_norm": 8.022757530212402,
"learning_rate": 7.0923076923076926e-06,
"loss": 0.1545,
"step": 6175
},
{
"epoch": 2.6221188411926413,
"grad_norm": 9.464363098144531,
"learning_rate": 7.07948717948718e-06,
"loss": 0.1729,
"step": 6200
},
{
"epoch": 2.63269190103616,
"grad_norm": 9.03287124633789,
"learning_rate": 7.066666666666667e-06,
"loss": 0.1885,
"step": 6225
},
{
"epoch": 2.6432649608796783,
"grad_norm": 10.790355682373047,
"learning_rate": 7.053846153846155e-06,
"loss": 0.1629,
"step": 6250
},
{
"epoch": 2.6538380207231973,
"grad_norm": 6.963956832885742,
"learning_rate": 7.0410256410256415e-06,
"loss": 0.1771,
"step": 6275
},
{
"epoch": 2.6644110805667163,
"grad_norm": 8.90701675415039,
"learning_rate": 7.028205128205129e-06,
"loss": 0.1514,
"step": 6300
},
{
"epoch": 2.674984140410235,
"grad_norm": 6.524221420288086,
"learning_rate": 7.015384615384616e-06,
"loss": 0.1486,
"step": 6325
},
{
"epoch": 2.6855572002537533,
"grad_norm": 6.47484827041626,
"learning_rate": 7.002564102564104e-06,
"loss": 0.1344,
"step": 6350
},
{
"epoch": 2.6961302600972723,
"grad_norm": 7.201345443725586,
"learning_rate": 6.9897435897435905e-06,
"loss": 0.1458,
"step": 6375
},
{
"epoch": 2.706703319940791,
"grad_norm": 6.066169261932373,
"learning_rate": 6.976923076923078e-06,
"loss": 0.1546,
"step": 6400
},
{
"epoch": 2.71727637978431,
"grad_norm": 7.907600402832031,
"learning_rate": 6.964102564102565e-06,
"loss": 0.1713,
"step": 6425
},
{
"epoch": 2.7278494396278283,
"grad_norm": 7.963566303253174,
"learning_rate": 6.951282051282052e-06,
"loss": 0.1704,
"step": 6450
},
{
"epoch": 2.738422499471347,
"grad_norm": 7.709939956665039,
"learning_rate": 6.9384615384615395e-06,
"loss": 0.1685,
"step": 6475
},
{
"epoch": 2.748995559314866,
"grad_norm": 6.702678680419922,
"learning_rate": 6.925641025641026e-06,
"loss": 0.1777,
"step": 6500
},
{
"epoch": 2.7595686191583844,
"grad_norm": 5.540548324584961,
"learning_rate": 6.912820512820514e-06,
"loss": 0.1513,
"step": 6525
},
{
"epoch": 2.7701416790019033,
"grad_norm": 5.520662307739258,
"learning_rate": 6.9e-06,
"loss": 0.1692,
"step": 6550
},
{
"epoch": 2.780714738845422,
"grad_norm": 9.791701316833496,
"learning_rate": 6.887179487179488e-06,
"loss": 0.1518,
"step": 6575
},
{
"epoch": 2.7912877986889404,
"grad_norm": 6.41890811920166,
"learning_rate": 6.8743589743589745e-06,
"loss": 0.1525,
"step": 6600
},
{
"epoch": 2.8018608585324594,
"grad_norm": 6.0221943855285645,
"learning_rate": 6.861538461538461e-06,
"loss": 0.1637,
"step": 6625
},
{
"epoch": 2.812433918375978,
"grad_norm": 8.487308502197266,
"learning_rate": 6.848717948717949e-06,
"loss": 0.1431,
"step": 6650
},
{
"epoch": 2.823006978219497,
"grad_norm": 15.519043922424316,
"learning_rate": 6.835897435897436e-06,
"loss": 0.1604,
"step": 6675
},
{
"epoch": 2.8335800380630154,
"grad_norm": 6.480257987976074,
"learning_rate": 6.8230769230769235e-06,
"loss": 0.1465,
"step": 6700
},
{
"epoch": 2.844153097906534,
"grad_norm": 6.450438976287842,
"learning_rate": 6.81025641025641e-06,
"loss": 0.1476,
"step": 6725
},
{
"epoch": 2.854726157750053,
"grad_norm": 5.721517562866211,
"learning_rate": 6.797435897435898e-06,
"loss": 0.1825,
"step": 6750
},
{
"epoch": 2.8652992175935714,
"grad_norm": 9.688868522644043,
"learning_rate": 6.784615384615385e-06,
"loss": 0.1669,
"step": 6775
},
{
"epoch": 2.8758722774370904,
"grad_norm": 8.336874961853027,
"learning_rate": 6.7717948717948725e-06,
"loss": 0.1434,
"step": 6800
},
{
"epoch": 2.886445337280609,
"grad_norm": 8.098302841186523,
"learning_rate": 6.758974358974359e-06,
"loss": 0.1506,
"step": 6825
},
{
"epoch": 2.8970183971241275,
"grad_norm": 5.927369117736816,
"learning_rate": 6.746153846153847e-06,
"loss": 0.1607,
"step": 6850
},
{
"epoch": 2.9075914569676464,
"grad_norm": 20.486467361450195,
"learning_rate": 6.733333333333334e-06,
"loss": 0.1616,
"step": 6875
},
{
"epoch": 2.9181645168111654,
"grad_norm": 9.060097694396973,
"learning_rate": 6.7205128205128215e-06,
"loss": 0.1671,
"step": 6900
},
{
"epoch": 2.928737576654684,
"grad_norm": 6.587460517883301,
"learning_rate": 6.707692307692308e-06,
"loss": 0.1645,
"step": 6925
},
{
"epoch": 2.9393106364982025,
"grad_norm": 14.763110160827637,
"learning_rate": 6.694871794871796e-06,
"loss": 0.1485,
"step": 6950
},
{
"epoch": 2.9498836963417214,
"grad_norm": 7.697717666625977,
"learning_rate": 6.682051282051283e-06,
"loss": 0.1664,
"step": 6975
},
{
"epoch": 2.96045675618524,
"grad_norm": 8.12340259552002,
"learning_rate": 6.6692307692307705e-06,
"loss": 0.1731,
"step": 7000
},
{
"epoch": 2.96045675618524,
"eval_loss": 0.2189464271068573,
"eval_runtime": 440.062,
"eval_samples_per_second": 8.274,
"eval_steps_per_second": 1.036,
"eval_wer": 0.19109935495846675,
"step": 7000
},
{
"epoch": 2.971029816028759,
"grad_norm": 6.9427809715271,
"learning_rate": 6.656410256410257e-06,
"loss": 0.1594,
"step": 7025
},
{
"epoch": 2.9816028758722775,
"grad_norm": 7.702503204345703,
"learning_rate": 6.643589743589744e-06,
"loss": 0.1508,
"step": 7050
},
{
"epoch": 2.992175935715796,
"grad_norm": 8.890095710754395,
"learning_rate": 6.630769230769232e-06,
"loss": 0.1757,
"step": 7075
},
{
"epoch": 3.002748995559315,
"grad_norm": 5.879498481750488,
"learning_rate": 6.617948717948719e-06,
"loss": 0.1206,
"step": 7100
},
{
"epoch": 3.0133220554028335,
"grad_norm": 4.067333221435547,
"learning_rate": 6.605128205128206e-06,
"loss": 0.1087,
"step": 7125
},
{
"epoch": 3.0238951152463525,
"grad_norm": 4.698537826538086,
"learning_rate": 6.592307692307692e-06,
"loss": 0.1071,
"step": 7150
},
{
"epoch": 3.034468175089871,
"grad_norm": 7.69912576675415,
"learning_rate": 6.57948717948718e-06,
"loss": 0.1102,
"step": 7175
},
{
"epoch": 3.0450412349333895,
"grad_norm": 6.612318992614746,
"learning_rate": 6.566666666666667e-06,
"loss": 0.1069,
"step": 7200
},
{
"epoch": 3.0556142947769085,
"grad_norm": 6.134123802185059,
"learning_rate": 6.553846153846154e-06,
"loss": 0.1154,
"step": 7225
},
{
"epoch": 3.066187354620427,
"grad_norm": 6.283836841583252,
"learning_rate": 6.541025641025641e-06,
"loss": 0.1334,
"step": 7250
},
{
"epoch": 3.076760414463946,
"grad_norm": 5.295098781585693,
"learning_rate": 6.528205128205128e-06,
"loss": 0.1064,
"step": 7275
},
{
"epoch": 3.0873334743074645,
"grad_norm": 4.781207084655762,
"learning_rate": 6.515384615384616e-06,
"loss": 0.0878,
"step": 7300
},
{
"epoch": 3.0979065341509835,
"grad_norm": 8.744128227233887,
"learning_rate": 6.5025641025641026e-06,
"loss": 0.117,
"step": 7325
},
{
"epoch": 3.108479593994502,
"grad_norm": 6.0972900390625,
"learning_rate": 6.48974358974359e-06,
"loss": 0.1024,
"step": 7350
},
{
"epoch": 3.1190526538380206,
"grad_norm": 5.786510944366455,
"learning_rate": 6.476923076923077e-06,
"loss": 0.1036,
"step": 7375
},
{
"epoch": 3.1296257136815395,
"grad_norm": 5.329776763916016,
"learning_rate": 6.464102564102565e-06,
"loss": 0.1122,
"step": 7400
},
{
"epoch": 3.140198773525058,
"grad_norm": 6.620870590209961,
"learning_rate": 6.4512820512820516e-06,
"loss": 0.094,
"step": 7425
},
{
"epoch": 3.150771833368577,
"grad_norm": 5.6966962814331055,
"learning_rate": 6.438461538461539e-06,
"loss": 0.1034,
"step": 7450
},
{
"epoch": 3.1613448932120956,
"grad_norm": 5.702958583831787,
"learning_rate": 6.425641025641026e-06,
"loss": 0.1002,
"step": 7475
},
{
"epoch": 3.1719179530556145,
"grad_norm": 6.589195251464844,
"learning_rate": 6.412820512820514e-06,
"loss": 0.0962,
"step": 7500
},
{
"epoch": 3.182491012899133,
"grad_norm": 6.6714558601379395,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.1006,
"step": 7525
},
{
"epoch": 3.1930640727426516,
"grad_norm": 6.5564165115356445,
"learning_rate": 6.387179487179488e-06,
"loss": 0.1045,
"step": 7550
},
{
"epoch": 3.2036371325861706,
"grad_norm": 5.653778076171875,
"learning_rate": 6.374358974358975e-06,
"loss": 0.1251,
"step": 7575
},
{
"epoch": 3.214210192429689,
"grad_norm": 5.369845867156982,
"learning_rate": 6.361538461538463e-06,
"loss": 0.1021,
"step": 7600
},
{
"epoch": 3.224783252273208,
"grad_norm": 10.076266288757324,
"learning_rate": 6.3487179487179495e-06,
"loss": 0.0952,
"step": 7625
},
{
"epoch": 3.2353563121167266,
"grad_norm": 7.338042259216309,
"learning_rate": 6.335897435897436e-06,
"loss": 0.0995,
"step": 7650
},
{
"epoch": 3.245929371960245,
"grad_norm": 7.094664573669434,
"learning_rate": 6.323076923076924e-06,
"loss": 0.1049,
"step": 7675
},
{
"epoch": 3.256502431803764,
"grad_norm": 3.372304916381836,
"learning_rate": 6.310256410256411e-06,
"loss": 0.1015,
"step": 7700
},
{
"epoch": 3.2670754916472826,
"grad_norm": 6.649098873138428,
"learning_rate": 6.2974358974358985e-06,
"loss": 0.0854,
"step": 7725
},
{
"epoch": 3.2776485514908016,
"grad_norm": 6.159810543060303,
"learning_rate": 6.284615384615385e-06,
"loss": 0.103,
"step": 7750
},
{
"epoch": 3.28822161133432,
"grad_norm": 7.026951313018799,
"learning_rate": 6.271794871794872e-06,
"loss": 0.0999,
"step": 7775
},
{
"epoch": 3.2987946711778386,
"grad_norm": 6.820842266082764,
"learning_rate": 6.258974358974359e-06,
"loss": 0.1172,
"step": 7800
},
{
"epoch": 3.3093677310213576,
"grad_norm": 4.771303176879883,
"learning_rate": 6.246153846153846e-06,
"loss": 0.1003,
"step": 7825
},
{
"epoch": 3.319940790864876,
"grad_norm": 8.24927043914795,
"learning_rate": 6.2333333333333335e-06,
"loss": 0.0958,
"step": 7850
},
{
"epoch": 3.330513850708395,
"grad_norm": 5.464736461639404,
"learning_rate": 6.22051282051282e-06,
"loss": 0.1236,
"step": 7875
},
{
"epoch": 3.3410869105519136,
"grad_norm": 9.046813011169434,
"learning_rate": 6.207692307692308e-06,
"loss": 0.1195,
"step": 7900
},
{
"epoch": 3.3516599703954326,
"grad_norm": 9.43157958984375,
"learning_rate": 6.194871794871795e-06,
"loss": 0.1182,
"step": 7925
},
{
"epoch": 3.362233030238951,
"grad_norm": 5.381494045257568,
"learning_rate": 6.1820512820512825e-06,
"loss": 0.1256,
"step": 7950
},
{
"epoch": 3.3728060900824697,
"grad_norm": 5.340794086456299,
"learning_rate": 6.169230769230769e-06,
"loss": 0.1161,
"step": 7975
},
{
"epoch": 3.3833791499259886,
"grad_norm": 5.442756652832031,
"learning_rate": 6.156410256410257e-06,
"loss": 0.1017,
"step": 8000
},
{
"epoch": 3.3833791499259886,
"eval_loss": 0.21353289484977722,
"eval_runtime": 440.3677,
"eval_samples_per_second": 8.268,
"eval_steps_per_second": 1.035,
"eval_wer": 0.1970393057682491,
"step": 8000
},
{
"epoch": 3.393952209769507,
"grad_norm": 7.739430904388428,
"learning_rate": 6.143589743589744e-06,
"loss": 0.0922,
"step": 8025
},
{
"epoch": 3.404525269613026,
"grad_norm": 6.592048168182373,
"learning_rate": 6.1307692307692315e-06,
"loss": 0.116,
"step": 8050
},
{
"epoch": 3.4150983294565447,
"grad_norm": 6.631498336791992,
"learning_rate": 6.117948717948718e-06,
"loss": 0.0926,
"step": 8075
},
{
"epoch": 3.4256713893000637,
"grad_norm": 4.4608049392700195,
"learning_rate": 6.105128205128206e-06,
"loss": 0.0946,
"step": 8100
},
{
"epoch": 3.436244449143582,
"grad_norm": 6.894134044647217,
"learning_rate": 6.092307692307693e-06,
"loss": 0.1056,
"step": 8125
},
{
"epoch": 3.4468175089871007,
"grad_norm": 7.685297966003418,
"learning_rate": 6.0794871794871805e-06,
"loss": 0.1031,
"step": 8150
},
{
"epoch": 3.4573905688306197,
"grad_norm": 5.096285820007324,
"learning_rate": 6.066666666666667e-06,
"loss": 0.1157,
"step": 8175
},
{
"epoch": 3.467963628674138,
"grad_norm": 4.975133419036865,
"learning_rate": 6.053846153846155e-06,
"loss": 0.1033,
"step": 8200
},
{
"epoch": 3.478536688517657,
"grad_norm": 4.345186233520508,
"learning_rate": 6.041025641025642e-06,
"loss": 0.0987,
"step": 8225
},
{
"epoch": 3.4891097483611757,
"grad_norm": 5.9824299812316895,
"learning_rate": 6.028205128205129e-06,
"loss": 0.0985,
"step": 8250
},
{
"epoch": 3.4996828082046942,
"grad_norm": 7.680878162384033,
"learning_rate": 6.015384615384616e-06,
"loss": 0.1005,
"step": 8275
},
{
"epoch": 3.510255868048213,
"grad_norm": 6.961033344268799,
"learning_rate": 6.002564102564103e-06,
"loss": 0.1046,
"step": 8300
},
{
"epoch": 3.5208289278917317,
"grad_norm": 6.000370025634766,
"learning_rate": 5.989743589743591e-06,
"loss": 0.1193,
"step": 8325
},
{
"epoch": 3.5314019877352507,
"grad_norm": 5.969180583953857,
"learning_rate": 5.976923076923078e-06,
"loss": 0.0992,
"step": 8350
},
{
"epoch": 3.5419750475787692,
"grad_norm": 7.239658355712891,
"learning_rate": 5.9641025641025644e-06,
"loss": 0.1076,
"step": 8375
},
{
"epoch": 3.5525481074222878,
"grad_norm": 6.9521708488464355,
"learning_rate": 5.951282051282051e-06,
"loss": 0.088,
"step": 8400
},
{
"epoch": 3.5631211672658067,
"grad_norm": 5.2356109619140625,
"learning_rate": 5.938461538461538e-06,
"loss": 0.111,
"step": 8425
},
{
"epoch": 3.5736942271093257,
"grad_norm": 7.44065523147583,
"learning_rate": 5.925641025641026e-06,
"loss": 0.1006,
"step": 8450
},
{
"epoch": 3.5842672869528442,
"grad_norm": 6.604626178741455,
"learning_rate": 5.912820512820513e-06,
"loss": 0.1187,
"step": 8475
},
{
"epoch": 3.5948403467963628,
"grad_norm": 5.469221591949463,
"learning_rate": 5.9e-06,
"loss": 0.1256,
"step": 8500
},
{
"epoch": 3.6054134066398817,
"grad_norm": 3.496335983276367,
"learning_rate": 5.887179487179487e-06,
"loss": 0.0875,
"step": 8525
},
{
"epoch": 3.6159864664834003,
"grad_norm": 4.853099346160889,
"learning_rate": 5.874358974358975e-06,
"loss": 0.1401,
"step": 8550
},
{
"epoch": 3.6265595263269192,
"grad_norm": 6.409478187561035,
"learning_rate": 5.861538461538462e-06,
"loss": 0.1222,
"step": 8575
},
{
"epoch": 3.6371325861704378,
"grad_norm": 4.797995567321777,
"learning_rate": 5.848717948717949e-06,
"loss": 0.113,
"step": 8600
},
{
"epoch": 3.6477056460139563,
"grad_norm": 6.740935802459717,
"learning_rate": 5.835897435897436e-06,
"loss": 0.1081,
"step": 8625
},
{
"epoch": 3.6582787058574753,
"grad_norm": 7.878625392913818,
"learning_rate": 5.823076923076924e-06,
"loss": 0.0906,
"step": 8650
},
{
"epoch": 3.668851765700994,
"grad_norm": 8.157573699951172,
"learning_rate": 5.8102564102564106e-06,
"loss": 0.1016,
"step": 8675
},
{
"epoch": 3.679424825544513,
"grad_norm": 5.366656303405762,
"learning_rate": 5.797435897435898e-06,
"loss": 0.1038,
"step": 8700
},
{
"epoch": 3.6899978853880313,
"grad_norm": 7.368963241577148,
"learning_rate": 5.784615384615385e-06,
"loss": 0.1131,
"step": 8725
},
{
"epoch": 3.70057094523155,
"grad_norm": 3.6952099800109863,
"learning_rate": 5.771794871794873e-06,
"loss": 0.0917,
"step": 8750
},
{
"epoch": 3.711144005075069,
"grad_norm": 6.393226623535156,
"learning_rate": 5.7589743589743596e-06,
"loss": 0.117,
"step": 8775
},
{
"epoch": 3.7217170649185873,
"grad_norm": 6.510304927825928,
"learning_rate": 5.746153846153847e-06,
"loss": 0.1187,
"step": 8800
},
{
"epoch": 3.7322901247621063,
"grad_norm": 7.048624038696289,
"learning_rate": 5.733333333333334e-06,
"loss": 0.0916,
"step": 8825
},
{
"epoch": 3.742863184605625,
"grad_norm": 4.117500305175781,
"learning_rate": 5.720512820512821e-06,
"loss": 0.1035,
"step": 8850
},
{
"epoch": 3.7534362444491434,
"grad_norm": 6.197905540466309,
"learning_rate": 5.7076923076923086e-06,
"loss": 0.1128,
"step": 8875
},
{
"epoch": 3.7640093042926623,
"grad_norm": 7.742534637451172,
"learning_rate": 5.694871794871795e-06,
"loss": 0.1026,
"step": 8900
},
{
"epoch": 3.774582364136181,
"grad_norm": 6.087040424346924,
"learning_rate": 5.682051282051283e-06,
"loss": 0.1175,
"step": 8925
},
{
"epoch": 3.7851554239797,
"grad_norm": 8.049386024475098,
"learning_rate": 5.66923076923077e-06,
"loss": 0.1121,
"step": 8950
},
{
"epoch": 3.7957284838232184,
"grad_norm": 3.5438435077667236,
"learning_rate": 5.6564102564102575e-06,
"loss": 0.1293,
"step": 8975
},
{
"epoch": 3.806301543666737,
"grad_norm": 8.00123119354248,
"learning_rate": 5.6435897435897435e-06,
"loss": 0.0985,
"step": 9000
},
{
"epoch": 3.806301543666737,
"eval_loss": 0.2076605260372162,
"eval_runtime": 438.019,
"eval_samples_per_second": 8.312,
"eval_steps_per_second": 1.041,
"eval_wer": 0.18186458768388325,
"step": 9000
},
{
"epoch": 3.816874603510256,
"grad_norm": 4.4232869148254395,
"learning_rate": 5.63076923076923e-06,
"loss": 0.1045,
"step": 9025
},
{
"epoch": 3.827447663353775,
"grad_norm": 6.264804363250732,
"learning_rate": 5.617948717948718e-06,
"loss": 0.0949,
"step": 9050
},
{
"epoch": 3.8380207231972934,
"grad_norm": 7.897494792938232,
"learning_rate": 5.605128205128205e-06,
"loss": 0.1008,
"step": 9075
},
{
"epoch": 3.848593783040812,
"grad_norm": 5.078146457672119,
"learning_rate": 5.5923076923076925e-06,
"loss": 0.0932,
"step": 9100
},
{
"epoch": 3.859166842884331,
"grad_norm": 5.596771240234375,
"learning_rate": 5.579487179487179e-06,
"loss": 0.0994,
"step": 9125
},
{
"epoch": 3.8697399027278494,
"grad_norm": 5.29062032699585,
"learning_rate": 5.566666666666667e-06,
"loss": 0.1101,
"step": 9150
},
{
"epoch": 3.8803129625713684,
"grad_norm": 6.083955764770508,
"learning_rate": 5.553846153846154e-06,
"loss": 0.1173,
"step": 9175
},
{
"epoch": 3.890886022414887,
"grad_norm": 5.622173309326172,
"learning_rate": 5.5410256410256415e-06,
"loss": 0.099,
"step": 9200
},
{
"epoch": 3.9014590822584054,
"grad_norm": 6.262368202209473,
"learning_rate": 5.528205128205128e-06,
"loss": 0.1033,
"step": 9225
},
{
"epoch": 3.9120321421019244,
"grad_norm": 9.63398265838623,
"learning_rate": 5.515384615384616e-06,
"loss": 0.1171,
"step": 9250
},
{
"epoch": 3.922605201945443,
"grad_norm": 8.468087196350098,
"learning_rate": 5.502564102564103e-06,
"loss": 0.1062,
"step": 9275
},
{
"epoch": 3.933178261788962,
"grad_norm": 5.411839485168457,
"learning_rate": 5.4897435897435905e-06,
"loss": 0.0946,
"step": 9300
},
{
"epoch": 3.9437513216324804,
"grad_norm": 4.889887809753418,
"learning_rate": 5.476923076923077e-06,
"loss": 0.1097,
"step": 9325
},
{
"epoch": 3.954324381475999,
"grad_norm": 6.154474258422852,
"learning_rate": 5.464102564102565e-06,
"loss": 0.0974,
"step": 9350
},
{
"epoch": 3.964897441319518,
"grad_norm": 7.760702133178711,
"learning_rate": 5.451282051282052e-06,
"loss": 0.1074,
"step": 9375
},
{
"epoch": 3.9754705011630365,
"grad_norm": 4.108585834503174,
"learning_rate": 5.4384615384615395e-06,
"loss": 0.0997,
"step": 9400
},
{
"epoch": 3.9860435610065554,
"grad_norm": 6.994529724121094,
"learning_rate": 5.425641025641026e-06,
"loss": 0.1078,
"step": 9425
},
{
"epoch": 3.996616620850074,
"grad_norm": 2.9072048664093018,
"learning_rate": 5.412820512820514e-06,
"loss": 0.1128,
"step": 9450
},
{
"epoch": 4.0071896806935925,
"grad_norm": 5.968387603759766,
"learning_rate": 5.400000000000001e-06,
"loss": 0.0853,
"step": 9475
},
{
"epoch": 4.017762740537111,
"grad_norm": 2.4111251831054688,
"learning_rate": 5.387179487179488e-06,
"loss": 0.0553,
"step": 9500
},
{
"epoch": 4.02833580038063,
"grad_norm": 3.6558897495269775,
"learning_rate": 5.374358974358975e-06,
"loss": 0.0832,
"step": 9525
},
{
"epoch": 4.038908860224149,
"grad_norm": 5.5717973709106445,
"learning_rate": 5.361538461538462e-06,
"loss": 0.0784,
"step": 9550
},
{
"epoch": 4.0494819200676675,
"grad_norm": 5.605922698974609,
"learning_rate": 5.34871794871795e-06,
"loss": 0.0743,
"step": 9575
},
{
"epoch": 4.060054979911186,
"grad_norm": 5.932694911956787,
"learning_rate": 5.335897435897436e-06,
"loss": 0.0634,
"step": 9600
},
{
"epoch": 4.070628039754705,
"grad_norm": 4.883663177490234,
"learning_rate": 5.323076923076923e-06,
"loss": 0.0627,
"step": 9625
},
{
"epoch": 4.081201099598224,
"grad_norm": 4.509759902954102,
"learning_rate": 5.31025641025641e-06,
"loss": 0.0716,
"step": 9650
},
{
"epoch": 4.0917741594417425,
"grad_norm": 8.665048599243164,
"learning_rate": 5.297435897435897e-06,
"loss": 0.0678,
"step": 9675
},
{
"epoch": 4.102347219285261,
"grad_norm": 4.062932968139648,
"learning_rate": 5.284615384615385e-06,
"loss": 0.0573,
"step": 9700
},
{
"epoch": 4.1129202791287796,
"grad_norm": 5.353725433349609,
"learning_rate": 5.271794871794872e-06,
"loss": 0.0823,
"step": 9725
},
{
"epoch": 4.123493338972299,
"grad_norm": 3.5691452026367188,
"learning_rate": 5.258974358974359e-06,
"loss": 0.0722,
"step": 9750
},
{
"epoch": 4.1340663988158175,
"grad_norm": 4.866578578948975,
"learning_rate": 5.246153846153846e-06,
"loss": 0.0763,
"step": 9775
},
{
"epoch": 4.144639458659336,
"grad_norm": 3.342728614807129,
"learning_rate": 5.233333333333334e-06,
"loss": 0.0621,
"step": 9800
},
{
"epoch": 4.155212518502855,
"grad_norm": 3.556044816970825,
"learning_rate": 5.220512820512821e-06,
"loss": 0.0725,
"step": 9825
},
{
"epoch": 4.165785578346373,
"grad_norm": 5.201412677764893,
"learning_rate": 5.207692307692308e-06,
"loss": 0.0671,
"step": 9850
},
{
"epoch": 4.1763586381898925,
"grad_norm": 5.240591526031494,
"learning_rate": 5.194871794871795e-06,
"loss": 0.0728,
"step": 9875
},
{
"epoch": 4.186931698033411,
"grad_norm": 3.6806883811950684,
"learning_rate": 5.182051282051283e-06,
"loss": 0.0653,
"step": 9900
},
{
"epoch": 4.19750475787693,
"grad_norm": 3.5019476413726807,
"learning_rate": 5.16923076923077e-06,
"loss": 0.0566,
"step": 9925
},
{
"epoch": 4.208077817720448,
"grad_norm": 4.452245712280273,
"learning_rate": 5.156410256410257e-06,
"loss": 0.0582,
"step": 9950
},
{
"epoch": 4.218650877563967,
"grad_norm": 5.536275863647461,
"learning_rate": 5.143589743589744e-06,
"loss": 0.0737,
"step": 9975
},
{
"epoch": 4.229223937407486,
"grad_norm": 5.515459060668945,
"learning_rate": 5.130769230769232e-06,
"loss": 0.0828,
"step": 10000
},
{
"epoch": 4.229223937407486,
"eval_loss": 0.20704784989356995,
"eval_runtime": 438.0885,
"eval_samples_per_second": 8.311,
"eval_steps_per_second": 1.041,
"eval_wer": 0.17917304747320062,
"step": 10000
},
{
"epoch": 4.239796997251005,
"grad_norm": 3.628927230834961,
"learning_rate": 5.1179487179487186e-06,
"loss": 0.0657,
"step": 10025
},
{
"epoch": 4.250370057094523,
"grad_norm": 4.095386028289795,
"learning_rate": 5.105128205128206e-06,
"loss": 0.0644,
"step": 10050
},
{
"epoch": 4.260943116938042,
"grad_norm": 4.334606170654297,
"learning_rate": 5.092307692307693e-06,
"loss": 0.0707,
"step": 10075
},
{
"epoch": 4.27151617678156,
"grad_norm": 4.358844757080078,
"learning_rate": 5.07948717948718e-06,
"loss": 0.0712,
"step": 10100
},
{
"epoch": 4.28208923662508,
"grad_norm": 3.893803358078003,
"learning_rate": 5.0666666666666676e-06,
"loss": 0.0742,
"step": 10125
},
{
"epoch": 4.292662296468598,
"grad_norm": 4.782370090484619,
"learning_rate": 5.053846153846154e-06,
"loss": 0.0654,
"step": 10150
},
{
"epoch": 4.303235356312117,
"grad_norm": 4.6114821434021,
"learning_rate": 5.041025641025642e-06,
"loss": 0.0678,
"step": 10175
},
{
"epoch": 4.313808416155635,
"grad_norm": 2.206007480621338,
"learning_rate": 5.0287179487179495e-06,
"loss": 0.0778,
"step": 10200
},
{
"epoch": 4.324381475999155,
"grad_norm": 3.889173984527588,
"learning_rate": 5.015897435897436e-06,
"loss": 0.0754,
"step": 10225
},
{
"epoch": 4.334954535842673,
"grad_norm": 3.887746810913086,
"learning_rate": 5.003076923076924e-06,
"loss": 0.0777,
"step": 10250
},
{
"epoch": 4.345527595686192,
"grad_norm": 4.674836158752441,
"learning_rate": 4.990256410256411e-06,
"loss": 0.0746,
"step": 10275
},
{
"epoch": 4.35610065552971,
"grad_norm": 4.411125183105469,
"learning_rate": 4.977435897435898e-06,
"loss": 0.0703,
"step": 10300
},
{
"epoch": 4.366673715373229,
"grad_norm": 9.945749282836914,
"learning_rate": 4.964615384615385e-06,
"loss": 0.0666,
"step": 10325
},
{
"epoch": 4.377246775216748,
"grad_norm": 5.479196071624756,
"learning_rate": 4.951794871794872e-06,
"loss": 0.0672,
"step": 10350
},
{
"epoch": 4.387819835060267,
"grad_norm": 5.061864376068115,
"learning_rate": 4.93897435897436e-06,
"loss": 0.0649,
"step": 10375
},
{
"epoch": 4.398392894903785,
"grad_norm": 5.103855133056641,
"learning_rate": 4.926153846153847e-06,
"loss": 0.0761,
"step": 10400
},
{
"epoch": 4.408965954747304,
"grad_norm": 3.8901400566101074,
"learning_rate": 4.9133333333333334e-06,
"loss": 0.0696,
"step": 10425
},
{
"epoch": 4.419539014590822,
"grad_norm": 8.482890129089355,
"learning_rate": 4.900512820512821e-06,
"loss": 0.0725,
"step": 10450
},
{
"epoch": 4.430112074434342,
"grad_norm": 3.7482855319976807,
"learning_rate": 4.887692307692308e-06,
"loss": 0.0586,
"step": 10475
},
{
"epoch": 4.44068513427786,
"grad_norm": 19.449848175048828,
"learning_rate": 4.874871794871796e-06,
"loss": 0.067,
"step": 10500
},
{
"epoch": 4.451258194121379,
"grad_norm": 3.536351203918457,
"learning_rate": 4.8620512820512824e-06,
"loss": 0.0659,
"step": 10525
},
{
"epoch": 4.461831253964897,
"grad_norm": 3.9151482582092285,
"learning_rate": 4.849230769230769e-06,
"loss": 0.0623,
"step": 10550
},
{
"epoch": 4.472404313808416,
"grad_norm": 3.5885860919952393,
"learning_rate": 4.836410256410257e-06,
"loss": 0.0683,
"step": 10575
},
{
"epoch": 4.482977373651935,
"grad_norm": 4.230422019958496,
"learning_rate": 4.823589743589744e-06,
"loss": 0.0776,
"step": 10600
},
{
"epoch": 4.493550433495454,
"grad_norm": 3.4841506481170654,
"learning_rate": 4.8107692307692314e-06,
"loss": 0.0719,
"step": 10625
},
{
"epoch": 4.504123493338972,
"grad_norm": 5.864824295043945,
"learning_rate": 4.797948717948718e-06,
"loss": 0.068,
"step": 10650
},
{
"epoch": 4.514696553182491,
"grad_norm": 5.13723087310791,
"learning_rate": 4.785128205128206e-06,
"loss": 0.0796,
"step": 10675
},
{
"epoch": 4.52526961302601,
"grad_norm": 4.680889129638672,
"learning_rate": 4.772307692307693e-06,
"loss": 0.0835,
"step": 10700
},
{
"epoch": 4.535842672869529,
"grad_norm": 4.532434463500977,
"learning_rate": 4.7594871794871796e-06,
"loss": 0.0737,
"step": 10725
},
{
"epoch": 4.546415732713047,
"grad_norm": 6.627336502075195,
"learning_rate": 4.746666666666667e-06,
"loss": 0.0905,
"step": 10750
},
{
"epoch": 4.556988792556566,
"grad_norm": 3.9481518268585205,
"learning_rate": 4.733846153846154e-06,
"loss": 0.0666,
"step": 10775
},
{
"epoch": 4.567561852400084,
"grad_norm": 4.709788799285889,
"learning_rate": 4.721025641025642e-06,
"loss": 0.074,
"step": 10800
},
{
"epoch": 4.578134912243604,
"grad_norm": 4.298018932342529,
"learning_rate": 4.7082051282051286e-06,
"loss": 0.0721,
"step": 10825
},
{
"epoch": 4.588707972087122,
"grad_norm": 2.7479617595672607,
"learning_rate": 4.695384615384615e-06,
"loss": 0.0653,
"step": 10850
},
{
"epoch": 4.599281031930641,
"grad_norm": 3.157724380493164,
"learning_rate": 4.682564102564103e-06,
"loss": 0.0923,
"step": 10875
},
{
"epoch": 4.609854091774159,
"grad_norm": 5.712294101715088,
"learning_rate": 4.66974358974359e-06,
"loss": 0.0741,
"step": 10900
},
{
"epoch": 4.620427151617678,
"grad_norm": 4.904754638671875,
"learning_rate": 4.6569230769230776e-06,
"loss": 0.0649,
"step": 10925
},
{
"epoch": 4.631000211461197,
"grad_norm": 13.393159866333008,
"learning_rate": 4.644102564102564e-06,
"loss": 0.0709,
"step": 10950
},
{
"epoch": 4.641573271304716,
"grad_norm": 3.0155038833618164,
"learning_rate": 4.631282051282052e-06,
"loss": 0.0744,
"step": 10975
},
{
"epoch": 4.652146331148234,
"grad_norm": 3.272163152694702,
"learning_rate": 4.618461538461539e-06,
"loss": 0.06,
"step": 11000
},
{
"epoch": 4.652146331148234,
"eval_loss": 0.19907286763191223,
"eval_runtime": 440.5684,
"eval_samples_per_second": 8.264,
"eval_steps_per_second": 1.035,
"eval_wer": 0.18256067566940462,
"step": 11000
},
{
"epoch": 4.662719390991753,
"grad_norm": 4.0782060623168945,
"learning_rate": 4.605641025641026e-06,
"loss": 0.0631,
"step": 11025
},
{
"epoch": 4.673292450835271,
"grad_norm": 8.949256896972656,
"learning_rate": 4.592820512820513e-06,
"loss": 0.0737,
"step": 11050
},
{
"epoch": 4.683865510678791,
"grad_norm": 6.3690505027771,
"learning_rate": 4.58e-06,
"loss": 0.0755,
"step": 11075
},
{
"epoch": 4.694438570522309,
"grad_norm": 5.039371490478516,
"learning_rate": 4.567179487179488e-06,
"loss": 0.079,
"step": 11100
},
{
"epoch": 4.705011630365828,
"grad_norm": 3.4718008041381836,
"learning_rate": 4.554358974358975e-06,
"loss": 0.0552,
"step": 11125
},
{
"epoch": 4.715584690209346,
"grad_norm": 7.306105613708496,
"learning_rate": 4.5415384615384615e-06,
"loss": 0.0607,
"step": 11150
},
{
"epoch": 4.726157750052865,
"grad_norm": 4.172931671142578,
"learning_rate": 4.528717948717949e-06,
"loss": 0.0828,
"step": 11175
},
{
"epoch": 4.736730809896384,
"grad_norm": 4.136832237243652,
"learning_rate": 4.515897435897436e-06,
"loss": 0.0993,
"step": 11200
},
{
"epoch": 4.747303869739903,
"grad_norm": 2.4323673248291016,
"learning_rate": 4.503076923076924e-06,
"loss": 0.0603,
"step": 11225
},
{
"epoch": 4.757876929583421,
"grad_norm": 6.04355525970459,
"learning_rate": 4.4902564102564105e-06,
"loss": 0.06,
"step": 11250
},
{
"epoch": 4.76844998942694,
"grad_norm": 5.033048629760742,
"learning_rate": 4.477435897435898e-06,
"loss": 0.0849,
"step": 11275
},
{
"epoch": 4.779023049270458,
"grad_norm": 6.350639820098877,
"learning_rate": 4.464615384615385e-06,
"loss": 0.0576,
"step": 11300
},
{
"epoch": 4.789596109113978,
"grad_norm": 4.880070209503174,
"learning_rate": 4.451794871794872e-06,
"loss": 0.0712,
"step": 11325
},
{
"epoch": 4.800169168957496,
"grad_norm": 5.648702621459961,
"learning_rate": 4.4389743589743595e-06,
"loss": 0.0564,
"step": 11350
},
{
"epoch": 4.810742228801015,
"grad_norm": 8.151969909667969,
"learning_rate": 4.426153846153846e-06,
"loss": 0.0616,
"step": 11375
},
{
"epoch": 4.821315288644533,
"grad_norm": 4.684852123260498,
"learning_rate": 4.413333333333334e-06,
"loss": 0.0724,
"step": 11400
},
{
"epoch": 4.831888348488053,
"grad_norm": 3.9812090396881104,
"learning_rate": 4.400512820512821e-06,
"loss": 0.0774,
"step": 11425
},
{
"epoch": 4.842461408331571,
"grad_norm": 5.788280963897705,
"learning_rate": 4.387692307692308e-06,
"loss": 0.0776,
"step": 11450
},
{
"epoch": 4.85303446817509,
"grad_norm": 4.4890522956848145,
"learning_rate": 4.374871794871795e-06,
"loss": 0.0636,
"step": 11475
},
{
"epoch": 4.863607528018608,
"grad_norm": 3.6684515476226807,
"learning_rate": 4.362051282051282e-06,
"loss": 0.0707,
"step": 11500
},
{
"epoch": 4.874180587862127,
"grad_norm": 4.590714454650879,
"learning_rate": 4.34923076923077e-06,
"loss": 0.0761,
"step": 11525
},
{
"epoch": 4.884753647705646,
"grad_norm": 5.7994256019592285,
"learning_rate": 4.336410256410257e-06,
"loss": 0.063,
"step": 11550
},
{
"epoch": 4.895326707549165,
"grad_norm": 4.685302734375,
"learning_rate": 4.323589743589744e-06,
"loss": 0.0595,
"step": 11575
},
{
"epoch": 4.905899767392683,
"grad_norm": 6.557898044586182,
"learning_rate": 4.310769230769231e-06,
"loss": 0.0727,
"step": 11600
},
{
"epoch": 4.916472827236202,
"grad_norm": 3.237697124481201,
"learning_rate": 4.297948717948718e-06,
"loss": 0.0758,
"step": 11625
},
{
"epoch": 4.927045887079721,
"grad_norm": 6.924361705780029,
"learning_rate": 4.285128205128206e-06,
"loss": 0.0556,
"step": 11650
},
{
"epoch": 4.93761894692324,
"grad_norm": 6.172597408294678,
"learning_rate": 4.2723076923076925e-06,
"loss": 0.074,
"step": 11675
},
{
"epoch": 4.948192006766758,
"grad_norm": 4.857706546783447,
"learning_rate": 4.25948717948718e-06,
"loss": 0.0656,
"step": 11700
},
{
"epoch": 4.958765066610277,
"grad_norm": 11.788376808166504,
"learning_rate": 4.246666666666667e-06,
"loss": 0.0724,
"step": 11725
},
{
"epoch": 4.9693381264537955,
"grad_norm": 2.799781322479248,
"learning_rate": 4.233846153846154e-06,
"loss": 0.067,
"step": 11750
},
{
"epoch": 4.979911186297315,
"grad_norm": 3.4480295181274414,
"learning_rate": 4.2210256410256414e-06,
"loss": 0.0667,
"step": 11775
},
{
"epoch": 4.990484246140833,
"grad_norm": 5.533037185668945,
"learning_rate": 4.208205128205128e-06,
"loss": 0.0598,
"step": 11800
},
{
"epoch": 5.001057305984352,
"grad_norm": 3.5116047859191895,
"learning_rate": 4.195384615384616e-06,
"loss": 0.0669,
"step": 11825
},
{
"epoch": 5.0116303658278705,
"grad_norm": 2.5963640213012695,
"learning_rate": 4.182564102564103e-06,
"loss": 0.043,
"step": 11850
},
{
"epoch": 5.022203425671389,
"grad_norm": 5.519112586975098,
"learning_rate": 4.1697435897435904e-06,
"loss": 0.044,
"step": 11875
},
{
"epoch": 5.032776485514908,
"grad_norm": 3.7173142433166504,
"learning_rate": 4.156923076923077e-06,
"loss": 0.0473,
"step": 11900
},
{
"epoch": 5.043349545358427,
"grad_norm": 2.8905227184295654,
"learning_rate": 4.144102564102564e-06,
"loss": 0.0389,
"step": 11925
},
{
"epoch": 5.0539226052019455,
"grad_norm": 2.328718423843384,
"learning_rate": 4.131282051282052e-06,
"loss": 0.0502,
"step": 11950
},
{
"epoch": 5.064495665045464,
"grad_norm": 4.866549015045166,
"learning_rate": 4.118461538461539e-06,
"loss": 0.043,
"step": 11975
},
{
"epoch": 5.0750687248889825,
"grad_norm": 1.9600281715393066,
"learning_rate": 4.105641025641026e-06,
"loss": 0.0629,
"step": 12000
},
{
"epoch": 5.0750687248889825,
"eval_loss": 0.20117300748825073,
"eval_runtime": 442.5655,
"eval_samples_per_second": 8.227,
"eval_steps_per_second": 1.03,
"eval_wer": 0.19184184880968955,
"step": 12000
},
{
"epoch": 5.085641784732502,
"grad_norm": 4.797824382781982,
"learning_rate": 4.092820512820513e-06,
"loss": 0.0554,
"step": 12025
},
{
"epoch": 5.0962148445760205,
"grad_norm": 2.1049580574035645,
"learning_rate": 4.08e-06,
"loss": 0.0553,
"step": 12050
},
{
"epoch": 5.106787904419539,
"grad_norm": 2.2943015098571777,
"learning_rate": 4.0671794871794876e-06,
"loss": 0.0413,
"step": 12075
},
{
"epoch": 5.1173609642630575,
"grad_norm": 3.040888547897339,
"learning_rate": 4.054358974358974e-06,
"loss": 0.0464,
"step": 12100
},
{
"epoch": 5.127934024106576,
"grad_norm": 2.409097909927368,
"learning_rate": 4.041538461538462e-06,
"loss": 0.0419,
"step": 12125
},
{
"epoch": 5.1385070839500955,
"grad_norm": 4.554444789886475,
"learning_rate": 4.028717948717949e-06,
"loss": 0.0415,
"step": 12150
},
{
"epoch": 5.149080143793614,
"grad_norm": 4.427338600158691,
"learning_rate": 4.0158974358974366e-06,
"loss": 0.0477,
"step": 12175
},
{
"epoch": 5.1596532036371325,
"grad_norm": 4.4627580642700195,
"learning_rate": 4.003076923076923e-06,
"loss": 0.0485,
"step": 12200
},
{
"epoch": 5.170226263480651,
"grad_norm": 5.8571882247924805,
"learning_rate": 3.990256410256411e-06,
"loss": 0.0443,
"step": 12225
},
{
"epoch": 5.18079932332417,
"grad_norm": 2.8459227085113525,
"learning_rate": 3.977435897435898e-06,
"loss": 0.0446,
"step": 12250
},
{
"epoch": 5.191372383167689,
"grad_norm": 4.811408996582031,
"learning_rate": 3.964615384615385e-06,
"loss": 0.045,
"step": 12275
},
{
"epoch": 5.2019454430112075,
"grad_norm": 3.7404351234436035,
"learning_rate": 3.951794871794872e-06,
"loss": 0.0358,
"step": 12300
},
{
"epoch": 5.212518502854726,
"grad_norm": 4.323686122894287,
"learning_rate": 3.938974358974359e-06,
"loss": 0.0462,
"step": 12325
},
{
"epoch": 5.223091562698245,
"grad_norm": 4.527756690979004,
"learning_rate": 3.926153846153846e-06,
"loss": 0.0429,
"step": 12350
},
{
"epoch": 5.233664622541764,
"grad_norm": 2.838021755218506,
"learning_rate": 3.913333333333334e-06,
"loss": 0.0367,
"step": 12375
},
{
"epoch": 5.2442376823852825,
"grad_norm": 4.497096061706543,
"learning_rate": 3.9005128205128205e-06,
"loss": 0.0435,
"step": 12400
},
{
"epoch": 5.254810742228801,
"grad_norm": 4.4126410484313965,
"learning_rate": 3.887692307692308e-06,
"loss": 0.0465,
"step": 12425
},
{
"epoch": 5.26538380207232,
"grad_norm": 4.237513065338135,
"learning_rate": 3.874871794871795e-06,
"loss": 0.0466,
"step": 12450
},
{
"epoch": 5.275956861915838,
"grad_norm": 3.8605165481567383,
"learning_rate": 3.862051282051283e-06,
"loss": 0.0631,
"step": 12475
},
{
"epoch": 5.2865299217593575,
"grad_norm": 5.698480606079102,
"learning_rate": 3.8492307692307695e-06,
"loss": 0.0435,
"step": 12500
},
{
"epoch": 5.297102981602876,
"grad_norm": 3.7574024200439453,
"learning_rate": 3.836410256410257e-06,
"loss": 0.049,
"step": 12525
},
{
"epoch": 5.307676041446395,
"grad_norm": 2.125375509262085,
"learning_rate": 3.824102564102565e-06,
"loss": 0.05,
"step": 12550
},
{
"epoch": 5.318249101289913,
"grad_norm": 4.459465026855469,
"learning_rate": 3.8112820512820514e-06,
"loss": 0.0453,
"step": 12575
},
{
"epoch": 5.328822161133432,
"grad_norm": 3.0165951251983643,
"learning_rate": 3.7984615384615387e-06,
"loss": 0.0436,
"step": 12600
},
{
"epoch": 5.339395220976951,
"grad_norm": 8.148979187011719,
"learning_rate": 3.785641025641026e-06,
"loss": 0.0596,
"step": 12625
},
{
"epoch": 5.34996828082047,
"grad_norm": 4.275932788848877,
"learning_rate": 3.772820512820513e-06,
"loss": 0.0554,
"step": 12650
},
{
"epoch": 5.360541340663988,
"grad_norm": 4.193767070770264,
"learning_rate": 3.7600000000000004e-06,
"loss": 0.0542,
"step": 12675
},
{
"epoch": 5.371114400507507,
"grad_norm": 3.2880380153656006,
"learning_rate": 3.7471794871794877e-06,
"loss": 0.0418,
"step": 12700
},
{
"epoch": 5.381687460351025,
"grad_norm": 4.61824369430542,
"learning_rate": 3.734358974358975e-06,
"loss": 0.0476,
"step": 12725
},
{
"epoch": 5.392260520194545,
"grad_norm": 4.10459566116333,
"learning_rate": 3.721538461538462e-06,
"loss": 0.0551,
"step": 12750
},
{
"epoch": 5.402833580038063,
"grad_norm": 3.0435068607330322,
"learning_rate": 3.7087179487179494e-06,
"loss": 0.0501,
"step": 12775
},
{
"epoch": 5.413406639881582,
"grad_norm": 5.007273197174072,
"learning_rate": 3.6958974358974362e-06,
"loss": 0.051,
"step": 12800
},
{
"epoch": 5.4239796997251,
"grad_norm": 4.816375255584717,
"learning_rate": 3.683076923076923e-06,
"loss": 0.0444,
"step": 12825
},
{
"epoch": 5.43455275956862,
"grad_norm": 3.2627782821655273,
"learning_rate": 3.6702564102564103e-06,
"loss": 0.048,
"step": 12850
},
{
"epoch": 5.445125819412138,
"grad_norm": 4.271965026855469,
"learning_rate": 3.6574358974358976e-06,
"loss": 0.0476,
"step": 12875
},
{
"epoch": 5.455698879255657,
"grad_norm": 3.3179333209991455,
"learning_rate": 3.644615384615385e-06,
"loss": 0.05,
"step": 12900
},
{
"epoch": 5.466271939099175,
"grad_norm": 1.9072633981704712,
"learning_rate": 3.631794871794872e-06,
"loss": 0.0441,
"step": 12925
},
{
"epoch": 5.476844998942694,
"grad_norm": 3.9856927394866943,
"learning_rate": 3.6189743589743593e-06,
"loss": 0.0382,
"step": 12950
},
{
"epoch": 5.487418058786213,
"grad_norm": 4.388250827789307,
"learning_rate": 3.6061538461538466e-06,
"loss": 0.0487,
"step": 12975
},
{
"epoch": 5.497991118629732,
"grad_norm": 7.110579967498779,
"learning_rate": 3.593333333333334e-06,
"loss": 0.0545,
"step": 13000
},
{
"epoch": 5.497991118629732,
"eval_loss": 0.20169800519943237,
"eval_runtime": 439.2743,
"eval_samples_per_second": 8.289,
"eval_steps_per_second": 1.038,
"eval_wer": 0.18641236252262286,
"step": 13000
},
{
"epoch": 5.50856417847325,
"grad_norm": 3.1531167030334473,
"learning_rate": 3.580512820512821e-06,
"loss": 0.051,
"step": 13025
},
{
"epoch": 5.519137238316769,
"grad_norm": 4.554049491882324,
"learning_rate": 3.5676923076923083e-06,
"loss": 0.0493,
"step": 13050
},
{
"epoch": 5.529710298160287,
"grad_norm": 1.308733582496643,
"learning_rate": 3.5548717948717955e-06,
"loss": 0.0522,
"step": 13075
},
{
"epoch": 5.540283358003807,
"grad_norm": 3.7449610233306885,
"learning_rate": 3.5420512820512824e-06,
"loss": 0.05,
"step": 13100
},
{
"epoch": 5.550856417847325,
"grad_norm": 6.0974273681640625,
"learning_rate": 3.5292307692307696e-06,
"loss": 0.0453,
"step": 13125
},
{
"epoch": 5.561429477690844,
"grad_norm": 13.427715301513672,
"learning_rate": 3.5164102564102564e-06,
"loss": 0.0478,
"step": 13150
},
{
"epoch": 5.572002537534362,
"grad_norm": 6.203893661499023,
"learning_rate": 3.5035897435897437e-06,
"loss": 0.0429,
"step": 13175
},
{
"epoch": 5.582575597377881,
"grad_norm": 6.499892234802246,
"learning_rate": 3.490769230769231e-06,
"loss": 0.0545,
"step": 13200
},
{
"epoch": 5.5931486572214,
"grad_norm": 4.413330078125,
"learning_rate": 3.477948717948718e-06,
"loss": 0.0496,
"step": 13225
},
{
"epoch": 5.603721717064919,
"grad_norm": 1.8597774505615234,
"learning_rate": 3.4651282051282054e-06,
"loss": 0.0509,
"step": 13250
},
{
"epoch": 5.614294776908437,
"grad_norm": 4.8508405685424805,
"learning_rate": 3.4523076923076927e-06,
"loss": 0.0524,
"step": 13275
},
{
"epoch": 5.624867836751956,
"grad_norm": 5.132781505584717,
"learning_rate": 3.43948717948718e-06,
"loss": 0.0539,
"step": 13300
},
{
"epoch": 5.635440896595474,
"grad_norm": 4.2406110763549805,
"learning_rate": 3.426666666666667e-06,
"loss": 0.0387,
"step": 13325
},
{
"epoch": 5.646013956438994,
"grad_norm": 5.790931701660156,
"learning_rate": 3.4138461538461544e-06,
"loss": 0.0476,
"step": 13350
},
{
"epoch": 5.656587016282512,
"grad_norm": 5.764678955078125,
"learning_rate": 3.4010256410256417e-06,
"loss": 0.0484,
"step": 13375
},
{
"epoch": 5.667160076126031,
"grad_norm": 4.839846611022949,
"learning_rate": 3.3882051282051285e-06,
"loss": 0.0494,
"step": 13400
},
{
"epoch": 5.677733135969549,
"grad_norm": 5.120883941650391,
"learning_rate": 3.3753846153846157e-06,
"loss": 0.0364,
"step": 13425
},
{
"epoch": 5.688306195813068,
"grad_norm": 1.9081509113311768,
"learning_rate": 3.3625641025641026e-06,
"loss": 0.0399,
"step": 13450
},
{
"epoch": 5.698879255656587,
"grad_norm": 3.328843593597412,
"learning_rate": 3.34974358974359e-06,
"loss": 0.043,
"step": 13475
},
{
"epoch": 5.709452315500106,
"grad_norm": 3.8817241191864014,
"learning_rate": 3.336923076923077e-06,
"loss": 0.05,
"step": 13500
},
{
"epoch": 5.720025375343624,
"grad_norm": 3.7421860694885254,
"learning_rate": 3.3241025641025643e-06,
"loss": 0.0435,
"step": 13525
},
{
"epoch": 5.730598435187143,
"grad_norm": 4.542396545410156,
"learning_rate": 3.3112820512820516e-06,
"loss": 0.0697,
"step": 13550
},
{
"epoch": 5.741171495030661,
"grad_norm": 2.264399528503418,
"learning_rate": 3.298461538461539e-06,
"loss": 0.0477,
"step": 13575
},
{
"epoch": 5.751744554874181,
"grad_norm": 4.958556652069092,
"learning_rate": 3.285641025641026e-06,
"loss": 0.0636,
"step": 13600
},
{
"epoch": 5.762317614717699,
"grad_norm": 5.233386993408203,
"learning_rate": 3.2728205128205133e-06,
"loss": 0.0509,
"step": 13625
},
{
"epoch": 5.772890674561218,
"grad_norm": 2.960580587387085,
"learning_rate": 3.2600000000000006e-06,
"loss": 0.0455,
"step": 13650
},
{
"epoch": 5.783463734404736,
"grad_norm": 5.023665904998779,
"learning_rate": 3.247179487179488e-06,
"loss": 0.0552,
"step": 13675
},
{
"epoch": 5.794036794248256,
"grad_norm": 2.2450520992279053,
"learning_rate": 3.2343589743589746e-06,
"loss": 0.0395,
"step": 13700
},
{
"epoch": 5.804609854091774,
"grad_norm": 6.890443801879883,
"learning_rate": 3.221538461538462e-06,
"loss": 0.0525,
"step": 13725
},
{
"epoch": 5.815182913935293,
"grad_norm": 3.799301862716675,
"learning_rate": 3.2087179487179487e-06,
"loss": 0.0486,
"step": 13750
},
{
"epoch": 5.825755973778811,
"grad_norm": 1.8951350450515747,
"learning_rate": 3.195897435897436e-06,
"loss": 0.0573,
"step": 13775
},
{
"epoch": 5.836329033622331,
"grad_norm": 2.119630813598633,
"learning_rate": 3.183076923076923e-06,
"loss": 0.0377,
"step": 13800
},
{
"epoch": 5.846902093465849,
"grad_norm": 5.626587867736816,
"learning_rate": 3.1702564102564104e-06,
"loss": 0.0486,
"step": 13825
},
{
"epoch": 5.857475153309368,
"grad_norm": 4.05784797668457,
"learning_rate": 3.1574358974358977e-06,
"loss": 0.0439,
"step": 13850
},
{
"epoch": 5.868048213152886,
"grad_norm": 3.4822025299072266,
"learning_rate": 3.144615384615385e-06,
"loss": 0.0502,
"step": 13875
},
{
"epoch": 5.878621272996405,
"grad_norm": 6.456898212432861,
"learning_rate": 3.131794871794872e-06,
"loss": 0.0429,
"step": 13900
},
{
"epoch": 5.889194332839924,
"grad_norm": 5.004892826080322,
"learning_rate": 3.1189743589743594e-06,
"loss": 0.0431,
"step": 13925
},
{
"epoch": 5.899767392683443,
"grad_norm": 5.014365196228027,
"learning_rate": 3.1061538461538467e-06,
"loss": 0.0484,
"step": 13950
},
{
"epoch": 5.910340452526961,
"grad_norm": 2.9999139308929443,
"learning_rate": 3.093333333333334e-06,
"loss": 0.0366,
"step": 13975
},
{
"epoch": 5.92091351237048,
"grad_norm": 3.330026388168335,
"learning_rate": 3.080512820512821e-06,
"loss": 0.0392,
"step": 14000
},
{
"epoch": 5.92091351237048,
"eval_loss": 0.1985091120004654,
"eval_runtime": 442.1262,
"eval_samples_per_second": 8.235,
"eval_steps_per_second": 1.031,
"eval_wer": 0.19096013736136247,
"step": 14000
},
{
"epoch": 5.931486572213998,
"grad_norm": 3.9660110473632812,
"learning_rate": 3.067692307692308e-06,
"loss": 0.0507,
"step": 14025
},
{
"epoch": 5.942059632057518,
"grad_norm": 4.192524433135986,
"learning_rate": 3.054871794871795e-06,
"loss": 0.0705,
"step": 14050
},
{
"epoch": 5.952632691901036,
"grad_norm": 6.012485980987549,
"learning_rate": 3.042051282051282e-06,
"loss": 0.0484,
"step": 14075
},
{
"epoch": 5.963205751744555,
"grad_norm": 4.29821252822876,
"learning_rate": 3.0292307692307693e-06,
"loss": 0.0407,
"step": 14100
},
{
"epoch": 5.9737788115880734,
"grad_norm": 2.2624855041503906,
"learning_rate": 3.0164102564102566e-06,
"loss": 0.0376,
"step": 14125
},
{
"epoch": 5.984351871431592,
"grad_norm": 3.6296563148498535,
"learning_rate": 3.003589743589744e-06,
"loss": 0.0462,
"step": 14150
},
{
"epoch": 5.994924931275111,
"grad_norm": 5.471303939819336,
"learning_rate": 2.990769230769231e-06,
"loss": 0.0445,
"step": 14175
},
{
"epoch": 6.00549799111863,
"grad_norm": 1.4642462730407715,
"learning_rate": 2.9779487179487183e-06,
"loss": 0.041,
"step": 14200
},
{
"epoch": 6.0160710509621484,
"grad_norm": 1.2915719747543335,
"learning_rate": 2.9651282051282056e-06,
"loss": 0.0352,
"step": 14225
},
{
"epoch": 6.026644110805667,
"grad_norm": 1.6247045993804932,
"learning_rate": 2.952307692307693e-06,
"loss": 0.0277,
"step": 14250
},
{
"epoch": 6.0372171706491855,
"grad_norm": 4.9181671142578125,
"learning_rate": 2.93948717948718e-06,
"loss": 0.0396,
"step": 14275
},
{
"epoch": 6.047790230492705,
"grad_norm": 9.853203773498535,
"learning_rate": 2.9266666666666673e-06,
"loss": 0.029,
"step": 14300
},
{
"epoch": 6.0583632903362235,
"grad_norm": 2.4494051933288574,
"learning_rate": 2.913846153846154e-06,
"loss": 0.0334,
"step": 14325
},
{
"epoch": 6.068936350179742,
"grad_norm": 2.7287230491638184,
"learning_rate": 2.901025641025641e-06,
"loss": 0.0391,
"step": 14350
},
{
"epoch": 6.0795094100232605,
"grad_norm": 3.225248098373413,
"learning_rate": 2.888205128205128e-06,
"loss": 0.042,
"step": 14375
},
{
"epoch": 6.090082469866779,
"grad_norm": 2.684298276901245,
"learning_rate": 2.8753846153846154e-06,
"loss": 0.032,
"step": 14400
},
{
"epoch": 6.1006555297102985,
"grad_norm": 2.294804334640503,
"learning_rate": 2.8625641025641027e-06,
"loss": 0.0295,
"step": 14425
},
{
"epoch": 6.111228589553817,
"grad_norm": 3.1335229873657227,
"learning_rate": 2.84974358974359e-06,
"loss": 0.0366,
"step": 14450
},
{
"epoch": 6.1218016493973355,
"grad_norm": 1.516295313835144,
"learning_rate": 2.836923076923077e-06,
"loss": 0.0382,
"step": 14475
},
{
"epoch": 6.132374709240854,
"grad_norm": 1.7349085807800293,
"learning_rate": 2.8241025641025644e-06,
"loss": 0.0443,
"step": 14500
},
{
"epoch": 6.142947769084373,
"grad_norm": 3.1473402976989746,
"learning_rate": 2.8112820512820517e-06,
"loss": 0.0223,
"step": 14525
},
{
"epoch": 6.153520828927892,
"grad_norm": 4.05681848526001,
"learning_rate": 2.798461538461539e-06,
"loss": 0.0365,
"step": 14550
},
{
"epoch": 6.1640938887714105,
"grad_norm": 2.0210795402526855,
"learning_rate": 2.785641025641026e-06,
"loss": 0.0319,
"step": 14575
},
{
"epoch": 6.174666948614929,
"grad_norm": 2.4962692260742188,
"learning_rate": 2.7728205128205134e-06,
"loss": 0.0253,
"step": 14600
},
{
"epoch": 6.185240008458448,
"grad_norm": 1.9406040906906128,
"learning_rate": 2.7600000000000003e-06,
"loss": 0.0322,
"step": 14625
},
{
"epoch": 6.195813068301967,
"grad_norm": 2.434849739074707,
"learning_rate": 2.7471794871794875e-06,
"loss": 0.03,
"step": 14650
},
{
"epoch": 6.2063861281454855,
"grad_norm": 2.942782163619995,
"learning_rate": 2.7343589743589743e-06,
"loss": 0.0424,
"step": 14675
},
{
"epoch": 6.216959187989004,
"grad_norm": 2.0401015281677246,
"learning_rate": 2.7215384615384616e-06,
"loss": 0.0359,
"step": 14700
},
{
"epoch": 6.227532247832523,
"grad_norm": 1.1493183374404907,
"learning_rate": 2.708717948717949e-06,
"loss": 0.0279,
"step": 14725
},
{
"epoch": 6.238105307676041,
"grad_norm": 1.1555042266845703,
"learning_rate": 2.695897435897436e-06,
"loss": 0.0298,
"step": 14750
},
{
"epoch": 6.2486783675195605,
"grad_norm": 4.421550273895264,
"learning_rate": 2.683589743589744e-06,
"loss": 0.0325,
"step": 14775
},
{
"epoch": 6.259251427363079,
"grad_norm": 4.805689334869385,
"learning_rate": 2.670769230769231e-06,
"loss": 0.0247,
"step": 14800
},
{
"epoch": 6.269824487206598,
"grad_norm": 0.8942240476608276,
"learning_rate": 2.657948717948718e-06,
"loss": 0.0305,
"step": 14825
},
{
"epoch": 6.280397547050116,
"grad_norm": 2.235232353210449,
"learning_rate": 2.6451282051282052e-06,
"loss": 0.0302,
"step": 14850
},
{
"epoch": 6.290970606893635,
"grad_norm": 1.470533013343811,
"learning_rate": 2.6323076923076925e-06,
"loss": 0.0359,
"step": 14875
},
{
"epoch": 6.301543666737154,
"grad_norm": 4.327214241027832,
"learning_rate": 2.6194871794871797e-06,
"loss": 0.0394,
"step": 14900
},
{
"epoch": 6.312116726580673,
"grad_norm": 1.062364101409912,
"learning_rate": 2.606666666666667e-06,
"loss": 0.0341,
"step": 14925
},
{
"epoch": 6.322689786424191,
"grad_norm": 3.674901247024536,
"learning_rate": 2.593846153846154e-06,
"loss": 0.025,
"step": 14950
},
{
"epoch": 6.33326284626771,
"grad_norm": 2.2653369903564453,
"learning_rate": 2.581025641025641e-06,
"loss": 0.0264,
"step": 14975
},
{
"epoch": 6.343835906111229,
"grad_norm": 1.9021022319793701,
"learning_rate": 2.5682051282051283e-06,
"loss": 0.0338,
"step": 15000
},
{
"epoch": 6.343835906111229,
"eval_loss": 0.19885598123073578,
"eval_runtime": 437.3615,
"eval_samples_per_second": 8.325,
"eval_steps_per_second": 1.043,
"eval_wer": 0.1807044410413476,
"step": 15000
},
{
"epoch": 6.354408965954748,
"grad_norm": 2.683213472366333,
"learning_rate": 2.5553846153846155e-06,
"loss": 0.0293,
"step": 15025
},
{
"epoch": 6.364982025798266,
"grad_norm": 4.173922538757324,
"learning_rate": 2.542564102564103e-06,
"loss": 0.0299,
"step": 15050
},
{
"epoch": 6.375555085641785,
"grad_norm": 2.688981294631958,
"learning_rate": 2.52974358974359e-06,
"loss": 0.0349,
"step": 15075
},
{
"epoch": 6.386128145485303,
"grad_norm": 3.2048428058624268,
"learning_rate": 2.5169230769230773e-06,
"loss": 0.0401,
"step": 15100
},
{
"epoch": 6.396701205328823,
"grad_norm": 2.362328052520752,
"learning_rate": 2.5041025641025645e-06,
"loss": 0.0368,
"step": 15125
},
{
"epoch": 6.407274265172341,
"grad_norm": 2.584799289703369,
"learning_rate": 2.4912820512820514e-06,
"loss": 0.0252,
"step": 15150
},
{
"epoch": 6.41784732501586,
"grad_norm": 3.501321315765381,
"learning_rate": 2.4784615384615386e-06,
"loss": 0.0278,
"step": 15175
},
{
"epoch": 6.428420384859378,
"grad_norm": 2.7278032302856445,
"learning_rate": 2.465641025641026e-06,
"loss": 0.0421,
"step": 15200
},
{
"epoch": 6.438993444702897,
"grad_norm": 2.1528923511505127,
"learning_rate": 2.452820512820513e-06,
"loss": 0.0354,
"step": 15225
},
{
"epoch": 6.449566504546416,
"grad_norm": 2.926116704940796,
"learning_rate": 2.4400000000000004e-06,
"loss": 0.032,
"step": 15250
},
{
"epoch": 6.460139564389935,
"grad_norm": 2.286530017852783,
"learning_rate": 2.427179487179487e-06,
"loss": 0.0416,
"step": 15275
},
{
"epoch": 6.470712624233453,
"grad_norm": 3.249962329864502,
"learning_rate": 2.4143589743589744e-06,
"loss": 0.0327,
"step": 15300
},
{
"epoch": 6.481285684076972,
"grad_norm": 3.736004114151001,
"learning_rate": 2.4015384615384617e-06,
"loss": 0.032,
"step": 15325
},
{
"epoch": 6.49185874392049,
"grad_norm": 1.310922384262085,
"learning_rate": 2.388717948717949e-06,
"loss": 0.0342,
"step": 15350
},
{
"epoch": 6.50243180376401,
"grad_norm": 1.6140261888504028,
"learning_rate": 2.375897435897436e-06,
"loss": 0.0402,
"step": 15375
},
{
"epoch": 6.513004863607528,
"grad_norm": 2.1381657123565674,
"learning_rate": 2.3630769230769234e-06,
"loss": 0.0378,
"step": 15400
},
{
"epoch": 6.523577923451047,
"grad_norm": 1.3571207523345947,
"learning_rate": 2.3502564102564102e-06,
"loss": 0.0318,
"step": 15425
},
{
"epoch": 6.534150983294565,
"grad_norm": 3.115314483642578,
"learning_rate": 2.3374358974358975e-06,
"loss": 0.0422,
"step": 15450
},
{
"epoch": 6.544724043138084,
"grad_norm": 3.6001369953155518,
"learning_rate": 2.3246153846153847e-06,
"loss": 0.0329,
"step": 15475
},
{
"epoch": 6.555297102981603,
"grad_norm": 2.726231098175049,
"learning_rate": 2.311794871794872e-06,
"loss": 0.0417,
"step": 15500
},
{
"epoch": 6.565870162825122,
"grad_norm": 2.179466485977173,
"learning_rate": 2.2989743589743592e-06,
"loss": 0.0326,
"step": 15525
},
{
"epoch": 6.57644322266864,
"grad_norm": 4.61680269241333,
"learning_rate": 2.2861538461538465e-06,
"loss": 0.0376,
"step": 15550
},
{
"epoch": 6.587016282512159,
"grad_norm": 4.001598358154297,
"learning_rate": 2.2733333333333333e-06,
"loss": 0.0341,
"step": 15575
},
{
"epoch": 6.597589342355677,
"grad_norm": 2.2401235103607178,
"learning_rate": 2.2605128205128206e-06,
"loss": 0.0298,
"step": 15600
},
{
"epoch": 6.608162402199197,
"grad_norm": 5.490719795227051,
"learning_rate": 2.247692307692308e-06,
"loss": 0.0405,
"step": 15625
},
{
"epoch": 6.618735462042715,
"grad_norm": 4.875611305236816,
"learning_rate": 2.234871794871795e-06,
"loss": 0.0431,
"step": 15650
},
{
"epoch": 6.629308521886234,
"grad_norm": 2.3372411727905273,
"learning_rate": 2.2220512820512823e-06,
"loss": 0.0309,
"step": 15675
},
{
"epoch": 6.639881581729752,
"grad_norm": 7.326153755187988,
"learning_rate": 2.2092307692307695e-06,
"loss": 0.0377,
"step": 15700
},
{
"epoch": 6.650454641573271,
"grad_norm": 2.733332633972168,
"learning_rate": 2.1964102564102564e-06,
"loss": 0.0341,
"step": 15725
},
{
"epoch": 6.66102770141679,
"grad_norm": 2.7690587043762207,
"learning_rate": 2.1835897435897436e-06,
"loss": 0.0269,
"step": 15750
},
{
"epoch": 6.671600761260309,
"grad_norm": 3.4367501735687256,
"learning_rate": 2.170769230769231e-06,
"loss": 0.0416,
"step": 15775
},
{
"epoch": 6.682173821103827,
"grad_norm": 1.8765815496444702,
"learning_rate": 2.157948717948718e-06,
"loss": 0.0274,
"step": 15800
},
{
"epoch": 6.692746880947346,
"grad_norm": 4.376313209533691,
"learning_rate": 2.1451282051282054e-06,
"loss": 0.0353,
"step": 15825
},
{
"epoch": 6.703319940790865,
"grad_norm": 2.716111660003662,
"learning_rate": 2.1323076923076926e-06,
"loss": 0.0346,
"step": 15850
},
{
"epoch": 6.713893000634384,
"grad_norm": 1.410538911819458,
"learning_rate": 2.1194871794871794e-06,
"loss": 0.0516,
"step": 15875
},
{
"epoch": 6.724466060477902,
"grad_norm": 2.3586137294769287,
"learning_rate": 2.1066666666666667e-06,
"loss": 0.0312,
"step": 15900
},
{
"epoch": 6.735039120321421,
"grad_norm": 2.5256056785583496,
"learning_rate": 2.093846153846154e-06,
"loss": 0.0342,
"step": 15925
},
{
"epoch": 6.745612180164939,
"grad_norm": 1.971276879310608,
"learning_rate": 2.081025641025641e-06,
"loss": 0.0286,
"step": 15950
},
{
"epoch": 6.756185240008459,
"grad_norm": 3.3202672004699707,
"learning_rate": 2.0682051282051284e-06,
"loss": 0.0324,
"step": 15975
},
{
"epoch": 6.766758299851977,
"grad_norm": 4.523472309112549,
"learning_rate": 2.0553846153846157e-06,
"loss": 0.0312,
"step": 16000
},
{
"epoch": 6.766758299851977,
"eval_loss": 0.1982458233833313,
"eval_runtime": 440.4101,
"eval_samples_per_second": 8.267,
"eval_steps_per_second": 1.035,
"eval_wer": 0.19453338902037218,
"step": 16000
},
{
"epoch": 6.777331359695496,
"grad_norm": 1.8507798910140991,
"learning_rate": 2.0425641025641025e-06,
"loss": 0.0326,
"step": 16025
},
{
"epoch": 6.787904419539014,
"grad_norm": 1.808121681213379,
"learning_rate": 2.0297435897435897e-06,
"loss": 0.0317,
"step": 16050
},
{
"epoch": 6.798477479382534,
"grad_norm": 1.5614056587219238,
"learning_rate": 2.016923076923077e-06,
"loss": 0.0404,
"step": 16075
},
{
"epoch": 6.809050539226052,
"grad_norm": 1.277215600013733,
"learning_rate": 2.0041025641025642e-06,
"loss": 0.0257,
"step": 16100
},
{
"epoch": 6.819623599069571,
"grad_norm": 2.63959002494812,
"learning_rate": 1.9912820512820515e-06,
"loss": 0.0232,
"step": 16125
},
{
"epoch": 6.830196658913089,
"grad_norm": 3.124812602996826,
"learning_rate": 1.9784615384615387e-06,
"loss": 0.0277,
"step": 16150
},
{
"epoch": 6.840769718756608,
"grad_norm": 5.799993991851807,
"learning_rate": 1.9656410256410256e-06,
"loss": 0.0354,
"step": 16175
},
{
"epoch": 6.851342778600127,
"grad_norm": 1.860620141029358,
"learning_rate": 1.952820512820513e-06,
"loss": 0.0231,
"step": 16200
},
{
"epoch": 6.861915838443646,
"grad_norm": 3.376303195953369,
"learning_rate": 1.94e-06,
"loss": 0.0357,
"step": 16225
},
{
"epoch": 6.872488898287164,
"grad_norm": 3.7502923011779785,
"learning_rate": 1.9271794871794873e-06,
"loss": 0.0365,
"step": 16250
},
{
"epoch": 6.883061958130683,
"grad_norm": 2.344674587249756,
"learning_rate": 1.9143589743589746e-06,
"loss": 0.0354,
"step": 16275
},
{
"epoch": 6.893635017974201,
"grad_norm": 1.4450386762619019,
"learning_rate": 1.9015384615384616e-06,
"loss": 0.0257,
"step": 16300
},
{
"epoch": 6.904208077817721,
"grad_norm": 2.097320079803467,
"learning_rate": 1.8887179487179488e-06,
"loss": 0.0265,
"step": 16325
},
{
"epoch": 6.914781137661239,
"grad_norm": 4.6603851318359375,
"learning_rate": 1.875897435897436e-06,
"loss": 0.0354,
"step": 16350
},
{
"epoch": 6.925354197504758,
"grad_norm": 1.707977533340454,
"learning_rate": 1.8630769230769233e-06,
"loss": 0.0422,
"step": 16375
},
{
"epoch": 6.935927257348276,
"grad_norm": 3.779792070388794,
"learning_rate": 1.8502564102564106e-06,
"loss": 0.0315,
"step": 16400
},
{
"epoch": 6.946500317191795,
"grad_norm": 4.249421119689941,
"learning_rate": 1.8374358974358974e-06,
"loss": 0.0309,
"step": 16425
},
{
"epoch": 6.957073377035314,
"grad_norm": 2.3399875164031982,
"learning_rate": 1.8246153846153847e-06,
"loss": 0.0276,
"step": 16450
},
{
"epoch": 6.967646436878833,
"grad_norm": 4.361936092376709,
"learning_rate": 1.811794871794872e-06,
"loss": 0.0255,
"step": 16475
},
{
"epoch": 6.978219496722351,
"grad_norm": 2.4711239337921143,
"learning_rate": 1.7989743589743592e-06,
"loss": 0.03,
"step": 16500
},
{
"epoch": 6.98879255656587,
"grad_norm": 1.2361491918563843,
"learning_rate": 1.7861538461538464e-06,
"loss": 0.0311,
"step": 16525
},
{
"epoch": 6.9993656164093885,
"grad_norm": 3.5375123023986816,
"learning_rate": 1.7733333333333336e-06,
"loss": 0.0367,
"step": 16550
},
{
"epoch": 7.009938676252908,
"grad_norm": 1.0913958549499512,
"learning_rate": 1.7605128205128205e-06,
"loss": 0.0279,
"step": 16575
},
{
"epoch": 7.020511736096426,
"grad_norm": 1.6949819326400757,
"learning_rate": 1.7476923076923077e-06,
"loss": 0.0241,
"step": 16600
},
{
"epoch": 7.031084795939945,
"grad_norm": 2.930881977081299,
"learning_rate": 1.734871794871795e-06,
"loss": 0.0216,
"step": 16625
},
{
"epoch": 7.0416578557834635,
"grad_norm": 4.314076900482178,
"learning_rate": 1.7220512820512822e-06,
"loss": 0.0248,
"step": 16650
},
{
"epoch": 7.052230915626982,
"grad_norm": 2.817746639251709,
"learning_rate": 1.7092307692307695e-06,
"loss": 0.0208,
"step": 16675
},
{
"epoch": 7.062803975470501,
"grad_norm": 1.8693445920944214,
"learning_rate": 1.6964102564102567e-06,
"loss": 0.0245,
"step": 16700
},
{
"epoch": 7.07337703531402,
"grad_norm": 2.050861120223999,
"learning_rate": 1.683589743589744e-06,
"loss": 0.0178,
"step": 16725
},
{
"epoch": 7.0839500951575385,
"grad_norm": 1.6270172595977783,
"learning_rate": 1.6707692307692308e-06,
"loss": 0.0211,
"step": 16750
},
{
"epoch": 7.094523155001057,
"grad_norm": 2.275005578994751,
"learning_rate": 1.657948717948718e-06,
"loss": 0.022,
"step": 16775
},
{
"epoch": 7.105096214844576,
"grad_norm": 2.681896924972534,
"learning_rate": 1.6451282051282053e-06,
"loss": 0.0228,
"step": 16800
},
{
"epoch": 7.115669274688095,
"grad_norm": 1.60031259059906,
"learning_rate": 1.632820512820513e-06,
"loss": 0.0213,
"step": 16825
},
{
"epoch": 7.1262423345316135,
"grad_norm": 1.0850863456726074,
"learning_rate": 1.6200000000000002e-06,
"loss": 0.0289,
"step": 16850
},
{
"epoch": 7.136815394375132,
"grad_norm": 1.6733803749084473,
"learning_rate": 1.6071794871794874e-06,
"loss": 0.0374,
"step": 16875
},
{
"epoch": 7.1473884542186505,
"grad_norm": 1.9587990045547485,
"learning_rate": 1.5943589743589744e-06,
"loss": 0.0276,
"step": 16900
},
{
"epoch": 7.15796151406217,
"grad_norm": 1.7133156061172485,
"learning_rate": 1.5815384615384615e-06,
"loss": 0.0246,
"step": 16925
},
{
"epoch": 7.1685345739056885,
"grad_norm": 1.5173008441925049,
"learning_rate": 1.5687179487179487e-06,
"loss": 0.0269,
"step": 16950
},
{
"epoch": 7.179107633749207,
"grad_norm": 2.4916610717773438,
"learning_rate": 1.555897435897436e-06,
"loss": 0.022,
"step": 16975
},
{
"epoch": 7.1896806935927255,
"grad_norm": 0.9858763217926025,
"learning_rate": 1.5430769230769232e-06,
"loss": 0.0237,
"step": 17000
},
{
"epoch": 7.1896806935927255,
"eval_loss": 0.19975194334983826,
"eval_runtime": 439.8052,
"eval_samples_per_second": 8.279,
"eval_steps_per_second": 1.037,
"eval_wer": 0.1842312868346559,
"step": 17000
},
{
"epoch": 7.200253753436244,
"grad_norm": 2.612891912460327,
"learning_rate": 1.5302564102564105e-06,
"loss": 0.0265,
"step": 17025
},
{
"epoch": 7.2108268132797635,
"grad_norm": 2.0947177410125732,
"learning_rate": 1.5174358974358977e-06,
"loss": 0.0256,
"step": 17050
},
{
"epoch": 7.221399873123282,
"grad_norm": 2.62943959236145,
"learning_rate": 1.5046153846153845e-06,
"loss": 0.0177,
"step": 17075
},
{
"epoch": 7.2319729329668005,
"grad_norm": 1.3514835834503174,
"learning_rate": 1.4917948717948718e-06,
"loss": 0.0249,
"step": 17100
},
{
"epoch": 7.242545992810319,
"grad_norm": 1.2198314666748047,
"learning_rate": 1.478974358974359e-06,
"loss": 0.0358,
"step": 17125
},
{
"epoch": 7.253119052653838,
"grad_norm": 1.2630308866500854,
"learning_rate": 1.4661538461538463e-06,
"loss": 0.0283,
"step": 17150
},
{
"epoch": 7.263692112497357,
"grad_norm": 2.1470680236816406,
"learning_rate": 1.4533333333333335e-06,
"loss": 0.0324,
"step": 17175
},
{
"epoch": 7.2742651723408756,
"grad_norm": 1.8368949890136719,
"learning_rate": 1.4405128205128208e-06,
"loss": 0.0346,
"step": 17200
},
{
"epoch": 7.284838232184394,
"grad_norm": 1.5384070873260498,
"learning_rate": 1.4276923076923076e-06,
"loss": 0.0196,
"step": 17225
},
{
"epoch": 7.295411292027913,
"grad_norm": 1.1340972185134888,
"learning_rate": 1.4148717948717949e-06,
"loss": 0.019,
"step": 17250
},
{
"epoch": 7.305984351871432,
"grad_norm": 1.1070088148117065,
"learning_rate": 1.402051282051282e-06,
"loss": 0.022,
"step": 17275
},
{
"epoch": 7.3165574117149506,
"grad_norm": 1.5131502151489258,
"learning_rate": 1.3892307692307694e-06,
"loss": 0.0212,
"step": 17300
},
{
"epoch": 7.327130471558469,
"grad_norm": 1.0822603702545166,
"learning_rate": 1.3764102564102566e-06,
"loss": 0.0167,
"step": 17325
},
{
"epoch": 7.337703531401988,
"grad_norm": 1.711835503578186,
"learning_rate": 1.3635897435897438e-06,
"loss": 0.0184,
"step": 17350
},
{
"epoch": 7.348276591245506,
"grad_norm": 1.619463324546814,
"learning_rate": 1.3507692307692307e-06,
"loss": 0.02,
"step": 17375
},
{
"epoch": 7.358849651089026,
"grad_norm": 0.940372884273529,
"learning_rate": 1.337948717948718e-06,
"loss": 0.0244,
"step": 17400
},
{
"epoch": 7.369422710932544,
"grad_norm": 1.5679852962493896,
"learning_rate": 1.3251282051282052e-06,
"loss": 0.0245,
"step": 17425
},
{
"epoch": 7.379995770776063,
"grad_norm": 1.4729161262512207,
"learning_rate": 1.3123076923076924e-06,
"loss": 0.0225,
"step": 17450
},
{
"epoch": 7.390568830619581,
"grad_norm": 1.3189888000488281,
"learning_rate": 1.2994871794871797e-06,
"loss": 0.0377,
"step": 17475
},
{
"epoch": 7.4011418904631,
"grad_norm": 2.973733425140381,
"learning_rate": 1.286666666666667e-06,
"loss": 0.0241,
"step": 17500
},
{
"epoch": 7.411714950306619,
"grad_norm": 5.635532855987549,
"learning_rate": 1.273846153846154e-06,
"loss": 0.0364,
"step": 17525
},
{
"epoch": 7.422288010150138,
"grad_norm": 1.3458870649337769,
"learning_rate": 1.261025641025641e-06,
"loss": 0.0236,
"step": 17550
},
{
"epoch": 7.432861069993656,
"grad_norm": 3.022913932800293,
"learning_rate": 1.2482051282051282e-06,
"loss": 0.0395,
"step": 17575
},
{
"epoch": 7.443434129837175,
"grad_norm": 1.9709924459457397,
"learning_rate": 1.2353846153846155e-06,
"loss": 0.0253,
"step": 17600
},
{
"epoch": 7.454007189680693,
"grad_norm": 5.252633571624756,
"learning_rate": 1.2225641025641025e-06,
"loss": 0.0305,
"step": 17625
},
{
"epoch": 7.464580249524213,
"grad_norm": 1.3409159183502197,
"learning_rate": 1.2097435897435898e-06,
"loss": 0.0253,
"step": 17650
},
{
"epoch": 7.475153309367731,
"grad_norm": 0.8901488780975342,
"learning_rate": 1.196923076923077e-06,
"loss": 0.0236,
"step": 17675
},
{
"epoch": 7.48572636921125,
"grad_norm": 2.1367154121398926,
"learning_rate": 1.184102564102564e-06,
"loss": 0.0329,
"step": 17700
},
{
"epoch": 7.496299429054768,
"grad_norm": 2.0283923149108887,
"learning_rate": 1.1712820512820513e-06,
"loss": 0.0231,
"step": 17725
},
{
"epoch": 7.506872488898287,
"grad_norm": 4.209811210632324,
"learning_rate": 1.1584615384615385e-06,
"loss": 0.0205,
"step": 17750
},
{
"epoch": 7.517445548741806,
"grad_norm": 1.1471270322799683,
"learning_rate": 1.1456410256410256e-06,
"loss": 0.0262,
"step": 17775
},
{
"epoch": 7.528018608585325,
"grad_norm": 1.345858097076416,
"learning_rate": 1.1328205128205128e-06,
"loss": 0.0419,
"step": 17800
},
{
"epoch": 7.538591668428843,
"grad_norm": 4.0828857421875,
"learning_rate": 1.12e-06,
"loss": 0.0248,
"step": 17825
},
{
"epoch": 7.549164728272362,
"grad_norm": 2.618866443634033,
"learning_rate": 1.1071794871794873e-06,
"loss": 0.0239,
"step": 17850
},
{
"epoch": 7.55973778811588,
"grad_norm": 3.4622833728790283,
"learning_rate": 1.0943589743589744e-06,
"loss": 0.0291,
"step": 17875
},
{
"epoch": 7.5703108479594,
"grad_norm": 4.808816432952881,
"learning_rate": 1.0815384615384616e-06,
"loss": 0.0308,
"step": 17900
},
{
"epoch": 7.580883907802918,
"grad_norm": 1.1136987209320068,
"learning_rate": 1.0687179487179489e-06,
"loss": 0.0185,
"step": 17925
},
{
"epoch": 7.591456967646437,
"grad_norm": 1.1457035541534424,
"learning_rate": 1.0558974358974359e-06,
"loss": 0.0234,
"step": 17950
},
{
"epoch": 7.602030027489955,
"grad_norm": 2.3021388053894043,
"learning_rate": 1.0430769230769231e-06,
"loss": 0.0172,
"step": 17975
},
{
"epoch": 7.612603087333475,
"grad_norm": 2.4270472526550293,
"learning_rate": 1.0302564102564104e-06,
"loss": 0.0223,
"step": 18000
},
{
"epoch": 7.612603087333475,
"eval_loss": 0.19938968122005463,
"eval_runtime": 437.8238,
"eval_samples_per_second": 8.316,
"eval_steps_per_second": 1.042,
"eval_wer": 0.18000835305582627,
"step": 18000
},
{
"epoch": 7.623176147176993,
"grad_norm": 1.8567931652069092,
"learning_rate": 1.0174358974358974e-06,
"loss": 0.0193,
"step": 18025
},
{
"epoch": 7.633749207020512,
"grad_norm": 1.3256702423095703,
"learning_rate": 1.0046153846153847e-06,
"loss": 0.0237,
"step": 18050
},
{
"epoch": 7.64432226686403,
"grad_norm": 2.2503092288970947,
"learning_rate": 9.91794871794872e-07,
"loss": 0.0335,
"step": 18075
},
{
"epoch": 7.654895326707549,
"grad_norm": 2.70359468460083,
"learning_rate": 9.78974358974359e-07,
"loss": 0.0217,
"step": 18100
},
{
"epoch": 7.665468386551068,
"grad_norm": 2.691211700439453,
"learning_rate": 9.661538461538462e-07,
"loss": 0.0198,
"step": 18125
},
{
"epoch": 7.676041446394587,
"grad_norm": 2.4848880767822266,
"learning_rate": 9.533333333333335e-07,
"loss": 0.0209,
"step": 18150
},
{
"epoch": 7.686614506238105,
"grad_norm": 1.926702857017517,
"learning_rate": 9.405128205128206e-07,
"loss": 0.0355,
"step": 18175
},
{
"epoch": 7.697187566081624,
"grad_norm": 1.5971437692642212,
"learning_rate": 9.276923076923077e-07,
"loss": 0.0197,
"step": 18200
},
{
"epoch": 7.707760625925143,
"grad_norm": 2.465510845184326,
"learning_rate": 9.14871794871795e-07,
"loss": 0.0248,
"step": 18225
},
{
"epoch": 7.718333685768662,
"grad_norm": 2.2006723880767822,
"learning_rate": 9.020512820512821e-07,
"loss": 0.0325,
"step": 18250
},
{
"epoch": 7.72890674561218,
"grad_norm": 0.9062207937240601,
"learning_rate": 8.892307692307693e-07,
"loss": 0.025,
"step": 18275
},
{
"epoch": 7.739479805455699,
"grad_norm": 0.9305509328842163,
"learning_rate": 8.764102564102565e-07,
"loss": 0.0268,
"step": 18300
},
{
"epoch": 7.750052865299217,
"grad_norm": 2.8776371479034424,
"learning_rate": 8.635897435897437e-07,
"loss": 0.024,
"step": 18325
},
{
"epoch": 7.760625925142737,
"grad_norm": 1.8713349103927612,
"learning_rate": 8.507692307692308e-07,
"loss": 0.0201,
"step": 18350
},
{
"epoch": 7.771198984986255,
"grad_norm": 1.8037798404693604,
"learning_rate": 8.37948717948718e-07,
"loss": 0.0195,
"step": 18375
},
{
"epoch": 7.781772044829774,
"grad_norm": 2.990373134613037,
"learning_rate": 8.251282051282052e-07,
"loss": 0.019,
"step": 18400
},
{
"epoch": 7.792345104673292,
"grad_norm": 2.369366407394409,
"learning_rate": 8.123076923076923e-07,
"loss": 0.0306,
"step": 18425
},
{
"epoch": 7.802918164516811,
"grad_norm": 2.136784553527832,
"learning_rate": 7.994871794871796e-07,
"loss": 0.0268,
"step": 18450
},
{
"epoch": 7.81349122436033,
"grad_norm": 1.5172721147537231,
"learning_rate": 7.866666666666667e-07,
"loss": 0.0255,
"step": 18475
},
{
"epoch": 7.824064284203849,
"grad_norm": 1.0286389589309692,
"learning_rate": 7.738461538461539e-07,
"loss": 0.0226,
"step": 18500
},
{
"epoch": 7.834637344047367,
"grad_norm": 3.877328634262085,
"learning_rate": 7.610256410256411e-07,
"loss": 0.031,
"step": 18525
},
{
"epoch": 7.845210403890886,
"grad_norm": 1.6196309328079224,
"learning_rate": 7.482051282051283e-07,
"loss": 0.0187,
"step": 18550
},
{
"epoch": 7.855783463734404,
"grad_norm": 3.4636642932891846,
"learning_rate": 7.353846153846154e-07,
"loss": 0.0314,
"step": 18575
},
{
"epoch": 7.866356523577924,
"grad_norm": 1.571252703666687,
"learning_rate": 7.225641025641026e-07,
"loss": 0.0216,
"step": 18600
},
{
"epoch": 7.876929583421442,
"grad_norm": 3.157963752746582,
"learning_rate": 7.097435897435898e-07,
"loss": 0.0219,
"step": 18625
},
{
"epoch": 7.887502643264961,
"grad_norm": 1.0556855201721191,
"learning_rate": 6.969230769230769e-07,
"loss": 0.0271,
"step": 18650
},
{
"epoch": 7.898075703108479,
"grad_norm": 3.6477906703948975,
"learning_rate": 6.841025641025642e-07,
"loss": 0.0297,
"step": 18675
},
{
"epoch": 7.908648762951998,
"grad_norm": 2.9608850479125977,
"learning_rate": 6.712820512820513e-07,
"loss": 0.0289,
"step": 18700
},
{
"epoch": 7.919221822795517,
"grad_norm": 2.4232919216156006,
"learning_rate": 6.584615384615385e-07,
"loss": 0.0254,
"step": 18725
},
{
"epoch": 7.929794882639036,
"grad_norm": 2.4459691047668457,
"learning_rate": 6.456410256410257e-07,
"loss": 0.0212,
"step": 18750
},
{
"epoch": 7.940367942482554,
"grad_norm": 2.3065927028656006,
"learning_rate": 6.328205128205129e-07,
"loss": 0.0312,
"step": 18775
},
{
"epoch": 7.950941002326073,
"grad_norm": 1.5412800312042236,
"learning_rate": 6.200000000000001e-07,
"loss": 0.0196,
"step": 18800
},
{
"epoch": 7.9615140621695915,
"grad_norm": 1.0917569398880005,
"learning_rate": 6.071794871794872e-07,
"loss": 0.0197,
"step": 18825
},
{
"epoch": 7.972087122013111,
"grad_norm": 4.866013526916504,
"learning_rate": 5.943589743589744e-07,
"loss": 0.0253,
"step": 18850
},
{
"epoch": 7.982660181856629,
"grad_norm": 1.2132177352905273,
"learning_rate": 5.815384615384616e-07,
"loss": 0.0235,
"step": 18875
},
{
"epoch": 7.993233241700148,
"grad_norm": 1.900343894958496,
"learning_rate": 5.687179487179488e-07,
"loss": 0.0264,
"step": 18900
},
{
"epoch": 8.003806301543667,
"grad_norm": 2.312898874282837,
"learning_rate": 5.558974358974359e-07,
"loss": 0.0239,
"step": 18925
},
{
"epoch": 8.014379361387185,
"grad_norm": 1.2997639179229736,
"learning_rate": 5.430769230769232e-07,
"loss": 0.0164,
"step": 18950
},
{
"epoch": 8.024952421230704,
"grad_norm": 1.6723324060440063,
"learning_rate": 5.302564102564103e-07,
"loss": 0.0151,
"step": 18975
},
{
"epoch": 8.035525481074222,
"grad_norm": 1.057395339012146,
"learning_rate": 5.174358974358974e-07,
"loss": 0.0192,
"step": 19000
},
{
"epoch": 8.035525481074222,
"eval_loss": 0.19927473366260529,
"eval_runtime": 441.263,
"eval_samples_per_second": 8.251,
"eval_steps_per_second": 1.033,
"eval_wer": 0.1806116293099448,
"step": 19000
},
{
"epoch": 8.046098540917741,
"grad_norm": 1.4631696939468384,
"learning_rate": 5.046153846153847e-07,
"loss": 0.0164,
"step": 19025
},
{
"epoch": 8.05667160076126,
"grad_norm": 1.539763331413269,
"learning_rate": 4.917948717948718e-07,
"loss": 0.0242,
"step": 19050
},
{
"epoch": 8.067244660604779,
"grad_norm": 0.7737773060798645,
"learning_rate": 4.78974358974359e-07,
"loss": 0.0147,
"step": 19075
},
{
"epoch": 8.077817720448298,
"grad_norm": 1.1692899465560913,
"learning_rate": 4.661538461538462e-07,
"loss": 0.0266,
"step": 19100
},
{
"epoch": 8.088390780291816,
"grad_norm": 0.7815419435501099,
"learning_rate": 4.533333333333334e-07,
"loss": 0.0234,
"step": 19125
},
{
"epoch": 8.098963840135335,
"grad_norm": 1.5961151123046875,
"learning_rate": 4.4051282051282056e-07,
"loss": 0.0209,
"step": 19150
},
{
"epoch": 8.109536899978854,
"grad_norm": 2.6668717861175537,
"learning_rate": 4.276923076923077e-07,
"loss": 0.02,
"step": 19175
},
{
"epoch": 8.120109959822372,
"grad_norm": 2.6953537464141846,
"learning_rate": 4.1487179487179495e-07,
"loss": 0.0227,
"step": 19200
},
{
"epoch": 8.130683019665891,
"grad_norm": 1.7729073762893677,
"learning_rate": 4.025641025641026e-07,
"loss": 0.0262,
"step": 19225
},
{
"epoch": 8.14125607950941,
"grad_norm": 1.3672327995300293,
"learning_rate": 3.897435897435898e-07,
"loss": 0.0168,
"step": 19250
},
{
"epoch": 8.151829139352929,
"grad_norm": 3.492060422897339,
"learning_rate": 3.769230769230769e-07,
"loss": 0.0193,
"step": 19275
},
{
"epoch": 8.162402199196448,
"grad_norm": 3.2767393589019775,
"learning_rate": 3.641025641025641e-07,
"loss": 0.0197,
"step": 19300
},
{
"epoch": 8.172975259039966,
"grad_norm": 1.0066640377044678,
"learning_rate": 3.512820512820513e-07,
"loss": 0.0222,
"step": 19325
},
{
"epoch": 8.183548318883485,
"grad_norm": 2.359158515930176,
"learning_rate": 3.3846153846153845e-07,
"loss": 0.0171,
"step": 19350
},
{
"epoch": 8.194121378727004,
"grad_norm": 1.6191521883010864,
"learning_rate": 3.2564102564102565e-07,
"loss": 0.0192,
"step": 19375
},
{
"epoch": 8.204694438570522,
"grad_norm": 3.286207914352417,
"learning_rate": 3.1282051282051284e-07,
"loss": 0.0171,
"step": 19400
},
{
"epoch": 8.215267498414041,
"grad_norm": 4.682559013366699,
"learning_rate": 3.0000000000000004e-07,
"loss": 0.0291,
"step": 19425
},
{
"epoch": 8.225840558257559,
"grad_norm": 5.000258445739746,
"learning_rate": 2.871794871794872e-07,
"loss": 0.0287,
"step": 19450
},
{
"epoch": 8.236413618101079,
"grad_norm": 1.9743106365203857,
"learning_rate": 2.743589743589744e-07,
"loss": 0.0185,
"step": 19475
},
{
"epoch": 8.246986677944598,
"grad_norm": 1.1319037675857544,
"learning_rate": 2.6153846153846157e-07,
"loss": 0.0156,
"step": 19500
},
{
"epoch": 8.257559737788116,
"grad_norm": 4.324277400970459,
"learning_rate": 2.487179487179487e-07,
"loss": 0.021,
"step": 19525
},
{
"epoch": 8.268132797631635,
"grad_norm": 3.4904730319976807,
"learning_rate": 2.3589743589743593e-07,
"loss": 0.0195,
"step": 19550
},
{
"epoch": 8.278705857475153,
"grad_norm": 1.5097222328186035,
"learning_rate": 2.2307692307692308e-07,
"loss": 0.0181,
"step": 19575
},
{
"epoch": 8.289278917318672,
"grad_norm": 2.1649582386016846,
"learning_rate": 2.1025641025641027e-07,
"loss": 0.0198,
"step": 19600
},
{
"epoch": 8.299851977162191,
"grad_norm": 3.739485740661621,
"learning_rate": 1.9743589743589747e-07,
"loss": 0.021,
"step": 19625
},
{
"epoch": 8.31042503700571,
"grad_norm": 4.509456634521484,
"learning_rate": 1.8461538461538464e-07,
"loss": 0.0364,
"step": 19650
},
{
"epoch": 8.320998096849229,
"grad_norm": 1.5747437477111816,
"learning_rate": 1.717948717948718e-07,
"loss": 0.0283,
"step": 19675
},
{
"epoch": 8.331571156692746,
"grad_norm": 2.8335936069488525,
"learning_rate": 1.58974358974359e-07,
"loss": 0.0273,
"step": 19700
},
{
"epoch": 8.342144216536266,
"grad_norm": 1.661657452583313,
"learning_rate": 1.4615384615384617e-07,
"loss": 0.0283,
"step": 19725
},
{
"epoch": 8.352717276379785,
"grad_norm": 2.5407464504241943,
"learning_rate": 1.3333333333333336e-07,
"loss": 0.0225,
"step": 19750
},
{
"epoch": 8.363290336223303,
"grad_norm": 1.7496103048324585,
"learning_rate": 1.2051282051282053e-07,
"loss": 0.02,
"step": 19775
},
{
"epoch": 8.373863396066822,
"grad_norm": 1.1357078552246094,
"learning_rate": 1.076923076923077e-07,
"loss": 0.0159,
"step": 19800
},
{
"epoch": 8.38443645591034,
"grad_norm": 1.138163685798645,
"learning_rate": 9.487179487179488e-08,
"loss": 0.0219,
"step": 19825
},
{
"epoch": 8.39500951575386,
"grad_norm": 1.9725435972213745,
"learning_rate": 8.205128205128206e-08,
"loss": 0.0189,
"step": 19850
},
{
"epoch": 8.405582575597379,
"grad_norm": 1.0541646480560303,
"learning_rate": 6.923076923076924e-08,
"loss": 0.0292,
"step": 19875
},
{
"epoch": 8.416155635440896,
"grad_norm": 3.125678062438965,
"learning_rate": 5.641025641025642e-08,
"loss": 0.0196,
"step": 19900
},
{
"epoch": 8.426728695284416,
"grad_norm": 1.1036850214004517,
"learning_rate": 4.358974358974359e-08,
"loss": 0.0163,
"step": 19925
},
{
"epoch": 8.437301755127933,
"grad_norm": 3.5145390033721924,
"learning_rate": 3.076923076923077e-08,
"loss": 0.0224,
"step": 19950
},
{
"epoch": 8.447874814971453,
"grad_norm": 4.518385410308838,
"learning_rate": 1.794871794871795e-08,
"loss": 0.0223,
"step": 19975
},
{
"epoch": 8.458447874814972,
"grad_norm": 1.1149485111236572,
"learning_rate": 5.128205128205129e-09,
"loss": 0.0158,
"step": 20000
},
{
"epoch": 8.458447874814972,
"eval_loss": 0.20003820955753326,
"eval_runtime": 439.4967,
"eval_samples_per_second": 8.284,
"eval_steps_per_second": 1.038,
"eval_wer": 0.1807044410413476,
"step": 20000
},
{
"epoch": 8.458447874814972,
"step": 20000,
"total_flos": 7.8770584829952e+18,
"train_loss": 0.15760719767808914,
"train_runtime": 46114.6561,
"train_samples_per_second": 6.939,
"train_steps_per_second": 0.434
}
],
"logging_steps": 25,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.8770584829952e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}