WS-nrSG-HLBT / trainer_state.json
Marcus-W's picture
Upload 16 files
124518e verified
{
"best_metric": 0.12174894318263237,
"best_model_checkpoint": "Checkpoints/noisereduce_small_HLBTAugs/checkpoint-10612",
"epoch": 7.0,
"eval_steps": 1000,
"global_step": 10612,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016490765171503958,
"grad_norm": 39.31369400024414,
"learning_rate": 4.2000000000000006e-07,
"loss": 6.8388,
"step": 25
},
{
"epoch": 0.032981530343007916,
"grad_norm": 33.235538482666016,
"learning_rate": 9.200000000000001e-07,
"loss": 6.0977,
"step": 50
},
{
"epoch": 0.04947229551451187,
"grad_norm": 26.6918888092041,
"learning_rate": 1.42e-06,
"loss": 5.1518,
"step": 75
},
{
"epoch": 0.06596306068601583,
"grad_norm": 19.928937911987305,
"learning_rate": 1.9200000000000003e-06,
"loss": 4.1287,
"step": 100
},
{
"epoch": 0.08245382585751979,
"grad_norm": 17.019432067871094,
"learning_rate": 2.42e-06,
"loss": 3.3885,
"step": 125
},
{
"epoch": 0.09894459102902374,
"grad_norm": 14.757500648498535,
"learning_rate": 2.92e-06,
"loss": 2.9706,
"step": 150
},
{
"epoch": 0.11543535620052771,
"grad_norm": 15.232291221618652,
"learning_rate": 3.4200000000000007e-06,
"loss": 2.6453,
"step": 175
},
{
"epoch": 0.13192612137203166,
"grad_norm": 14.23778247833252,
"learning_rate": 3.920000000000001e-06,
"loss": 2.3353,
"step": 200
},
{
"epoch": 0.14841688654353563,
"grad_norm": 15.65247631072998,
"learning_rate": 4.42e-06,
"loss": 2.0593,
"step": 225
},
{
"epoch": 0.16490765171503957,
"grad_norm": 14.948330879211426,
"learning_rate": 4.92e-06,
"loss": 1.9178,
"step": 250
},
{
"epoch": 0.18139841688654354,
"grad_norm": 13.788339614868164,
"learning_rate": 5.420000000000001e-06,
"loss": 1.7265,
"step": 275
},
{
"epoch": 0.19788918205804748,
"grad_norm": 13.352944374084473,
"learning_rate": 5.92e-06,
"loss": 1.6424,
"step": 300
},
{
"epoch": 0.21437994722955145,
"grad_norm": 13.235891342163086,
"learning_rate": 6.42e-06,
"loss": 1.5053,
"step": 325
},
{
"epoch": 0.23087071240105542,
"grad_norm": 14.21623420715332,
"learning_rate": 6.92e-06,
"loss": 1.4925,
"step": 350
},
{
"epoch": 0.24736147757255936,
"grad_norm": 22.086210250854492,
"learning_rate": 7.420000000000001e-06,
"loss": 1.4107,
"step": 375
},
{
"epoch": 0.2638522427440633,
"grad_norm": 12.337775230407715,
"learning_rate": 7.92e-06,
"loss": 1.3594,
"step": 400
},
{
"epoch": 0.28034300791556727,
"grad_norm": 13.980206489562988,
"learning_rate": 8.42e-06,
"loss": 1.3478,
"step": 425
},
{
"epoch": 0.29683377308707126,
"grad_norm": 12.868133544921875,
"learning_rate": 8.920000000000001e-06,
"loss": 1.2952,
"step": 450
},
{
"epoch": 0.3133245382585752,
"grad_norm": 12.454700469970703,
"learning_rate": 9.42e-06,
"loss": 1.3642,
"step": 475
},
{
"epoch": 0.32981530343007914,
"grad_norm": 14.960906982421875,
"learning_rate": 9.920000000000002e-06,
"loss": 1.1298,
"step": 500
},
{
"epoch": 0.34630606860158314,
"grad_norm": 14.04990291595459,
"learning_rate": 9.99533125833704e-06,
"loss": 1.1925,
"step": 525
},
{
"epoch": 0.3627968337730871,
"grad_norm": 12.418570518493652,
"learning_rate": 9.9897732325478e-06,
"loss": 1.1135,
"step": 550
},
{
"epoch": 0.379287598944591,
"grad_norm": 12.186723709106445,
"learning_rate": 9.98421520675856e-06,
"loss": 1.0462,
"step": 575
},
{
"epoch": 0.39577836411609496,
"grad_norm": 13.12454891204834,
"learning_rate": 9.97865718096932e-06,
"loss": 1.0761,
"step": 600
},
{
"epoch": 0.41226912928759896,
"grad_norm": 9.63180923461914,
"learning_rate": 9.97309915518008e-06,
"loss": 1.1031,
"step": 625
},
{
"epoch": 0.4287598944591029,
"grad_norm": 12.495417594909668,
"learning_rate": 9.967541129390842e-06,
"loss": 1.0276,
"step": 650
},
{
"epoch": 0.44525065963060684,
"grad_norm": 12.295565605163574,
"learning_rate": 9.961983103601602e-06,
"loss": 0.9776,
"step": 675
},
{
"epoch": 0.46174142480211083,
"grad_norm": 11.512835502624512,
"learning_rate": 9.956425077812362e-06,
"loss": 0.9664,
"step": 700
},
{
"epoch": 0.4782321899736148,
"grad_norm": 14.376967430114746,
"learning_rate": 9.950867052023122e-06,
"loss": 1.0564,
"step": 725
},
{
"epoch": 0.4947229551451187,
"grad_norm": 11.573795318603516,
"learning_rate": 9.945309026233882e-06,
"loss": 1.0157,
"step": 750
},
{
"epoch": 0.5112137203166227,
"grad_norm": 13.335847854614258,
"learning_rate": 9.939751000444643e-06,
"loss": 0.9132,
"step": 775
},
{
"epoch": 0.5277044854881267,
"grad_norm": 11.932069778442383,
"learning_rate": 9.934192974655403e-06,
"loss": 0.9303,
"step": 800
},
{
"epoch": 0.5441952506596306,
"grad_norm": 15.350811004638672,
"learning_rate": 9.928634948866163e-06,
"loss": 0.9654,
"step": 825
},
{
"epoch": 0.5606860158311345,
"grad_norm": 13.428604125976562,
"learning_rate": 9.923076923076923e-06,
"loss": 0.9238,
"step": 850
},
{
"epoch": 0.5771767810026385,
"grad_norm": 9.998970985412598,
"learning_rate": 9.917518897287685e-06,
"loss": 0.9136,
"step": 875
},
{
"epoch": 0.5936675461741425,
"grad_norm": 12.5803804397583,
"learning_rate": 9.911960871498445e-06,
"loss": 0.8368,
"step": 900
},
{
"epoch": 0.6101583113456465,
"grad_norm": 9.582605361938477,
"learning_rate": 9.906402845709205e-06,
"loss": 0.8865,
"step": 925
},
{
"epoch": 0.6266490765171504,
"grad_norm": 11.229839324951172,
"learning_rate": 9.900844819919965e-06,
"loss": 0.9401,
"step": 950
},
{
"epoch": 0.6431398416886543,
"grad_norm": 9.11830997467041,
"learning_rate": 9.895286794130725e-06,
"loss": 0.9186,
"step": 975
},
{
"epoch": 0.6596306068601583,
"grad_norm": 8.9396333694458,
"learning_rate": 9.889728768341485e-06,
"loss": 0.8388,
"step": 1000
},
{
"epoch": 0.6761213720316622,
"grad_norm": 11.35300064086914,
"learning_rate": 9.884170742552245e-06,
"loss": 0.8979,
"step": 1025
},
{
"epoch": 0.6926121372031663,
"grad_norm": 11.368852615356445,
"learning_rate": 9.878612716763007e-06,
"loss": 0.8806,
"step": 1050
},
{
"epoch": 0.7091029023746702,
"grad_norm": 9.17705249786377,
"learning_rate": 9.873054690973767e-06,
"loss": 0.7954,
"step": 1075
},
{
"epoch": 0.7255936675461742,
"grad_norm": 9.339437484741211,
"learning_rate": 9.867496665184527e-06,
"loss": 0.8565,
"step": 1100
},
{
"epoch": 0.7420844327176781,
"grad_norm": 13.083456039428711,
"learning_rate": 9.861938639395287e-06,
"loss": 0.8049,
"step": 1125
},
{
"epoch": 0.758575197889182,
"grad_norm": 13.02919864654541,
"learning_rate": 9.856380613606049e-06,
"loss": 0.7948,
"step": 1150
},
{
"epoch": 0.775065963060686,
"grad_norm": 8.956382751464844,
"learning_rate": 9.850822587816809e-06,
"loss": 0.802,
"step": 1175
},
{
"epoch": 0.7915567282321899,
"grad_norm": 12.043669700622559,
"learning_rate": 9.845264562027569e-06,
"loss": 0.8199,
"step": 1200
},
{
"epoch": 0.808047493403694,
"grad_norm": 10.119061470031738,
"learning_rate": 9.839706536238329e-06,
"loss": 0.7736,
"step": 1225
},
{
"epoch": 0.8245382585751979,
"grad_norm": 9.62971019744873,
"learning_rate": 9.83414851044909e-06,
"loss": 0.7698,
"step": 1250
},
{
"epoch": 0.8410290237467019,
"grad_norm": 14.902313232421875,
"learning_rate": 9.82859048465985e-06,
"loss": 0.7961,
"step": 1275
},
{
"epoch": 0.8575197889182058,
"grad_norm": 13.862401008605957,
"learning_rate": 9.823032458870609e-06,
"loss": 0.7682,
"step": 1300
},
{
"epoch": 0.8740105540897097,
"grad_norm": 10.040505409240723,
"learning_rate": 9.81747443308137e-06,
"loss": 0.777,
"step": 1325
},
{
"epoch": 0.8905013192612137,
"grad_norm": 8.199767112731934,
"learning_rate": 9.81191640729213e-06,
"loss": 0.7698,
"step": 1350
},
{
"epoch": 0.9069920844327177,
"grad_norm": 9.803958892822266,
"learning_rate": 9.80635838150289e-06,
"loss": 0.7456,
"step": 1375
},
{
"epoch": 0.9234828496042217,
"grad_norm": 10.063946723937988,
"learning_rate": 9.80080035571365e-06,
"loss": 0.7653,
"step": 1400
},
{
"epoch": 0.9399736147757256,
"grad_norm": 11.48767375946045,
"learning_rate": 9.795242329924412e-06,
"loss": 0.7554,
"step": 1425
},
{
"epoch": 0.9564643799472295,
"grad_norm": 10.616227149963379,
"learning_rate": 9.789684304135172e-06,
"loss": 0.7911,
"step": 1450
},
{
"epoch": 0.9729551451187335,
"grad_norm": 8.222291946411133,
"learning_rate": 9.784126278345932e-06,
"loss": 0.7838,
"step": 1475
},
{
"epoch": 0.9894459102902374,
"grad_norm": 9.053133964538574,
"learning_rate": 9.778568252556692e-06,
"loss": 0.7834,
"step": 1500
},
{
"epoch": 1.0,
"eval_loss": 0.37832075357437134,
"eval_runtime": 1456.4978,
"eval_samples_per_second": 3.567,
"eval_steps_per_second": 1.784,
"eval_wer": 0.18685964032385183,
"step": 1516
},
{
"epoch": 1.0059366754617414,
"grad_norm": 7.5070929527282715,
"learning_rate": 9.773010226767452e-06,
"loss": 0.7013,
"step": 1525
},
{
"epoch": 1.0224274406332454,
"grad_norm": 12.309554100036621,
"learning_rate": 9.767452200978214e-06,
"loss": 0.7185,
"step": 1550
},
{
"epoch": 1.0389182058047493,
"grad_norm": 9.577189445495605,
"learning_rate": 9.761894175188974e-06,
"loss": 0.6881,
"step": 1575
},
{
"epoch": 1.0554089709762533,
"grad_norm": 13.550950050354004,
"learning_rate": 9.756336149399734e-06,
"loss": 0.7469,
"step": 1600
},
{
"epoch": 1.0718997361477574,
"grad_norm": 11.185826301574707,
"learning_rate": 9.750778123610494e-06,
"loss": 0.6697,
"step": 1625
},
{
"epoch": 1.0883905013192612,
"grad_norm": 9.437973976135254,
"learning_rate": 9.745220097821256e-06,
"loss": 0.7037,
"step": 1650
},
{
"epoch": 1.1048812664907652,
"grad_norm": 8.722063064575195,
"learning_rate": 9.739662072032014e-06,
"loss": 0.6956,
"step": 1675
},
{
"epoch": 1.121372031662269,
"grad_norm": 8.723018646240234,
"learning_rate": 9.734104046242774e-06,
"loss": 0.736,
"step": 1700
},
{
"epoch": 1.1378627968337731,
"grad_norm": 7.343624114990234,
"learning_rate": 9.728546020453536e-06,
"loss": 0.7095,
"step": 1725
},
{
"epoch": 1.154353562005277,
"grad_norm": 8.930522918701172,
"learning_rate": 9.722987994664296e-06,
"loss": 0.6715,
"step": 1750
},
{
"epoch": 1.170844327176781,
"grad_norm": 8.927308082580566,
"learning_rate": 9.717429968875056e-06,
"loss": 0.6891,
"step": 1775
},
{
"epoch": 1.187335092348285,
"grad_norm": 9.455988883972168,
"learning_rate": 9.711871943085816e-06,
"loss": 0.6908,
"step": 1800
},
{
"epoch": 1.2038258575197889,
"grad_norm": 10.517112731933594,
"learning_rate": 9.706313917296578e-06,
"loss": 0.6583,
"step": 1825
},
{
"epoch": 1.220316622691293,
"grad_norm": 6.787125587463379,
"learning_rate": 9.700755891507338e-06,
"loss": 0.694,
"step": 1850
},
{
"epoch": 1.2368073878627968,
"grad_norm": 9.443794250488281,
"learning_rate": 9.695197865718098e-06,
"loss": 0.6771,
"step": 1875
},
{
"epoch": 1.2532981530343008,
"grad_norm": 8.916643142700195,
"learning_rate": 9.689639839928858e-06,
"loss": 0.7213,
"step": 1900
},
{
"epoch": 1.2697889182058049,
"grad_norm": 9.56280517578125,
"learning_rate": 9.68408181413962e-06,
"loss": 0.6517,
"step": 1925
},
{
"epoch": 1.2862796833773087,
"grad_norm": 10.584586143493652,
"learning_rate": 9.67852378835038e-06,
"loss": 0.6909,
"step": 1950
},
{
"epoch": 1.3027704485488127,
"grad_norm": 7.714522361755371,
"learning_rate": 9.67296576256114e-06,
"loss": 0.6411,
"step": 1975
},
{
"epoch": 1.3192612137203166,
"grad_norm": 8.497810363769531,
"learning_rate": 9.6674077367719e-06,
"loss": 0.6612,
"step": 2000
},
{
"epoch": 1.3357519788918206,
"grad_norm": 7.779177188873291,
"learning_rate": 9.66184971098266e-06,
"loss": 0.6655,
"step": 2025
},
{
"epoch": 1.3522427440633247,
"grad_norm": 8.202792167663574,
"learning_rate": 9.65629168519342e-06,
"loss": 0.6234,
"step": 2050
},
{
"epoch": 1.3687335092348285,
"grad_norm": 6.34199857711792,
"learning_rate": 9.65073365940418e-06,
"loss": 0.6737,
"step": 2075
},
{
"epoch": 1.3852242744063323,
"grad_norm": 9.55582332611084,
"learning_rate": 9.645175633614941e-06,
"loss": 0.6718,
"step": 2100
},
{
"epoch": 1.4017150395778364,
"grad_norm": 9.783499717712402,
"learning_rate": 9.639617607825701e-06,
"loss": 0.6453,
"step": 2125
},
{
"epoch": 1.4182058047493404,
"grad_norm": 7.693665027618408,
"learning_rate": 9.634059582036461e-06,
"loss": 0.631,
"step": 2150
},
{
"epoch": 1.4346965699208443,
"grad_norm": 9.876138687133789,
"learning_rate": 9.628501556247221e-06,
"loss": 0.6951,
"step": 2175
},
{
"epoch": 1.4511873350923483,
"grad_norm": 9.020912170410156,
"learning_rate": 9.622943530457981e-06,
"loss": 0.6479,
"step": 2200
},
{
"epoch": 1.4676781002638521,
"grad_norm": 10.81850528717041,
"learning_rate": 9.617385504668743e-06,
"loss": 0.709,
"step": 2225
},
{
"epoch": 1.4841688654353562,
"grad_norm": 9.466885566711426,
"learning_rate": 9.611827478879503e-06,
"loss": 0.6469,
"step": 2250
},
{
"epoch": 1.5006596306068603,
"grad_norm": 10.120772361755371,
"learning_rate": 9.606269453090263e-06,
"loss": 0.6428,
"step": 2275
},
{
"epoch": 1.517150395778364,
"grad_norm": 9.613449096679688,
"learning_rate": 9.600711427301023e-06,
"loss": 0.6808,
"step": 2300
},
{
"epoch": 1.533641160949868,
"grad_norm": 6.854451656341553,
"learning_rate": 9.595153401511785e-06,
"loss": 0.674,
"step": 2325
},
{
"epoch": 1.550131926121372,
"grad_norm": 7.6446309089660645,
"learning_rate": 9.589595375722545e-06,
"loss": 0.6497,
"step": 2350
},
{
"epoch": 1.566622691292876,
"grad_norm": 9.535722732543945,
"learning_rate": 9.584037349933303e-06,
"loss": 0.6262,
"step": 2375
},
{
"epoch": 1.58311345646438,
"grad_norm": 10.47003173828125,
"learning_rate": 9.578479324144065e-06,
"loss": 0.6381,
"step": 2400
},
{
"epoch": 1.599604221635884,
"grad_norm": 9.593868255615234,
"learning_rate": 9.572921298354825e-06,
"loss": 0.6331,
"step": 2425
},
{
"epoch": 1.6160949868073877,
"grad_norm": 9.041787147521973,
"learning_rate": 9.567363272565585e-06,
"loss": 0.6333,
"step": 2450
},
{
"epoch": 1.6325857519788918,
"grad_norm": 10.436051368713379,
"learning_rate": 9.561805246776345e-06,
"loss": 0.6992,
"step": 2475
},
{
"epoch": 1.6490765171503958,
"grad_norm": 10.629966735839844,
"learning_rate": 9.556247220987107e-06,
"loss": 0.6054,
"step": 2500
},
{
"epoch": 1.6655672823218999,
"grad_norm": 9.051644325256348,
"learning_rate": 9.550689195197867e-06,
"loss": 0.6326,
"step": 2525
},
{
"epoch": 1.6820580474934037,
"grad_norm": 10.509145736694336,
"learning_rate": 9.545131169408627e-06,
"loss": 0.5865,
"step": 2550
},
{
"epoch": 1.6985488126649075,
"grad_norm": 10.23874568939209,
"learning_rate": 9.539573143619387e-06,
"loss": 0.6688,
"step": 2575
},
{
"epoch": 1.7150395778364116,
"grad_norm": 9.89738655090332,
"learning_rate": 9.534015117830148e-06,
"loss": 0.6056,
"step": 2600
},
{
"epoch": 1.7315303430079156,
"grad_norm": 7.859192848205566,
"learning_rate": 9.528457092040908e-06,
"loss": 0.6504,
"step": 2625
},
{
"epoch": 1.7480211081794197,
"grad_norm": 6.974312782287598,
"learning_rate": 9.522899066251668e-06,
"loss": 0.6634,
"step": 2650
},
{
"epoch": 1.7645118733509235,
"grad_norm": 8.40381145477295,
"learning_rate": 9.517341040462428e-06,
"loss": 0.562,
"step": 2675
},
{
"epoch": 1.7810026385224274,
"grad_norm": 9.789514541625977,
"learning_rate": 9.511783014673188e-06,
"loss": 0.6344,
"step": 2700
},
{
"epoch": 1.7974934036939314,
"grad_norm": 10.27668285369873,
"learning_rate": 9.50622498888395e-06,
"loss": 0.6181,
"step": 2725
},
{
"epoch": 1.8139841688654355,
"grad_norm": 8.970308303833008,
"learning_rate": 9.500666963094709e-06,
"loss": 0.6289,
"step": 2750
},
{
"epoch": 1.8304749340369393,
"grad_norm": 18.562963485717773,
"learning_rate": 9.49510893730547e-06,
"loss": 0.5785,
"step": 2775
},
{
"epoch": 1.8469656992084431,
"grad_norm": 9.711603164672852,
"learning_rate": 9.48955091151623e-06,
"loss": 0.6709,
"step": 2800
},
{
"epoch": 1.8634564643799472,
"grad_norm": 6.827445983886719,
"learning_rate": 9.48399288572699e-06,
"loss": 0.5618,
"step": 2825
},
{
"epoch": 1.8799472295514512,
"grad_norm": 7.309730529785156,
"learning_rate": 9.47843485993775e-06,
"loss": 0.6147,
"step": 2850
},
{
"epoch": 1.8964379947229553,
"grad_norm": 8.898188591003418,
"learning_rate": 9.47287683414851e-06,
"loss": 0.6617,
"step": 2875
},
{
"epoch": 1.912928759894459,
"grad_norm": 9.802018165588379,
"learning_rate": 9.467318808359272e-06,
"loss": 0.607,
"step": 2900
},
{
"epoch": 1.929419525065963,
"grad_norm": 11.065816879272461,
"learning_rate": 9.461760782570032e-06,
"loss": 0.5876,
"step": 2925
},
{
"epoch": 1.945910290237467,
"grad_norm": 8.673478126525879,
"learning_rate": 9.456202756780792e-06,
"loss": 0.5708,
"step": 2950
},
{
"epoch": 1.962401055408971,
"grad_norm": 8.835992813110352,
"learning_rate": 9.450644730991552e-06,
"loss": 0.5943,
"step": 2975
},
{
"epoch": 1.978891820580475,
"grad_norm": 9.243196487426758,
"learning_rate": 9.445086705202314e-06,
"loss": 0.6245,
"step": 3000
},
{
"epoch": 1.995382585751979,
"grad_norm": 7.948757171630859,
"learning_rate": 9.439528679413074e-06,
"loss": 0.5756,
"step": 3025
},
{
"epoch": 2.0,
"eval_loss": 0.33147963881492615,
"eval_runtime": 1586.3717,
"eval_samples_per_second": 3.275,
"eval_steps_per_second": 1.638,
"eval_wer": 0.1471842086408254,
"step": 3032
},
{
"epoch": 2.0118733509234827,
"grad_norm": 9.14528751373291,
"learning_rate": 9.433970653623834e-06,
"loss": 0.5881,
"step": 3050
},
{
"epoch": 2.028364116094987,
"grad_norm": 6.768561840057373,
"learning_rate": 9.428412627834594e-06,
"loss": 0.5513,
"step": 3075
},
{
"epoch": 2.044854881266491,
"grad_norm": 6.91849422454834,
"learning_rate": 9.422854602045354e-06,
"loss": 0.5318,
"step": 3100
},
{
"epoch": 2.061345646437995,
"grad_norm": 10.172245025634766,
"learning_rate": 9.417296576256114e-06,
"loss": 0.5481,
"step": 3125
},
{
"epoch": 2.0778364116094985,
"grad_norm": 7.840974807739258,
"learning_rate": 9.411738550466874e-06,
"loss": 0.5453,
"step": 3150
},
{
"epoch": 2.0943271767810026,
"grad_norm": 10.709723472595215,
"learning_rate": 9.406180524677636e-06,
"loss": 0.5526,
"step": 3175
},
{
"epoch": 2.1108179419525066,
"grad_norm": 7.9419169425964355,
"learning_rate": 9.400622498888396e-06,
"loss": 0.5277,
"step": 3200
},
{
"epoch": 2.1273087071240107,
"grad_norm": 9.7705659866333,
"learning_rate": 9.395064473099156e-06,
"loss": 0.5574,
"step": 3225
},
{
"epoch": 2.1437994722955147,
"grad_norm": 6.695239067077637,
"learning_rate": 9.389506447309916e-06,
"loss": 0.5335,
"step": 3250
},
{
"epoch": 2.1602902374670183,
"grad_norm": 6.585846900939941,
"learning_rate": 9.383948421520677e-06,
"loss": 0.5108,
"step": 3275
},
{
"epoch": 2.1767810026385224,
"grad_norm": 7.804145336151123,
"learning_rate": 9.378390395731437e-06,
"loss": 0.5136,
"step": 3300
},
{
"epoch": 2.1932717678100264,
"grad_norm": 10.289584159851074,
"learning_rate": 9.372832369942197e-06,
"loss": 0.5409,
"step": 3325
},
{
"epoch": 2.2097625329815305,
"grad_norm": 10.98071002960205,
"learning_rate": 9.367274344152957e-06,
"loss": 0.5362,
"step": 3350
},
{
"epoch": 2.2262532981530345,
"grad_norm": 9.558561325073242,
"learning_rate": 9.361716318363719e-06,
"loss": 0.5324,
"step": 3375
},
{
"epoch": 2.242744063324538,
"grad_norm": 7.030877590179443,
"learning_rate": 9.356158292574479e-06,
"loss": 0.5889,
"step": 3400
},
{
"epoch": 2.259234828496042,
"grad_norm": 6.8814697265625,
"learning_rate": 9.350600266785239e-06,
"loss": 0.523,
"step": 3425
},
{
"epoch": 2.2757255936675462,
"grad_norm": 6.776719093322754,
"learning_rate": 9.345042240996e-06,
"loss": 0.5468,
"step": 3450
},
{
"epoch": 2.2922163588390503,
"grad_norm": 6.252242565155029,
"learning_rate": 9.33948421520676e-06,
"loss": 0.5775,
"step": 3475
},
{
"epoch": 2.308707124010554,
"grad_norm": 10.693267822265625,
"learning_rate": 9.33392618941752e-06,
"loss": 0.5281,
"step": 3500
},
{
"epoch": 2.325197889182058,
"grad_norm": 8.575288772583008,
"learning_rate": 9.32836816362828e-06,
"loss": 0.5695,
"step": 3525
},
{
"epoch": 2.341688654353562,
"grad_norm": 7.428770065307617,
"learning_rate": 9.322810137839041e-06,
"loss": 0.5466,
"step": 3550
},
{
"epoch": 2.358179419525066,
"grad_norm": 10.00837516784668,
"learning_rate": 9.317252112049801e-06,
"loss": 0.5537,
"step": 3575
},
{
"epoch": 2.37467018469657,
"grad_norm": 8.1272611618042,
"learning_rate": 9.311694086260561e-06,
"loss": 0.5876,
"step": 3600
},
{
"epoch": 2.3911609498680737,
"grad_norm": 6.853724956512451,
"learning_rate": 9.306136060471321e-06,
"loss": 0.5321,
"step": 3625
},
{
"epoch": 2.4076517150395778,
"grad_norm": 8.857322692871094,
"learning_rate": 9.300578034682081e-06,
"loss": 0.549,
"step": 3650
},
{
"epoch": 2.424142480211082,
"grad_norm": 8.530628204345703,
"learning_rate": 9.295020008892843e-06,
"loss": 0.6112,
"step": 3675
},
{
"epoch": 2.440633245382586,
"grad_norm": 7.353663444519043,
"learning_rate": 9.289461983103603e-06,
"loss": 0.5738,
"step": 3700
},
{
"epoch": 2.4571240105540895,
"grad_norm": 7.334917068481445,
"learning_rate": 9.283903957314363e-06,
"loss": 0.5684,
"step": 3725
},
{
"epoch": 2.4736147757255935,
"grad_norm": 9.152280807495117,
"learning_rate": 9.278345931525123e-06,
"loss": 0.5736,
"step": 3750
},
{
"epoch": 2.4901055408970976,
"grad_norm": 6.766674041748047,
"learning_rate": 9.272787905735884e-06,
"loss": 0.5036,
"step": 3775
},
{
"epoch": 2.5065963060686016,
"grad_norm": 9.731369972229004,
"learning_rate": 9.267229879946644e-06,
"loss": 0.5643,
"step": 3800
},
{
"epoch": 2.5230870712401057,
"grad_norm": 9.269509315490723,
"learning_rate": 9.261671854157403e-06,
"loss": 0.5074,
"step": 3825
},
{
"epoch": 2.5395778364116097,
"grad_norm": 10.63927936553955,
"learning_rate": 9.256113828368165e-06,
"loss": 0.5361,
"step": 3850
},
{
"epoch": 2.5560686015831133,
"grad_norm": 7.579345226287842,
"learning_rate": 9.250555802578925e-06,
"loss": 0.5388,
"step": 3875
},
{
"epoch": 2.5725593667546174,
"grad_norm": 8.379159927368164,
"learning_rate": 9.244997776789685e-06,
"loss": 0.5462,
"step": 3900
},
{
"epoch": 2.5890501319261214,
"grad_norm": 6.8499860763549805,
"learning_rate": 9.239439751000445e-06,
"loss": 0.5571,
"step": 3925
},
{
"epoch": 2.6055408970976255,
"grad_norm": 8.696283340454102,
"learning_rate": 9.233881725211206e-06,
"loss": 0.5593,
"step": 3950
},
{
"epoch": 2.622031662269129,
"grad_norm": 8.017210960388184,
"learning_rate": 9.228323699421966e-06,
"loss": 0.5044,
"step": 3975
},
{
"epoch": 2.638522427440633,
"grad_norm": 7.696545124053955,
"learning_rate": 9.222765673632726e-06,
"loss": 0.5696,
"step": 4000
},
{
"epoch": 2.655013192612137,
"grad_norm": 8.843703269958496,
"learning_rate": 9.217207647843486e-06,
"loss": 0.5285,
"step": 4025
},
{
"epoch": 2.6715039577836412,
"grad_norm": 9.353804588317871,
"learning_rate": 9.211649622054248e-06,
"loss": 0.5766,
"step": 4050
},
{
"epoch": 2.6879947229551453,
"grad_norm": 9.517998695373535,
"learning_rate": 9.206091596265008e-06,
"loss": 0.5299,
"step": 4075
},
{
"epoch": 2.7044854881266494,
"grad_norm": 8.601608276367188,
"learning_rate": 9.200533570475768e-06,
"loss": 0.5392,
"step": 4100
},
{
"epoch": 2.720976253298153,
"grad_norm": 8.98355770111084,
"learning_rate": 9.194975544686528e-06,
"loss": 0.5403,
"step": 4125
},
{
"epoch": 2.737467018469657,
"grad_norm": 8.31533145904541,
"learning_rate": 9.189417518897288e-06,
"loss": 0.5444,
"step": 4150
},
{
"epoch": 2.753957783641161,
"grad_norm": 8.77151107788086,
"learning_rate": 9.183859493108048e-06,
"loss": 0.4957,
"step": 4175
},
{
"epoch": 2.7704485488126647,
"grad_norm": 8.633058547973633,
"learning_rate": 9.178301467318808e-06,
"loss": 0.5124,
"step": 4200
},
{
"epoch": 2.7869393139841687,
"grad_norm": 6.91944694519043,
"learning_rate": 9.17274344152957e-06,
"loss": 0.5006,
"step": 4225
},
{
"epoch": 2.8034300791556728,
"grad_norm": 8.961416244506836,
"learning_rate": 9.16718541574033e-06,
"loss": 0.5394,
"step": 4250
},
{
"epoch": 2.819920844327177,
"grad_norm": 9.132115364074707,
"learning_rate": 9.16162738995109e-06,
"loss": 0.4883,
"step": 4275
},
{
"epoch": 2.836411609498681,
"grad_norm": 8.976722717285156,
"learning_rate": 9.15606936416185e-06,
"loss": 0.5483,
"step": 4300
},
{
"epoch": 2.852902374670185,
"grad_norm": 6.239002704620361,
"learning_rate": 9.15051133837261e-06,
"loss": 0.5395,
"step": 4325
},
{
"epoch": 2.8693931398416885,
"grad_norm": 8.597009658813477,
"learning_rate": 9.144953312583372e-06,
"loss": 0.5363,
"step": 4350
},
{
"epoch": 2.8858839050131926,
"grad_norm": 8.338591575622559,
"learning_rate": 9.139395286794132e-06,
"loss": 0.5185,
"step": 4375
},
{
"epoch": 2.9023746701846966,
"grad_norm": 7.570108890533447,
"learning_rate": 9.133837261004892e-06,
"loss": 0.5011,
"step": 4400
},
{
"epoch": 2.9188654353562007,
"grad_norm": 8.193133354187012,
"learning_rate": 9.128279235215652e-06,
"loss": 0.5201,
"step": 4425
},
{
"epoch": 2.9353562005277043,
"grad_norm": 8.535017013549805,
"learning_rate": 9.122721209426413e-06,
"loss": 0.5196,
"step": 4450
},
{
"epoch": 2.9518469656992083,
"grad_norm": 10.10825252532959,
"learning_rate": 9.117163183637173e-06,
"loss": 0.4601,
"step": 4475
},
{
"epoch": 2.9683377308707124,
"grad_norm": 9.440041542053223,
"learning_rate": 9.111605157847933e-06,
"loss": 0.5435,
"step": 4500
},
{
"epoch": 2.9848284960422165,
"grad_norm": 9.113434791564941,
"learning_rate": 9.106047132058693e-06,
"loss": 0.59,
"step": 4525
},
{
"epoch": 3.0,
"eval_loss": 0.31148383021354675,
"eval_runtime": 1520.788,
"eval_samples_per_second": 3.417,
"eval_steps_per_second": 1.708,
"eval_wer": 0.14428243891953857,
"step": 4548
},
{
"epoch": 3.0013192612137205,
"grad_norm": 6.392806529998779,
"learning_rate": 9.100489106269453e-06,
"loss": 0.5845,
"step": 4550
},
{
"epoch": 3.017810026385224,
"grad_norm": 8.058794021606445,
"learning_rate": 9.094931080480214e-06,
"loss": 0.4345,
"step": 4575
},
{
"epoch": 3.034300791556728,
"grad_norm": 7.85006856918335,
"learning_rate": 9.089373054690974e-06,
"loss": 0.4532,
"step": 4600
},
{
"epoch": 3.050791556728232,
"grad_norm": 9.823562622070312,
"learning_rate": 9.083815028901735e-06,
"loss": 0.4992,
"step": 4625
},
{
"epoch": 3.0672823218997363,
"grad_norm": 7.448474407196045,
"learning_rate": 9.078257003112495e-06,
"loss": 0.4648,
"step": 4650
},
{
"epoch": 3.0837730870712403,
"grad_norm": 6.409183502197266,
"learning_rate": 9.072698977323255e-06,
"loss": 0.4332,
"step": 4675
},
{
"epoch": 3.100263852242744,
"grad_norm": 8.675482749938965,
"learning_rate": 9.067140951534015e-06,
"loss": 0.4741,
"step": 4700
},
{
"epoch": 3.116754617414248,
"grad_norm": 7.694202423095703,
"learning_rate": 9.061582925744777e-06,
"loss": 0.5007,
"step": 4725
},
{
"epoch": 3.133245382585752,
"grad_norm": 8.39884090423584,
"learning_rate": 9.056024899955537e-06,
"loss": 0.4821,
"step": 4750
},
{
"epoch": 3.149736147757256,
"grad_norm": 9.592146873474121,
"learning_rate": 9.050466874166297e-06,
"loss": 0.4572,
"step": 4775
},
{
"epoch": 3.16622691292876,
"grad_norm": 10.090729713439941,
"learning_rate": 9.044908848377057e-06,
"loss": 0.5059,
"step": 4800
},
{
"epoch": 3.1827176781002637,
"grad_norm": 7.96800422668457,
"learning_rate": 9.039350822587819e-06,
"loss": 0.4581,
"step": 4825
},
{
"epoch": 3.199208443271768,
"grad_norm": 10.114542961120605,
"learning_rate": 9.033792796798579e-06,
"loss": 0.5104,
"step": 4850
},
{
"epoch": 3.215699208443272,
"grad_norm": 7.575864791870117,
"learning_rate": 9.028234771009339e-06,
"loss": 0.4697,
"step": 4875
},
{
"epoch": 3.232189973614776,
"grad_norm": 8.376447677612305,
"learning_rate": 9.022676745220099e-06,
"loss": 0.477,
"step": 4900
},
{
"epoch": 3.2486807387862795,
"grad_norm": 7.300264835357666,
"learning_rate": 9.017118719430859e-06,
"loss": 0.4581,
"step": 4925
},
{
"epoch": 3.2651715039577835,
"grad_norm": 8.129483222961426,
"learning_rate": 9.011560693641619e-06,
"loss": 0.5413,
"step": 4950
},
{
"epoch": 3.2816622691292876,
"grad_norm": 8.351349830627441,
"learning_rate": 9.006002667852379e-06,
"loss": 0.5203,
"step": 4975
},
{
"epoch": 3.2981530343007917,
"grad_norm": 8.101301193237305,
"learning_rate": 9.000444642063139e-06,
"loss": 0.4434,
"step": 5000
},
{
"epoch": 3.3146437994722957,
"grad_norm": 8.380141258239746,
"learning_rate": 8.9948866162739e-06,
"loss": 0.5014,
"step": 5025
},
{
"epoch": 3.3311345646437993,
"grad_norm": 6.295933723449707,
"learning_rate": 8.98932859048466e-06,
"loss": 0.5022,
"step": 5050
},
{
"epoch": 3.3476253298153034,
"grad_norm": 8.170040130615234,
"learning_rate": 8.98377056469542e-06,
"loss": 0.4938,
"step": 5075
},
{
"epoch": 3.3641160949868074,
"grad_norm": 8.715851783752441,
"learning_rate": 8.97821253890618e-06,
"loss": 0.469,
"step": 5100
},
{
"epoch": 3.3806068601583115,
"grad_norm": 8.525429725646973,
"learning_rate": 8.972654513116942e-06,
"loss": 0.4733,
"step": 5125
},
{
"epoch": 3.397097625329815,
"grad_norm": 9.116293907165527,
"learning_rate": 8.967096487327702e-06,
"loss": 0.4529,
"step": 5150
},
{
"epoch": 3.413588390501319,
"grad_norm": 7.459149360656738,
"learning_rate": 8.961538461538462e-06,
"loss": 0.4919,
"step": 5175
},
{
"epoch": 3.430079155672823,
"grad_norm": 12.199692726135254,
"learning_rate": 8.955980435749222e-06,
"loss": 0.4397,
"step": 5200
},
{
"epoch": 3.4465699208443272,
"grad_norm": 5.766480922698975,
"learning_rate": 8.950422409959984e-06,
"loss": 0.4934,
"step": 5225
},
{
"epoch": 3.4630606860158313,
"grad_norm": 8.253438949584961,
"learning_rate": 8.944864384170742e-06,
"loss": 0.487,
"step": 5250
},
{
"epoch": 3.4795514511873353,
"grad_norm": 7.708489894866943,
"learning_rate": 8.939306358381502e-06,
"loss": 0.4843,
"step": 5275
},
{
"epoch": 3.496042216358839,
"grad_norm": 7.353459358215332,
"learning_rate": 8.933748332592264e-06,
"loss": 0.4652,
"step": 5300
},
{
"epoch": 3.512532981530343,
"grad_norm": 11.070839881896973,
"learning_rate": 8.928190306803024e-06,
"loss": 0.5014,
"step": 5325
},
{
"epoch": 3.529023746701847,
"grad_norm": 6.638571262359619,
"learning_rate": 8.922632281013784e-06,
"loss": 0.5049,
"step": 5350
},
{
"epoch": 3.5455145118733506,
"grad_norm": 5.465677261352539,
"learning_rate": 8.917074255224544e-06,
"loss": 0.4593,
"step": 5375
},
{
"epoch": 3.5620052770448547,
"grad_norm": 6.364098072052002,
"learning_rate": 8.911516229435306e-06,
"loss": 0.4506,
"step": 5400
},
{
"epoch": 3.5784960422163588,
"grad_norm": 6.888390064239502,
"learning_rate": 8.905958203646066e-06,
"loss": 0.4435,
"step": 5425
},
{
"epoch": 3.594986807387863,
"grad_norm": 8.262774467468262,
"learning_rate": 8.900400177856826e-06,
"loss": 0.501,
"step": 5450
},
{
"epoch": 3.611477572559367,
"grad_norm": 10.522833824157715,
"learning_rate": 8.894842152067586e-06,
"loss": 0.5003,
"step": 5475
},
{
"epoch": 3.627968337730871,
"grad_norm": 9.361923217773438,
"learning_rate": 8.889284126278348e-06,
"loss": 0.4184,
"step": 5500
},
{
"epoch": 3.6444591029023745,
"grad_norm": 11.22805404663086,
"learning_rate": 8.883726100489108e-06,
"loss": 0.4803,
"step": 5525
},
{
"epoch": 3.6609498680738786,
"grad_norm": 6.513778209686279,
"learning_rate": 8.878168074699868e-06,
"loss": 0.5071,
"step": 5550
},
{
"epoch": 3.6774406332453826,
"grad_norm": 7.134753227233887,
"learning_rate": 8.872610048910628e-06,
"loss": 0.4784,
"step": 5575
},
{
"epoch": 3.6939313984168867,
"grad_norm": 7.034178733825684,
"learning_rate": 8.867052023121388e-06,
"loss": 0.4687,
"step": 5600
},
{
"epoch": 3.7104221635883903,
"grad_norm": 5.566599369049072,
"learning_rate": 8.861493997332148e-06,
"loss": 0.5028,
"step": 5625
},
{
"epoch": 3.7269129287598943,
"grad_norm": 8.106322288513184,
"learning_rate": 8.855935971542908e-06,
"loss": 0.4644,
"step": 5650
},
{
"epoch": 3.7434036939313984,
"grad_norm": 7.244205951690674,
"learning_rate": 8.85037794575367e-06,
"loss": 0.4561,
"step": 5675
},
{
"epoch": 3.7598944591029024,
"grad_norm": 9.410643577575684,
"learning_rate": 8.84481991996443e-06,
"loss": 0.4611,
"step": 5700
},
{
"epoch": 3.7763852242744065,
"grad_norm": 5.8635687828063965,
"learning_rate": 8.83926189417519e-06,
"loss": 0.4702,
"step": 5725
},
{
"epoch": 3.7928759894459105,
"grad_norm": 7.882669448852539,
"learning_rate": 8.83370386838595e-06,
"loss": 0.4609,
"step": 5750
},
{
"epoch": 3.809366754617414,
"grad_norm": 8.896418571472168,
"learning_rate": 8.82814584259671e-06,
"loss": 0.4812,
"step": 5775
},
{
"epoch": 3.825857519788918,
"grad_norm": 6.063759803771973,
"learning_rate": 8.822587816807471e-06,
"loss": 0.5068,
"step": 5800
},
{
"epoch": 3.8423482849604222,
"grad_norm": 7.969833850860596,
"learning_rate": 8.817029791018231e-06,
"loss": 0.4726,
"step": 5825
},
{
"epoch": 3.858839050131926,
"grad_norm": 7.975457191467285,
"learning_rate": 8.811471765228991e-06,
"loss": 0.4773,
"step": 5850
},
{
"epoch": 3.87532981530343,
"grad_norm": 7.696924209594727,
"learning_rate": 8.805913739439751e-06,
"loss": 0.4699,
"step": 5875
},
{
"epoch": 3.891820580474934,
"grad_norm": 7.673986911773682,
"learning_rate": 8.800355713650513e-06,
"loss": 0.4245,
"step": 5900
},
{
"epoch": 3.908311345646438,
"grad_norm": 7.145058631896973,
"learning_rate": 8.794797687861273e-06,
"loss": 0.4492,
"step": 5925
},
{
"epoch": 3.924802110817942,
"grad_norm": 6.980532646179199,
"learning_rate": 8.789239662072033e-06,
"loss": 0.4476,
"step": 5950
},
{
"epoch": 3.941292875989446,
"grad_norm": 5.808567047119141,
"learning_rate": 8.783681636282793e-06,
"loss": 0.4633,
"step": 5975
},
{
"epoch": 3.9577836411609497,
"grad_norm": 9.074129104614258,
"learning_rate": 8.778123610493553e-06,
"loss": 0.4576,
"step": 6000
},
{
"epoch": 3.9742744063324538,
"grad_norm": 8.285755157470703,
"learning_rate": 8.772565584704313e-06,
"loss": 0.4938,
"step": 6025
},
{
"epoch": 3.990765171503958,
"grad_norm": 7.264706611633301,
"learning_rate": 8.767007558915073e-06,
"loss": 0.4727,
"step": 6050
},
{
"epoch": 4.0,
"eval_loss": 0.30439668893814087,
"eval_runtime": 1406.1275,
"eval_samples_per_second": 3.695,
"eval_steps_per_second": 1.848,
"eval_wer": 0.12903919180339615,
"step": 6064
},
{
"epoch": 4.007255936675461,
"grad_norm": 9.51659870147705,
"learning_rate": 8.761449533125835e-06,
"loss": 0.4168,
"step": 6075
},
{
"epoch": 4.0237467018469655,
"grad_norm": 8.642926216125488,
"learning_rate": 8.755891507336595e-06,
"loss": 0.4274,
"step": 6100
},
{
"epoch": 4.0402374670184695,
"grad_norm": 7.2832489013671875,
"learning_rate": 8.750333481547355e-06,
"loss": 0.427,
"step": 6125
},
{
"epoch": 4.056728232189974,
"grad_norm": 8.79689884185791,
"learning_rate": 8.744775455758115e-06,
"loss": 0.4192,
"step": 6150
},
{
"epoch": 4.073218997361478,
"grad_norm": 8.33678150177002,
"learning_rate": 8.739217429968877e-06,
"loss": 0.467,
"step": 6175
},
{
"epoch": 4.089709762532982,
"grad_norm": 9.119438171386719,
"learning_rate": 8.733659404179637e-06,
"loss": 0.4396,
"step": 6200
},
{
"epoch": 4.106200527704486,
"grad_norm": 5.395960807800293,
"learning_rate": 8.728101378390397e-06,
"loss": 0.4144,
"step": 6225
},
{
"epoch": 4.12269129287599,
"grad_norm": 7.0732102394104,
"learning_rate": 8.722543352601157e-06,
"loss": 0.4201,
"step": 6250
},
{
"epoch": 4.139182058047494,
"grad_norm": 7.840185165405273,
"learning_rate": 8.716985326811917e-06,
"loss": 0.4212,
"step": 6275
},
{
"epoch": 4.155672823218997,
"grad_norm": 10.627087593078613,
"learning_rate": 8.711427301022678e-06,
"loss": 0.4046,
"step": 6300
},
{
"epoch": 4.172163588390501,
"grad_norm": 6.872922897338867,
"learning_rate": 8.705869275233437e-06,
"loss": 0.4225,
"step": 6325
},
{
"epoch": 4.188654353562005,
"grad_norm": 9.158402442932129,
"learning_rate": 8.700311249444198e-06,
"loss": 0.4105,
"step": 6350
},
{
"epoch": 4.205145118733509,
"grad_norm": 6.458835124969482,
"learning_rate": 8.694753223654958e-06,
"loss": 0.3805,
"step": 6375
},
{
"epoch": 4.221635883905013,
"grad_norm": 8.311756134033203,
"learning_rate": 8.689195197865719e-06,
"loss": 0.3907,
"step": 6400
},
{
"epoch": 4.238126649076517,
"grad_norm": 7.731131076812744,
"learning_rate": 8.683637172076479e-06,
"loss": 0.4104,
"step": 6425
},
{
"epoch": 4.254617414248021,
"grad_norm": 8.254470825195312,
"learning_rate": 8.678079146287239e-06,
"loss": 0.4037,
"step": 6450
},
{
"epoch": 4.271108179419525,
"grad_norm": 6.898271560668945,
"learning_rate": 8.672521120498e-06,
"loss": 0.4715,
"step": 6475
},
{
"epoch": 4.287598944591029,
"grad_norm": 6.904751777648926,
"learning_rate": 8.66696309470876e-06,
"loss": 0.4213,
"step": 6500
},
{
"epoch": 4.304089709762533,
"grad_norm": 8.848785400390625,
"learning_rate": 8.66140506891952e-06,
"loss": 0.406,
"step": 6525
},
{
"epoch": 4.320580474934037,
"grad_norm": 8.48416805267334,
"learning_rate": 8.65584704313028e-06,
"loss": 0.4219,
"step": 6550
},
{
"epoch": 4.337071240105541,
"grad_norm": 10.504020690917969,
"learning_rate": 8.650289017341042e-06,
"loss": 0.4611,
"step": 6575
},
{
"epoch": 4.353562005277045,
"grad_norm": 9.928447723388672,
"learning_rate": 8.644730991551802e-06,
"loss": 0.4566,
"step": 6600
},
{
"epoch": 4.370052770448549,
"grad_norm": 8.590391159057617,
"learning_rate": 8.639172965762562e-06,
"loss": 0.4143,
"step": 6625
},
{
"epoch": 4.386543535620053,
"grad_norm": 9.111311912536621,
"learning_rate": 8.633614939973322e-06,
"loss": 0.4046,
"step": 6650
},
{
"epoch": 4.403034300791557,
"grad_norm": 8.099916458129883,
"learning_rate": 8.628056914184082e-06,
"loss": 0.4309,
"step": 6675
},
{
"epoch": 4.419525065963061,
"grad_norm": 8.059656143188477,
"learning_rate": 8.622498888394842e-06,
"loss": 0.4764,
"step": 6700
},
{
"epoch": 4.436015831134565,
"grad_norm": 7.070877552032471,
"learning_rate": 8.616940862605602e-06,
"loss": 0.4682,
"step": 6725
},
{
"epoch": 4.452506596306069,
"grad_norm": 10.665205955505371,
"learning_rate": 8.611382836816364e-06,
"loss": 0.4091,
"step": 6750
},
{
"epoch": 4.468997361477572,
"grad_norm": 7.845228672027588,
"learning_rate": 8.605824811027124e-06,
"loss": 0.4549,
"step": 6775
},
{
"epoch": 4.485488126649076,
"grad_norm": 8.280657768249512,
"learning_rate": 8.600266785237884e-06,
"loss": 0.4166,
"step": 6800
},
{
"epoch": 4.50197889182058,
"grad_norm": 9.942441940307617,
"learning_rate": 8.594708759448644e-06,
"loss": 0.4164,
"step": 6825
},
{
"epoch": 4.518469656992084,
"grad_norm": 7.303974151611328,
"learning_rate": 8.589150733659406e-06,
"loss": 0.4455,
"step": 6850
},
{
"epoch": 4.534960422163588,
"grad_norm": 9.158472061157227,
"learning_rate": 8.583592707870166e-06,
"loss": 0.4324,
"step": 6875
},
{
"epoch": 4.5514511873350925,
"grad_norm": 8.51202392578125,
"learning_rate": 8.578034682080926e-06,
"loss": 0.4393,
"step": 6900
},
{
"epoch": 4.5679419525065965,
"grad_norm": 8.030768394470215,
"learning_rate": 8.572476656291686e-06,
"loss": 0.4463,
"step": 6925
},
{
"epoch": 4.584432717678101,
"grad_norm": 9.200541496276855,
"learning_rate": 8.566918630502447e-06,
"loss": 0.4654,
"step": 6950
},
{
"epoch": 4.600923482849604,
"grad_norm": 8.542319297790527,
"learning_rate": 8.561360604713207e-06,
"loss": 0.4101,
"step": 6975
},
{
"epoch": 4.617414248021108,
"grad_norm": 7.467373847961426,
"learning_rate": 8.555802578923967e-06,
"loss": 0.4343,
"step": 7000
},
{
"epoch": 4.633905013192612,
"grad_norm": 6.186234474182129,
"learning_rate": 8.550244553134727e-06,
"loss": 0.4718,
"step": 7025
},
{
"epoch": 4.650395778364116,
"grad_norm": 9.750950813293457,
"learning_rate": 8.544686527345487e-06,
"loss": 0.4141,
"step": 7050
},
{
"epoch": 4.66688654353562,
"grad_norm": 7.7578253746032715,
"learning_rate": 8.539128501556247e-06,
"loss": 0.4167,
"step": 7075
},
{
"epoch": 4.683377308707124,
"grad_norm": 5.445390224456787,
"learning_rate": 8.533570475767007e-06,
"loss": 0.4324,
"step": 7100
},
{
"epoch": 4.699868073878628,
"grad_norm": 9.907746315002441,
"learning_rate": 8.52801244997777e-06,
"loss": 0.4258,
"step": 7125
},
{
"epoch": 4.716358839050132,
"grad_norm": 8.552475929260254,
"learning_rate": 8.52245442418853e-06,
"loss": 0.4233,
"step": 7150
},
{
"epoch": 4.732849604221636,
"grad_norm": 8.171768188476562,
"learning_rate": 8.51689639839929e-06,
"loss": 0.3845,
"step": 7175
},
{
"epoch": 4.74934036939314,
"grad_norm": 9.57198429107666,
"learning_rate": 8.51133837261005e-06,
"loss": 0.4482,
"step": 7200
},
{
"epoch": 4.765831134564644,
"grad_norm": 8.749096870422363,
"learning_rate": 8.50578034682081e-06,
"loss": 0.4512,
"step": 7225
},
{
"epoch": 4.782321899736147,
"grad_norm": 6.986504554748535,
"learning_rate": 8.500222321031571e-06,
"loss": 0.4268,
"step": 7250
},
{
"epoch": 4.7988126649076515,
"grad_norm": 7.843720436096191,
"learning_rate": 8.494664295242331e-06,
"loss": 0.4414,
"step": 7275
},
{
"epoch": 4.8153034300791555,
"grad_norm": 7.746464252471924,
"learning_rate": 8.489106269453091e-06,
"loss": 0.3829,
"step": 7300
},
{
"epoch": 4.83179419525066,
"grad_norm": 6.705915451049805,
"learning_rate": 8.483548243663851e-06,
"loss": 0.3767,
"step": 7325
},
{
"epoch": 4.848284960422164,
"grad_norm": 6.143075466156006,
"learning_rate": 8.477990217874613e-06,
"loss": 0.4318,
"step": 7350
},
{
"epoch": 4.864775725593668,
"grad_norm": 9.569985389709473,
"learning_rate": 8.472432192085373e-06,
"loss": 0.4184,
"step": 7375
},
{
"epoch": 4.881266490765172,
"grad_norm": 6.865624904632568,
"learning_rate": 8.466874166296131e-06,
"loss": 0.3741,
"step": 7400
},
{
"epoch": 4.897757255936676,
"grad_norm": 8.196345329284668,
"learning_rate": 8.461316140506893e-06,
"loss": 0.4535,
"step": 7425
},
{
"epoch": 4.914248021108179,
"grad_norm": 6.617861747741699,
"learning_rate": 8.455758114717653e-06,
"loss": 0.3955,
"step": 7450
},
{
"epoch": 4.930738786279683,
"grad_norm": 6.829433441162109,
"learning_rate": 8.450200088928413e-06,
"loss": 0.3856,
"step": 7475
},
{
"epoch": 4.947229551451187,
"grad_norm": 7.319567680358887,
"learning_rate": 8.444642063139173e-06,
"loss": 0.438,
"step": 7500
},
{
"epoch": 4.963720316622691,
"grad_norm": 7.047403812408447,
"learning_rate": 8.439084037349935e-06,
"loss": 0.4061,
"step": 7525
},
{
"epoch": 4.980211081794195,
"grad_norm": 9.969358444213867,
"learning_rate": 8.433526011560695e-06,
"loss": 0.392,
"step": 7550
},
{
"epoch": 4.996701846965699,
"grad_norm": 8.516345977783203,
"learning_rate": 8.427967985771455e-06,
"loss": 0.4134,
"step": 7575
},
{
"epoch": 5.0,
"eval_loss": 0.3021000921726227,
"eval_runtime": 1273.4915,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 2.04,
"eval_wer": 0.12830479329368777,
"step": 7580
},
{
"epoch": 5.013192612137203,
"grad_norm": 7.070035457611084,
"learning_rate": 8.422409959982215e-06,
"loss": 0.3749,
"step": 7600
},
{
"epoch": 5.029683377308707,
"grad_norm": 6.715669631958008,
"learning_rate": 8.416851934192976e-06,
"loss": 0.3547,
"step": 7625
},
{
"epoch": 5.046174142480211,
"grad_norm": 6.825494766235352,
"learning_rate": 8.411293908403736e-06,
"loss": 0.3513,
"step": 7650
},
{
"epoch": 5.062664907651715,
"grad_norm": 9.53160572052002,
"learning_rate": 8.405735882614496e-06,
"loss": 0.3648,
"step": 7675
},
{
"epoch": 5.0791556728232194,
"grad_norm": 5.551097393035889,
"learning_rate": 8.400177856825256e-06,
"loss": 0.3457,
"step": 7700
},
{
"epoch": 5.095646437994723,
"grad_norm": 8.277714729309082,
"learning_rate": 8.394619831036016e-06,
"loss": 0.3598,
"step": 7725
},
{
"epoch": 5.112137203166227,
"grad_norm": 7.182380199432373,
"learning_rate": 8.389061805246776e-06,
"loss": 0.3625,
"step": 7750
},
{
"epoch": 5.128627968337731,
"grad_norm": 7.097862243652344,
"learning_rate": 8.383503779457536e-06,
"loss": 0.4117,
"step": 7775
},
{
"epoch": 5.145118733509235,
"grad_norm": 5.849909782409668,
"learning_rate": 8.377945753668298e-06,
"loss": 0.3609,
"step": 7800
},
{
"epoch": 5.161609498680739,
"grad_norm": 7.363701343536377,
"learning_rate": 8.372387727879058e-06,
"loss": 0.3826,
"step": 7825
},
{
"epoch": 5.178100263852243,
"grad_norm": 6.545772075653076,
"learning_rate": 8.366829702089818e-06,
"loss": 0.3679,
"step": 7850
},
{
"epoch": 5.194591029023747,
"grad_norm": 11.929193496704102,
"learning_rate": 8.361271676300578e-06,
"loss": 0.3753,
"step": 7875
},
{
"epoch": 5.211081794195251,
"grad_norm": 6.836696624755859,
"learning_rate": 8.355713650511338e-06,
"loss": 0.3905,
"step": 7900
},
{
"epoch": 5.227572559366755,
"grad_norm": 9.196709632873535,
"learning_rate": 8.3501556247221e-06,
"loss": 0.364,
"step": 7925
},
{
"epoch": 5.244063324538258,
"grad_norm": 8.637734413146973,
"learning_rate": 8.34459759893286e-06,
"loss": 0.3797,
"step": 7950
},
{
"epoch": 5.260554089709762,
"grad_norm": 9.510890007019043,
"learning_rate": 8.33903957314362e-06,
"loss": 0.3592,
"step": 7975
},
{
"epoch": 5.277044854881266,
"grad_norm": 5.116422176361084,
"learning_rate": 8.33348154735438e-06,
"loss": 0.4121,
"step": 8000
},
{
"epoch": 5.29353562005277,
"grad_norm": 5.724400997161865,
"learning_rate": 8.327923521565142e-06,
"loss": 0.3679,
"step": 8025
},
{
"epoch": 5.310026385224274,
"grad_norm": 6.654912948608398,
"learning_rate": 8.322365495775902e-06,
"loss": 0.3784,
"step": 8050
},
{
"epoch": 5.326517150395778,
"grad_norm": 6.645880222320557,
"learning_rate": 8.316807469986662e-06,
"loss": 0.3809,
"step": 8075
},
{
"epoch": 5.3430079155672825,
"grad_norm": 7.6851091384887695,
"learning_rate": 8.311249444197422e-06,
"loss": 0.3949,
"step": 8100
},
{
"epoch": 5.3594986807387865,
"grad_norm": 6.103472709655762,
"learning_rate": 8.305691418408182e-06,
"loss": 0.3656,
"step": 8125
},
{
"epoch": 5.375989445910291,
"grad_norm": 7.165574073791504,
"learning_rate": 8.300133392618942e-06,
"loss": 0.3331,
"step": 8150
},
{
"epoch": 5.392480211081795,
"grad_norm": 5.427399635314941,
"learning_rate": 8.294575366829702e-06,
"loss": 0.349,
"step": 8175
},
{
"epoch": 5.408970976253298,
"grad_norm": 7.878599643707275,
"learning_rate": 8.289017341040463e-06,
"loss": 0.3861,
"step": 8200
},
{
"epoch": 5.425461741424802,
"grad_norm": 9.836017608642578,
"learning_rate": 8.283459315251223e-06,
"loss": 0.4138,
"step": 8225
},
{
"epoch": 5.441952506596306,
"grad_norm": 7.767301559448242,
"learning_rate": 8.277901289461984e-06,
"loss": 0.4012,
"step": 8250
},
{
"epoch": 5.45844327176781,
"grad_norm": 8.52707576751709,
"learning_rate": 8.272343263672744e-06,
"loss": 0.3942,
"step": 8275
},
{
"epoch": 5.474934036939314,
"grad_norm": 8.378703117370605,
"learning_rate": 8.266785237883505e-06,
"loss": 0.3718,
"step": 8300
},
{
"epoch": 5.491424802110818,
"grad_norm": 7.213738918304443,
"learning_rate": 8.261227212094265e-06,
"loss": 0.3959,
"step": 8325
},
{
"epoch": 5.507915567282322,
"grad_norm": 8.569686889648438,
"learning_rate": 8.255669186305025e-06,
"loss": 0.4553,
"step": 8350
},
{
"epoch": 5.524406332453826,
"grad_norm": 8.710777282714844,
"learning_rate": 8.250111160515785e-06,
"loss": 0.3511,
"step": 8375
},
{
"epoch": 5.540897097625329,
"grad_norm": 8.045042037963867,
"learning_rate": 8.244553134726547e-06,
"loss": 0.3977,
"step": 8400
},
{
"epoch": 5.557387862796833,
"grad_norm": 6.2842254638671875,
"learning_rate": 8.238995108937307e-06,
"loss": 0.374,
"step": 8425
},
{
"epoch": 5.573878627968337,
"grad_norm": 8.507267951965332,
"learning_rate": 8.233437083148067e-06,
"loss": 0.4065,
"step": 8450
},
{
"epoch": 5.5903693931398415,
"grad_norm": 7.953192234039307,
"learning_rate": 8.227879057358827e-06,
"loss": 0.4024,
"step": 8475
},
{
"epoch": 5.6068601583113455,
"grad_norm": 5.938470840454102,
"learning_rate": 8.222321031569587e-06,
"loss": 0.369,
"step": 8500
},
{
"epoch": 5.62335092348285,
"grad_norm": 8.270731925964355,
"learning_rate": 8.216763005780347e-06,
"loss": 0.3962,
"step": 8525
},
{
"epoch": 5.639841688654354,
"grad_norm": 8.280898094177246,
"learning_rate": 8.211204979991107e-06,
"loss": 0.3711,
"step": 8550
},
{
"epoch": 5.656332453825858,
"grad_norm": 8.00169849395752,
"learning_rate": 8.205646954201867e-06,
"loss": 0.4316,
"step": 8575
},
{
"epoch": 5.672823218997362,
"grad_norm": 6.38738489151001,
"learning_rate": 8.200088928412629e-06,
"loss": 0.409,
"step": 8600
},
{
"epoch": 5.689313984168866,
"grad_norm": 8.060171127319336,
"learning_rate": 8.194753223654958e-06,
"loss": 0.3912,
"step": 8625
},
{
"epoch": 5.70580474934037,
"grad_norm": 7.570400238037109,
"learning_rate": 8.18919519786572e-06,
"loss": 0.4043,
"step": 8650
},
{
"epoch": 5.722295514511873,
"grad_norm": 6.873922824859619,
"learning_rate": 8.18363717207648e-06,
"loss": 0.3764,
"step": 8675
},
{
"epoch": 5.738786279683377,
"grad_norm": 7.591399669647217,
"learning_rate": 8.17807914628724e-06,
"loss": 0.3745,
"step": 8700
},
{
"epoch": 5.755277044854881,
"grad_norm": 8.414856910705566,
"learning_rate": 8.172521120498e-06,
"loss": 0.4268,
"step": 8725
},
{
"epoch": 5.771767810026385,
"grad_norm": 9.1331148147583,
"learning_rate": 8.16696309470876e-06,
"loss": 0.3676,
"step": 8750
},
{
"epoch": 5.788258575197889,
"grad_norm": 6.92949104309082,
"learning_rate": 8.16140506891952e-06,
"loss": 0.3932,
"step": 8775
},
{
"epoch": 5.804749340369393,
"grad_norm": 6.618691444396973,
"learning_rate": 8.15584704313028e-06,
"loss": 0.371,
"step": 8800
},
{
"epoch": 5.821240105540897,
"grad_norm": 8.719780921936035,
"learning_rate": 8.150289017341042e-06,
"loss": 0.4156,
"step": 8825
},
{
"epoch": 5.837730870712401,
"grad_norm": 4.5113525390625,
"learning_rate": 8.144730991551802e-06,
"loss": 0.4222,
"step": 8850
},
{
"epoch": 5.8542216358839045,
"grad_norm": 7.771907329559326,
"learning_rate": 8.139172965762562e-06,
"loss": 0.3476,
"step": 8875
},
{
"epoch": 5.870712401055409,
"grad_norm": 8.649697303771973,
"learning_rate": 8.133614939973322e-06,
"loss": 0.3834,
"step": 8900
},
{
"epoch": 5.887203166226913,
"grad_norm": 11.093376159667969,
"learning_rate": 8.128056914184082e-06,
"loss": 0.3735,
"step": 8925
},
{
"epoch": 5.903693931398417,
"grad_norm": 7.354765892028809,
"learning_rate": 8.122498888394843e-06,
"loss": 0.3949,
"step": 8950
},
{
"epoch": 5.920184696569921,
"grad_norm": 8.032320022583008,
"learning_rate": 8.116940862605604e-06,
"loss": 0.3853,
"step": 8975
},
{
"epoch": 5.936675461741425,
"grad_norm": 7.220771312713623,
"learning_rate": 8.111382836816364e-06,
"loss": 0.3608,
"step": 9000
},
{
"epoch": 5.953166226912929,
"grad_norm": 7.930024147033691,
"learning_rate": 8.105824811027124e-06,
"loss": 0.3715,
"step": 9025
},
{
"epoch": 5.969656992084433,
"grad_norm": 10.353165626525879,
"learning_rate": 8.100266785237885e-06,
"loss": 0.3852,
"step": 9050
},
{
"epoch": 5.986147757255937,
"grad_norm": 7.248688220977783,
"learning_rate": 8.094708759448644e-06,
"loss": 0.3542,
"step": 9075
},
{
"epoch": 6.0,
"eval_loss": 0.3076770603656769,
"eval_runtime": 1501.2001,
"eval_samples_per_second": 3.461,
"eval_steps_per_second": 1.731,
"eval_wer": 0.12749874614888587,
"step": 9096
},
{
"epoch": 6.002638522427441,
"grad_norm": 6.4646382331848145,
"learning_rate": 8.089150733659404e-06,
"loss": 0.3508,
"step": 9100
},
{
"epoch": 6.019129287598944,
"grad_norm": 8.597702980041504,
"learning_rate": 8.083592707870165e-06,
"loss": 0.3326,
"step": 9125
},
{
"epoch": 6.035620052770448,
"grad_norm": 8.334925651550293,
"learning_rate": 8.078034682080925e-06,
"loss": 0.3246,
"step": 9150
},
{
"epoch": 6.052110817941952,
"grad_norm": 5.674637794494629,
"learning_rate": 8.072476656291685e-06,
"loss": 0.3472,
"step": 9175
},
{
"epoch": 6.068601583113456,
"grad_norm": 4.868644714355469,
"learning_rate": 8.066918630502445e-06,
"loss": 0.3483,
"step": 9200
},
{
"epoch": 6.08509234828496,
"grad_norm": 7.509459972381592,
"learning_rate": 8.061360604713207e-06,
"loss": 0.3372,
"step": 9225
},
{
"epoch": 6.101583113456464,
"grad_norm": 6.369452953338623,
"learning_rate": 8.055802578923967e-06,
"loss": 0.3294,
"step": 9250
},
{
"epoch": 6.1180738786279685,
"grad_norm": 6.736758232116699,
"learning_rate": 8.050244553134727e-06,
"loss": 0.317,
"step": 9275
},
{
"epoch": 6.1345646437994725,
"grad_norm": 5.772039890289307,
"learning_rate": 8.044686527345487e-06,
"loss": 0.3138,
"step": 9300
},
{
"epoch": 6.151055408970977,
"grad_norm": 8.263904571533203,
"learning_rate": 8.039128501556249e-06,
"loss": 0.3507,
"step": 9325
},
{
"epoch": 6.167546174142481,
"grad_norm": 7.171966552734375,
"learning_rate": 8.033570475767009e-06,
"loss": 0.3343,
"step": 9350
},
{
"epoch": 6.184036939313984,
"grad_norm": 7.375022888183594,
"learning_rate": 8.028012449977769e-06,
"loss": 0.328,
"step": 9375
},
{
"epoch": 6.200527704485488,
"grad_norm": 7.487870693206787,
"learning_rate": 8.022454424188529e-06,
"loss": 0.3433,
"step": 9400
},
{
"epoch": 6.217018469656992,
"grad_norm": 7.687648296356201,
"learning_rate": 8.016896398399289e-06,
"loss": 0.3625,
"step": 9425
},
{
"epoch": 6.233509234828496,
"grad_norm": 8.015031814575195,
"learning_rate": 8.011338372610049e-06,
"loss": 0.3585,
"step": 9450
},
{
"epoch": 6.25,
"grad_norm": 6.240160942077637,
"learning_rate": 8.005780346820809e-06,
"loss": 0.3272,
"step": 9475
},
{
"epoch": 6.266490765171504,
"grad_norm": 5.806061744689941,
"learning_rate": 8.00022232103157e-06,
"loss": 0.3368,
"step": 9500
},
{
"epoch": 6.282981530343008,
"grad_norm": 5.972324848175049,
"learning_rate": 7.99466429524233e-06,
"loss": 0.305,
"step": 9525
},
{
"epoch": 6.299472295514512,
"grad_norm": 7.271647930145264,
"learning_rate": 7.98910626945309e-06,
"loss": 0.3777,
"step": 9550
},
{
"epoch": 6.315963060686016,
"grad_norm": 8.508190155029297,
"learning_rate": 7.98354824366385e-06,
"loss": 0.3894,
"step": 9575
},
{
"epoch": 6.33245382585752,
"grad_norm": 7.478120803833008,
"learning_rate": 7.977990217874612e-06,
"loss": 0.3469,
"step": 9600
},
{
"epoch": 6.348944591029023,
"grad_norm": 7.627612113952637,
"learning_rate": 7.972432192085372e-06,
"loss": 0.3106,
"step": 9625
},
{
"epoch": 6.3654353562005275,
"grad_norm": 6.731842041015625,
"learning_rate": 7.966874166296132e-06,
"loss": 0.3331,
"step": 9650
},
{
"epoch": 6.3819261213720315,
"grad_norm": 5.524582862854004,
"learning_rate": 7.961316140506892e-06,
"loss": 0.3364,
"step": 9675
},
{
"epoch": 6.398416886543536,
"grad_norm": 5.552459239959717,
"learning_rate": 7.955758114717652e-06,
"loss": 0.3239,
"step": 9700
},
{
"epoch": 6.41490765171504,
"grad_norm": 8.744638442993164,
"learning_rate": 7.950200088928414e-06,
"loss": 0.3518,
"step": 9725
},
{
"epoch": 6.431398416886544,
"grad_norm": 6.690433502197266,
"learning_rate": 7.944642063139174e-06,
"loss": 0.323,
"step": 9750
},
{
"epoch": 6.447889182058048,
"grad_norm": 6.405791282653809,
"learning_rate": 7.939084037349934e-06,
"loss": 0.3255,
"step": 9775
},
{
"epoch": 6.464379947229552,
"grad_norm": 8.274345397949219,
"learning_rate": 7.933526011560694e-06,
"loss": 0.3573,
"step": 9800
},
{
"epoch": 6.480870712401055,
"grad_norm": 10.02904987335205,
"learning_rate": 7.927967985771454e-06,
"loss": 0.3627,
"step": 9825
},
{
"epoch": 6.497361477572559,
"grad_norm": 8.587586402893066,
"learning_rate": 7.922409959982214e-06,
"loss": 0.3399,
"step": 9850
},
{
"epoch": 6.513852242744063,
"grad_norm": 8.427138328552246,
"learning_rate": 7.916851934192974e-06,
"loss": 0.3729,
"step": 9875
},
{
"epoch": 6.530343007915567,
"grad_norm": 11.161705017089844,
"learning_rate": 7.911293908403736e-06,
"loss": 0.3749,
"step": 9900
},
{
"epoch": 6.546833773087071,
"grad_norm": 9.07302474975586,
"learning_rate": 7.905735882614496e-06,
"loss": 0.3293,
"step": 9925
},
{
"epoch": 6.563324538258575,
"grad_norm": 5.79204797744751,
"learning_rate": 7.900177856825256e-06,
"loss": 0.352,
"step": 9950
},
{
"epoch": 6.579815303430079,
"grad_norm": 8.09050178527832,
"learning_rate": 7.894619831036016e-06,
"loss": 0.3466,
"step": 9975
},
{
"epoch": 6.596306068601583,
"grad_norm": 6.807469844818115,
"learning_rate": 7.889061805246778e-06,
"loss": 0.3827,
"step": 10000
},
{
"epoch": 6.612796833773087,
"grad_norm": 5.356978416442871,
"learning_rate": 7.883503779457538e-06,
"loss": 0.3486,
"step": 10025
},
{
"epoch": 6.629287598944591,
"grad_norm": 5.796497821807861,
"learning_rate": 7.877945753668298e-06,
"loss": 0.3431,
"step": 10050
},
{
"epoch": 6.6457783641160955,
"grad_norm": 5.685765743255615,
"learning_rate": 7.872387727879058e-06,
"loss": 0.3423,
"step": 10075
},
{
"epoch": 6.662269129287599,
"grad_norm": Infinity,
"learning_rate": 7.867052023121387e-06,
"loss": 0.3711,
"step": 10100
},
{
"epoch": 6.678759894459103,
"grad_norm": 8.378765106201172,
"learning_rate": 7.861493997332147e-06,
"loss": 0.3662,
"step": 10125
},
{
"epoch": 6.695250659630607,
"grad_norm": 9.546087265014648,
"learning_rate": 7.855935971542909e-06,
"loss": 0.3005,
"step": 10150
},
{
"epoch": 6.711741424802111,
"grad_norm": 7.322122097015381,
"learning_rate": 7.850377945753669e-06,
"loss": 0.3464,
"step": 10175
},
{
"epoch": 6.728232189973615,
"grad_norm": 8.071023941040039,
"learning_rate": 7.844819919964429e-06,
"loss": 0.3444,
"step": 10200
},
{
"epoch": 6.744722955145119,
"grad_norm": 8.991476058959961,
"learning_rate": 7.839261894175189e-06,
"loss": 0.3779,
"step": 10225
},
{
"epoch": 6.761213720316623,
"grad_norm": 7.23716402053833,
"learning_rate": 7.83370386838595e-06,
"loss": 0.3075,
"step": 10250
},
{
"epoch": 6.777704485488127,
"grad_norm": 5.675558567047119,
"learning_rate": 7.82814584259671e-06,
"loss": 0.3185,
"step": 10275
},
{
"epoch": 6.79419525065963,
"grad_norm": 9.553876876831055,
"learning_rate": 7.82258781680747e-06,
"loss": 0.3661,
"step": 10300
},
{
"epoch": 6.810686015831134,
"grad_norm": 5.838205814361572,
"learning_rate": 7.81702979101823e-06,
"loss": 0.3378,
"step": 10325
},
{
"epoch": 6.827176781002638,
"grad_norm": 7.847379207611084,
"learning_rate": 7.811471765228992e-06,
"loss": 0.3443,
"step": 10350
},
{
"epoch": 6.843667546174142,
"grad_norm": 7.678672790527344,
"learning_rate": 7.805913739439752e-06,
"loss": 0.3265,
"step": 10375
},
{
"epoch": 6.860158311345646,
"grad_norm": 7.874334335327148,
"learning_rate": 7.80035571365051e-06,
"loss": 0.3456,
"step": 10400
},
{
"epoch": 6.87664907651715,
"grad_norm": 7.230392932891846,
"learning_rate": 7.794797687861272e-06,
"loss": 0.3702,
"step": 10425
},
{
"epoch": 6.8931398416886545,
"grad_norm": 7.550044536590576,
"learning_rate": 7.789239662072033e-06,
"loss": 0.3428,
"step": 10450
},
{
"epoch": 6.9096306068601585,
"grad_norm": 5.714991569519043,
"learning_rate": 7.783681636282793e-06,
"loss": 0.3442,
"step": 10475
},
{
"epoch": 6.926121372031663,
"grad_norm": 9.231269836425781,
"learning_rate": 7.778123610493553e-06,
"loss": 0.3805,
"step": 10500
},
{
"epoch": 6.942612137203167,
"grad_norm": 8.083724021911621,
"learning_rate": 7.772565584704314e-06,
"loss": 0.3497,
"step": 10525
},
{
"epoch": 6.959102902374671,
"grad_norm": 5.767257213592529,
"learning_rate": 7.767007558915074e-06,
"loss": 0.3101,
"step": 10550
},
{
"epoch": 6.975593667546174,
"grad_norm": 6.558534622192383,
"learning_rate": 7.761449533125834e-06,
"loss": 0.3563,
"step": 10575
},
{
"epoch": 6.992084432717678,
"grad_norm": 8.02295207977295,
"learning_rate": 7.755891507336594e-06,
"loss": 0.3648,
"step": 10600
},
{
"epoch": 7.0,
"eval_loss": 0.30907538533210754,
"eval_runtime": 1524.829,
"eval_samples_per_second": 3.408,
"eval_steps_per_second": 1.704,
"eval_wer": 0.12174894318263237,
"step": 10612
}
],
"logging_steps": 25,
"max_steps": 45480,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.897929131311104e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}